1 /*
2  * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file has functions to get queries from clients.
40  */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 #  include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #ifdef USE_TCP_FASTOPEN
47 #include <netinet/tcp.h>
48 #endif
49 #include "services/listen_dnsport.h"
50 #include "services/outside_network.h"
51 #include "util/netevent.h"
52 #include "util/log.h"
53 #include "util/config_file.h"
54 #include "util/net_help.h"
55 #include "sldns/sbuffer.h"
56 #include "services/mesh.h"
57 #include "util/fptr_wlist.h"
58 #include "util/locks.h"
59 
60 #ifdef HAVE_NETDB_H
61 #include <netdb.h>
62 #endif
63 #include <fcntl.h>
64 
65 #ifdef HAVE_SYS_UN_H
66 #include <sys/un.h>
67 #endif
68 
69 #ifdef HAVE_SYSTEMD
70 #include <systemd/sd-daemon.h>
71 #endif
72 
73 /** number of queued TCP connections for listen() */
74 #define TCP_BACKLOG 256
75 
76 /** number of simultaneous requests a client can have */
77 #define TCP_MAX_REQ_SIMULTANEOUS 32
78 
79 #ifndef THREADS_DISABLED
80 /** lock on the counter of stream buffer memory */
81 static lock_basic_type stream_wait_count_lock;
82 #endif
83 /** size (in bytes) of stream wait buffers */
84 static size_t stream_wait_count = 0;
85 /** is the lock initialised for stream wait buffers */
86 static int stream_wait_lock_inited = 0;
87 
88 /**
89  * Debug print of the getaddrinfo returned address.
90  * @param addr: the address returned.
91  */
92 static void
93 verbose_print_addr(struct addrinfo *addr)
94 {
95 	if(verbosity >= VERB_ALGO) {
96 		char buf[100];
97 		void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
98 #ifdef INET6
99 		if(addr->ai_family == AF_INET6)
100 			sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
101 				sin6_addr;
102 #endif /* INET6 */
103 		if(inet_ntop(addr->ai_family, sinaddr, buf,
104 			(socklen_t)sizeof(buf)) == 0) {
105 			(void)strlcpy(buf, "(null)", sizeof(buf));
106 		}
107 		buf[sizeof(buf)-1] = 0;
108 		verbose(VERB_ALGO, "creating %s%s socket %s %d",
109 			addr->ai_socktype==SOCK_DGRAM?"udp":
110 			addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
111 			addr->ai_family==AF_INET?"4":
112 			addr->ai_family==AF_INET6?"6":
113 			"_otherfam", buf,
114 			ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
115 	}
116 }
117 
118 #ifdef HAVE_SYSTEMD
119 static int
120 systemd_get_activated(int family, int socktype, int listen,
121 		      struct sockaddr *addr, socklen_t addrlen,
122 		      const char *path)
123 {
124 	int i = 0;
125 	int r = 0;
126 	int s = -1;
127 	const char* listen_pid, *listen_fds;
128 
129 	/* We should use "listen" option only for stream protocols. For UDP it should be -1 */
130 
131 	if((r = sd_booted()) < 1) {
132 		if(r == 0)
133 			log_warn("systemd is not running");
134 		else
135 			log_err("systemd sd_booted(): %s", strerror(-r));
136 		return -1;
137 	}
138 
139 	listen_pid = getenv("LISTEN_PID");
140 	listen_fds = getenv("LISTEN_FDS");
141 
142 	if (!listen_pid) {
143 		log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
144 		return -1;
145 	}
146 
147 	if (!listen_fds) {
148 		log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
149 		return -1;
150 	}
151 
152 	if((r = sd_listen_fds(0)) < 1) {
153 		if(r == 0)
154 			log_warn("systemd: did not return socket, check unit configuration");
155 		else
156 			log_err("systemd sd_listen_fds(): %s", strerror(-r));
157 		return -1;
158 	}
159 
160 	for(i = 0; i < r; i++) {
161 		if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
162 			s = SD_LISTEN_FDS_START + i;
163 			break;
164 		}
165 	}
166 	if (s == -1) {
167 		if (addr)
168 			log_err_addr("systemd sd_listen_fds()",
169 				     "no such socket",
170 				     (struct sockaddr_storage *)addr, addrlen);
171 		else
172 			log_err("systemd sd_listen_fds(): %s", path);
173 	}
174 	return s;
175 }
176 #endif
177 
178 int
179 create_udp_sock(int family, int socktype, struct sockaddr* addr,
180         socklen_t addrlen, int v6only, int* inuse, int* noproto,
181 	int rcv, int snd, int listen, int* reuseport, int transparent,
182 	int freebind, int use_systemd)
183 {
184 	int s;
185 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)  || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
186 	int on=1;
187 #endif
188 #ifdef IPV6_MTU
189 	int mtu = IPV6_MIN_MTU;
190 #endif
191 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
192 	(void)rcv;
193 #endif
194 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
195 	(void)snd;
196 #endif
197 #ifndef IPV6_V6ONLY
198 	(void)v6only;
199 #endif
200 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
201 	(void)transparent;
202 #endif
203 #if !defined(IP_FREEBIND)
204 	(void)freebind;
205 #endif
206 #ifdef HAVE_SYSTEMD
207 	int got_fd_from_systemd = 0;
208 
209 	if (!use_systemd
210 	    || (use_systemd
211 		&& (s = systemd_get_activated(family, socktype, -1, addr,
212 					      addrlen, NULL)) == -1)) {
213 #else
214 	(void)use_systemd;
215 #endif
216 	if((s = socket(family, socktype, 0)) == -1) {
217 		*inuse = 0;
218 #ifndef USE_WINSOCK
219 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
220 			*noproto = 1;
221 			return -1;
222 		}
223 		log_err("can't create socket: %s", strerror(errno));
224 #else
225 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
226 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
227 			*noproto = 1;
228 			return -1;
229 		}
230 		log_err("can't create socket: %s",
231 			wsa_strerror(WSAGetLastError()));
232 #endif
233 		*noproto = 0;
234 		return -1;
235 	}
236 #ifdef HAVE_SYSTEMD
237 	} else {
238 		got_fd_from_systemd = 1;
239 	}
240 #endif
241 	if(listen) {
242 #ifdef SO_REUSEADDR
243 		if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
244 			(socklen_t)sizeof(on)) < 0) {
245 #ifndef USE_WINSOCK
246 			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
247 				strerror(errno));
248 			if(errno != ENOSYS) {
249 				close(s);
250 				*noproto = 0;
251 				*inuse = 0;
252 				return -1;
253 			}
254 #else
255 			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
256 				wsa_strerror(WSAGetLastError()));
257 			closesocket(s);
258 			*noproto = 0;
259 			*inuse = 0;
260 			return -1;
261 #endif
262 		}
263 #endif /* SO_REUSEADDR */
264 #ifdef SO_REUSEPORT
265 #  ifdef SO_REUSEPORT_LB
266 		/* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
267 		 * like SO_REUSEPORT on Linux.  This is what the users want
268 		 * with the config option in unbound.conf; if we actually
269 		 * need local address and port reuse they'll also need to
270 		 * have SO_REUSEPORT set for them, assume it was _LB they want.
271 		 */
272 		if (reuseport && *reuseport &&
273 		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
274 			(socklen_t)sizeof(on)) < 0) {
275 #ifdef ENOPROTOOPT
276 			if(errno != ENOPROTOOPT || verbosity >= 3)
277 				log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
278 					strerror(errno));
279 #endif
280 			/* this option is not essential, we can continue */
281 			*reuseport = 0;
282 		}
283 #  else /* no SO_REUSEPORT_LB */
284 
285 		/* try to set SO_REUSEPORT so that incoming
286 		 * queries are distributed evenly among the receiving threads.
287 		 * Each thread must have its own socket bound to the same port,
288 		 * with SO_REUSEPORT set on each socket.
289 		 */
290 		if (reuseport && *reuseport &&
291 		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
292 			(socklen_t)sizeof(on)) < 0) {
293 #ifdef ENOPROTOOPT
294 			if(errno != ENOPROTOOPT || verbosity >= 3)
295 				log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
296 					strerror(errno));
297 #endif
298 			/* this option is not essential, we can continue */
299 			*reuseport = 0;
300 		}
301 #  endif /* SO_REUSEPORT_LB */
302 #else
303 		(void)reuseport;
304 #endif /* defined(SO_REUSEPORT) */
305 #ifdef IP_TRANSPARENT
306 		if (transparent &&
307 		    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
308 		    (socklen_t)sizeof(on)) < 0) {
309 			log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
310 			strerror(errno));
311 		}
312 #elif defined(IP_BINDANY)
313 		if (transparent &&
314 		    setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
315 		    (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
316 		    (void*)&on, (socklen_t)sizeof(on)) < 0) {
317 			log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
318 			(family==AF_INET6?"V6":""), strerror(errno));
319 		}
320 #elif defined(SO_BINDANY)
321 		if (transparent &&
322 		    setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
323 		    (socklen_t)sizeof(on)) < 0) {
324 			log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
325 			strerror(errno));
326 		}
327 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
328 	}
329 #ifdef IP_FREEBIND
330 	if(freebind &&
331 	    setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
332 	    (socklen_t)sizeof(on)) < 0) {
333 		log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
334 		strerror(errno));
335 	}
336 #endif /* IP_FREEBIND */
337 	if(rcv) {
338 #ifdef SO_RCVBUF
339 		int got;
340 		socklen_t slen = (socklen_t)sizeof(got);
341 #  ifdef SO_RCVBUFFORCE
342 		/* Linux specific: try to use root permission to override
343 		 * system limits on rcvbuf. The limit is stored in
344 		 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
345 		if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
346 			(socklen_t)sizeof(rcv)) < 0) {
347 			if(errno != EPERM) {
348 #    ifndef USE_WINSOCK
349 				log_err("setsockopt(..., SO_RCVBUFFORCE, "
350 					"...) failed: %s", strerror(errno));
351 				close(s);
352 #    else
353 				log_err("setsockopt(..., SO_RCVBUFFORCE, "
354 					"...) failed: %s",
355 					wsa_strerror(WSAGetLastError()));
356 				closesocket(s);
357 #    endif
358 				*noproto = 0;
359 				*inuse = 0;
360 				return -1;
361 			}
362 #  endif /* SO_RCVBUFFORCE */
363 			if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
364 				(socklen_t)sizeof(rcv)) < 0) {
365 #  ifndef USE_WINSOCK
366 				log_err("setsockopt(..., SO_RCVBUF, "
367 					"...) failed: %s", strerror(errno));
368 				close(s);
369 #  else
370 				log_err("setsockopt(..., SO_RCVBUF, "
371 					"...) failed: %s",
372 					wsa_strerror(WSAGetLastError()));
373 				closesocket(s);
374 #  endif
375 				*noproto = 0;
376 				*inuse = 0;
377 				return -1;
378 			}
379 			/* check if we got the right thing or if system
380 			 * reduced to some system max.  Warn if so */
381 			if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
382 				&slen) >= 0 && got < rcv/2) {
383 				log_warn("so-rcvbuf %u was not granted. "
384 					"Got %u. To fix: start with "
385 					"root permissions(linux) or sysctl "
386 					"bigger net.core.rmem_max(linux) or "
387 					"kern.ipc.maxsockbuf(bsd) values.",
388 					(unsigned)rcv, (unsigned)got);
389 			}
390 #  ifdef SO_RCVBUFFORCE
391 		}
392 #  endif
393 #endif /* SO_RCVBUF */
394 	}
395 	/* first do RCVBUF as the receive buffer is more important */
396 	if(snd) {
397 #ifdef SO_SNDBUF
398 		int got;
399 		socklen_t slen = (socklen_t)sizeof(got);
400 #  ifdef SO_SNDBUFFORCE
401 		/* Linux specific: try to use root permission to override
402 		 * system limits on sndbuf. The limit is stored in
403 		 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
404 		if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
405 			(socklen_t)sizeof(snd)) < 0) {
406 			if(errno != EPERM) {
407 #    ifndef USE_WINSOCK
408 				log_err("setsockopt(..., SO_SNDBUFFORCE, "
409 					"...) failed: %s", strerror(errno));
410 				close(s);
411 #    else
412 				log_err("setsockopt(..., SO_SNDBUFFORCE, "
413 					"...) failed: %s",
414 					wsa_strerror(WSAGetLastError()));
415 				closesocket(s);
416 #    endif
417 				*noproto = 0;
418 				*inuse = 0;
419 				return -1;
420 			}
421 #  endif /* SO_SNDBUFFORCE */
422 			if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
423 				(socklen_t)sizeof(snd)) < 0) {
424 #  ifndef USE_WINSOCK
425 				log_err("setsockopt(..., SO_SNDBUF, "
426 					"...) failed: %s", strerror(errno));
427 				close(s);
428 #  else
429 				log_err("setsockopt(..., SO_SNDBUF, "
430 					"...) failed: %s",
431 					wsa_strerror(WSAGetLastError()));
432 				closesocket(s);
433 #  endif
434 				*noproto = 0;
435 				*inuse = 0;
436 				return -1;
437 			}
438 			/* check if we got the right thing or if system
439 			 * reduced to some system max.  Warn if so */
440 			if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
441 				&slen) >= 0 && got < snd/2) {
442 				log_warn("so-sndbuf %u was not granted. "
443 					"Got %u. To fix: start with "
444 					"root permissions(linux) or sysctl "
445 					"bigger net.core.wmem_max(linux) or "
446 					"kern.ipc.maxsockbuf(bsd) values.",
447 					(unsigned)snd, (unsigned)got);
448 			}
449 #  ifdef SO_SNDBUFFORCE
450 		}
451 #  endif
452 #endif /* SO_SNDBUF */
453 	}
454 	if(family == AF_INET6) {
455 # if defined(IPV6_V6ONLY)
456 		if(v6only) {
457 			int val=(v6only==2)?0:1;
458 			if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
459 				(void*)&val, (socklen_t)sizeof(val)) < 0) {
460 #ifndef USE_WINSOCK
461 				log_err("setsockopt(..., IPV6_V6ONLY"
462 					", ...) failed: %s", strerror(errno));
463 				close(s);
464 #else
465 				log_err("setsockopt(..., IPV6_V6ONLY"
466 					", ...) failed: %s",
467 					wsa_strerror(WSAGetLastError()));
468 				closesocket(s);
469 #endif
470 				*noproto = 0;
471 				*inuse = 0;
472 				return -1;
473 			}
474 		}
475 # endif
476 # if defined(IPV6_USE_MIN_MTU)
477 		/*
478 		 * There is no fragmentation of IPv6 datagrams
479 		 * during forwarding in the network. Therefore
480 		 * we do not send UDP datagrams larger than
481 		 * the minimum IPv6 MTU of 1280 octets. The
482 		 * EDNS0 message length can be larger if the
483 		 * network stack supports IPV6_USE_MIN_MTU.
484 		 */
485 		if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
486 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
487 #  ifndef USE_WINSOCK
488 			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
489 				"...) failed: %s", strerror(errno));
490 			close(s);
491 #  else
492 			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
493 				"...) failed: %s",
494 				wsa_strerror(WSAGetLastError()));
495 			closesocket(s);
496 #  endif
497 			*noproto = 0;
498 			*inuse = 0;
499 			return -1;
500 		}
501 # elif defined(IPV6_MTU)
502 		/*
503 		 * On Linux, to send no larger than 1280, the PMTUD is
504 		 * disabled by default for datagrams anyway, so we set
505 		 * the MTU to use.
506 		 */
507 		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
508 			(void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
509 #  ifndef USE_WINSOCK
510 			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
511 				strerror(errno));
512 			close(s);
513 #  else
514 			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
515 				wsa_strerror(WSAGetLastError()));
516 			closesocket(s);
517 #  endif
518 			*noproto = 0;
519 			*inuse = 0;
520 			return -1;
521 		}
522 # endif /* IPv6 MTU */
523 	} else if(family == AF_INET) {
524 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
525 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
526  * PMTU information is not accepted, but fragmentation is allowed
527  * if and only if the packet size exceeds the outgoing interface MTU
528  * (and also uses the interface mtu to determine the size of the packets).
529  * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
530  * FreeBSD already has same semantics without setting the option. */
531 		int omit_set = 0;
532 		int action;
533 #   if defined(IP_PMTUDISC_OMIT)
534 		action = IP_PMTUDISC_OMIT;
535 		if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
536 			&action, (socklen_t)sizeof(action)) < 0) {
537 
538 			if (errno != EINVAL) {
539 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
540 					strerror(errno));
541 
542 #    ifndef USE_WINSOCK
543 				close(s);
544 #    else
545 				closesocket(s);
546 #    endif
547 				*noproto = 0;
548 				*inuse = 0;
549 				return -1;
550 			}
551 		}
552 		else
553 		{
554 		    omit_set = 1;
555 		}
556 #   endif
557 		if (omit_set == 0) {
558    			action = IP_PMTUDISC_DONT;
559 			if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
560 				&action, (socklen_t)sizeof(action)) < 0) {
561 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
562 					strerror(errno));
563 #    ifndef USE_WINSOCK
564 				close(s);
565 #    else
566 				closesocket(s);
567 #    endif
568 				*noproto = 0;
569 				*inuse = 0;
570 				return -1;
571 			}
572 		}
573 #  elif defined(IP_DONTFRAG)
574 		int off = 0;
575 		if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
576 			&off, (socklen_t)sizeof(off)) < 0) {
577 			log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
578 				strerror(errno));
579 #    ifndef USE_WINSOCK
580 			close(s);
581 #    else
582 			closesocket(s);
583 #    endif
584 			*noproto = 0;
585 			*inuse = 0;
586 			return -1;
587 		}
588 #  endif /* IPv4 MTU */
589 	}
590 	if(
591 #ifdef HAVE_SYSTEMD
592 		!got_fd_from_systemd &&
593 #endif
594 		bind(s, (struct sockaddr*)addr, addrlen) != 0) {
595 		*noproto = 0;
596 		*inuse = 0;
597 #ifndef USE_WINSOCK
598 #ifdef EADDRINUSE
599 		*inuse = (errno == EADDRINUSE);
600 		/* detect freebsd jail with no ipv6 permission */
601 		if(family==AF_INET6 && errno==EINVAL)
602 			*noproto = 1;
603 		else if(errno != EADDRINUSE &&
604 			!(errno == EACCES && verbosity < 4 && !listen)
605 #ifdef EADDRNOTAVAIL
606 			&& !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
607 #endif
608 			) {
609 			log_err_addr("can't bind socket", strerror(errno),
610 				(struct sockaddr_storage*)addr, addrlen);
611 		}
612 #endif /* EADDRINUSE */
613 		close(s);
614 #else /* USE_WINSOCK */
615 		if(WSAGetLastError() != WSAEADDRINUSE &&
616 			WSAGetLastError() != WSAEADDRNOTAVAIL &&
617 			!(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
618 			log_err_addr("can't bind socket",
619 				wsa_strerror(WSAGetLastError()),
620 				(struct sockaddr_storage*)addr, addrlen);
621 		}
622 		closesocket(s);
623 #endif /* USE_WINSOCK */
624 		return -1;
625 	}
626 	if(!fd_set_nonblock(s)) {
627 		*noproto = 0;
628 		*inuse = 0;
629 #ifndef USE_WINSOCK
630 		close(s);
631 #else
632 		closesocket(s);
633 #endif
634 		return -1;
635 	}
636 	return s;
637 }
638 
639 int
640 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
641 	int* reuseport, int transparent, int mss, int freebind, int use_systemd)
642 {
643 	int s;
644 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY)
645 	int on = 1;
646 #endif
647 #ifdef HAVE_SYSTEMD
648 	int got_fd_from_systemd = 0;
649 #endif
650 #ifdef USE_TCP_FASTOPEN
651 	int qlen;
652 #endif
653 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
654 	(void)transparent;
655 #endif
656 #if !defined(IP_FREEBIND)
657 	(void)freebind;
658 #endif
659 	verbose_print_addr(addr);
660 	*noproto = 0;
661 #ifdef HAVE_SYSTEMD
662 	if (!use_systemd ||
663 	    (use_systemd
664 	     && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
665 					   addr->ai_addr, addr->ai_addrlen,
666 					   NULL)) == -1)) {
667 #else
668 	(void)use_systemd;
669 #endif
670 	if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
671 #ifndef USE_WINSOCK
672 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
673 			*noproto = 1;
674 			return -1;
675 		}
676 		log_err("can't create socket: %s", strerror(errno));
677 #else
678 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
679 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
680 			*noproto = 1;
681 			return -1;
682 		}
683 		log_err("can't create socket: %s",
684 			wsa_strerror(WSAGetLastError()));
685 #endif
686 		return -1;
687 	}
688 	if (mss > 0) {
689 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
690 		if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
691 			(socklen_t)sizeof(mss)) < 0) {
692 			#ifndef USE_WINSOCK
693 			log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
694 				strerror(errno));
695 			#else
696 			log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
697 				wsa_strerror(WSAGetLastError()));
698 			#endif
699 		} else {
700 			verbose(VERB_ALGO,
701 				" tcp socket mss set to %d", mss);
702 		}
703 #else
704 		log_warn(" setsockopt(TCP_MAXSEG) unsupported");
705 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
706 	}
707 #ifdef HAVE_SYSTEMD
708 	} else {
709 		got_fd_from_systemd = 1;
710     }
711 #endif
712 #ifdef SO_REUSEADDR
713 	if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
714 		(socklen_t)sizeof(on)) < 0) {
715 #ifndef USE_WINSOCK
716 		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
717 			strerror(errno));
718 		close(s);
719 #else
720 		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
721 			wsa_strerror(WSAGetLastError()));
722 		closesocket(s);
723 #endif
724 		return -1;
725 	}
726 #endif /* SO_REUSEADDR */
727 #ifdef IP_FREEBIND
728 	if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
729 	    (socklen_t)sizeof(on)) < 0) {
730 		log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
731 		strerror(errno));
732 	}
733 #endif /* IP_FREEBIND */
734 #ifdef SO_REUSEPORT
735 	/* try to set SO_REUSEPORT so that incoming
736 	 * connections are distributed evenly among the receiving threads.
737 	 * Each thread must have its own socket bound to the same port,
738 	 * with SO_REUSEPORT set on each socket.
739 	 */
740 	if (reuseport && *reuseport &&
741 		setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
742 		(socklen_t)sizeof(on)) < 0) {
743 #ifdef ENOPROTOOPT
744 		if(errno != ENOPROTOOPT || verbosity >= 3)
745 			log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
746 				strerror(errno));
747 #endif
748 		/* this option is not essential, we can continue */
749 		*reuseport = 0;
750 	}
751 #else
752 	(void)reuseport;
753 #endif /* defined(SO_REUSEPORT) */
754 #if defined(IPV6_V6ONLY)
755 	if(addr->ai_family == AF_INET6 && v6only) {
756 		if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
757 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
758 #ifndef USE_WINSOCK
759 			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
760 				strerror(errno));
761 			close(s);
762 #else
763 			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
764 				wsa_strerror(WSAGetLastError()));
765 			closesocket(s);
766 #endif
767 			return -1;
768 		}
769 	}
770 #else
771 	(void)v6only;
772 #endif /* IPV6_V6ONLY */
773 #ifdef IP_TRANSPARENT
774 	if (transparent &&
775 	    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
776 	    (socklen_t)sizeof(on)) < 0) {
777 		log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
778 			strerror(errno));
779 	}
780 #elif defined(IP_BINDANY)
781 	if (transparent &&
782 	    setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
783 	    (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
784 	    (void*)&on, (socklen_t)sizeof(on)) < 0) {
785 		log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
786 		(addr->ai_family==AF_INET6?"V6":""), strerror(errno));
787 	}
788 #elif defined(SO_BINDANY)
789 	if (transparent &&
790 	    setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
791 	    sizeof(on)) < 0) {
792 		log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
793 		strerror(errno));
794 	}
795 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
796 	if(
797 #ifdef HAVE_SYSTEMD
798 		!got_fd_from_systemd &&
799 #endif
800         bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
801 #ifndef USE_WINSOCK
802 		/* detect freebsd jail with no ipv6 permission */
803 		if(addr->ai_family==AF_INET6 && errno==EINVAL)
804 			*noproto = 1;
805 		else {
806 			log_err_addr("can't bind socket", strerror(errno),
807 				(struct sockaddr_storage*)addr->ai_addr,
808 				addr->ai_addrlen);
809 		}
810 		close(s);
811 #else
812 		log_err_addr("can't bind socket",
813 			wsa_strerror(WSAGetLastError()),
814 			(struct sockaddr_storage*)addr->ai_addr,
815 			addr->ai_addrlen);
816 		closesocket(s);
817 #endif
818 		return -1;
819 	}
820 	if(!fd_set_nonblock(s)) {
821 #ifndef USE_WINSOCK
822 		close(s);
823 #else
824 		closesocket(s);
825 #endif
826 		return -1;
827 	}
828 	if(listen(s, TCP_BACKLOG) == -1) {
829 #ifndef USE_WINSOCK
830 		log_err("can't listen: %s", strerror(errno));
831 		close(s);
832 #else
833 		log_err("can't listen: %s", wsa_strerror(WSAGetLastError()));
834 		closesocket(s);
835 #endif
836 		return -1;
837 	}
838 #ifdef USE_TCP_FASTOPEN
839 	/* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
840 	   against IP spoofing attacks as suggested in RFC7413 */
841 #ifdef __APPLE__
842 	/* OS X implementation only supports qlen of 1 via this call. Actual
843 	   value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */
844 	qlen = 1;
845 #else
846 	/* 5 is recommended on linux */
847 	qlen = 5;
848 #endif
849 	if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen,
850 		  sizeof(qlen))) == -1 ) {
851 #ifdef ENOPROTOOPT
852 		/* squelch ENOPROTOOPT: freebsd server mode with kernel support
853 		   disabled, except when verbosity enabled for debugging */
854 		if(errno != ENOPROTOOPT || verbosity >= 3) {
855 #endif
856 		  if(errno == EPERM) {
857 		  	log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
858 		  } else {
859 		  	log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
860 		  }
861 #ifdef ENOPROTOOPT
862 		}
863 #endif
864 	}
865 #endif
866 	return s;
867 }
868 
869 int
870 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
871 {
872 #ifdef HAVE_SYSTEMD
873 	int ret;
874 
875 	if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
876 		return ret;
877 	else {
878 #endif
879 #ifdef HAVE_SYS_UN_H
880 	int s;
881 	struct sockaddr_un usock;
882 #ifndef HAVE_SYSTEMD
883 	(void)use_systemd;
884 #endif
885 
886 	verbose(VERB_ALGO, "creating unix socket %s", path);
887 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
888 	/* this member exists on BSDs, not Linux */
889 	usock.sun_len = (unsigned)sizeof(usock);
890 #endif
891 	usock.sun_family = AF_LOCAL;
892 	/* length is 92-108, 104 on FreeBSD */
893 	(void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
894 
895 	if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
896 		log_err("Cannot create local socket %s (%s)",
897 			path, strerror(errno));
898 		return -1;
899 	}
900 
901 	if (unlink(path) && errno != ENOENT) {
902 		/* The socket already exists and cannot be removed */
903 		log_err("Cannot remove old local socket %s (%s)",
904 			path, strerror(errno));
905 		goto err;
906 	}
907 
908 	if (bind(s, (struct sockaddr *)&usock,
909 		(socklen_t)sizeof(struct sockaddr_un)) == -1) {
910 		log_err("Cannot bind local socket %s (%s)",
911 			path, strerror(errno));
912 		goto err;
913 	}
914 
915 	if (!fd_set_nonblock(s)) {
916 		log_err("Cannot set non-blocking mode");
917 		goto err;
918 	}
919 
920 	if (listen(s, TCP_BACKLOG) == -1) {
921 		log_err("can't listen: %s", strerror(errno));
922 		goto err;
923 	}
924 
925 	(void)noproto; /*unused*/
926 	return s;
927 
928 err:
929 #ifndef USE_WINSOCK
930 	close(s);
931 #else
932 	closesocket(s);
933 #endif
934 	return -1;
935 
936 #ifdef HAVE_SYSTEMD
937 	}
938 #endif
939 #else
940 	(void)use_systemd;
941 	(void)path;
942 	log_err("Local sockets are not supported");
943 	*noproto = 1;
944 	return -1;
945 #endif
946 }
947 
948 
949 /**
950  * Create socket from getaddrinfo results
951  */
952 static int
953 make_sock(int stype, const char* ifname, const char* port,
954 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
955 	int* reuseport, int transparent, int tcp_mss, int freebind, int use_systemd)
956 {
957 	struct addrinfo *res = NULL;
958 	int r, s, inuse, noproto;
959 	hints->ai_socktype = stype;
960 	*noip6 = 0;
961 	if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
962 #ifdef USE_WINSOCK
963 		if(r == EAI_NONAME && hints->ai_family == AF_INET6){
964 			*noip6 = 1; /* 'Host not found' for IP6 on winXP */
965 			return -1;
966 		}
967 #endif
968 		log_err("node %s:%s getaddrinfo: %s %s",
969 			ifname?ifname:"default", port, gai_strerror(r),
970 #ifdef EAI_SYSTEM
971 			r==EAI_SYSTEM?(char*)strerror(errno):""
972 #else
973 			""
974 #endif
975 		);
976 		return -1;
977 	}
978 	if(stype == SOCK_DGRAM) {
979 		verbose_print_addr(res);
980 		s = create_udp_sock(res->ai_family, res->ai_socktype,
981 			(struct sockaddr*)res->ai_addr, res->ai_addrlen,
982 			v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
983 			reuseport, transparent, freebind, use_systemd);
984 		if(s == -1 && inuse) {
985 			log_err("bind: address already in use");
986 		} else if(s == -1 && noproto && hints->ai_family == AF_INET6){
987 			*noip6 = 1;
988 		}
989 	} else	{
990 		s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
991 			transparent, tcp_mss, freebind, use_systemd);
992 		if(s == -1 && noproto && hints->ai_family == AF_INET6){
993 			*noip6 = 1;
994 		}
995 	}
996 	freeaddrinfo(res);
997 	return s;
998 }
999 
1000 /** make socket and first see if ifname contains port override info */
1001 static int
1002 make_sock_port(int stype, const char* ifname, const char* port,
1003 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1004 	int* reuseport, int transparent, int tcp_mss, int freebind, int use_systemd)
1005 {
1006 	char* s = strchr(ifname, '@');
1007 	if(s) {
1008 		/* override port with ifspec@port */
1009 		char p[16];
1010 		char newif[128];
1011 		if((size_t)(s-ifname) >= sizeof(newif)) {
1012 			log_err("ifname too long: %s", ifname);
1013 			*noip6 = 0;
1014 			return -1;
1015 		}
1016 		if(strlen(s+1) >= sizeof(p)) {
1017 			log_err("portnumber too long: %s", ifname);
1018 			*noip6 = 0;
1019 			return -1;
1020 		}
1021 		(void)strlcpy(newif, ifname, sizeof(newif));
1022 		newif[s-ifname] = 0;
1023 		(void)strlcpy(p, s+1, sizeof(p));
1024 		p[strlen(s+1)]=0;
1025 		return make_sock(stype, newif, p, hints, v6only, noip6,
1026 			rcv, snd, reuseport, transparent, tcp_mss, freebind, use_systemd);
1027 	}
1028 	return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
1029 		reuseport, transparent, tcp_mss, freebind, use_systemd);
1030 }
1031 
1032 /**
1033  * Add port to open ports list.
1034  * @param list: list head. changed.
1035  * @param s: fd.
1036  * @param ftype: if fd is UDP.
1037  * @return false on failure. list in unchanged then.
1038  */
1039 static int
1040 port_insert(struct listen_port** list, int s, enum listen_type ftype)
1041 {
1042 	struct listen_port* item = (struct listen_port*)malloc(
1043 		sizeof(struct listen_port));
1044 	if(!item)
1045 		return 0;
1046 	item->next = *list;
1047 	item->fd = s;
1048 	item->ftype = ftype;
1049 	*list = item;
1050 	return 1;
1051 }
1052 
1053 /** set fd to receive source address packet info */
1054 static int
1055 set_recvpktinfo(int s, int family)
1056 {
1057 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1058 	int on = 1;
1059 #else
1060 	(void)s;
1061 #endif
1062 	if(family == AF_INET6) {
1063 #           ifdef IPV6_RECVPKTINFO
1064 		if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1065 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1066 			log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1067 				strerror(errno));
1068 			return 0;
1069 		}
1070 #           elif defined(IPV6_PKTINFO)
1071 		if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1072 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1073 			log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1074 				strerror(errno));
1075 			return 0;
1076 		}
1077 #           else
1078 		log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
1079 			"disable interface-automatic or do-ip6 in config");
1080 		return 0;
1081 #           endif /* defined IPV6_RECVPKTINFO */
1082 
1083 	} else if(family == AF_INET) {
1084 #           ifdef IP_PKTINFO
1085 		if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1086 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1087 			log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1088 				strerror(errno));
1089 			return 0;
1090 		}
1091 #           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1092 		if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1093 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1094 			log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1095 				strerror(errno));
1096 			return 0;
1097 		}
1098 #           else
1099 		log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1100 			"interface-automatic or do-ip4 in config");
1101 		return 0;
1102 #           endif /* IP_PKTINFO */
1103 
1104 	}
1105 	return 1;
1106 }
1107 
1108 /** see if interface is ssl, its port number == the ssl port number */
1109 static int
1110 if_is_ssl(const char* ifname, const char* port, int ssl_port,
1111 	struct config_strlist* tls_additional_port)
1112 {
1113 	struct config_strlist* s;
1114 	char* p = strchr(ifname, '@');
1115 	if(!p && atoi(port) == ssl_port)
1116 		return 1;
1117 	if(p && atoi(p+1) == ssl_port)
1118 		return 1;
1119 	for(s = tls_additional_port; s; s = s->next) {
1120 		if(p && atoi(p+1) == atoi(s->str))
1121 			return 1;
1122 		if(!p && atoi(port) == atoi(s->str))
1123 			return 1;
1124 	}
1125 	return 0;
1126 }
1127 
1128 /**
1129  * Helper for ports_open. Creates one interface (or NULL for default).
1130  * @param ifname: The interface ip address.
1131  * @param do_auto: use automatic interface detection.
1132  * 	If enabled, then ifname must be the wildcard name.
1133  * @param do_udp: if udp should be used.
1134  * @param do_tcp: if udp should be used.
1135  * @param hints: for getaddrinfo. family and flags have to be set by caller.
1136  * @param port: Port number to use (as string).
1137  * @param list: list of open ports, appended to, changed to point to list head.
1138  * @param rcv: receive buffer size for UDP
1139  * @param snd: send buffer size for UDP
1140  * @param ssl_port: ssl service port number
1141  * @param tls_additional_port: list of additional ssl service port numbers.
1142  * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1143  * 	set to false on exit if reuseport failed due to no kernel support.
1144  * @param transparent: set IP_TRANSPARENT socket option.
1145  * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1146  * @param freebind: set IP_FREEBIND socket option.
1147  * @param use_systemd: if true, fetch sockets from systemd.
1148  * @param dnscrypt_port: dnscrypt service port number
1149  * @return: returns false on error.
1150  */
1151 static int
1152 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
1153 	struct addrinfo *hints, const char* port, struct listen_port** list,
1154 	size_t rcv, size_t snd, int ssl_port,
1155 	struct config_strlist* tls_additional_port, int* reuseport,
1156 	int transparent, int tcp_mss, int freebind, int use_systemd,
1157 	int dnscrypt_port)
1158 {
1159 	int s, noip6=0;
1160 #ifdef USE_DNSCRYPT
1161 	int is_dnscrypt = ((strchr(ifname, '@') &&
1162 			atoi(strchr(ifname, '@')+1) == dnscrypt_port) ||
1163 			(!strchr(ifname, '@') && atoi(port) == dnscrypt_port));
1164 #else
1165 	int is_dnscrypt = 0;
1166 	(void)dnscrypt_port;
1167 #endif
1168 
1169 	if(!do_udp && !do_tcp)
1170 		return 0;
1171 	if(do_auto) {
1172 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1173 			&noip6, rcv, snd, reuseport, transparent,
1174 			tcp_mss, freebind, use_systemd)) == -1) {
1175 			if(noip6) {
1176 				log_warn("IPv6 protocol not available");
1177 				return 1;
1178 			}
1179 			return 0;
1180 		}
1181 		/* getting source addr packet info is highly non-portable */
1182 		if(!set_recvpktinfo(s, hints->ai_family)) {
1183 #ifndef USE_WINSOCK
1184 			close(s);
1185 #else
1186 			closesocket(s);
1187 #endif
1188 			return 0;
1189 		}
1190 		if(!port_insert(list, s,
1191 		   is_dnscrypt?listen_type_udpancil_dnscrypt:listen_type_udpancil)) {
1192 #ifndef USE_WINSOCK
1193 			close(s);
1194 #else
1195 			closesocket(s);
1196 #endif
1197 			return 0;
1198 		}
1199 	} else if(do_udp) {
1200 		/* regular udp socket */
1201 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1202 			&noip6, rcv, snd, reuseport, transparent,
1203 			tcp_mss, freebind, use_systemd)) == -1) {
1204 			if(noip6) {
1205 				log_warn("IPv6 protocol not available");
1206 				return 1;
1207 			}
1208 			return 0;
1209 		}
1210 		if(!port_insert(list, s,
1211 		   is_dnscrypt?listen_type_udp_dnscrypt:listen_type_udp)) {
1212 #ifndef USE_WINSOCK
1213 			close(s);
1214 #else
1215 			closesocket(s);
1216 #endif
1217 			return 0;
1218 		}
1219 	}
1220 	if(do_tcp) {
1221 		int is_ssl = if_is_ssl(ifname, port, ssl_port,
1222 			tls_additional_port);
1223 		if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
1224 			&noip6, 0, 0, reuseport, transparent, tcp_mss,
1225 			freebind, use_systemd)) == -1) {
1226 			if(noip6) {
1227 				/*log_warn("IPv6 protocol not available");*/
1228 				return 1;
1229 			}
1230 			return 0;
1231 		}
1232 		if(is_ssl)
1233 			verbose(VERB_ALGO, "setup TCP for SSL service");
1234 		if(!port_insert(list, s, is_ssl?listen_type_ssl:
1235 			(is_dnscrypt?listen_type_tcp_dnscrypt:listen_type_tcp))) {
1236 #ifndef USE_WINSOCK
1237 			close(s);
1238 #else
1239 			closesocket(s);
1240 #endif
1241 			return 0;
1242 		}
1243 	}
1244 	return 1;
1245 }
1246 
1247 /**
1248  * Add items to commpoint list in front.
1249  * @param c: commpoint to add.
1250  * @param front: listen struct.
1251  * @return: false on failure.
1252  */
1253 static int
1254 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1255 {
1256 	struct listen_list* item = (struct listen_list*)malloc(
1257 		sizeof(struct listen_list));
1258 	if(!item)
1259 		return 0;
1260 	item->com = c;
1261 	item->next = front->cps;
1262 	front->cps = item;
1263 	return 1;
1264 }
1265 
1266 struct listen_dnsport*
1267 listen_create(struct comm_base* base, struct listen_port* ports,
1268 	size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1269 	struct tcl_list* tcp_conn_limit, void* sslctx,
1270 	struct dt_env* dtenv, comm_point_callback_type* cb, void *cb_arg)
1271 {
1272 	struct listen_dnsport* front = (struct listen_dnsport*)
1273 		malloc(sizeof(struct listen_dnsport));
1274 	if(!front)
1275 		return NULL;
1276 	front->cps = NULL;
1277 	front->udp_buff = sldns_buffer_new(bufsize);
1278 #ifdef USE_DNSCRYPT
1279 	front->dnscrypt_udp_buff = NULL;
1280 #endif
1281 	if(!front->udp_buff) {
1282 		free(front);
1283 		return NULL;
1284 	}
1285 	if(!stream_wait_lock_inited) {
1286 		lock_basic_init(&stream_wait_count_lock);
1287 		stream_wait_lock_inited = 1;
1288 	}
1289 
1290 	/* create comm points as needed */
1291 	while(ports) {
1292 		struct comm_point* cp = NULL;
1293 		if(ports->ftype == listen_type_udp ||
1294 		   ports->ftype == listen_type_udp_dnscrypt)
1295 			cp = comm_point_create_udp(base, ports->fd,
1296 				front->udp_buff, cb, cb_arg);
1297 		else if(ports->ftype == listen_type_tcp ||
1298 				ports->ftype == listen_type_tcp_dnscrypt)
1299 			cp = comm_point_create_tcp(base, ports->fd,
1300 				tcp_accept_count, tcp_idle_timeout,
1301 				tcp_conn_limit, bufsize, front->udp_buff,
1302 				cb, cb_arg);
1303 		else if(ports->ftype == listen_type_ssl) {
1304 			cp = comm_point_create_tcp(base, ports->fd,
1305 				tcp_accept_count, tcp_idle_timeout,
1306 				tcp_conn_limit, bufsize, front->udp_buff,
1307 				cb, cb_arg);
1308 			cp->ssl = sslctx;
1309 		} else if(ports->ftype == listen_type_udpancil ||
1310 				  ports->ftype == listen_type_udpancil_dnscrypt)
1311 			cp = comm_point_create_udp_ancil(base, ports->fd,
1312 				front->udp_buff, cb, cb_arg);
1313 		if(!cp) {
1314 			log_err("can't create commpoint");
1315 			listen_delete(front);
1316 			return NULL;
1317 		}
1318 		cp->dtenv = dtenv;
1319 		cp->do_not_close = 1;
1320 #ifdef USE_DNSCRYPT
1321 		if (ports->ftype == listen_type_udp_dnscrypt ||
1322 			ports->ftype == listen_type_tcp_dnscrypt ||
1323 			ports->ftype == listen_type_udpancil_dnscrypt) {
1324 			cp->dnscrypt = 1;
1325 			cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1326 			if(!cp->dnscrypt_buffer) {
1327 				log_err("can't alloc dnscrypt_buffer");
1328 				comm_point_delete(cp);
1329 				listen_delete(front);
1330 				return NULL;
1331 			}
1332 			front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1333 		}
1334 #endif
1335 		if(!listen_cp_insert(cp, front)) {
1336 			log_err("malloc failed");
1337 			comm_point_delete(cp);
1338 			listen_delete(front);
1339 			return NULL;
1340 		}
1341 		ports = ports->next;
1342 	}
1343 	if(!front->cps) {
1344 		log_err("Could not open sockets to accept queries.");
1345 		listen_delete(front);
1346 		return NULL;
1347 	}
1348 
1349 	return front;
1350 }
1351 
1352 void
1353 listen_list_delete(struct listen_list* list)
1354 {
1355 	struct listen_list *p = list, *pn;
1356 	while(p) {
1357 		pn = p->next;
1358 		comm_point_delete(p->com);
1359 		free(p);
1360 		p = pn;
1361 	}
1362 }
1363 
1364 void
1365 listen_delete(struct listen_dnsport* front)
1366 {
1367 	if(!front)
1368 		return;
1369 	listen_list_delete(front->cps);
1370 #ifdef USE_DNSCRYPT
1371 	if(front->dnscrypt_udp_buff &&
1372 		front->udp_buff != front->dnscrypt_udp_buff) {
1373 		sldns_buffer_free(front->dnscrypt_udp_buff);
1374 	}
1375 #endif
1376 	sldns_buffer_free(front->udp_buff);
1377 	free(front);
1378 	if(stream_wait_lock_inited) {
1379 		stream_wait_lock_inited = 0;
1380 		lock_basic_destroy(&stream_wait_count_lock);
1381 	}
1382 }
1383 
1384 struct listen_port*
1385 listening_ports_open(struct config_file* cfg, int* reuseport)
1386 {
1387 	struct listen_port* list = NULL;
1388 	struct addrinfo hints;
1389 	int i, do_ip4, do_ip6;
1390 	int do_tcp, do_auto;
1391 	char portbuf[32];
1392 	snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1393 	do_ip4 = cfg->do_ip4;
1394 	do_ip6 = cfg->do_ip6;
1395 	do_tcp = cfg->do_tcp;
1396 	do_auto = cfg->if_automatic && cfg->do_udp;
1397 	if(cfg->incoming_num_tcp == 0)
1398 		do_tcp = 0;
1399 
1400 	/* getaddrinfo */
1401 	memset(&hints, 0, sizeof(hints));
1402 	hints.ai_flags = AI_PASSIVE;
1403 	/* no name lookups on our listening ports */
1404 	if(cfg->num_ifs > 0)
1405 		hints.ai_flags |= AI_NUMERICHOST;
1406 	hints.ai_family = AF_UNSPEC;
1407 #ifndef INET6
1408 	do_ip6 = 0;
1409 #endif
1410 	if(!do_ip4 && !do_ip6) {
1411 		return NULL;
1412 	}
1413 	/* create ip4 and ip6 ports so that return addresses are nice. */
1414 	if(do_auto || cfg->num_ifs == 0) {
1415 		if(do_ip6) {
1416 			hints.ai_family = AF_INET6;
1417 			if(!ports_create_if(do_auto?"::0":"::1",
1418 				do_auto, cfg->do_udp, do_tcp,
1419 				&hints, portbuf, &list,
1420 				cfg->so_rcvbuf, cfg->so_sndbuf,
1421 				cfg->ssl_port, cfg->tls_additional_port,
1422 				reuseport, cfg->ip_transparent,
1423 				cfg->tcp_mss, cfg->ip_freebind, cfg->use_systemd,
1424 				cfg->dnscrypt_port)) {
1425 				listening_ports_free(list);
1426 				return NULL;
1427 			}
1428 		}
1429 		if(do_ip4) {
1430 			hints.ai_family = AF_INET;
1431 			if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1432 				do_auto, cfg->do_udp, do_tcp,
1433 				&hints, portbuf, &list,
1434 				cfg->so_rcvbuf, cfg->so_sndbuf,
1435 				cfg->ssl_port, cfg->tls_additional_port,
1436 				reuseport, cfg->ip_transparent,
1437 				cfg->tcp_mss, cfg->ip_freebind, cfg->use_systemd,
1438 				cfg->dnscrypt_port)) {
1439 				listening_ports_free(list);
1440 				return NULL;
1441 			}
1442 		}
1443 	} else for(i = 0; i<cfg->num_ifs; i++) {
1444 		if(str_is_ip6(cfg->ifs[i])) {
1445 			if(!do_ip6)
1446 				continue;
1447 			hints.ai_family = AF_INET6;
1448 			if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
1449 				do_tcp, &hints, portbuf, &list,
1450 				cfg->so_rcvbuf, cfg->so_sndbuf,
1451 				cfg->ssl_port, cfg->tls_additional_port,
1452 				reuseport, cfg->ip_transparent,
1453 				cfg->tcp_mss, cfg->ip_freebind, cfg->use_systemd,
1454 				cfg->dnscrypt_port)) {
1455 				listening_ports_free(list);
1456 				return NULL;
1457 			}
1458 		} else {
1459 			if(!do_ip4)
1460 				continue;
1461 			hints.ai_family = AF_INET;
1462 			if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
1463 				do_tcp, &hints, portbuf, &list,
1464 				cfg->so_rcvbuf, cfg->so_sndbuf,
1465 				cfg->ssl_port, cfg->tls_additional_port,
1466 				reuseport, cfg->ip_transparent,
1467 				cfg->tcp_mss, cfg->ip_freebind, cfg->use_systemd,
1468 				cfg->dnscrypt_port)) {
1469 				listening_ports_free(list);
1470 				return NULL;
1471 			}
1472 		}
1473 	}
1474 	return list;
1475 }
1476 
1477 void listening_ports_free(struct listen_port* list)
1478 {
1479 	struct listen_port* nx;
1480 	while(list) {
1481 		nx = list->next;
1482 		if(list->fd != -1) {
1483 #ifndef USE_WINSOCK
1484 			close(list->fd);
1485 #else
1486 			closesocket(list->fd);
1487 #endif
1488 		}
1489 		free(list);
1490 		list = nx;
1491 	}
1492 }
1493 
1494 size_t listen_get_mem(struct listen_dnsport* listen)
1495 {
1496 	struct listen_list* p;
1497 	size_t s = sizeof(*listen) + sizeof(*listen->base) +
1498 		sizeof(*listen->udp_buff) +
1499 		sldns_buffer_capacity(listen->udp_buff);
1500 #ifdef USE_DNSCRYPT
1501 	s += sizeof(*listen->dnscrypt_udp_buff);
1502 	if(listen->udp_buff != listen->dnscrypt_udp_buff){
1503 		s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
1504 	}
1505 #endif
1506 	for(p = listen->cps; p; p = p->next) {
1507 		s += sizeof(*p);
1508 		s += comm_point_get_mem(p->com);
1509 	}
1510 	return s;
1511 }
1512 
1513 void listen_stop_accept(struct listen_dnsport* listen)
1514 {
1515 	/* do not stop the ones that have no tcp_free list
1516 	 * (they have already stopped listening) */
1517 	struct listen_list* p;
1518 	for(p=listen->cps; p; p=p->next) {
1519 		if(p->com->type == comm_tcp_accept &&
1520 			p->com->tcp_free != NULL) {
1521 			comm_point_stop_listening(p->com);
1522 		}
1523 	}
1524 }
1525 
1526 void listen_start_accept(struct listen_dnsport* listen)
1527 {
1528 	/* do not start the ones that have no tcp_free list, it is no
1529 	 * use to listen to them because they have no free tcp handlers */
1530 	struct listen_list* p;
1531 	for(p=listen->cps; p; p=p->next) {
1532 		if(p->com->type == comm_tcp_accept &&
1533 			p->com->tcp_free != NULL) {
1534 			comm_point_start_listening(p->com, -1, -1);
1535 		}
1536 	}
1537 }
1538 
1539 struct tcp_req_info*
1540 tcp_req_info_create(struct sldns_buffer* spoolbuf)
1541 {
1542 	struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
1543 	if(!req) {
1544 		log_err("malloc failure for new stream outoforder processing structure");
1545 		return NULL;
1546 	}
1547 	memset(req, 0, sizeof(*req));
1548 	req->spool_buffer = spoolbuf;
1549 	return req;
1550 }
1551 
1552 void
1553 tcp_req_info_delete(struct tcp_req_info* req)
1554 {
1555 	if(!req) return;
1556 	tcp_req_info_clear(req);
1557 	/* cp is pointer back to commpoint that owns this struct and
1558 	 * called delete on us */
1559 	/* spool_buffer is shared udp buffer, not deleted here */
1560 	free(req);
1561 }
1562 
1563 void tcp_req_info_clear(struct tcp_req_info* req)
1564 {
1565 	struct tcp_req_open_item* open, *nopen;
1566 	struct tcp_req_done_item* item, *nitem;
1567 	if(!req) return;
1568 
1569 	/* free outstanding request mesh reply entries */
1570 	open = req->open_req_list;
1571 	while(open) {
1572 		nopen = open->next;
1573 		mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
1574 		free(open);
1575 		open = nopen;
1576 	}
1577 	req->open_req_list = NULL;
1578 	req->num_open_req = 0;
1579 
1580 	/* free pending writable result packets */
1581 	item = req->done_req_list;
1582 	while(item) {
1583 		nitem = item->next;
1584 		lock_basic_lock(&stream_wait_count_lock);
1585 		stream_wait_count -= (sizeof(struct tcp_req_done_item)
1586 			+item->len);
1587 		lock_basic_unlock(&stream_wait_count_lock);
1588 		free(item->buf);
1589 		free(item);
1590 		item = nitem;
1591 	}
1592 	req->done_req_list = NULL;
1593 	req->num_done_req = 0;
1594 	req->read_is_closed = 0;
1595 }
1596 
1597 void
1598 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
1599 {
1600 	struct tcp_req_open_item* open, *prev = NULL;
1601 	if(!req || !m) return;
1602 	open = req->open_req_list;
1603 	while(open) {
1604 		if(open->mesh_state == m) {
1605 			struct tcp_req_open_item* next;
1606 			if(prev) prev->next = open->next;
1607 			else req->open_req_list = open->next;
1608 			/* caller has to manage the mesh state reply entry */
1609 			next = open->next;
1610 			free(open);
1611 			req->num_open_req --;
1612 
1613 			/* prev = prev; */
1614 			open = next;
1615 			continue;
1616 		}
1617 		prev = open;
1618 		open = open->next;
1619 	}
1620 }
1621 
1622 /** setup listening for read or write */
1623 static void
1624 tcp_req_info_setup_listen(struct tcp_req_info* req)
1625 {
1626 	int wr = 0;
1627 	int rd = 0;
1628 
1629 	if(req->cp->tcp_byte_count != 0) {
1630 		/* cannot change, halfway through */
1631 		return;
1632 	}
1633 
1634 	if(!req->cp->tcp_is_reading)
1635 		wr = 1;
1636 	if(req->num_open_req + req->num_done_req < TCP_MAX_REQ_SIMULTANEOUS &&
1637 		!req->read_is_closed)
1638 		rd = 1;
1639 
1640 	if(wr) {
1641 		req->cp->tcp_is_reading = 0;
1642 		comm_point_stop_listening(req->cp);
1643 		comm_point_start_listening(req->cp, -1,
1644 			req->cp->tcp_timeout_msec);
1645 	} else if(rd) {
1646 		req->cp->tcp_is_reading = 1;
1647 		comm_point_stop_listening(req->cp);
1648 		comm_point_start_listening(req->cp, -1,
1649 			req->cp->tcp_timeout_msec);
1650 		/* and also read it (from SSL stack buffers), so
1651 		 * no event read event is expected since the remainder of
1652 		 * the TLS frame is sitting in the buffers. */
1653 		req->read_again = 1;
1654 	} else {
1655 		comm_point_stop_listening(req->cp);
1656 		comm_point_start_listening(req->cp, -1,
1657 			req->cp->tcp_timeout_msec);
1658 		comm_point_listen_for_rw(req->cp, 0, 0);
1659 	}
1660 }
1661 
1662 /** remove first item from list of pending results */
1663 static struct tcp_req_done_item*
1664 tcp_req_info_pop_done(struct tcp_req_info* req)
1665 {
1666 	struct tcp_req_done_item* item;
1667 	log_assert(req->num_done_req > 0 && req->done_req_list);
1668 	item = req->done_req_list;
1669 	lock_basic_lock(&stream_wait_count_lock);
1670 	stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
1671 	lock_basic_unlock(&stream_wait_count_lock);
1672 	req->done_req_list = req->done_req_list->next;
1673 	req->num_done_req --;
1674 	return item;
1675 }
1676 
1677 /** Send given buffer and setup to write */
1678 static void
1679 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
1680 	size_t len)
1681 {
1682 	sldns_buffer_clear(req->cp->buffer);
1683 	sldns_buffer_write(req->cp->buffer, buf, len);
1684 	sldns_buffer_flip(req->cp->buffer);
1685 
1686 	req->cp->tcp_is_reading = 0; /* we are now writing */
1687 }
1688 
1689 /** pick up the next result and start writing it to the channel */
1690 static void
1691 tcp_req_pickup_next_result(struct tcp_req_info* req)
1692 {
1693 	if(req->num_done_req > 0) {
1694 		/* unlist the done item from the list of pending results */
1695 		struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
1696 		tcp_req_info_start_write_buf(req, item->buf, item->len);
1697 		free(item->buf);
1698 		free(item);
1699 	}
1700 }
1701 
1702 /** the read channel has closed */
1703 int
1704 tcp_req_info_handle_read_close(struct tcp_req_info* req)
1705 {
1706 	verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
1707 	/* reset byte count for (potential) partial read */
1708 	req->cp->tcp_byte_count = 0;
1709 	/* if we still have results to write, pick up next and write it */
1710 	if(req->num_done_req != 0) {
1711 		tcp_req_pickup_next_result(req);
1712 		tcp_req_info_setup_listen(req);
1713 		return 1;
1714 	}
1715 	/* if nothing to do, this closes the connection */
1716 	if(req->num_open_req == 0 && req->num_done_req == 0)
1717 		return 0;
1718 	/* otherwise, we must be waiting for dns resolve, wait with timeout */
1719 	req->read_is_closed = 1;
1720 	tcp_req_info_setup_listen(req);
1721 	return 1;
1722 }
1723 
1724 void
1725 tcp_req_info_handle_writedone(struct tcp_req_info* req)
1726 {
1727 	/* back to reading state, we finished this write event */
1728 	sldns_buffer_clear(req->cp->buffer);
1729 	if(req->num_done_req == 0 && req->read_is_closed) {
1730 		/* no more to write and nothing to read, close it */
1731 		comm_point_drop_reply(&req->cp->repinfo);
1732 		return;
1733 	}
1734 	req->cp->tcp_is_reading = 1;
1735 	/* see if another result needs writing */
1736 	tcp_req_pickup_next_result(req);
1737 
1738 	/* see if there is more to write, if not stop_listening for writing */
1739 	/* see if new requests are allowed, if so, start_listening
1740 	 * for reading */
1741 	tcp_req_info_setup_listen(req);
1742 }
1743 
1744 void
1745 tcp_req_info_handle_readdone(struct tcp_req_info* req)
1746 {
1747 	struct comm_point* c = req->cp;
1748 
1749 	/* we want to read up several requests, unless there are
1750 	 * pending answers */
1751 
1752 	req->is_drop = 0;
1753 	req->is_reply = 0;
1754 	req->in_worker_handle = 1;
1755 	sldns_buffer_set_limit(req->spool_buffer, 0);
1756 	/* handle the current request */
1757 	/* this calls the worker handle request routine that could give
1758 	 * a cache response, or localdata response, or drop the reply,
1759 	 * or schedule a mesh entry for later */
1760 	fptr_ok(fptr_whitelist_comm_point(c->callback));
1761 	if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
1762 		req->in_worker_handle = 0;
1763 		/* there is an answer, put it up.  It is already in the
1764 		 * c->buffer, just send it. */
1765 		/* since we were just reading a query, the channel is
1766 		 * clear to write to */
1767 	send_it:
1768 		c->tcp_is_reading = 0;
1769 		comm_point_stop_listening(c);
1770 		comm_point_start_listening(c, -1, c->tcp_timeout_msec);
1771 		return;
1772 	}
1773 	req->in_worker_handle = 0;
1774 	/* it should be waiting in the mesh for recursion.
1775 	 * If mesh failed to add a new entry and called commpoint_drop_reply.
1776 	 * Then the mesh state has been cleared. */
1777 	if(req->is_drop) {
1778 		/* the reply has been dropped, stream has been closed. */
1779 		return;
1780 	}
1781 	/* If mesh failed(mallocfail) and called commpoint_send_reply with
1782 	 * something like servfail then we pick up that reply below. */
1783 	if(req->is_reply) {
1784 		goto send_it;
1785 	}
1786 
1787 	sldns_buffer_clear(c->buffer);
1788 	/* if pending answers, pick up an answer and start sending it */
1789 	tcp_req_pickup_next_result(req);
1790 
1791 	/* if answers pending, start sending answers */
1792 	/* read more requests if we can have more requests */
1793 	tcp_req_info_setup_listen(req);
1794 }
1795 
1796 int
1797 tcp_req_info_add_meshstate(struct tcp_req_info* req,
1798 	struct mesh_area* mesh, struct mesh_state* m)
1799 {
1800 	struct tcp_req_open_item* item;
1801 	log_assert(req && mesh && m);
1802 	item = (struct tcp_req_open_item*)malloc(sizeof(*item));
1803 	if(!item) return 0;
1804 	item->next = req->open_req_list;
1805 	item->mesh = mesh;
1806 	item->mesh_state = m;
1807 	req->open_req_list = item;
1808 	req->num_open_req++;
1809 	return 1;
1810 }
1811 
1812 /** Add a result to the result list.  At the end. */
1813 static int
1814 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
1815 {
1816 	struct tcp_req_done_item* last = NULL;
1817 	struct tcp_req_done_item* item;
1818 	size_t space;
1819 
1820 	/* see if we have space */
1821 	space = sizeof(struct tcp_req_done_item) + len;
1822 	lock_basic_lock(&stream_wait_count_lock);
1823 	if(stream_wait_count + space > stream_wait_max) {
1824 		lock_basic_unlock(&stream_wait_count_lock);
1825 		verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
1826 		return 0;
1827 	}
1828 	stream_wait_count += space;
1829 	lock_basic_unlock(&stream_wait_count_lock);
1830 
1831 	/* find last element */
1832 	last = req->done_req_list;
1833 	while(last && last->next)
1834 		last = last->next;
1835 
1836 	/* create new element */
1837 	item = (struct tcp_req_done_item*)malloc(sizeof(*item));
1838 	if(!item) {
1839 		log_err("malloc failure, for stream result list");
1840 		return 0;
1841 	}
1842 	item->next = NULL;
1843 	item->len = len;
1844 	item->buf = memdup(buf, len);
1845 	if(!item->buf) {
1846 		free(item);
1847 		log_err("malloc failure, adding reply to stream result list");
1848 		return 0;
1849 	}
1850 
1851 	/* link in */
1852 	if(last) last->next = item;
1853 	else req->done_req_list = item;
1854 	req->num_done_req++;
1855 	return 1;
1856 }
1857 
1858 void
1859 tcp_req_info_send_reply(struct tcp_req_info* req)
1860 {
1861 	if(req->in_worker_handle) {
1862 		/* reply from mesh is in the spool_buffer */
1863 		/* copy now, so that the spool buffer is free for other tasks
1864 		 * before the callback is done */
1865 		sldns_buffer_clear(req->cp->buffer);
1866 		sldns_buffer_write(req->cp->buffer,
1867 			sldns_buffer_begin(req->spool_buffer),
1868 			sldns_buffer_limit(req->spool_buffer));
1869 		sldns_buffer_flip(req->cp->buffer);
1870 		req->is_reply = 1;
1871 		return;
1872 	}
1873 	/* now that the query has been handled, that mesh_reply entry
1874 	 * should be removed, from the tcp_req_info list,
1875 	 * the mesh state cleanup removes then with region_cleanup and
1876 	 * replies_sent true. */
1877 	/* see if we can send it straight away (we are not doing
1878 	 * anything else).  If so, copy to buffer and start */
1879 	if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
1880 		/* buffer is free, and was ready to read new query into,
1881 		 * but we are now going to use it to send this answer */
1882 		tcp_req_info_start_write_buf(req,
1883 			sldns_buffer_begin(req->spool_buffer),
1884 			sldns_buffer_limit(req->spool_buffer));
1885 		/* switch to listen to write events */
1886 		comm_point_stop_listening(req->cp);
1887 		comm_point_start_listening(req->cp, -1,
1888 			req->cp->tcp_timeout_msec);
1889 		return;
1890 	}
1891 	/* queue up the answer behind the others already pending */
1892 	if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
1893 		sldns_buffer_limit(req->spool_buffer))) {
1894 		/* drop the connection, we are out of resources */
1895 		comm_point_drop_reply(&req->cp->repinfo);
1896 	}
1897 }
1898 
1899 size_t tcp_req_info_get_stream_buffer_size(void)
1900 {
1901 	size_t s;
1902 	if(!stream_wait_lock_inited)
1903 		return stream_wait_count;
1904 	lock_basic_lock(&stream_wait_count_lock);
1905 	s = stream_wait_count;
1906 	lock_basic_unlock(&stream_wait_count_lock);
1907 	return s;
1908 }
1909