1 /*
2  * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file has functions to get queries from clients.
40  */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 #  include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include <limits.h>
47 #ifdef USE_TCP_FASTOPEN
48 #include <netinet/tcp.h>
49 #endif
50 #include <ctype.h>
51 #include "services/listen_dnsport.h"
52 #include "services/outside_network.h"
53 #include "util/netevent.h"
54 #include "util/log.h"
55 #include "util/config_file.h"
56 #include "util/net_help.h"
57 #include "sldns/sbuffer.h"
58 #include "sldns/parseutil.h"
59 #include "services/mesh.h"
60 #include "util/fptr_wlist.h"
61 #include "util/locks.h"
62 
63 #ifdef HAVE_NETDB_H
64 #include <netdb.h>
65 #endif
66 #include <fcntl.h>
67 
68 #ifdef HAVE_SYS_UN_H
69 #include <sys/un.h>
70 #endif
71 
72 #ifdef HAVE_SYSTEMD
73 #include <systemd/sd-daemon.h>
74 #endif
75 
76 #ifdef HAVE_IFADDRS_H
77 #include <ifaddrs.h>
78 #endif
79 #ifdef HAVE_NET_IF_H
80 #include <net/if.h>
81 #endif
82 
83 /** number of queued TCP connections for listen() */
84 #define TCP_BACKLOG 256
85 
86 #ifndef THREADS_DISABLED
87 /** lock on the counter of stream buffer memory */
88 static lock_basic_type stream_wait_count_lock;
89 /** lock on the counter of HTTP2 query buffer memory */
90 static lock_basic_type http2_query_buffer_count_lock;
91 /** lock on the counter of HTTP2 response buffer memory */
92 static lock_basic_type http2_response_buffer_count_lock;
93 #endif
94 /** size (in bytes) of stream wait buffers */
95 static size_t stream_wait_count = 0;
96 /** is the lock initialised for stream wait buffers */
97 static int stream_wait_lock_inited = 0;
98 /** size (in bytes) of HTTP2 query buffers */
99 static size_t http2_query_buffer_count = 0;
100 /** is the lock initialised for HTTP2 query buffers */
101 static int http2_query_buffer_lock_inited = 0;
102 /** size (in bytes) of HTTP2 response buffers */
103 static size_t http2_response_buffer_count = 0;
104 /** is the lock initialised for HTTP2 response buffers */
105 static int http2_response_buffer_lock_inited = 0;
106 
107 /**
108  * Debug print of the getaddrinfo returned address.
109  * @param addr: the address returned.
110  */
111 static void
112 verbose_print_addr(struct addrinfo *addr)
113 {
114 	if(verbosity >= VERB_ALGO) {
115 		char buf[100];
116 		void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
117 #ifdef INET6
118 		if(addr->ai_family == AF_INET6)
119 			sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
120 				sin6_addr;
121 #endif /* INET6 */
122 		if(inet_ntop(addr->ai_family, sinaddr, buf,
123 			(socklen_t)sizeof(buf)) == 0) {
124 			(void)strlcpy(buf, "(null)", sizeof(buf));
125 		}
126 		buf[sizeof(buf)-1] = 0;
127 		verbose(VERB_ALGO, "creating %s%s socket %s %d",
128 			addr->ai_socktype==SOCK_DGRAM?"udp":
129 			addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
130 			addr->ai_family==AF_INET?"4":
131 			addr->ai_family==AF_INET6?"6":
132 			"_otherfam", buf,
133 			ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
134 	}
135 }
136 
137 void
138 verbose_print_unbound_socket(struct unbound_socket* ub_sock)
139 {
140 	if(verbosity >= VERB_ALGO) {
141 		log_info("listing of unbound_socket structure:");
142 		verbose_print_addr(ub_sock->addr);
143 		log_info("s is: %d, fam is: %s", ub_sock->s, ub_sock->fam == AF_INET?"AF_INET":"AF_INET6");
144 	}
145 }
146 
147 #ifdef HAVE_SYSTEMD
148 static int
149 systemd_get_activated(int family, int socktype, int listen,
150 		      struct sockaddr *addr, socklen_t addrlen,
151 		      const char *path)
152 {
153 	int i = 0;
154 	int r = 0;
155 	int s = -1;
156 	const char* listen_pid, *listen_fds;
157 
158 	/* We should use "listen" option only for stream protocols. For UDP it should be -1 */
159 
160 	if((r = sd_booted()) < 1) {
161 		if(r == 0)
162 			log_warn("systemd is not running");
163 		else
164 			log_err("systemd sd_booted(): %s", strerror(-r));
165 		return -1;
166 	}
167 
168 	listen_pid = getenv("LISTEN_PID");
169 	listen_fds = getenv("LISTEN_FDS");
170 
171 	if (!listen_pid) {
172 		log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
173 		return -1;
174 	}
175 
176 	if (!listen_fds) {
177 		log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
178 		return -1;
179 	}
180 
181 	if((r = sd_listen_fds(0)) < 1) {
182 		if(r == 0)
183 			log_warn("systemd: did not return socket, check unit configuration");
184 		else
185 			log_err("systemd sd_listen_fds(): %s", strerror(-r));
186 		return -1;
187 	}
188 
189 	for(i = 0; i < r; i++) {
190 		if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
191 			s = SD_LISTEN_FDS_START + i;
192 			break;
193 		}
194 	}
195 	if (s == -1) {
196 		if (addr)
197 			log_err_addr("systemd sd_listen_fds()",
198 				     "no such socket",
199 				     (struct sockaddr_storage *)addr, addrlen);
200 		else
201 			log_err("systemd sd_listen_fds(): %s", path);
202 	}
203 	return s;
204 }
205 #endif
206 
207 int
208 create_udp_sock(int family, int socktype, struct sockaddr* addr,
209         socklen_t addrlen, int v6only, int* inuse, int* noproto,
210 	int rcv, int snd, int listen, int* reuseport, int transparent,
211 	int freebind, int use_systemd, int dscp)
212 {
213 	int s;
214 	char* err;
215 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)  || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
216 	int on=1;
217 #endif
218 #ifdef IPV6_MTU
219 	int mtu = IPV6_MIN_MTU;
220 #endif
221 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
222 	(void)rcv;
223 #endif
224 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
225 	(void)snd;
226 #endif
227 #ifndef IPV6_V6ONLY
228 	(void)v6only;
229 #endif
230 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
231 	(void)transparent;
232 #endif
233 #if !defined(IP_FREEBIND)
234 	(void)freebind;
235 #endif
236 #ifdef HAVE_SYSTEMD
237 	int got_fd_from_systemd = 0;
238 
239 	if (!use_systemd
240 	    || (use_systemd
241 		&& (s = systemd_get_activated(family, socktype, -1, addr,
242 					      addrlen, NULL)) == -1)) {
243 #else
244 	(void)use_systemd;
245 #endif
246 	if((s = socket(family, socktype, 0)) == -1) {
247 		*inuse = 0;
248 #ifndef USE_WINSOCK
249 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
250 			*noproto = 1;
251 			return -1;
252 		}
253 #else
254 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
255 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
256 			*noproto = 1;
257 			return -1;
258 		}
259 #endif
260 		log_err("can't create socket: %s", sock_strerror(errno));
261 		*noproto = 0;
262 		return -1;
263 	}
264 #ifdef HAVE_SYSTEMD
265 	} else {
266 		got_fd_from_systemd = 1;
267 	}
268 #endif
269 	if(listen) {
270 #ifdef SO_REUSEADDR
271 		if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
272 			(socklen_t)sizeof(on)) < 0) {
273 			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
274 				sock_strerror(errno));
275 #ifndef USE_WINSOCK
276 			if(errno != ENOSYS) {
277 				close(s);
278 				*noproto = 0;
279 				*inuse = 0;
280 				return -1;
281 			}
282 #else
283 			closesocket(s);
284 			*noproto = 0;
285 			*inuse = 0;
286 			return -1;
287 #endif
288 		}
289 #endif /* SO_REUSEADDR */
290 #ifdef SO_REUSEPORT
291 #  ifdef SO_REUSEPORT_LB
292 		/* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
293 		 * like SO_REUSEPORT on Linux.  This is what the users want
294 		 * with the config option in unbound.conf; if we actually
295 		 * need local address and port reuse they'll also need to
296 		 * have SO_REUSEPORT set for them, assume it was _LB they want.
297 		 */
298 		if (reuseport && *reuseport &&
299 		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
300 			(socklen_t)sizeof(on)) < 0) {
301 #ifdef ENOPROTOOPT
302 			if(errno != ENOPROTOOPT || verbosity >= 3)
303 				log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
304 					strerror(errno));
305 #endif
306 			/* this option is not essential, we can continue */
307 			*reuseport = 0;
308 		}
309 #  else /* no SO_REUSEPORT_LB */
310 
311 		/* try to set SO_REUSEPORT so that incoming
312 		 * queries are distributed evenly among the receiving threads.
313 		 * Each thread must have its own socket bound to the same port,
314 		 * with SO_REUSEPORT set on each socket.
315 		 */
316 		if (reuseport && *reuseport &&
317 		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
318 			(socklen_t)sizeof(on)) < 0) {
319 #ifdef ENOPROTOOPT
320 			if(errno != ENOPROTOOPT || verbosity >= 3)
321 				log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
322 					strerror(errno));
323 #endif
324 			/* this option is not essential, we can continue */
325 			*reuseport = 0;
326 		}
327 #  endif /* SO_REUSEPORT_LB */
328 #else
329 		(void)reuseport;
330 #endif /* defined(SO_REUSEPORT) */
331 #ifdef IP_TRANSPARENT
332 		if (transparent &&
333 		    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
334 		    (socklen_t)sizeof(on)) < 0) {
335 			log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
336 			strerror(errno));
337 		}
338 #elif defined(IP_BINDANY)
339 		if (transparent &&
340 		    setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
341 		    (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
342 		    (void*)&on, (socklen_t)sizeof(on)) < 0) {
343 			log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
344 			(family==AF_INET6?"V6":""), strerror(errno));
345 		}
346 #elif defined(SO_BINDANY)
347 		if (transparent &&
348 		    setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
349 		    (socklen_t)sizeof(on)) < 0) {
350 			log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
351 			strerror(errno));
352 		}
353 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
354 	}
355 #ifdef IP_FREEBIND
356 	if(freebind &&
357 	    setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
358 	    (socklen_t)sizeof(on)) < 0) {
359 		log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
360 		strerror(errno));
361 	}
362 #endif /* IP_FREEBIND */
363 	if(rcv) {
364 #ifdef SO_RCVBUF
365 		int got;
366 		socklen_t slen = (socklen_t)sizeof(got);
367 #  ifdef SO_RCVBUFFORCE
368 		/* Linux specific: try to use root permission to override
369 		 * system limits on rcvbuf. The limit is stored in
370 		 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
371 		if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
372 			(socklen_t)sizeof(rcv)) < 0) {
373 			if(errno != EPERM) {
374 				log_err("setsockopt(..., SO_RCVBUFFORCE, "
375 					"...) failed: %s", sock_strerror(errno));
376 				sock_close(s);
377 				*noproto = 0;
378 				*inuse = 0;
379 				return -1;
380 			}
381 #  endif /* SO_RCVBUFFORCE */
382 			if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
383 				(socklen_t)sizeof(rcv)) < 0) {
384 				log_err("setsockopt(..., SO_RCVBUF, "
385 					"...) failed: %s", sock_strerror(errno));
386 				sock_close(s);
387 				*noproto = 0;
388 				*inuse = 0;
389 				return -1;
390 			}
391 			/* check if we got the right thing or if system
392 			 * reduced to some system max.  Warn if so */
393 			if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
394 				&slen) >= 0 && got < rcv/2) {
395 				log_warn("so-rcvbuf %u was not granted. "
396 					"Got %u. To fix: start with "
397 					"root permissions(linux) or sysctl "
398 					"bigger net.core.rmem_max(linux) or "
399 					"kern.ipc.maxsockbuf(bsd) values.",
400 					(unsigned)rcv, (unsigned)got);
401 			}
402 #  ifdef SO_RCVBUFFORCE
403 		}
404 #  endif
405 #endif /* SO_RCVBUF */
406 	}
407 	/* first do RCVBUF as the receive buffer is more important */
408 	if(snd) {
409 #ifdef SO_SNDBUF
410 		int got;
411 		socklen_t slen = (socklen_t)sizeof(got);
412 #  ifdef SO_SNDBUFFORCE
413 		/* Linux specific: try to use root permission to override
414 		 * system limits on sndbuf. The limit is stored in
415 		 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
416 		if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
417 			(socklen_t)sizeof(snd)) < 0) {
418 			if(errno != EPERM) {
419 				log_err("setsockopt(..., SO_SNDBUFFORCE, "
420 					"...) failed: %s", sock_strerror(errno));
421 				sock_close(s);
422 				*noproto = 0;
423 				*inuse = 0;
424 				return -1;
425 			}
426 #  endif /* SO_SNDBUFFORCE */
427 			if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
428 				(socklen_t)sizeof(snd)) < 0) {
429 				log_err("setsockopt(..., SO_SNDBUF, "
430 					"...) failed: %s", sock_strerror(errno));
431 				sock_close(s);
432 				*noproto = 0;
433 				*inuse = 0;
434 				return -1;
435 			}
436 			/* check if we got the right thing or if system
437 			 * reduced to some system max.  Warn if so */
438 			if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
439 				&slen) >= 0 && got < snd/2) {
440 				log_warn("so-sndbuf %u was not granted. "
441 					"Got %u. To fix: start with "
442 					"root permissions(linux) or sysctl "
443 					"bigger net.core.wmem_max(linux) or "
444 					"kern.ipc.maxsockbuf(bsd) values.",
445 					(unsigned)snd, (unsigned)got);
446 			}
447 #  ifdef SO_SNDBUFFORCE
448 		}
449 #  endif
450 #endif /* SO_SNDBUF */
451 	}
452 	err = set_ip_dscp(s, family, dscp);
453 	if(err != NULL)
454 		log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err);
455 	if(family == AF_INET6) {
456 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
457 		int omit6_set = 0;
458 		int action;
459 # endif
460 # if defined(IPV6_V6ONLY)
461 		if(v6only) {
462 			int val=(v6only==2)?0:1;
463 			if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
464 				(void*)&val, (socklen_t)sizeof(val)) < 0) {
465 				log_err("setsockopt(..., IPV6_V6ONLY"
466 					", ...) failed: %s", sock_strerror(errno));
467 				sock_close(s);
468 				*noproto = 0;
469 				*inuse = 0;
470 				return -1;
471 			}
472 		}
473 # endif
474 # if defined(IPV6_USE_MIN_MTU)
475 		/*
476 		 * There is no fragmentation of IPv6 datagrams
477 		 * during forwarding in the network. Therefore
478 		 * we do not send UDP datagrams larger than
479 		 * the minimum IPv6 MTU of 1280 octets. The
480 		 * EDNS0 message length can be larger if the
481 		 * network stack supports IPV6_USE_MIN_MTU.
482 		 */
483 		if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
484 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
485 			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
486 				"...) failed: %s", sock_strerror(errno));
487 			sock_close(s);
488 			*noproto = 0;
489 			*inuse = 0;
490 			return -1;
491 		}
492 # elif defined(IPV6_MTU)
493 #   ifndef USE_WINSOCK
494 		/*
495 		 * On Linux, to send no larger than 1280, the PMTUD is
496 		 * disabled by default for datagrams anyway, so we set
497 		 * the MTU to use.
498 		 */
499 		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
500 			(void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
501 			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
502 				sock_strerror(errno));
503 			sock_close(s);
504 			*noproto = 0;
505 			*inuse = 0;
506 			return -1;
507 		}
508 #   elif defined(IPV6_USER_MTU)
509 		/* As later versions of the mingw crosscompiler define
510 		 * IPV6_MTU, do the same for windows but use IPV6_USER_MTU
511 		 * instead which is writable; IPV6_MTU is readonly there. */
512 		if (setsockopt(s, IPPROTO_IPV6, IPV6_USER_MTU,
513 			(void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
514 			log_err("setsockopt(..., IPV6_USER_MTU, ...) failed: %s",
515 				wsa_strerror(WSAGetLastError()));
516 			sock_close(s);
517 			*noproto = 0;
518 			*inuse = 0;
519 			return -1;
520 		}
521 #   endif /* USE_WINSOCK */
522 # endif /* IPv6 MTU */
523 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
524 #  if defined(IP_PMTUDISC_OMIT)
525 		action = IP_PMTUDISC_OMIT;
526 		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
527 			&action, (socklen_t)sizeof(action)) < 0) {
528 
529 			if (errno != EINVAL) {
530 				log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
531 					strerror(errno));
532 				sock_close(s);
533 				*noproto = 0;
534 				*inuse = 0;
535 				return -1;
536 			}
537 		}
538 		else
539 		{
540 		    omit6_set = 1;
541 		}
542 #  endif
543 		if (omit6_set == 0) {
544 			action = IP_PMTUDISC_DONT;
545 			if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
546 				&action, (socklen_t)sizeof(action)) < 0) {
547 				log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
548 					strerror(errno));
549 				sock_close(s);
550 				*noproto = 0;
551 				*inuse = 0;
552 				return -1;
553 			}
554 		}
555 # endif /* IPV6_MTU_DISCOVER */
556 	} else if(family == AF_INET) {
557 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
558 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
559  * PMTU information is not accepted, but fragmentation is allowed
560  * if and only if the packet size exceeds the outgoing interface MTU
561  * (and also uses the interface mtu to determine the size of the packets).
562  * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
563  * FreeBSD already has same semantics without setting the option. */
564 		int omit_set = 0;
565 		int action;
566 #   if defined(IP_PMTUDISC_OMIT)
567 		action = IP_PMTUDISC_OMIT;
568 		if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
569 			&action, (socklen_t)sizeof(action)) < 0) {
570 
571 			if (errno != EINVAL) {
572 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
573 					strerror(errno));
574 				sock_close(s);
575 				*noproto = 0;
576 				*inuse = 0;
577 				return -1;
578 			}
579 		}
580 		else
581 		{
582 		    omit_set = 1;
583 		}
584 #   endif
585 		if (omit_set == 0) {
586    			action = IP_PMTUDISC_DONT;
587 			if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
588 				&action, (socklen_t)sizeof(action)) < 0) {
589 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
590 					strerror(errno));
591 				sock_close(s);
592 				*noproto = 0;
593 				*inuse = 0;
594 				return -1;
595 			}
596 		}
597 #  elif defined(IP_DONTFRAG) && !defined(__APPLE__)
598 		/* the IP_DONTFRAG option if defined in the 11.0 OSX headers,
599 		 * but does not work on that version, so we exclude it */
600 		int off = 0;
601 		if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
602 			&off, (socklen_t)sizeof(off)) < 0) {
603 			log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
604 				strerror(errno));
605 			sock_close(s);
606 			*noproto = 0;
607 			*inuse = 0;
608 			return -1;
609 		}
610 #  endif /* IPv4 MTU */
611 	}
612 	if(
613 #ifdef HAVE_SYSTEMD
614 		!got_fd_from_systemd &&
615 #endif
616 		bind(s, (struct sockaddr*)addr, addrlen) != 0) {
617 		*noproto = 0;
618 		*inuse = 0;
619 #ifndef USE_WINSOCK
620 #ifdef EADDRINUSE
621 		*inuse = (errno == EADDRINUSE);
622 		/* detect freebsd jail with no ipv6 permission */
623 		if(family==AF_INET6 && errno==EINVAL)
624 			*noproto = 1;
625 		else if(errno != EADDRINUSE &&
626 			!(errno == EACCES && verbosity < 4 && !listen)
627 #ifdef EADDRNOTAVAIL
628 			&& !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
629 #endif
630 			) {
631 			log_err_addr("can't bind socket", strerror(errno),
632 				(struct sockaddr_storage*)addr, addrlen);
633 		}
634 #endif /* EADDRINUSE */
635 #else /* USE_WINSOCK */
636 		if(WSAGetLastError() != WSAEADDRINUSE &&
637 			WSAGetLastError() != WSAEADDRNOTAVAIL &&
638 			!(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
639 			log_err_addr("can't bind socket",
640 				wsa_strerror(WSAGetLastError()),
641 				(struct sockaddr_storage*)addr, addrlen);
642 		}
643 #endif /* USE_WINSOCK */
644 		sock_close(s);
645 		return -1;
646 	}
647 	if(!fd_set_nonblock(s)) {
648 		*noproto = 0;
649 		*inuse = 0;
650 		sock_close(s);
651 		return -1;
652 	}
653 	return s;
654 }
655 
656 int
657 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
658 	int* reuseport, int transparent, int mss, int nodelay, int freebind,
659 	int use_systemd, int dscp)
660 {
661 	int s;
662 	char* err;
663 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY)
664 	int on = 1;
665 #endif
666 #ifdef HAVE_SYSTEMD
667 	int got_fd_from_systemd = 0;
668 #endif
669 #ifdef USE_TCP_FASTOPEN
670 	int qlen;
671 #endif
672 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
673 	(void)transparent;
674 #endif
675 #if !defined(IP_FREEBIND)
676 	(void)freebind;
677 #endif
678 	verbose_print_addr(addr);
679 	*noproto = 0;
680 #ifdef HAVE_SYSTEMD
681 	if (!use_systemd ||
682 	    (use_systemd
683 	     && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
684 					   addr->ai_addr, addr->ai_addrlen,
685 					   NULL)) == -1)) {
686 #else
687 	(void)use_systemd;
688 #endif
689 	if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
690 #ifndef USE_WINSOCK
691 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
692 			*noproto = 1;
693 			return -1;
694 		}
695 #else
696 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
697 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
698 			*noproto = 1;
699 			return -1;
700 		}
701 #endif
702 		log_err("can't create socket: %s", sock_strerror(errno));
703 		return -1;
704 	}
705 	if(nodelay) {
706 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY)
707 		if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on,
708 			(socklen_t)sizeof(on)) < 0) {
709 			#ifndef USE_WINSOCK
710 			log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
711 				strerror(errno));
712 			#else
713 			log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
714 				wsa_strerror(WSAGetLastError()));
715 			#endif
716 		}
717 #else
718 		log_warn(" setsockopt(TCP_NODELAY) unsupported");
719 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */
720 	}
721 	if (mss > 0) {
722 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
723 		if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
724 			(socklen_t)sizeof(mss)) < 0) {
725 			log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
726 				sock_strerror(errno));
727 		} else {
728 			verbose(VERB_ALGO,
729 				" tcp socket mss set to %d", mss);
730 		}
731 #else
732 		log_warn(" setsockopt(TCP_MAXSEG) unsupported");
733 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
734 	}
735 #ifdef HAVE_SYSTEMD
736 	} else {
737 		got_fd_from_systemd = 1;
738     }
739 #endif
740 #ifdef SO_REUSEADDR
741 	if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
742 		(socklen_t)sizeof(on)) < 0) {
743 		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
744 			sock_strerror(errno));
745 		sock_close(s);
746 		return -1;
747 	}
748 #endif /* SO_REUSEADDR */
749 #ifdef IP_FREEBIND
750 	if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
751 	    (socklen_t)sizeof(on)) < 0) {
752 		log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
753 		strerror(errno));
754 	}
755 #endif /* IP_FREEBIND */
756 #ifdef SO_REUSEPORT
757 	/* try to set SO_REUSEPORT so that incoming
758 	 * connections are distributed evenly among the receiving threads.
759 	 * Each thread must have its own socket bound to the same port,
760 	 * with SO_REUSEPORT set on each socket.
761 	 */
762 	if (reuseport && *reuseport &&
763 		setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
764 		(socklen_t)sizeof(on)) < 0) {
765 #ifdef ENOPROTOOPT
766 		if(errno != ENOPROTOOPT || verbosity >= 3)
767 			log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
768 				strerror(errno));
769 #endif
770 		/* this option is not essential, we can continue */
771 		*reuseport = 0;
772 	}
773 #else
774 	(void)reuseport;
775 #endif /* defined(SO_REUSEPORT) */
776 #if defined(IPV6_V6ONLY)
777 	if(addr->ai_family == AF_INET6 && v6only) {
778 		if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
779 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
780 			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
781 				sock_strerror(errno));
782 			sock_close(s);
783 			return -1;
784 		}
785 	}
786 #else
787 	(void)v6only;
788 #endif /* IPV6_V6ONLY */
789 #ifdef IP_TRANSPARENT
790 	if (transparent &&
791 	    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
792 	    (socklen_t)sizeof(on)) < 0) {
793 		log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
794 			strerror(errno));
795 	}
796 #elif defined(IP_BINDANY)
797 	if (transparent &&
798 	    setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
799 	    (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
800 	    (void*)&on, (socklen_t)sizeof(on)) < 0) {
801 		log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
802 		(addr->ai_family==AF_INET6?"V6":""), strerror(errno));
803 	}
804 #elif defined(SO_BINDANY)
805 	if (transparent &&
806 	    setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
807 	    sizeof(on)) < 0) {
808 		log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
809 		strerror(errno));
810 	}
811 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
812 	err = set_ip_dscp(s, addr->ai_family, dscp);
813 	if(err != NULL)
814 		log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err);
815 	if(
816 #ifdef HAVE_SYSTEMD
817 		!got_fd_from_systemd &&
818 #endif
819         bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
820 #ifndef USE_WINSOCK
821 		/* detect freebsd jail with no ipv6 permission */
822 		if(addr->ai_family==AF_INET6 && errno==EINVAL)
823 			*noproto = 1;
824 		else {
825 			log_err_addr("can't bind socket", strerror(errno),
826 				(struct sockaddr_storage*)addr->ai_addr,
827 				addr->ai_addrlen);
828 		}
829 #else
830 		log_err_addr("can't bind socket",
831 			wsa_strerror(WSAGetLastError()),
832 			(struct sockaddr_storage*)addr->ai_addr,
833 			addr->ai_addrlen);
834 #endif
835 		sock_close(s);
836 		return -1;
837 	}
838 	if(!fd_set_nonblock(s)) {
839 		sock_close(s);
840 		return -1;
841 	}
842 	if(listen(s, TCP_BACKLOG) == -1) {
843 		log_err("can't listen: %s", sock_strerror(errno));
844 		sock_close(s);
845 		return -1;
846 	}
847 #ifdef USE_TCP_FASTOPEN
848 	/* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
849 	   against IP spoofing attacks as suggested in RFC7413 */
850 #ifdef __APPLE__
851 	/* OS X implementation only supports qlen of 1 via this call. Actual
852 	   value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */
853 	qlen = 1;
854 #else
855 	/* 5 is recommended on linux */
856 	qlen = 5;
857 #endif
858 	if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen,
859 		  sizeof(qlen))) == -1 ) {
860 #ifdef ENOPROTOOPT
861 		/* squelch ENOPROTOOPT: freebsd server mode with kernel support
862 		   disabled, except when verbosity enabled for debugging */
863 		if(errno != ENOPROTOOPT || verbosity >= 3) {
864 #endif
865 		  if(errno == EPERM) {
866 		  	log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
867 		  } else {
868 		  	log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
869 		  }
870 #ifdef ENOPROTOOPT
871 		}
872 #endif
873 	}
874 #endif
875 	return s;
876 }
877 
878 char*
879 set_ip_dscp(int socket, int addrfamily, int dscp)
880 {
881 	int ds;
882 
883 	if(dscp == 0)
884 		return NULL;
885 	ds = dscp << 2;
886 	switch(addrfamily) {
887 	case AF_INET6:
888 	#ifdef IPV6_TCLASS
889 		if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds,
890 			sizeof(ds)) < 0)
891 			return sock_strerror(errno);
892 		break;
893 	#else
894 		return "IPV6_TCLASS not defined on this system";
895 	#endif
896 	default:
897 		if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0)
898 			return sock_strerror(errno);
899 		break;
900 	}
901 	return NULL;
902 }
903 
904 int
905 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
906 {
907 #ifdef HAVE_SYSTEMD
908 	int ret;
909 
910 	if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
911 		return ret;
912 	else {
913 #endif
914 #ifdef HAVE_SYS_UN_H
915 	int s;
916 	struct sockaddr_un usock;
917 #ifndef HAVE_SYSTEMD
918 	(void)use_systemd;
919 #endif
920 
921 	verbose(VERB_ALGO, "creating unix socket %s", path);
922 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
923 	/* this member exists on BSDs, not Linux */
924 	usock.sun_len = (unsigned)sizeof(usock);
925 #endif
926 	usock.sun_family = AF_LOCAL;
927 	/* length is 92-108, 104 on FreeBSD */
928 	(void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
929 
930 	if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
931 		log_err("Cannot create local socket %s (%s)",
932 			path, strerror(errno));
933 		return -1;
934 	}
935 
936 	if (unlink(path) && errno != ENOENT) {
937 		/* The socket already exists and cannot be removed */
938 		log_err("Cannot remove old local socket %s (%s)",
939 			path, strerror(errno));
940 		goto err;
941 	}
942 
943 	if (bind(s, (struct sockaddr *)&usock,
944 		(socklen_t)sizeof(struct sockaddr_un)) == -1) {
945 		log_err("Cannot bind local socket %s (%s)",
946 			path, strerror(errno));
947 		goto err;
948 	}
949 
950 	if (!fd_set_nonblock(s)) {
951 		log_err("Cannot set non-blocking mode");
952 		goto err;
953 	}
954 
955 	if (listen(s, TCP_BACKLOG) == -1) {
956 		log_err("can't listen: %s", strerror(errno));
957 		goto err;
958 	}
959 
960 	(void)noproto; /*unused*/
961 	return s;
962 
963 err:
964 	sock_close(s);
965 	return -1;
966 
967 #ifdef HAVE_SYSTEMD
968 	}
969 #endif
970 #else
971 	(void)use_systemd;
972 	(void)path;
973 	log_err("Local sockets are not supported");
974 	*noproto = 1;
975 	return -1;
976 #endif
977 }
978 
979 
980 /**
981  * Create socket from getaddrinfo results
982  */
983 static int
984 make_sock(int stype, const char* ifname, const char* port,
985 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
986 	int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
987 	int use_systemd, int dscp, struct unbound_socket* ub_sock)
988 {
989 	struct addrinfo *res = NULL;
990 	int r, s, inuse, noproto;
991 	hints->ai_socktype = stype;
992 	*noip6 = 0;
993 	if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
994 #ifdef USE_WINSOCK
995 		if(r == EAI_NONAME && hints->ai_family == AF_INET6){
996 			*noip6 = 1; /* 'Host not found' for IP6 on winXP */
997 			return -1;
998 		}
999 #endif
1000 		log_err("node %s:%s getaddrinfo: %s %s",
1001 			ifname?ifname:"default", port, gai_strerror(r),
1002 #ifdef EAI_SYSTEM
1003 			r==EAI_SYSTEM?(char*)strerror(errno):""
1004 #else
1005 			""
1006 #endif
1007 		);
1008 		return -1;
1009 	}
1010 	if(stype == SOCK_DGRAM) {
1011 		verbose_print_addr(res);
1012 		s = create_udp_sock(res->ai_family, res->ai_socktype,
1013 			(struct sockaddr*)res->ai_addr, res->ai_addrlen,
1014 			v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
1015 			reuseport, transparent, freebind, use_systemd, dscp);
1016 		if(s == -1 && inuse) {
1017 			log_err("bind: address already in use");
1018 		} else if(s == -1 && noproto && hints->ai_family == AF_INET6){
1019 			*noip6 = 1;
1020 		}
1021 	} else	{
1022 		s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
1023 			transparent, tcp_mss, nodelay, freebind, use_systemd,
1024 			dscp);
1025 		if(s == -1 && noproto && hints->ai_family == AF_INET6){
1026 			*noip6 = 1;
1027 		}
1028 	}
1029 
1030 	ub_sock->addr = res;
1031 	ub_sock->s = s;
1032 	ub_sock->fam = hints->ai_family;
1033 
1034 	return s;
1035 }
1036 
1037 /** make socket and first see if ifname contains port override info */
1038 static int
1039 make_sock_port(int stype, const char* ifname, const char* port,
1040 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1041 	int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
1042 	int use_systemd, int dscp, struct unbound_socket* ub_sock)
1043 {
1044 	char* s = strchr(ifname, '@');
1045 	if(s) {
1046 		/* override port with ifspec@port */
1047 		char p[16];
1048 		char newif[128];
1049 		if((size_t)(s-ifname) >= sizeof(newif)) {
1050 			log_err("ifname too long: %s", ifname);
1051 			*noip6 = 0;
1052 			return -1;
1053 		}
1054 		if(strlen(s+1) >= sizeof(p)) {
1055 			log_err("portnumber too long: %s", ifname);
1056 			*noip6 = 0;
1057 			return -1;
1058 		}
1059 		(void)strlcpy(newif, ifname, sizeof(newif));
1060 		newif[s-ifname] = 0;
1061 		(void)strlcpy(p, s+1, sizeof(p));
1062 		p[strlen(s+1)]=0;
1063 		return make_sock(stype, newif, p, hints, v6only, noip6, rcv,
1064 			snd, reuseport, transparent, tcp_mss, nodelay, freebind,
1065 			use_systemd, dscp, ub_sock);
1066 	}
1067 	return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
1068 		reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd,
1069 		dscp, ub_sock);
1070 }
1071 
1072 /**
1073  * Add port to open ports list.
1074  * @param list: list head. changed.
1075  * @param s: fd.
1076  * @param ftype: if fd is UDP.
1077  * @param ub_sock: socket with address.
1078  * @return false on failure. list in unchanged then.
1079  */
1080 static int
1081 port_insert(struct listen_port** list, int s, enum listen_type ftype, struct unbound_socket* ub_sock)
1082 {
1083 	struct listen_port* item = (struct listen_port*)malloc(
1084 		sizeof(struct listen_port));
1085 	if(!item)
1086 		return 0;
1087 	item->next = *list;
1088 	item->fd = s;
1089 	item->ftype = ftype;
1090 	item->socket = ub_sock;
1091 	*list = item;
1092 	return 1;
1093 }
1094 
1095 /** set fd to receive source address packet info */
1096 static int
1097 set_recvpktinfo(int s, int family)
1098 {
1099 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1100 	int on = 1;
1101 #else
1102 	(void)s;
1103 #endif
1104 	if(family == AF_INET6) {
1105 #           ifdef IPV6_RECVPKTINFO
1106 		if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1107 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1108 			log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1109 				strerror(errno));
1110 			return 0;
1111 		}
1112 #           elif defined(IPV6_PKTINFO)
1113 		if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1114 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1115 			log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1116 				strerror(errno));
1117 			return 0;
1118 		}
1119 #           else
1120 		log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please "
1121 			"disable interface-automatic or do-ip6 in config");
1122 		return 0;
1123 #           endif /* defined IPV6_RECVPKTINFO */
1124 
1125 	} else if(family == AF_INET) {
1126 #           ifdef IP_PKTINFO
1127 		if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1128 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1129 			log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1130 				strerror(errno));
1131 			return 0;
1132 		}
1133 #           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1134 		if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1135 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1136 			log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1137 				strerror(errno));
1138 			return 0;
1139 		}
1140 #           else
1141 		log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1142 			"interface-automatic or do-ip4 in config");
1143 		return 0;
1144 #           endif /* IP_PKTINFO */
1145 
1146 	}
1147 	return 1;
1148 }
1149 
1150 /** see if interface is ssl, its port number == the ssl port number */
1151 static int
1152 if_is_ssl(const char* ifname, const char* port, int ssl_port,
1153 	struct config_strlist* tls_additional_port)
1154 {
1155 	struct config_strlist* s;
1156 	char* p = strchr(ifname, '@');
1157 	if(!p && atoi(port) == ssl_port)
1158 		return 1;
1159 	if(p && atoi(p+1) == ssl_port)
1160 		return 1;
1161 	for(s = tls_additional_port; s; s = s->next) {
1162 		if(p && atoi(p+1) == atoi(s->str))
1163 			return 1;
1164 		if(!p && atoi(port) == atoi(s->str))
1165 			return 1;
1166 	}
1167 	return 0;
1168 }
1169 
1170 /**
1171  * Helper for ports_open. Creates one interface (or NULL for default).
1172  * @param ifname: The interface ip address.
1173  * @param do_auto: use automatic interface detection.
1174  * 	If enabled, then ifname must be the wildcard name.
1175  * @param do_udp: if udp should be used.
1176  * @param do_tcp: if tcp should be used.
1177  * @param hints: for getaddrinfo. family and flags have to be set by caller.
1178  * @param port: Port number to use (as string).
1179  * @param list: list of open ports, appended to, changed to point to list head.
1180  * @param rcv: receive buffer size for UDP
1181  * @param snd: send buffer size for UDP
1182  * @param ssl_port: ssl service port number
1183  * @param tls_additional_port: list of additional ssl service port numbers.
1184  * @param https_port: DoH service port number
1185  * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1186  * 	set to false on exit if reuseport failed due to no kernel support.
1187  * @param transparent: set IP_TRANSPARENT socket option.
1188  * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1189  * @param freebind: set IP_FREEBIND socket option.
1190  * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection
1191  * @param use_systemd: if true, fetch sockets from systemd.
1192  * @param dnscrypt_port: dnscrypt service port number
1193  * @param dscp: DSCP to use.
1194  * @return: returns false on error.
1195  */
1196 static int
1197 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
1198 	struct addrinfo *hints, const char* port, struct listen_port** list,
1199 	size_t rcv, size_t snd, int ssl_port,
1200 	struct config_strlist* tls_additional_port, int https_port,
1201 	int* reuseport, int transparent, int tcp_mss, int freebind,
1202 	int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp)
1203 {
1204 	int s, noip6=0;
1205 	int is_https = if_is_https(ifname, port, https_port);
1206 	int nodelay = is_https && http2_nodelay;
1207 	struct unbound_socket* ub_sock;
1208 #ifdef USE_DNSCRYPT
1209 	int is_dnscrypt = ((strchr(ifname, '@') &&
1210 			atoi(strchr(ifname, '@')+1) == dnscrypt_port) ||
1211 			(!strchr(ifname, '@') && atoi(port) == dnscrypt_port));
1212 #else
1213 	int is_dnscrypt = 0;
1214 	(void)dnscrypt_port;
1215 #endif
1216 
1217 	if(!do_udp && !do_tcp)
1218 		return 0;
1219 
1220 	if(do_auto) {
1221 		ub_sock = calloc(1, sizeof(struct unbound_socket));
1222 		if(!ub_sock)
1223 			return 0;
1224 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1225 			&noip6, rcv, snd, reuseport, transparent,
1226 			tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
1227 			freeaddrinfo(ub_sock->addr);
1228 			free(ub_sock);
1229 			if(noip6) {
1230 				log_warn("IPv6 protocol not available");
1231 				return 1;
1232 			}
1233 			return 0;
1234 		}
1235 		/* getting source addr packet info is highly non-portable */
1236 		if(!set_recvpktinfo(s, hints->ai_family)) {
1237 			sock_close(s);
1238 			freeaddrinfo(ub_sock->addr);
1239 			free(ub_sock);
1240 			return 0;
1241 		}
1242 		if(!port_insert(list, s,
1243 		   is_dnscrypt?listen_type_udpancil_dnscrypt:listen_type_udpancil, ub_sock)) {
1244 			sock_close(s);
1245 			freeaddrinfo(ub_sock->addr);
1246 			free(ub_sock);
1247 			return 0;
1248 		}
1249 	} else if(do_udp) {
1250 		ub_sock = calloc(1, sizeof(struct unbound_socket));
1251 		if(!ub_sock)
1252 			return 0;
1253 		/* regular udp socket */
1254 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1255 			&noip6, rcv, snd, reuseport, transparent,
1256 			tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
1257 			freeaddrinfo(ub_sock->addr);
1258 			free(ub_sock);
1259 			if(noip6) {
1260 				log_warn("IPv6 protocol not available");
1261 				return 1;
1262 			}
1263 			return 0;
1264 		}
1265 		if(!port_insert(list, s,
1266 		   is_dnscrypt?listen_type_udp_dnscrypt:listen_type_udp, ub_sock)) {
1267 			sock_close(s);
1268 			freeaddrinfo(ub_sock->addr);
1269 			free(ub_sock);
1270 			return 0;
1271 		}
1272 	}
1273 	if(do_tcp) {
1274 		int is_ssl = if_is_ssl(ifname, port, ssl_port,
1275 			tls_additional_port);
1276 		enum listen_type port_type;
1277 		ub_sock = calloc(1, sizeof(struct unbound_socket));
1278 		if(!ub_sock)
1279 			return 0;
1280 		if(is_ssl)
1281 			port_type = listen_type_ssl;
1282 		else if(is_https)
1283 			port_type = listen_type_http;
1284 		else if(is_dnscrypt)
1285 			port_type = listen_type_tcp_dnscrypt;
1286 		else
1287 			port_type = listen_type_tcp;
1288 		if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
1289 			&noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay,
1290 			freebind, use_systemd, dscp, ub_sock)) == -1) {
1291 			freeaddrinfo(ub_sock->addr);
1292 			free(ub_sock);
1293 			if(noip6) {
1294 				/*log_warn("IPv6 protocol not available");*/
1295 				return 1;
1296 			}
1297 			return 0;
1298 		}
1299 		if(is_ssl)
1300 			verbose(VERB_ALGO, "setup TCP for SSL service");
1301 		if(!port_insert(list, s, port_type, ub_sock)) {
1302 			sock_close(s);
1303 			freeaddrinfo(ub_sock->addr);
1304 			free(ub_sock);
1305 			return 0;
1306 		}
1307 	}
1308 	return 1;
1309 }
1310 
1311 /**
1312  * Add items to commpoint list in front.
1313  * @param c: commpoint to add.
1314  * @param front: listen struct.
1315  * @return: false on failure.
1316  */
1317 static int
1318 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1319 {
1320 	struct listen_list* item = (struct listen_list*)malloc(
1321 		sizeof(struct listen_list));
1322 	if(!item)
1323 		return 0;
1324 	item->com = c;
1325 	item->next = front->cps;
1326 	front->cps = item;
1327 	return 1;
1328 }
1329 
1330 void listen_setup_locks(void)
1331 {
1332 	if(!stream_wait_lock_inited) {
1333 		lock_basic_init(&stream_wait_count_lock);
1334 		stream_wait_lock_inited = 1;
1335 	}
1336 	if(!http2_query_buffer_lock_inited) {
1337 		lock_basic_init(&http2_query_buffer_count_lock);
1338 		http2_query_buffer_lock_inited = 1;
1339 	}
1340 	if(!http2_response_buffer_lock_inited) {
1341 		lock_basic_init(&http2_response_buffer_count_lock);
1342 		http2_response_buffer_lock_inited = 1;
1343 	}
1344 }
1345 
1346 void listen_desetup_locks(void)
1347 {
1348 	if(stream_wait_lock_inited) {
1349 		stream_wait_lock_inited = 0;
1350 		lock_basic_destroy(&stream_wait_count_lock);
1351 	}
1352 	if(http2_query_buffer_lock_inited) {
1353 		http2_query_buffer_lock_inited = 0;
1354 		lock_basic_destroy(&http2_query_buffer_count_lock);
1355 	}
1356 	if(http2_response_buffer_lock_inited) {
1357 		http2_response_buffer_lock_inited = 0;
1358 		lock_basic_destroy(&http2_response_buffer_count_lock);
1359 	}
1360 }
1361 
1362 struct listen_dnsport*
1363 listen_create(struct comm_base* base, struct listen_port* ports,
1364 	size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1365 	int harden_large_queries, uint32_t http_max_streams,
1366 	char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
1367 	void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb,
1368 	void *cb_arg)
1369 {
1370 	struct listen_dnsport* front = (struct listen_dnsport*)
1371 		malloc(sizeof(struct listen_dnsport));
1372 	if(!front)
1373 		return NULL;
1374 	front->cps = NULL;
1375 	front->udp_buff = sldns_buffer_new(bufsize);
1376 #ifdef USE_DNSCRYPT
1377 	front->dnscrypt_udp_buff = NULL;
1378 #endif
1379 	if(!front->udp_buff) {
1380 		free(front);
1381 		return NULL;
1382 	}
1383 
1384 	/* create comm points as needed */
1385 	while(ports) {
1386 		struct comm_point* cp = NULL;
1387 		if(ports->ftype == listen_type_udp ||
1388 		   ports->ftype == listen_type_udp_dnscrypt) {
1389 			cp = comm_point_create_udp(base, ports->fd,
1390 				front->udp_buff, cb, cb_arg, ports->socket);
1391 		} else if(ports->ftype == listen_type_tcp ||
1392 				ports->ftype == listen_type_tcp_dnscrypt) {
1393 			cp = comm_point_create_tcp(base, ports->fd,
1394 				tcp_accept_count, tcp_idle_timeout,
1395 				harden_large_queries, 0, NULL,
1396 				tcp_conn_limit, bufsize, front->udp_buff,
1397 				ports->ftype, cb, cb_arg, ports->socket);
1398 		} else if(ports->ftype == listen_type_ssl ||
1399 			ports->ftype == listen_type_http) {
1400 			cp = comm_point_create_tcp(base, ports->fd,
1401 				tcp_accept_count, tcp_idle_timeout,
1402 				harden_large_queries,
1403 				http_max_streams, http_endpoint,
1404 				tcp_conn_limit, bufsize, front->udp_buff,
1405 				ports->ftype, cb, cb_arg, ports->socket);
1406 			if(ports->ftype == listen_type_http) {
1407 				if(!sslctx && !http_notls) {
1408 					log_warn("HTTPS port configured, but "
1409 						"no TLS tls-service-key or "
1410 						"tls-service-pem set");
1411 				}
1412 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
1413 				if(!http_notls) {
1414 					log_warn("Unbound is not compiled "
1415 						"with an OpenSSL version "
1416 						"supporting ALPN "
1417 						"(OpenSSL >= 1.0.2). This "
1418 						"is required to use "
1419 						"DNS-over-HTTPS");
1420 				}
1421 #endif
1422 #ifndef HAVE_NGHTTP2_NGHTTP2_H
1423 				log_warn("Unbound is not compiled with "
1424 					"nghttp2. This is required to use "
1425 					"DNS-over-HTTPS.");
1426 #endif
1427 			}
1428 		} else if(ports->ftype == listen_type_udpancil ||
1429 				  ports->ftype == listen_type_udpancil_dnscrypt) {
1430 			cp = comm_point_create_udp_ancil(base, ports->fd,
1431 				front->udp_buff, cb, cb_arg, ports->socket);
1432 		}
1433 		if(!cp) {
1434 			log_err("can't create commpoint");
1435 			listen_delete(front);
1436 			return NULL;
1437 		}
1438 		if((http_notls && ports->ftype == listen_type_http) ||
1439 			(ports->ftype == listen_type_tcp) ||
1440 			(ports->ftype == listen_type_udp) ||
1441 			(ports->ftype == listen_type_udpancil) ||
1442 			(ports->ftype == listen_type_tcp_dnscrypt) ||
1443 			(ports->ftype == listen_type_udp_dnscrypt) ||
1444 			(ports->ftype == listen_type_udpancil_dnscrypt))
1445 			cp->ssl = NULL;
1446 		else
1447 			cp->ssl = sslctx;
1448 		cp->dtenv = dtenv;
1449 		cp->do_not_close = 1;
1450 #ifdef USE_DNSCRYPT
1451 		if (ports->ftype == listen_type_udp_dnscrypt ||
1452 			ports->ftype == listen_type_tcp_dnscrypt ||
1453 			ports->ftype == listen_type_udpancil_dnscrypt) {
1454 			cp->dnscrypt = 1;
1455 			cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1456 			if(!cp->dnscrypt_buffer) {
1457 				log_err("can't alloc dnscrypt_buffer");
1458 				comm_point_delete(cp);
1459 				listen_delete(front);
1460 				return NULL;
1461 			}
1462 			front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1463 		}
1464 #endif
1465 		if(!listen_cp_insert(cp, front)) {
1466 			log_err("malloc failed");
1467 			comm_point_delete(cp);
1468 			listen_delete(front);
1469 			return NULL;
1470 		}
1471 		ports = ports->next;
1472 	}
1473 	if(!front->cps) {
1474 		log_err("Could not open sockets to accept queries.");
1475 		listen_delete(front);
1476 		return NULL;
1477 	}
1478 
1479 	return front;
1480 }
1481 
1482 void
1483 listen_list_delete(struct listen_list* list)
1484 {
1485 	struct listen_list *p = list, *pn;
1486 	while(p) {
1487 		pn = p->next;
1488 		comm_point_delete(p->com);
1489 		free(p);
1490 		p = pn;
1491 	}
1492 }
1493 
1494 void
1495 listen_delete(struct listen_dnsport* front)
1496 {
1497 	if(!front)
1498 		return;
1499 	listen_list_delete(front->cps);
1500 #ifdef USE_DNSCRYPT
1501 	if(front->dnscrypt_udp_buff &&
1502 		front->udp_buff != front->dnscrypt_udp_buff) {
1503 		sldns_buffer_free(front->dnscrypt_udp_buff);
1504 	}
1505 #endif
1506 	sldns_buffer_free(front->udp_buff);
1507 	free(front);
1508 }
1509 
1510 #ifdef HAVE_GETIFADDRS
1511 static int
1512 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size)
1513 {
1514 	struct ifaddrs *ifa;
1515 	void *tmpbuf;
1516 	int last_ip_addresses_size = *ip_addresses_size;
1517 
1518 	for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) {
1519 		sa_family_t family;
1520 		const char* atsign;
1521 #ifdef INET6      /* |   address ip    | % |  ifa name  | @ |  port  | nul */
1522 		char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1];
1523 #else
1524 		char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1];
1525 #endif
1526 
1527 		if((atsign=strrchr(search_ifa, '@')) != NULL) {
1528 			if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa)
1529 			   || strncmp(ifa->ifa_name, search_ifa,
1530 			   atsign-search_ifa) != 0)
1531 				continue;
1532 		} else {
1533 			if(strcmp(ifa->ifa_name, search_ifa) != 0)
1534 				continue;
1535 			atsign = "";
1536 		}
1537 
1538 		if(ifa->ifa_addr == NULL)
1539 			continue;
1540 
1541 		family = ifa->ifa_addr->sa_family;
1542 		if(family == AF_INET) {
1543 			char a4[INET_ADDRSTRLEN + 1];
1544 			struct sockaddr_in *in4 = (struct sockaddr_in *)
1545 				ifa->ifa_addr;
1546 			if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) {
1547 				log_err("inet_ntop failed");
1548 				return 0;
1549 			}
1550 			snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1551 				a4, atsign);
1552 		}
1553 #ifdef INET6
1554 		else if(family == AF_INET6) {
1555 			struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)
1556 				ifa->ifa_addr;
1557 			char a6[INET6_ADDRSTRLEN + 1];
1558 			char if_index_name[IF_NAMESIZE + 1];
1559 			if_index_name[0] = 0;
1560 			if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) {
1561 				log_err("inet_ntop failed");
1562 				return 0;
1563 			}
1564 			(void)if_indextoname(in6->sin6_scope_id,
1565 				(char *)if_index_name);
1566 			if (strlen(if_index_name) != 0) {
1567 				snprintf(addr_buf, sizeof(addr_buf),
1568 					"%s%%%s%s", a6, if_index_name, atsign);
1569 			} else {
1570 				snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1571 					a6, atsign);
1572 			}
1573 		}
1574 #endif
1575 		else {
1576 			continue;
1577 		}
1578 		verbose(4, "interface %s has address %s", search_ifa, addr_buf);
1579 
1580 		tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1581 		if(!tmpbuf) {
1582 			log_err("realloc failed: out of memory");
1583 			return 0;
1584 		} else {
1585 			*ip_addresses = tmpbuf;
1586 		}
1587 		(*ip_addresses)[*ip_addresses_size] = strdup(addr_buf);
1588 		if(!(*ip_addresses)[*ip_addresses_size]) {
1589 			log_err("strdup failed: out of memory");
1590 			return 0;
1591 		}
1592 		(*ip_addresses_size)++;
1593 	}
1594 
1595 	if (*ip_addresses_size == last_ip_addresses_size) {
1596 		tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1597 		if(!tmpbuf) {
1598 			log_err("realloc failed: out of memory");
1599 			return 0;
1600 		} else {
1601 			*ip_addresses = tmpbuf;
1602 		}
1603 		(*ip_addresses)[*ip_addresses_size] = strdup(search_ifa);
1604 		if(!(*ip_addresses)[*ip_addresses_size]) {
1605 			log_err("strdup failed: out of memory");
1606 			return 0;
1607 		}
1608 		(*ip_addresses_size)++;
1609 	}
1610 	return 1;
1611 }
1612 #endif /* HAVE_GETIFADDRS */
1613 
1614 int resolve_interface_names(char** ifs, int num_ifs,
1615 	struct config_strlist* list, char*** resif, int* num_resif)
1616 {
1617 #ifdef HAVE_GETIFADDRS
1618 	struct ifaddrs *addrs = NULL;
1619 	if(num_ifs == 0 && list == NULL) {
1620 		*resif = NULL;
1621 		*num_resif = 0;
1622 		return 1;
1623 	}
1624 	if(getifaddrs(&addrs) == -1) {
1625 		log_err("failed to list interfaces: getifaddrs: %s",
1626 			strerror(errno));
1627 		freeifaddrs(addrs);
1628 		return 0;
1629 	}
1630 	if(ifs) {
1631 		int i;
1632 		for(i=0; i<num_ifs; i++) {
1633 			if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) {
1634 				freeifaddrs(addrs);
1635 				config_del_strarray(*resif, *num_resif);
1636 				*resif = NULL;
1637 				*num_resif = 0;
1638 				return 0;
1639 			}
1640 		}
1641 	}
1642 	if(list) {
1643 		struct config_strlist* p;
1644 		for(p = list; p; p = p->next) {
1645 			if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) {
1646 				freeifaddrs(addrs);
1647 				config_del_strarray(*resif, *num_resif);
1648 				*resif = NULL;
1649 				*num_resif = 0;
1650 				return 0;
1651 			}
1652 }
1653 	}
1654 	freeifaddrs(addrs);
1655 	return 1;
1656 #else
1657 	struct config_strlist* p;
1658 	if(num_ifs == 0 && list == NULL) {
1659 		*resif = NULL;
1660 		*num_resif = 0;
1661 		return 1;
1662 	}
1663 	*num_resif = num_ifs;
1664 	for(p = list; p; p = p->next) {
1665 		(*num_resif)++;
1666 	}
1667 	*resif = calloc(*num_resif, sizeof(**resif));
1668 	if(!*resif) {
1669 		log_err("out of memory");
1670 		return 0;
1671 	}
1672 	if(ifs) {
1673 		int i;
1674 		for(i=0; i<num_ifs; i++) {
1675 			(*resif)[i] = strdup(ifs[i]);
1676 			if(!((*resif)[i])) {
1677 				log_err("out of memory");
1678 				config_del_strarray(*resif, *num_resif);
1679 				*resif = NULL;
1680 				*num_resif = 0;
1681 				return 0;
1682 			}
1683 		}
1684 	}
1685 	if(list) {
1686 		int idx = num_ifs;
1687 		for(p = list; p; p = p->next) {
1688 			(*resif)[idx] = strdup(p->str);
1689 			if(!((*resif)[idx])) {
1690 				log_err("out of memory");
1691 				config_del_strarray(*resif, *num_resif);
1692 				*resif = NULL;
1693 				*num_resif = 0;
1694 				return 0;
1695 			}
1696 			idx++;
1697 		}
1698 	}
1699 	return 1;
1700 #endif /* HAVE_GETIFADDRS */
1701 }
1702 
1703 struct listen_port*
1704 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
1705 	int* reuseport)
1706 {
1707 	struct listen_port* list = NULL;
1708 	struct addrinfo hints;
1709 	int i, do_ip4, do_ip6;
1710 	int do_tcp, do_auto;
1711 	char portbuf[32];
1712 	snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1713 	do_ip4 = cfg->do_ip4;
1714 	do_ip6 = cfg->do_ip6;
1715 	do_tcp = cfg->do_tcp;
1716 	do_auto = cfg->if_automatic && cfg->do_udp;
1717 	if(cfg->incoming_num_tcp == 0)
1718 		do_tcp = 0;
1719 
1720 	/* getaddrinfo */
1721 	memset(&hints, 0, sizeof(hints));
1722 	hints.ai_flags = AI_PASSIVE;
1723 	/* no name lookups on our listening ports */
1724 	if(num_ifs > 0)
1725 		hints.ai_flags |= AI_NUMERICHOST;
1726 	hints.ai_family = AF_UNSPEC;
1727 #ifndef INET6
1728 	do_ip6 = 0;
1729 #endif
1730 	if(!do_ip4 && !do_ip6) {
1731 		return NULL;
1732 	}
1733 	/* create ip4 and ip6 ports so that return addresses are nice. */
1734 	if(do_auto || num_ifs == 0) {
1735 		if(do_auto && cfg->if_automatic_ports &&
1736 			cfg->if_automatic_ports[0]!=0) {
1737 			char* now = cfg->if_automatic_ports;
1738 			while(now && *now) {
1739 				char* after;
1740 				int extraport;
1741 				while(isspace((unsigned char)*now))
1742 					now++;
1743 				if(!*now)
1744 					break;
1745 				after = now;
1746 				extraport = (int)strtol(now, &after, 10);
1747 				if(extraport < 0 || extraport > 65535) {
1748 					log_err("interface-automatic-ports port number out of range, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports);
1749 					listening_ports_free(list);
1750 					return NULL;
1751 				}
1752 				if(extraport == 0 && now == after) {
1753 					log_err("interface-automatic-ports could not be parsed, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports);
1754 					listening_ports_free(list);
1755 					return NULL;
1756 				}
1757 				now = after;
1758 				snprintf(portbuf, sizeof(portbuf), "%d", extraport);
1759 				if(do_ip6) {
1760 					hints.ai_family = AF_INET6;
1761 					if(!ports_create_if("::0",
1762 						do_auto, cfg->do_udp, do_tcp,
1763 						&hints, portbuf, &list,
1764 						cfg->so_rcvbuf, cfg->so_sndbuf,
1765 						cfg->ssl_port, cfg->tls_additional_port,
1766 						cfg->https_port, reuseport, cfg->ip_transparent,
1767 						cfg->tcp_mss, cfg->ip_freebind,
1768 						cfg->http_nodelay, cfg->use_systemd,
1769 						cfg->dnscrypt_port, cfg->ip_dscp)) {
1770 						listening_ports_free(list);
1771 						return NULL;
1772 					}
1773 				}
1774 				if(do_ip4) {
1775 					hints.ai_family = AF_INET;
1776 					if(!ports_create_if("0.0.0.0",
1777 						do_auto, cfg->do_udp, do_tcp,
1778 						&hints, portbuf, &list,
1779 						cfg->so_rcvbuf, cfg->so_sndbuf,
1780 						cfg->ssl_port, cfg->tls_additional_port,
1781 						cfg->https_port, reuseport, cfg->ip_transparent,
1782 						cfg->tcp_mss, cfg->ip_freebind,
1783 						cfg->http_nodelay, cfg->use_systemd,
1784 						cfg->dnscrypt_port, cfg->ip_dscp)) {
1785 						listening_ports_free(list);
1786 						return NULL;
1787 					}
1788 				}
1789 			}
1790 			return list;
1791 		}
1792 		if(do_ip6) {
1793 			hints.ai_family = AF_INET6;
1794 			if(!ports_create_if(do_auto?"::0":"::1",
1795 				do_auto, cfg->do_udp, do_tcp,
1796 				&hints, portbuf, &list,
1797 				cfg->so_rcvbuf, cfg->so_sndbuf,
1798 				cfg->ssl_port, cfg->tls_additional_port,
1799 				cfg->https_port, reuseport, cfg->ip_transparent,
1800 				cfg->tcp_mss, cfg->ip_freebind,
1801 				cfg->http_nodelay, cfg->use_systemd,
1802 				cfg->dnscrypt_port, cfg->ip_dscp)) {
1803 				listening_ports_free(list);
1804 				return NULL;
1805 			}
1806 		}
1807 		if(do_ip4) {
1808 			hints.ai_family = AF_INET;
1809 			if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1810 				do_auto, cfg->do_udp, do_tcp,
1811 				&hints, portbuf, &list,
1812 				cfg->so_rcvbuf, cfg->so_sndbuf,
1813 				cfg->ssl_port, cfg->tls_additional_port,
1814 				cfg->https_port, reuseport, cfg->ip_transparent,
1815 				cfg->tcp_mss, cfg->ip_freebind,
1816 				cfg->http_nodelay, cfg->use_systemd,
1817 				cfg->dnscrypt_port, cfg->ip_dscp)) {
1818 				listening_ports_free(list);
1819 				return NULL;
1820 			}
1821 		}
1822 	} else for(i = 0; i<num_ifs; i++) {
1823 		if(str_is_ip6(ifs[i])) {
1824 			if(!do_ip6)
1825 				continue;
1826 			hints.ai_family = AF_INET6;
1827 			if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1828 				do_tcp, &hints, portbuf, &list,
1829 				cfg->so_rcvbuf, cfg->so_sndbuf,
1830 				cfg->ssl_port, cfg->tls_additional_port,
1831 				cfg->https_port, reuseport, cfg->ip_transparent,
1832 				cfg->tcp_mss, cfg->ip_freebind,
1833 				cfg->http_nodelay, cfg->use_systemd,
1834 				cfg->dnscrypt_port, cfg->ip_dscp)) {
1835 				listening_ports_free(list);
1836 				return NULL;
1837 			}
1838 		} else {
1839 			if(!do_ip4)
1840 				continue;
1841 			hints.ai_family = AF_INET;
1842 			if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1843 				do_tcp, &hints, portbuf, &list,
1844 				cfg->so_rcvbuf, cfg->so_sndbuf,
1845 				cfg->ssl_port, cfg->tls_additional_port,
1846 				cfg->https_port, reuseport, cfg->ip_transparent,
1847 				cfg->tcp_mss, cfg->ip_freebind,
1848 				cfg->http_nodelay, cfg->use_systemd,
1849 				cfg->dnscrypt_port, cfg->ip_dscp)) {
1850 				listening_ports_free(list);
1851 				return NULL;
1852 			}
1853 		}
1854 	}
1855 
1856 	return list;
1857 }
1858 
1859 void listening_ports_free(struct listen_port* list)
1860 {
1861 	struct listen_port* nx;
1862 	while(list) {
1863 		nx = list->next;
1864 		if(list->fd != -1) {
1865 			sock_close(list->fd);
1866 		}
1867 		/* rc_ports don't have ub_socket */
1868 		if(list->socket) {
1869 			freeaddrinfo(list->socket->addr);
1870 			free(list->socket);
1871 		}
1872 		free(list);
1873 		list = nx;
1874 	}
1875 }
1876 
1877 size_t listen_get_mem(struct listen_dnsport* listen)
1878 {
1879 	struct listen_list* p;
1880 	size_t s = sizeof(*listen) + sizeof(*listen->base) +
1881 		sizeof(*listen->udp_buff) +
1882 		sldns_buffer_capacity(listen->udp_buff);
1883 #ifdef USE_DNSCRYPT
1884 	s += sizeof(*listen->dnscrypt_udp_buff);
1885 	if(listen->udp_buff != listen->dnscrypt_udp_buff){
1886 		s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
1887 	}
1888 #endif
1889 	for(p = listen->cps; p; p = p->next) {
1890 		s += sizeof(*p);
1891 		s += comm_point_get_mem(p->com);
1892 	}
1893 	return s;
1894 }
1895 
1896 void listen_stop_accept(struct listen_dnsport* listen)
1897 {
1898 	/* do not stop the ones that have no tcp_free list
1899 	 * (they have already stopped listening) */
1900 	struct listen_list* p;
1901 	for(p=listen->cps; p; p=p->next) {
1902 		if(p->com->type == comm_tcp_accept &&
1903 			p->com->tcp_free != NULL) {
1904 			comm_point_stop_listening(p->com);
1905 		}
1906 	}
1907 }
1908 
1909 void listen_start_accept(struct listen_dnsport* listen)
1910 {
1911 	/* do not start the ones that have no tcp_free list, it is no
1912 	 * use to listen to them because they have no free tcp handlers */
1913 	struct listen_list* p;
1914 	for(p=listen->cps; p; p=p->next) {
1915 		if(p->com->type == comm_tcp_accept &&
1916 			p->com->tcp_free != NULL) {
1917 			comm_point_start_listening(p->com, -1, -1);
1918 		}
1919 	}
1920 }
1921 
1922 struct tcp_req_info*
1923 tcp_req_info_create(struct sldns_buffer* spoolbuf)
1924 {
1925 	struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
1926 	if(!req) {
1927 		log_err("malloc failure for new stream outoforder processing structure");
1928 		return NULL;
1929 	}
1930 	memset(req, 0, sizeof(*req));
1931 	req->spool_buffer = spoolbuf;
1932 	return req;
1933 }
1934 
1935 void
1936 tcp_req_info_delete(struct tcp_req_info* req)
1937 {
1938 	if(!req) return;
1939 	tcp_req_info_clear(req);
1940 	/* cp is pointer back to commpoint that owns this struct and
1941 	 * called delete on us */
1942 	/* spool_buffer is shared udp buffer, not deleted here */
1943 	free(req);
1944 }
1945 
1946 void tcp_req_info_clear(struct tcp_req_info* req)
1947 {
1948 	struct tcp_req_open_item* open, *nopen;
1949 	struct tcp_req_done_item* item, *nitem;
1950 	if(!req) return;
1951 
1952 	/* free outstanding request mesh reply entries */
1953 	open = req->open_req_list;
1954 	while(open) {
1955 		nopen = open->next;
1956 		mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
1957 		free(open);
1958 		open = nopen;
1959 	}
1960 	req->open_req_list = NULL;
1961 	req->num_open_req = 0;
1962 
1963 	/* free pending writable result packets */
1964 	item = req->done_req_list;
1965 	while(item) {
1966 		nitem = item->next;
1967 		lock_basic_lock(&stream_wait_count_lock);
1968 		stream_wait_count -= (sizeof(struct tcp_req_done_item)
1969 			+item->len);
1970 		lock_basic_unlock(&stream_wait_count_lock);
1971 		free(item->buf);
1972 		free(item);
1973 		item = nitem;
1974 	}
1975 	req->done_req_list = NULL;
1976 	req->num_done_req = 0;
1977 	req->read_is_closed = 0;
1978 }
1979 
1980 void
1981 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
1982 {
1983 	struct tcp_req_open_item* open, *prev = NULL;
1984 	if(!req || !m) return;
1985 	open = req->open_req_list;
1986 	while(open) {
1987 		if(open->mesh_state == m) {
1988 			struct tcp_req_open_item* next;
1989 			if(prev) prev->next = open->next;
1990 			else req->open_req_list = open->next;
1991 			/* caller has to manage the mesh state reply entry */
1992 			next = open->next;
1993 			free(open);
1994 			req->num_open_req --;
1995 
1996 			/* prev = prev; */
1997 			open = next;
1998 			continue;
1999 		}
2000 		prev = open;
2001 		open = open->next;
2002 	}
2003 }
2004 
2005 /** setup listening for read or write */
2006 static void
2007 tcp_req_info_setup_listen(struct tcp_req_info* req)
2008 {
2009 	int wr = 0;
2010 	int rd = 0;
2011 
2012 	if(req->cp->tcp_byte_count != 0) {
2013 		/* cannot change, halfway through */
2014 		return;
2015 	}
2016 
2017 	if(!req->cp->tcp_is_reading)
2018 		wr = 1;
2019 	if(!req->read_is_closed)
2020 		rd = 1;
2021 
2022 	if(wr) {
2023 		req->cp->tcp_is_reading = 0;
2024 		comm_point_stop_listening(req->cp);
2025 		comm_point_start_listening(req->cp, -1,
2026 			adjusted_tcp_timeout(req->cp));
2027 	} else if(rd) {
2028 		req->cp->tcp_is_reading = 1;
2029 		comm_point_stop_listening(req->cp);
2030 		comm_point_start_listening(req->cp, -1,
2031 			adjusted_tcp_timeout(req->cp));
2032 		/* and also read it (from SSL stack buffers), so
2033 		 * no event read event is expected since the remainder of
2034 		 * the TLS frame is sitting in the buffers. */
2035 		req->read_again = 1;
2036 	} else {
2037 		comm_point_stop_listening(req->cp);
2038 		comm_point_start_listening(req->cp, -1,
2039 			adjusted_tcp_timeout(req->cp));
2040 		comm_point_listen_for_rw(req->cp, 0, 0);
2041 	}
2042 }
2043 
2044 /** remove first item from list of pending results */
2045 static struct tcp_req_done_item*
2046 tcp_req_info_pop_done(struct tcp_req_info* req)
2047 {
2048 	struct tcp_req_done_item* item;
2049 	log_assert(req->num_done_req > 0 && req->done_req_list);
2050 	item = req->done_req_list;
2051 	lock_basic_lock(&stream_wait_count_lock);
2052 	stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
2053 	lock_basic_unlock(&stream_wait_count_lock);
2054 	req->done_req_list = req->done_req_list->next;
2055 	req->num_done_req --;
2056 	return item;
2057 }
2058 
2059 /** Send given buffer and setup to write */
2060 static void
2061 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
2062 	size_t len)
2063 {
2064 	sldns_buffer_clear(req->cp->buffer);
2065 	sldns_buffer_write(req->cp->buffer, buf, len);
2066 	sldns_buffer_flip(req->cp->buffer);
2067 
2068 	req->cp->tcp_is_reading = 0; /* we are now writing */
2069 }
2070 
2071 /** pick up the next result and start writing it to the channel */
2072 static void
2073 tcp_req_pickup_next_result(struct tcp_req_info* req)
2074 {
2075 	if(req->num_done_req > 0) {
2076 		/* unlist the done item from the list of pending results */
2077 		struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
2078 		tcp_req_info_start_write_buf(req, item->buf, item->len);
2079 		free(item->buf);
2080 		free(item);
2081 	}
2082 }
2083 
2084 /** the read channel has closed */
2085 int
2086 tcp_req_info_handle_read_close(struct tcp_req_info* req)
2087 {
2088 	verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
2089 	/* reset byte count for (potential) partial read */
2090 	req->cp->tcp_byte_count = 0;
2091 	/* if we still have results to write, pick up next and write it */
2092 	if(req->num_done_req != 0) {
2093 		tcp_req_pickup_next_result(req);
2094 		tcp_req_info_setup_listen(req);
2095 		return 1;
2096 	}
2097 	/* if nothing to do, this closes the connection */
2098 	if(req->num_open_req == 0 && req->num_done_req == 0)
2099 		return 0;
2100 	/* otherwise, we must be waiting for dns resolve, wait with timeout */
2101 	req->read_is_closed = 1;
2102 	tcp_req_info_setup_listen(req);
2103 	return 1;
2104 }
2105 
2106 void
2107 tcp_req_info_handle_writedone(struct tcp_req_info* req)
2108 {
2109 	/* back to reading state, we finished this write event */
2110 	sldns_buffer_clear(req->cp->buffer);
2111 	if(req->num_done_req == 0 && req->read_is_closed) {
2112 		/* no more to write and nothing to read, close it */
2113 		comm_point_drop_reply(&req->cp->repinfo);
2114 		return;
2115 	}
2116 	req->cp->tcp_is_reading = 1;
2117 	/* see if another result needs writing */
2118 	tcp_req_pickup_next_result(req);
2119 
2120 	/* see if there is more to write, if not stop_listening for writing */
2121 	/* see if new requests are allowed, if so, start_listening
2122 	 * for reading */
2123 	tcp_req_info_setup_listen(req);
2124 }
2125 
2126 void
2127 tcp_req_info_handle_readdone(struct tcp_req_info* req)
2128 {
2129 	struct comm_point* c = req->cp;
2130 
2131 	/* we want to read up several requests, unless there are
2132 	 * pending answers */
2133 
2134 	req->is_drop = 0;
2135 	req->is_reply = 0;
2136 	req->in_worker_handle = 1;
2137 	sldns_buffer_set_limit(req->spool_buffer, 0);
2138 	/* handle the current request */
2139 	/* this calls the worker handle request routine that could give
2140 	 * a cache response, or localdata response, or drop the reply,
2141 	 * or schedule a mesh entry for later */
2142 	fptr_ok(fptr_whitelist_comm_point(c->callback));
2143 	if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
2144 		req->in_worker_handle = 0;
2145 		/* there is an answer, put it up.  It is already in the
2146 		 * c->buffer, just send it. */
2147 		/* since we were just reading a query, the channel is
2148 		 * clear to write to */
2149 	send_it:
2150 		c->tcp_is_reading = 0;
2151 		comm_point_stop_listening(c);
2152 		comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
2153 		return;
2154 	}
2155 	req->in_worker_handle = 0;
2156 	/* it should be waiting in the mesh for recursion.
2157 	 * If mesh failed to add a new entry and called commpoint_drop_reply.
2158 	 * Then the mesh state has been cleared. */
2159 	if(req->is_drop) {
2160 		/* the reply has been dropped, stream has been closed. */
2161 		return;
2162 	}
2163 	/* If mesh failed(mallocfail) and called commpoint_send_reply with
2164 	 * something like servfail then we pick up that reply below. */
2165 	if(req->is_reply) {
2166 		goto send_it;
2167 	}
2168 
2169 	sldns_buffer_clear(c->buffer);
2170 	/* if pending answers, pick up an answer and start sending it */
2171 	tcp_req_pickup_next_result(req);
2172 
2173 	/* if answers pending, start sending answers */
2174 	/* read more requests if we can have more requests */
2175 	tcp_req_info_setup_listen(req);
2176 }
2177 
2178 int
2179 tcp_req_info_add_meshstate(struct tcp_req_info* req,
2180 	struct mesh_area* mesh, struct mesh_state* m)
2181 {
2182 	struct tcp_req_open_item* item;
2183 	log_assert(req && mesh && m);
2184 	item = (struct tcp_req_open_item*)malloc(sizeof(*item));
2185 	if(!item) return 0;
2186 	item->next = req->open_req_list;
2187 	item->mesh = mesh;
2188 	item->mesh_state = m;
2189 	req->open_req_list = item;
2190 	req->num_open_req++;
2191 	return 1;
2192 }
2193 
2194 /** Add a result to the result list.  At the end. */
2195 static int
2196 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
2197 {
2198 	struct tcp_req_done_item* last = NULL;
2199 	struct tcp_req_done_item* item;
2200 	size_t space;
2201 
2202 	/* see if we have space */
2203 	space = sizeof(struct tcp_req_done_item) + len;
2204 	lock_basic_lock(&stream_wait_count_lock);
2205 	if(stream_wait_count + space > stream_wait_max) {
2206 		lock_basic_unlock(&stream_wait_count_lock);
2207 		verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
2208 		return 0;
2209 	}
2210 	stream_wait_count += space;
2211 	lock_basic_unlock(&stream_wait_count_lock);
2212 
2213 	/* find last element */
2214 	last = req->done_req_list;
2215 	while(last && last->next)
2216 		last = last->next;
2217 
2218 	/* create new element */
2219 	item = (struct tcp_req_done_item*)malloc(sizeof(*item));
2220 	if(!item) {
2221 		log_err("malloc failure, for stream result list");
2222 		return 0;
2223 	}
2224 	item->next = NULL;
2225 	item->len = len;
2226 	item->buf = memdup(buf, len);
2227 	if(!item->buf) {
2228 		free(item);
2229 		log_err("malloc failure, adding reply to stream result list");
2230 		return 0;
2231 	}
2232 
2233 	/* link in */
2234 	if(last) last->next = item;
2235 	else req->done_req_list = item;
2236 	req->num_done_req++;
2237 	return 1;
2238 }
2239 
2240 void
2241 tcp_req_info_send_reply(struct tcp_req_info* req)
2242 {
2243 	if(req->in_worker_handle) {
2244 		/* reply from mesh is in the spool_buffer */
2245 		/* copy now, so that the spool buffer is free for other tasks
2246 		 * before the callback is done */
2247 		sldns_buffer_clear(req->cp->buffer);
2248 		sldns_buffer_write(req->cp->buffer,
2249 			sldns_buffer_begin(req->spool_buffer),
2250 			sldns_buffer_limit(req->spool_buffer));
2251 		sldns_buffer_flip(req->cp->buffer);
2252 		req->is_reply = 1;
2253 		return;
2254 	}
2255 	/* now that the query has been handled, that mesh_reply entry
2256 	 * should be removed, from the tcp_req_info list,
2257 	 * the mesh state cleanup removes then with region_cleanup and
2258 	 * replies_sent true. */
2259 	/* see if we can send it straight away (we are not doing
2260 	 * anything else).  If so, copy to buffer and start */
2261 	if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
2262 		/* buffer is free, and was ready to read new query into,
2263 		 * but we are now going to use it to send this answer */
2264 		tcp_req_info_start_write_buf(req,
2265 			sldns_buffer_begin(req->spool_buffer),
2266 			sldns_buffer_limit(req->spool_buffer));
2267 		/* switch to listen to write events */
2268 		comm_point_stop_listening(req->cp);
2269 		comm_point_start_listening(req->cp, -1,
2270 			adjusted_tcp_timeout(req->cp));
2271 		return;
2272 	}
2273 	/* queue up the answer behind the others already pending */
2274 	if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
2275 		sldns_buffer_limit(req->spool_buffer))) {
2276 		/* drop the connection, we are out of resources */
2277 		comm_point_drop_reply(&req->cp->repinfo);
2278 	}
2279 }
2280 
2281 size_t tcp_req_info_get_stream_buffer_size(void)
2282 {
2283 	size_t s;
2284 	if(!stream_wait_lock_inited)
2285 		return stream_wait_count;
2286 	lock_basic_lock(&stream_wait_count_lock);
2287 	s = stream_wait_count;
2288 	lock_basic_unlock(&stream_wait_count_lock);
2289 	return s;
2290 }
2291 
2292 size_t http2_get_query_buffer_size(void)
2293 {
2294 	size_t s;
2295 	if(!http2_query_buffer_lock_inited)
2296 		return http2_query_buffer_count;
2297 	lock_basic_lock(&http2_query_buffer_count_lock);
2298 	s = http2_query_buffer_count;
2299 	lock_basic_unlock(&http2_query_buffer_count_lock);
2300 	return s;
2301 }
2302 
2303 size_t http2_get_response_buffer_size(void)
2304 {
2305 	size_t s;
2306 	if(!http2_response_buffer_lock_inited)
2307 		return http2_response_buffer_count;
2308 	lock_basic_lock(&http2_response_buffer_count_lock);
2309 	s = http2_response_buffer_count;
2310 	lock_basic_unlock(&http2_response_buffer_count_lock);
2311 	return s;
2312 }
2313 
2314 #ifdef HAVE_NGHTTP2
2315 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */
2316 static ssize_t http2_submit_response_read_callback(
2317 	nghttp2_session* ATTR_UNUSED(session),
2318 	int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2319 	nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2320 {
2321 	struct http2_stream* h2_stream;
2322 	struct http2_session* h2_session = source->ptr;
2323 	size_t copylen = length;
2324 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2325 		h2_session->session, stream_id))) {
2326 		verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2327 			"stream");
2328 		return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2329 	}
2330 	if(!h2_stream->rbuffer ||
2331 		sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2332 		verbose(VERB_QUERY, "http2: cannot submit buffer. No data "
2333 			"available in rbuffer");
2334 		/* rbuffer will be free'd in frame close cb */
2335 		return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2336 	}
2337 
2338 	if(copylen > sldns_buffer_remaining(h2_stream->rbuffer))
2339 		copylen = sldns_buffer_remaining(h2_stream->rbuffer);
2340 	if(copylen > SSIZE_MAX)
2341 		copylen = SSIZE_MAX; /* will probably never happen */
2342 
2343 	memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen);
2344 	sldns_buffer_skip(h2_stream->rbuffer, copylen);
2345 
2346 	if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2347 		*data_flags |= NGHTTP2_DATA_FLAG_EOF;
2348 		lock_basic_lock(&http2_response_buffer_count_lock);
2349 		http2_response_buffer_count -=
2350 			sldns_buffer_capacity(h2_stream->rbuffer);
2351 		lock_basic_unlock(&http2_response_buffer_count_lock);
2352 		sldns_buffer_free(h2_stream->rbuffer);
2353 		h2_stream->rbuffer = NULL;
2354 	}
2355 
2356 	return copylen;
2357 }
2358 
2359 /**
2360  * Send RST_STREAM frame for stream.
2361  * @param h2_session: http2 session to submit frame to
2362  * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM
2363  * @return 0 on error, 1 otherwise
2364  */
2365 static int http2_submit_rst_stream(struct http2_session* h2_session,
2366 		struct http2_stream* h2_stream)
2367 {
2368 	int ret = nghttp2_submit_rst_stream(h2_session->session,
2369 		NGHTTP2_FLAG_NONE, h2_stream->stream_id,
2370 		NGHTTP2_INTERNAL_ERROR);
2371 	if(ret) {
2372 		verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, "
2373 			"error: %s", nghttp2_strerror(ret));
2374 		return 0;
2375 	}
2376 	return 1;
2377 }
2378 
2379 /**
2380  * DNS response ready to be submitted to nghttp2, to be prepared for sending
2381  * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer
2382  * might be used before this will be sent out.
2383  * @param h2_session: http2 session, containing c->buffer which contains answer
2384  * @return 0 on error, 1 otherwise
2385  */
2386 int http2_submit_dns_response(struct http2_session* h2_session)
2387 {
2388 	int ret;
2389 	nghttp2_data_provider data_prd;
2390 	char status[4];
2391 	nghttp2_nv headers[3];
2392 	struct http2_stream* h2_stream = h2_session->c->h2_stream;
2393 	size_t rlen;
2394 	char rlen_str[32];
2395 
2396 	if(h2_stream->rbuffer) {
2397 		log_err("http2 submit response error: rbuffer already "
2398 			"exists");
2399 		return 0;
2400 	}
2401 	if(sldns_buffer_remaining(h2_session->c->buffer) == 0) {
2402 		log_err("http2 submit response error: c->buffer not complete");
2403 		return 0;
2404 	}
2405 
2406 	if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2407 		verbose(VERB_QUERY, "http2: submit response error: "
2408 			"invalid status");
2409 		return 0;
2410 	}
2411 
2412 	rlen = sldns_buffer_remaining(h2_session->c->buffer);
2413 	snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen);
2414 
2415 	lock_basic_lock(&http2_response_buffer_count_lock);
2416 	if(http2_response_buffer_count + rlen > http2_response_buffer_max) {
2417 		lock_basic_unlock(&http2_response_buffer_count_lock);
2418 		verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2419 			"in https-response-buffer-size");
2420 		return http2_submit_rst_stream(h2_session, h2_stream);
2421 	}
2422 	http2_response_buffer_count += rlen;
2423 	lock_basic_unlock(&http2_response_buffer_count_lock);
2424 
2425 	if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) {
2426 		lock_basic_lock(&http2_response_buffer_count_lock);
2427 		http2_response_buffer_count -= rlen;
2428 		lock_basic_unlock(&http2_response_buffer_count_lock);
2429 		log_err("http2 submit response error: malloc failure");
2430 		return 0;
2431 	}
2432 
2433 	headers[0].name = (uint8_t*)":status";
2434 	headers[0].namelen = 7;
2435 	headers[0].value = (uint8_t*)status;
2436 	headers[0].valuelen = 3;
2437 	headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2438 
2439 	headers[1].name = (uint8_t*)"content-type";
2440 	headers[1].namelen = 12;
2441 	headers[1].value = (uint8_t*)"application/dns-message";
2442 	headers[1].valuelen = 23;
2443 	headers[1].flags = NGHTTP2_NV_FLAG_NONE;
2444 
2445 	headers[2].name = (uint8_t*)"content-length";
2446 	headers[2].namelen = 14;
2447 	headers[2].value = (uint8_t*)rlen_str;
2448 	headers[2].valuelen = strlen(rlen_str);
2449 	headers[2].flags = NGHTTP2_NV_FLAG_NONE;
2450 
2451 	sldns_buffer_write(h2_stream->rbuffer,
2452 		sldns_buffer_current(h2_session->c->buffer),
2453 		sldns_buffer_remaining(h2_session->c->buffer));
2454 	sldns_buffer_flip(h2_stream->rbuffer);
2455 
2456 	data_prd.source.ptr = h2_session;
2457 	data_prd.read_callback = http2_submit_response_read_callback;
2458 	ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2459 		headers, 3, &data_prd);
2460 	if(ret) {
2461 		verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2462 			"error: %s", nghttp2_strerror(ret));
2463 		return 0;
2464 	}
2465 	return 1;
2466 }
2467 #else
2468 int http2_submit_dns_response(void* ATTR_UNUSED(v))
2469 {
2470 	return 0;
2471 }
2472 #endif
2473 
2474 #ifdef HAVE_NGHTTP2
2475 /** HTTP status to descriptive string */
2476 static char* http_status_to_str(enum http_status s)
2477 {
2478 	switch(s) {
2479 		case HTTP_STATUS_OK:
2480 			return "OK";
2481 		case HTTP_STATUS_BAD_REQUEST:
2482 			return "Bad Request";
2483 		case HTTP_STATUS_NOT_FOUND:
2484 			return "Not Found";
2485 		case HTTP_STATUS_PAYLOAD_TOO_LARGE:
2486 			return "Payload Too Large";
2487 		case HTTP_STATUS_URI_TOO_LONG:
2488 			return "URI Too Long";
2489 		case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE:
2490 			return "Unsupported Media Type";
2491 		case HTTP_STATUS_NOT_IMPLEMENTED:
2492 			return "Not Implemented";
2493 	}
2494 	return "Status Unknown";
2495 }
2496 
2497 /** nghttp2 callback. Used to copy error message to nghttp2 session */
2498 static ssize_t http2_submit_error_read_callback(
2499 	nghttp2_session* ATTR_UNUSED(session),
2500 	int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2501 	nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2502 {
2503 	struct http2_stream* h2_stream;
2504 	struct http2_session* h2_session = source->ptr;
2505 	char* msg;
2506 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2507 		h2_session->session, stream_id))) {
2508 		verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2509 			"stream");
2510 		return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2511 	}
2512 	*data_flags |= NGHTTP2_DATA_FLAG_EOF;
2513 	msg = http_status_to_str(h2_stream->status);
2514 	if(length < strlen(msg))
2515 		return 0; /* not worth trying over multiple frames */
2516 	memcpy(buf, msg, strlen(msg));
2517 	return strlen(msg);
2518 
2519 }
2520 
2521 /**
2522  * HTTP error response ready to be submitted to nghttp2, to be prepared for
2523  * sending out. Message body will contain descriptive string for HTTP status.
2524  * @param h2_session: http2 session to submit to
2525  * @param h2_stream: http2 stream containing HTTP status to use for error
2526  * @return 0 on error, 1 otherwise
2527  */
2528 static int http2_submit_error(struct http2_session* h2_session,
2529 	struct http2_stream* h2_stream)
2530 {
2531 	int ret;
2532 	char status[4];
2533 	nghttp2_data_provider data_prd;
2534 	nghttp2_nv headers[1]; /* will be copied by nghttp */
2535 	if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2536 		verbose(VERB_QUERY, "http2: submit error failed, "
2537 			"invalid status");
2538 		return 0;
2539 	}
2540 	headers[0].name = (uint8_t*)":status";
2541 	headers[0].namelen = 7;
2542 	headers[0].value = (uint8_t*)status;
2543 	headers[0].valuelen = 3;
2544 	headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2545 
2546 	data_prd.source.ptr = h2_session;
2547 	data_prd.read_callback = http2_submit_error_read_callback;
2548 
2549 	ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2550 		headers, 1, &data_prd);
2551 	if(ret) {
2552 		verbose(VERB_QUERY, "http2: submit error failed, "
2553 			"error: %s", nghttp2_strerror(ret));
2554 		return 0;
2555 	}
2556 	return 1;
2557 }
2558 
2559 /**
2560  * Start query handling. Query is stored in the stream, and will be free'd here.
2561  * @param h2_session: http2 session, containing comm point
2562  * @param h2_stream: stream containing buffered query
2563  * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no
2564  * reply available (yet).
2565  */
2566 static int http2_query_read_done(struct http2_session* h2_session,
2567 	struct http2_stream* h2_stream)
2568 {
2569 	log_assert(h2_stream->qbuffer);
2570 
2571 	if(h2_session->c->h2_stream) {
2572 		verbose(VERB_ALGO, "http2_query_read_done failure: shared "
2573 			"buffer already assigned to stream");
2574 		return -1;
2575 	}
2576 
2577     /* the c->buffer might be used by mesh_send_reply and no be cleard
2578 	 * need to be cleared before use */
2579 	sldns_buffer_clear(h2_session->c->buffer);
2580 	if(sldns_buffer_remaining(h2_session->c->buffer) <
2581 		sldns_buffer_remaining(h2_stream->qbuffer)) {
2582 		/* qbuffer will be free'd in frame close cb */
2583 		sldns_buffer_clear(h2_session->c->buffer);
2584 		verbose(VERB_ALGO, "http2_query_read_done failure: can't fit "
2585 			"qbuffer in c->buffer");
2586 		return -1;
2587 	}
2588 
2589 	sldns_buffer_write(h2_session->c->buffer,
2590 		sldns_buffer_current(h2_stream->qbuffer),
2591 		sldns_buffer_remaining(h2_stream->qbuffer));
2592 
2593 	lock_basic_lock(&http2_query_buffer_count_lock);
2594 	http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer);
2595 	lock_basic_unlock(&http2_query_buffer_count_lock);
2596 	sldns_buffer_free(h2_stream->qbuffer);
2597 	h2_stream->qbuffer = NULL;
2598 
2599 	sldns_buffer_flip(h2_session->c->buffer);
2600 	h2_session->c->h2_stream = h2_stream;
2601 	fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback));
2602 	if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg,
2603 		NETEVENT_NOERROR, &h2_session->c->repinfo)) {
2604 		return 1; /* answer in c->buffer */
2605 	}
2606 	sldns_buffer_clear(h2_session->c->buffer);
2607 	h2_session->c->h2_stream = NULL;
2608 	return 0; /* mesh state added, or dropped */
2609 }
2610 
2611 /** nghttp2 callback. Used to check if the received frame indicates the end of a
2612  * stream. Gather collected request data and start query handling. */
2613 static int http2_req_frame_recv_cb(nghttp2_session* session,
2614 	const nghttp2_frame* frame, void* cb_arg)
2615 {
2616 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2617 	struct http2_stream* h2_stream;
2618 	int query_read_done;
2619 
2620 	if((frame->hd.type != NGHTTP2_DATA &&
2621 		frame->hd.type != NGHTTP2_HEADERS) ||
2622 		!(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) {
2623 			return 0;
2624 	}
2625 
2626 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2627 		session, frame->hd.stream_id)))
2628 		return 0;
2629 
2630 	if(h2_stream->invalid_endpoint) {
2631 		h2_stream->status = HTTP_STATUS_NOT_FOUND;
2632 		goto submit_http_error;
2633 	}
2634 
2635 	if(h2_stream->invalid_content_type) {
2636 		h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE;
2637 		goto submit_http_error;
2638 	}
2639 
2640 	if(h2_stream->http_method != HTTP_METHOD_GET &&
2641 		h2_stream->http_method != HTTP_METHOD_POST) {
2642 		h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED;
2643 		goto submit_http_error;
2644 	}
2645 
2646 	if(h2_stream->query_too_large) {
2647 		if(h2_stream->http_method == HTTP_METHOD_POST)
2648 			h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE;
2649 		else
2650 			h2_stream->status = HTTP_STATUS_URI_TOO_LONG;
2651 		goto submit_http_error;
2652 	}
2653 
2654 	if(!h2_stream->qbuffer) {
2655 		h2_stream->status = HTTP_STATUS_BAD_REQUEST;
2656 		goto submit_http_error;
2657 	}
2658 
2659 	if(h2_stream->status) {
2660 submit_http_error:
2661 		verbose(VERB_QUERY, "http2 request invalid, returning :status="
2662 			"%d", h2_stream->status);
2663 		if(!http2_submit_error(h2_session, h2_stream)) {
2664 			return NGHTTP2_ERR_CALLBACK_FAILURE;
2665 		}
2666 		return 0;
2667 	}
2668 	h2_stream->status = HTTP_STATUS_OK;
2669 
2670 	sldns_buffer_flip(h2_stream->qbuffer);
2671 	h2_session->postpone_drop = 1;
2672 	query_read_done = http2_query_read_done(h2_session, h2_stream);
2673 	if(query_read_done < 0)
2674 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2675 	else if(!query_read_done) {
2676 		if(h2_session->is_drop) {
2677 			/* connection needs to be closed. Return failure to make
2678 			 * sure no other action are taken anymore on comm point.
2679 			 * failure will result in reclaiming (and closing)
2680 			 * of comm point. */
2681 			verbose(VERB_QUERY, "http2 query dropped in worker cb");
2682 			h2_session->postpone_drop = 0;
2683 			return NGHTTP2_ERR_CALLBACK_FAILURE;
2684 		}
2685 		/* nothing to submit right now, query added to mesh. */
2686 		h2_session->postpone_drop = 0;
2687 		return 0;
2688 	}
2689 	if(!http2_submit_dns_response(h2_session)) {
2690 		sldns_buffer_clear(h2_session->c->buffer);
2691 		h2_session->c->h2_stream = NULL;
2692 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2693 	}
2694 	verbose(VERB_QUERY, "http2 query submitted to session");
2695 	sldns_buffer_clear(h2_session->c->buffer);
2696 	h2_session->c->h2_stream = NULL;
2697 	return 0;
2698 }
2699 
2700 /** nghttp2 callback. Used to detect start of new streams. */
2701 static int http2_req_begin_headers_cb(nghttp2_session* session,
2702 	const nghttp2_frame* frame, void* cb_arg)
2703 {
2704 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2705 	struct http2_stream* h2_stream;
2706 	int ret;
2707 	if(frame->hd.type != NGHTTP2_HEADERS ||
2708 		frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2709 		/* only interested in request headers */
2710 		return 0;
2711 	}
2712 	if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) {
2713 		log_err("malloc failure while creating http2 stream");
2714 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2715 	}
2716 	http2_session_add_stream(h2_session, h2_stream);
2717 	ret = nghttp2_session_set_stream_user_data(session,
2718 		frame->hd.stream_id, h2_stream);
2719 	if(ret) {
2720 		/* stream does not exist */
2721 		verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2722 			"error: %s", nghttp2_strerror(ret));
2723 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2724 	}
2725 
2726 	return 0;
2727 }
2728 
2729 /**
2730  * base64url decode, store in qbuffer
2731  * @param h2_session: http2 session
2732  * @param h2_stream: http2 stream
2733  * @param start: start of the base64 string
2734  * @param length: length of the base64 string
2735  * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer,
2736  * buffer will be NULL is unparseble.
2737  */
2738 static int http2_buffer_uri_query(struct http2_session* h2_session,
2739 	struct http2_stream* h2_stream, const uint8_t* start, size_t length)
2740 {
2741 	size_t expectb64len;
2742 	int b64len;
2743 	if(h2_stream->http_method == HTTP_METHOD_POST)
2744 		return 1;
2745 	if(length == 0)
2746 		return 1;
2747 	if(h2_stream->qbuffer) {
2748 		verbose(VERB_ALGO, "http2_req_header fail, "
2749 			"qbuffer already set");
2750 		return 0;
2751 	}
2752 
2753 	/* calculate size, might be a bit bigger than the real
2754 	 * decoded buffer size */
2755 	expectb64len = sldns_b64_pton_calculate_size(length);
2756 	log_assert(expectb64len > 0);
2757 	if(expectb64len >
2758 		h2_session->c->http2_stream_max_qbuffer_size) {
2759 		h2_stream->query_too_large = 1;
2760 		return 1;
2761 	}
2762 
2763 	lock_basic_lock(&http2_query_buffer_count_lock);
2764 	if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) {
2765 		lock_basic_unlock(&http2_query_buffer_count_lock);
2766 		verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2767 			"in http2-query-buffer-size");
2768 		return http2_submit_rst_stream(h2_session, h2_stream);
2769 	}
2770 	http2_query_buffer_count += expectb64len;
2771 	lock_basic_unlock(&http2_query_buffer_count_lock);
2772 	if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) {
2773 		lock_basic_lock(&http2_query_buffer_count_lock);
2774 		http2_query_buffer_count -= expectb64len;
2775 		lock_basic_unlock(&http2_query_buffer_count_lock);
2776 		log_err("http2_req_header fail, qbuffer "
2777 			"malloc failure");
2778 		return 0;
2779 	}
2780 
2781 	if(sldns_b64_contains_nonurl((char const*)start, length)) {
2782 		char buf[65536+4];
2783 		verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding");
2784 		/* copy to the scratch buffer temporarily to terminate the
2785 		 * string with a zero */
2786 		if(length+1 > sizeof(buf)) {
2787 			/* too long */
2788 			lock_basic_lock(&http2_query_buffer_count_lock);
2789 			http2_query_buffer_count -= expectb64len;
2790 			lock_basic_unlock(&http2_query_buffer_count_lock);
2791 			sldns_buffer_free(h2_stream->qbuffer);
2792 			h2_stream->qbuffer = NULL;
2793 			return 1;
2794 		}
2795 		memmove(buf, start, length);
2796 		buf[length] = 0;
2797 		if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current(
2798 			h2_stream->qbuffer), expectb64len)) || b64len < 0) {
2799 			lock_basic_lock(&http2_query_buffer_count_lock);
2800 			http2_query_buffer_count -= expectb64len;
2801 			lock_basic_unlock(&http2_query_buffer_count_lock);
2802 			sldns_buffer_free(h2_stream->qbuffer);
2803 			h2_stream->qbuffer = NULL;
2804 			return 1;
2805 		}
2806 	} else {
2807 		if(!(b64len = sldns_b64url_pton(
2808 			(char const *)start, length,
2809 			sldns_buffer_current(h2_stream->qbuffer),
2810 			expectb64len)) || b64len < 0) {
2811 			lock_basic_lock(&http2_query_buffer_count_lock);
2812 			http2_query_buffer_count -= expectb64len;
2813 			lock_basic_unlock(&http2_query_buffer_count_lock);
2814 			sldns_buffer_free(h2_stream->qbuffer);
2815 			h2_stream->qbuffer = NULL;
2816 			/* return without error, method can be an
2817 			 * unknown POST */
2818 			return 1;
2819 		}
2820 	}
2821 	sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len);
2822 	return 1;
2823 }
2824 
2825 /** nghttp2 callback. Used to parse headers from HEADER frames. */
2826 static int http2_req_header_cb(nghttp2_session* session,
2827 	const nghttp2_frame* frame, const uint8_t* name, size_t namelen,
2828 	const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags),
2829 	void* cb_arg)
2830 {
2831 	struct http2_stream* h2_stream = NULL;
2832 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2833 	/* nghttp2 deals with CONTINUATION frames and provides them as part of
2834 	 * the HEADER */
2835 	if(frame->hd.type != NGHTTP2_HEADERS ||
2836 		frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2837 		/* only interested in request headers */
2838 		return 0;
2839 	}
2840 	if(!(h2_stream = nghttp2_session_get_stream_user_data(session,
2841 		frame->hd.stream_id)))
2842 		return 0;
2843 
2844 	/* earlier checks already indicate we can stop handling this query */
2845 	if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED ||
2846 		h2_stream->invalid_content_type ||
2847 		h2_stream->invalid_endpoint)
2848 		return 0;
2849 
2850 
2851 	/* nghttp2 performs some sanity checks in the headers, including:
2852 	 * name and value are guaranteed to be null terminated
2853 	 * name is guaranteed to be lowercase
2854 	 * content-length value is guaranteed to contain digits
2855 	 */
2856 
2857 	if(!h2_stream->http_method && namelen == 7 &&
2858 		memcmp(":method", name, namelen) == 0) {
2859 		/* Case insensitive check on :method value to be on the safe
2860 		 * side. I failed to find text about case sensitivity in specs.
2861 		 */
2862 		if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0)
2863 			h2_stream->http_method = HTTP_METHOD_GET;
2864 		else if(valuelen == 4 &&
2865 			strcasecmp("POST", (const char*)value) == 0) {
2866 			h2_stream->http_method = HTTP_METHOD_POST;
2867 			if(h2_stream->qbuffer) {
2868 				/* POST method uses query from DATA frames */
2869 				lock_basic_lock(&http2_query_buffer_count_lock);
2870 				http2_query_buffer_count -=
2871 					sldns_buffer_capacity(h2_stream->qbuffer);
2872 				lock_basic_unlock(&http2_query_buffer_count_lock);
2873 				sldns_buffer_free(h2_stream->qbuffer);
2874 				h2_stream->qbuffer = NULL;
2875 			}
2876 		} else
2877 			h2_stream->http_method = HTTP_METHOD_UNSUPPORTED;
2878 		return 0;
2879 	}
2880 	if(namelen == 5 && memcmp(":path", name, namelen) == 0) {
2881 		/* :path may contain DNS query, depending on method. Method might
2882 		 * not be known yet here, so check after finishing receiving
2883 		 * stream. */
2884 #define	HTTP_QUERY_PARAM "?dns="
2885 		size_t el = strlen(h2_session->c->http_endpoint);
2886 		size_t qpl = strlen(HTTP_QUERY_PARAM);
2887 
2888 		if(valuelen < el || memcmp(h2_session->c->http_endpoint,
2889 			value, el) != 0) {
2890 			h2_stream->invalid_endpoint = 1;
2891 			return 0;
2892 		}
2893 		/* larger than endpoint only allowed if it is for the query
2894 		 * parameter */
2895 		if(valuelen <= el+qpl ||
2896 			memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) {
2897 			if(valuelen != el)
2898 				h2_stream->invalid_endpoint = 1;
2899 			return 0;
2900 		}
2901 
2902 		if(!http2_buffer_uri_query(h2_session, h2_stream,
2903 			value+(el+qpl), valuelen-(el+qpl))) {
2904 			return NGHTTP2_ERR_CALLBACK_FAILURE;
2905 		}
2906 		return 0;
2907 	}
2908 	/* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST,
2909 	 * and not needed when using GET. Don't enfore.
2910 	 * If set only allow lowercase "application/dns-message".
2911 	 *
2912 	 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST
2913 	 * be able to handle "application/dns-message". Since that is the only
2914 	 * content-type supported we can ignore the accept header.
2915 	 */
2916 	if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) {
2917 		if(valuelen != 23 || memcmp("application/dns-message", value,
2918 			valuelen) != 0) {
2919 			h2_stream->invalid_content_type = 1;
2920 		}
2921 	}
2922 
2923 	/* Only interested in content-lentg for POST (on not yet known) method.
2924 	 */
2925 	if((!h2_stream->http_method ||
2926 		h2_stream->http_method == HTTP_METHOD_POST) &&
2927 		!h2_stream->content_length && namelen  == 14 &&
2928 		memcmp("content-length", name, namelen) == 0) {
2929 		if(valuelen > 5) {
2930 			h2_stream->query_too_large = 1;
2931 			return 0;
2932 		}
2933 		/* guaranteed to only contain digits and be null terminated */
2934 		h2_stream->content_length = atoi((const char*)value);
2935 		if(h2_stream->content_length >
2936 			h2_session->c->http2_stream_max_qbuffer_size) {
2937 			h2_stream->query_too_large = 1;
2938 			return 0;
2939 		}
2940 	}
2941 	return 0;
2942 }
2943 
2944 /** nghttp2 callback. Used to get data from DATA frames, which can contain
2945  * queries in POST requests. */
2946 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session),
2947 	uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data,
2948 	size_t len, void* cb_arg)
2949 {
2950 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2951 	struct http2_stream* h2_stream;
2952 	size_t qlen = 0;
2953 
2954 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2955 		h2_session->session, stream_id))) {
2956 		return 0;
2957 	}
2958 
2959 	if(h2_stream->query_too_large)
2960 		return 0;
2961 
2962 	if(!h2_stream->qbuffer) {
2963 		if(h2_stream->content_length) {
2964 			if(h2_stream->content_length < len)
2965 				/* getting more data in DATA frame than
2966 				 * advertised in content-length header. */
2967 				return NGHTTP2_ERR_CALLBACK_FAILURE;
2968 			qlen = h2_stream->content_length;
2969 		} else if(len <= h2_session->c->http2_stream_max_qbuffer_size) {
2970 			/* setting this to msg-buffer-size can result in a lot
2971 			 * of memory consuption. Most queries should fit in a
2972 			 * single DATA frame, and most POST queries will
2973 			 * contain content-length which does not impose this
2974 			 * limit. */
2975 			qlen = len;
2976 		}
2977 	}
2978 	if(!h2_stream->qbuffer && qlen) {
2979 		lock_basic_lock(&http2_query_buffer_count_lock);
2980 		if(http2_query_buffer_count + qlen > http2_query_buffer_max) {
2981 			lock_basic_unlock(&http2_query_buffer_count_lock);
2982 			verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2983 				"in http2-query-buffer-size");
2984 			return http2_submit_rst_stream(h2_session, h2_stream);
2985 		}
2986 		http2_query_buffer_count += qlen;
2987 		lock_basic_unlock(&http2_query_buffer_count_lock);
2988 		if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) {
2989 			lock_basic_lock(&http2_query_buffer_count_lock);
2990 			http2_query_buffer_count -= qlen;
2991 			lock_basic_unlock(&http2_query_buffer_count_lock);
2992 		}
2993 	}
2994 
2995 	if(!h2_stream->qbuffer ||
2996 		sldns_buffer_remaining(h2_stream->qbuffer) < len) {
2997 		verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough "
2998 			"buffer space for POST query. Can happen on multi "
2999 			"frame requests without content-length header");
3000 		h2_stream->query_too_large = 1;
3001 		return 0;
3002 	}
3003 
3004 	sldns_buffer_write(h2_stream->qbuffer, data, len);
3005 
3006 	return 0;
3007 }
3008 
3009 void http2_req_stream_clear(struct http2_stream* h2_stream)
3010 {
3011 	if(h2_stream->qbuffer) {
3012 		lock_basic_lock(&http2_query_buffer_count_lock);
3013 		http2_query_buffer_count -=
3014 			sldns_buffer_capacity(h2_stream->qbuffer);
3015 		lock_basic_unlock(&http2_query_buffer_count_lock);
3016 		sldns_buffer_free(h2_stream->qbuffer);
3017 		h2_stream->qbuffer = NULL;
3018 	}
3019 	if(h2_stream->rbuffer) {
3020 		lock_basic_lock(&http2_response_buffer_count_lock);
3021 		http2_response_buffer_count -=
3022 			sldns_buffer_capacity(h2_stream->rbuffer);
3023 		lock_basic_unlock(&http2_response_buffer_count_lock);
3024 		sldns_buffer_free(h2_stream->rbuffer);
3025 		h2_stream->rbuffer = NULL;
3026 	}
3027 }
3028 
3029 nghttp2_session_callbacks* http2_req_callbacks_create(void)
3030 {
3031 	nghttp2_session_callbacks *callbacks;
3032 	if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) {
3033 		log_err("failed to initialize nghttp2 callback");
3034 		return NULL;
3035 	}
3036 	/* reception of header block started, used to create h2_stream */
3037 	nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks,
3038 		http2_req_begin_headers_cb);
3039 	/* complete frame received, used to get data from stream if frame
3040 	 * has end stream flag, and start processing query */
3041 	nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks,
3042 		http2_req_frame_recv_cb);
3043 	/* get request info from headers */
3044 	nghttp2_session_callbacks_set_on_header_callback(callbacks,
3045 		http2_req_header_cb);
3046 	/* get data from DATA frames, containing POST query */
3047 	nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks,
3048 		http2_req_data_chunk_recv_cb);
3049 
3050 	/* generic HTTP2 callbacks */
3051 	nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb);
3052 	nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb);
3053 	nghttp2_session_callbacks_set_on_stream_close_callback(callbacks,
3054 		http2_stream_close_cb);
3055 
3056 	return callbacks;
3057 }
3058 #endif /* HAVE_NGHTTP2 */
3059