1 /*
2  * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file has functions to get queries from clients.
40  */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 #  include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include <limits.h>
47 #ifdef USE_TCP_FASTOPEN
48 #include <netinet/tcp.h>
49 #endif
50 #include <ctype.h>
51 #include "services/listen_dnsport.h"
52 #include "services/outside_network.h"
53 #include "util/netevent.h"
54 #include "util/log.h"
55 #include "util/config_file.h"
56 #include "util/net_help.h"
57 #include "sldns/sbuffer.h"
58 #include "sldns/parseutil.h"
59 #include "services/mesh.h"
60 #include "util/fptr_wlist.h"
61 #include "util/locks.h"
62 
63 #ifdef HAVE_NETDB_H
64 #include <netdb.h>
65 #endif
66 #include <fcntl.h>
67 
68 #ifdef HAVE_SYS_UN_H
69 #include <sys/un.h>
70 #endif
71 
72 #ifdef HAVE_SYSTEMD
73 #include <systemd/sd-daemon.h>
74 #endif
75 
76 #ifdef HAVE_IFADDRS_H
77 #include <ifaddrs.h>
78 #endif
79 #ifdef HAVE_NET_IF_H
80 #include <net/if.h>
81 #endif
82 #ifdef HAVE_LINUX_NET_TSTAMP_H
83 #include <linux/net_tstamp.h>
84 #endif
85 /** number of queued TCP connections for listen() */
86 #define TCP_BACKLOG 256
87 
88 #ifndef THREADS_DISABLED
89 /** lock on the counter of stream buffer memory */
90 static lock_basic_type stream_wait_count_lock;
91 /** lock on the counter of HTTP2 query buffer memory */
92 static lock_basic_type http2_query_buffer_count_lock;
93 /** lock on the counter of HTTP2 response buffer memory */
94 static lock_basic_type http2_response_buffer_count_lock;
95 #endif
96 /** size (in bytes) of stream wait buffers */
97 static size_t stream_wait_count = 0;
98 /** is the lock initialised for stream wait buffers */
99 static int stream_wait_lock_inited = 0;
100 /** size (in bytes) of HTTP2 query buffers */
101 static size_t http2_query_buffer_count = 0;
102 /** is the lock initialised for HTTP2 query buffers */
103 static int http2_query_buffer_lock_inited = 0;
104 /** size (in bytes) of HTTP2 response buffers */
105 static size_t http2_response_buffer_count = 0;
106 /** is the lock initialised for HTTP2 response buffers */
107 static int http2_response_buffer_lock_inited = 0;
108 
109 /**
110  * Debug print of the getaddrinfo returned address.
111  * @param addr: the address returned.
112  */
113 static void
verbose_print_addr(struct addrinfo * addr)114 verbose_print_addr(struct addrinfo *addr)
115 {
116 	if(verbosity >= VERB_ALGO) {
117 		char buf[100];
118 		void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
119 #ifdef INET6
120 		if(addr->ai_family == AF_INET6)
121 			sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
122 				sin6_addr;
123 #endif /* INET6 */
124 		if(inet_ntop(addr->ai_family, sinaddr, buf,
125 			(socklen_t)sizeof(buf)) == 0) {
126 			(void)strlcpy(buf, "(null)", sizeof(buf));
127 		}
128 		buf[sizeof(buf)-1] = 0;
129 		verbose(VERB_ALGO, "creating %s%s socket %s %d",
130 			addr->ai_socktype==SOCK_DGRAM?"udp":
131 			addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
132 			addr->ai_family==AF_INET?"4":
133 			addr->ai_family==AF_INET6?"6":
134 			"_otherfam", buf,
135 			ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
136 	}
137 }
138 
139 void
verbose_print_unbound_socket(struct unbound_socket * ub_sock)140 verbose_print_unbound_socket(struct unbound_socket* ub_sock)
141 {
142 	if(verbosity >= VERB_ALGO) {
143 		char buf[256];
144 		log_info("listing of unbound_socket structure:");
145 		addr_to_str((void*)ub_sock->addr, ub_sock->addrlen, buf,
146 			sizeof(buf));
147 		log_info("%s s is: %d, fam is: %s, acl: %s", buf, ub_sock->s,
148 			ub_sock->fam == AF_INET?"AF_INET":"AF_INET6",
149 			ub_sock->acl?"yes":"no");
150 	}
151 }
152 
153 #ifdef HAVE_SYSTEMD
154 static int
systemd_get_activated(int family,int socktype,int listen,struct sockaddr * addr,socklen_t addrlen,const char * path)155 systemd_get_activated(int family, int socktype, int listen,
156 		      struct sockaddr *addr, socklen_t addrlen,
157 		      const char *path)
158 {
159 	int i = 0;
160 	int r = 0;
161 	int s = -1;
162 	const char* listen_pid, *listen_fds;
163 
164 	/* We should use "listen" option only for stream protocols. For UDP it should be -1 */
165 
166 	if((r = sd_booted()) < 1) {
167 		if(r == 0)
168 			log_warn("systemd is not running");
169 		else
170 			log_err("systemd sd_booted(): %s", strerror(-r));
171 		return -1;
172 	}
173 
174 	listen_pid = getenv("LISTEN_PID");
175 	listen_fds = getenv("LISTEN_FDS");
176 
177 	if (!listen_pid) {
178 		log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
179 		return -1;
180 	}
181 
182 	if (!listen_fds) {
183 		log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
184 		return -1;
185 	}
186 
187 	if((r = sd_listen_fds(0)) < 1) {
188 		if(r == 0)
189 			log_warn("systemd: did not return socket, check unit configuration");
190 		else
191 			log_err("systemd sd_listen_fds(): %s", strerror(-r));
192 		return -1;
193 	}
194 
195 	for(i = 0; i < r; i++) {
196 		if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
197 			s = SD_LISTEN_FDS_START + i;
198 			break;
199 		}
200 	}
201 	if (s == -1) {
202 		if (addr)
203 			log_err_addr("systemd sd_listen_fds()",
204 				     "no such socket",
205 				     (struct sockaddr_storage *)addr, addrlen);
206 		else
207 			log_err("systemd sd_listen_fds(): %s", path);
208 	}
209 	return s;
210 }
211 #endif
212 
213 int
create_udp_sock(int family,int socktype,struct sockaddr * addr,socklen_t addrlen,int v6only,int * inuse,int * noproto,int rcv,int snd,int listen,int * reuseport,int transparent,int freebind,int use_systemd,int dscp)214 create_udp_sock(int family, int socktype, struct sockaddr* addr,
215         socklen_t addrlen, int v6only, int* inuse, int* noproto,
216 	int rcv, int snd, int listen, int* reuseport, int transparent,
217 	int freebind, int use_systemd, int dscp)
218 {
219 	int s;
220 	char* err;
221 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)  || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
222 	int on=1;
223 #endif
224 #ifdef IPV6_MTU
225 	int mtu = IPV6_MIN_MTU;
226 #endif
227 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
228 	(void)rcv;
229 #endif
230 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
231 	(void)snd;
232 #endif
233 #ifndef IPV6_V6ONLY
234 	(void)v6only;
235 #endif
236 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
237 	(void)transparent;
238 #endif
239 #if !defined(IP_FREEBIND)
240 	(void)freebind;
241 #endif
242 #ifdef HAVE_SYSTEMD
243 	int got_fd_from_systemd = 0;
244 
245 	if (!use_systemd
246 	    || (use_systemd
247 		&& (s = systemd_get_activated(family, socktype, -1, addr,
248 					      addrlen, NULL)) == -1)) {
249 #else
250 	(void)use_systemd;
251 #endif
252 	if((s = socket(family, socktype, 0)) == -1) {
253 		*inuse = 0;
254 #ifndef USE_WINSOCK
255 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
256 			*noproto = 1;
257 			return -1;
258 		}
259 #else
260 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
261 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
262 			*noproto = 1;
263 			return -1;
264 		}
265 #endif
266 		log_err("can't create socket: %s", sock_strerror(errno));
267 		*noproto = 0;
268 		return -1;
269 	}
270 #ifdef HAVE_SYSTEMD
271 	} else {
272 		got_fd_from_systemd = 1;
273 	}
274 #endif
275 	if(listen) {
276 #ifdef SO_REUSEADDR
277 		if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
278 			(socklen_t)sizeof(on)) < 0) {
279 			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
280 				sock_strerror(errno));
281 #ifndef USE_WINSOCK
282 			if(errno != ENOSYS) {
283 				close(s);
284 				*noproto = 0;
285 				*inuse = 0;
286 				return -1;
287 			}
288 #else
289 			closesocket(s);
290 			*noproto = 0;
291 			*inuse = 0;
292 			return -1;
293 #endif
294 		}
295 #endif /* SO_REUSEADDR */
296 #ifdef SO_REUSEPORT
297 #  ifdef SO_REUSEPORT_LB
298 		/* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
299 		 * like SO_REUSEPORT on Linux.  This is what the users want
300 		 * with the config option in unbound.conf; if we actually
301 		 * need local address and port reuse they'll also need to
302 		 * have SO_REUSEPORT set for them, assume it was _LB they want.
303 		 */
304 		if (reuseport && *reuseport &&
305 		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
306 			(socklen_t)sizeof(on)) < 0) {
307 #ifdef ENOPROTOOPT
308 			if(errno != ENOPROTOOPT || verbosity >= 3)
309 				log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
310 					strerror(errno));
311 #endif
312 			/* this option is not essential, we can continue */
313 			*reuseport = 0;
314 		}
315 #  else /* no SO_REUSEPORT_LB */
316 
317 		/* try to set SO_REUSEPORT so that incoming
318 		 * queries are distributed evenly among the receiving threads.
319 		 * Each thread must have its own socket bound to the same port,
320 		 * with SO_REUSEPORT set on each socket.
321 		 */
322 		if (reuseport && *reuseport &&
323 		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
324 			(socklen_t)sizeof(on)) < 0) {
325 #ifdef ENOPROTOOPT
326 			if(errno != ENOPROTOOPT || verbosity >= 3)
327 				log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
328 					strerror(errno));
329 #endif
330 			/* this option is not essential, we can continue */
331 			*reuseport = 0;
332 		}
333 #  endif /* SO_REUSEPORT_LB */
334 #else
335 		(void)reuseport;
336 #endif /* defined(SO_REUSEPORT) */
337 #ifdef IP_TRANSPARENT
338 		if (transparent &&
339 		    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
340 		    (socklen_t)sizeof(on)) < 0) {
341 			log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
342 			strerror(errno));
343 		}
344 #elif defined(IP_BINDANY)
345 		if (transparent &&
346 		    setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
347 		    (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
348 		    (void*)&on, (socklen_t)sizeof(on)) < 0) {
349 			log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
350 			(family==AF_INET6?"V6":""), strerror(errno));
351 		}
352 #elif defined(SO_BINDANY)
353 		if (transparent &&
354 		    setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
355 		    (socklen_t)sizeof(on)) < 0) {
356 			log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
357 			strerror(errno));
358 		}
359 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
360 	}
361 #ifdef IP_FREEBIND
362 	if(freebind &&
363 	    setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
364 	    (socklen_t)sizeof(on)) < 0) {
365 		log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
366 		strerror(errno));
367 	}
368 #endif /* IP_FREEBIND */
369 	if(rcv) {
370 #ifdef SO_RCVBUF
371 		int got;
372 		socklen_t slen = (socklen_t)sizeof(got);
373 #  ifdef SO_RCVBUFFORCE
374 		/* Linux specific: try to use root permission to override
375 		 * system limits on rcvbuf. The limit is stored in
376 		 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
377 		if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
378 			(socklen_t)sizeof(rcv)) < 0) {
379 			if(errno != EPERM) {
380 				log_err("setsockopt(..., SO_RCVBUFFORCE, "
381 					"...) failed: %s", sock_strerror(errno));
382 				sock_close(s);
383 				*noproto = 0;
384 				*inuse = 0;
385 				return -1;
386 			}
387 #  endif /* SO_RCVBUFFORCE */
388 			if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
389 				(socklen_t)sizeof(rcv)) < 0) {
390 				log_err("setsockopt(..., SO_RCVBUF, "
391 					"...) failed: %s", sock_strerror(errno));
392 				sock_close(s);
393 				*noproto = 0;
394 				*inuse = 0;
395 				return -1;
396 			}
397 			/* check if we got the right thing or if system
398 			 * reduced to some system max.  Warn if so */
399 			if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
400 				&slen) >= 0 && got < rcv/2) {
401 				log_warn("so-rcvbuf %u was not granted. "
402 					"Got %u. To fix: start with "
403 					"root permissions(linux) or sysctl "
404 					"bigger net.core.rmem_max(linux) or "
405 					"kern.ipc.maxsockbuf(bsd) values.",
406 					(unsigned)rcv, (unsigned)got);
407 			}
408 #  ifdef SO_RCVBUFFORCE
409 		}
410 #  endif
411 #endif /* SO_RCVBUF */
412 	}
413 	/* first do RCVBUF as the receive buffer is more important */
414 	if(snd) {
415 #ifdef SO_SNDBUF
416 		int got;
417 		socklen_t slen = (socklen_t)sizeof(got);
418 #  ifdef SO_SNDBUFFORCE
419 		/* Linux specific: try to use root permission to override
420 		 * system limits on sndbuf. The limit is stored in
421 		 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
422 		if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
423 			(socklen_t)sizeof(snd)) < 0) {
424 			if(errno != EPERM) {
425 				log_err("setsockopt(..., SO_SNDBUFFORCE, "
426 					"...) failed: %s", sock_strerror(errno));
427 				sock_close(s);
428 				*noproto = 0;
429 				*inuse = 0;
430 				return -1;
431 			}
432 #  endif /* SO_SNDBUFFORCE */
433 			if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
434 				(socklen_t)sizeof(snd)) < 0) {
435 				log_err("setsockopt(..., SO_SNDBUF, "
436 					"...) failed: %s", sock_strerror(errno));
437 				sock_close(s);
438 				*noproto = 0;
439 				*inuse = 0;
440 				return -1;
441 			}
442 			/* check if we got the right thing or if system
443 			 * reduced to some system max.  Warn if so */
444 			if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
445 				&slen) >= 0 && got < snd/2) {
446 				log_warn("so-sndbuf %u was not granted. "
447 					"Got %u. To fix: start with "
448 					"root permissions(linux) or sysctl "
449 					"bigger net.core.wmem_max(linux) or "
450 					"kern.ipc.maxsockbuf(bsd) values.",
451 					(unsigned)snd, (unsigned)got);
452 			}
453 #  ifdef SO_SNDBUFFORCE
454 		}
455 #  endif
456 #endif /* SO_SNDBUF */
457 	}
458 	err = set_ip_dscp(s, family, dscp);
459 	if(err != NULL)
460 		log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err);
461 	if(family == AF_INET6) {
462 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
463 		int omit6_set = 0;
464 		int action;
465 # endif
466 # if defined(IPV6_V6ONLY)
467 		if(v6only
468 #   ifdef HAVE_SYSTEMD
469 			/* Systemd wants to control if the socket is v6 only
470 			 * or both, with BindIPv6Only=default, ipv6-only or
471 			 * both in systemd.socket, so it is not set here. */
472 			&& !got_fd_from_systemd
473 #   endif
474 			) {
475 			int val=(v6only==2)?0:1;
476 			if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
477 				(void*)&val, (socklen_t)sizeof(val)) < 0) {
478 				log_err("setsockopt(..., IPV6_V6ONLY"
479 					", ...) failed: %s", sock_strerror(errno));
480 				sock_close(s);
481 				*noproto = 0;
482 				*inuse = 0;
483 				return -1;
484 			}
485 		}
486 # endif
487 # if defined(IPV6_USE_MIN_MTU)
488 		/*
489 		 * There is no fragmentation of IPv6 datagrams
490 		 * during forwarding in the network. Therefore
491 		 * we do not send UDP datagrams larger than
492 		 * the minimum IPv6 MTU of 1280 octets. The
493 		 * EDNS0 message length can be larger if the
494 		 * network stack supports IPV6_USE_MIN_MTU.
495 		 */
496 		if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
497 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
498 			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
499 				"...) failed: %s", sock_strerror(errno));
500 			sock_close(s);
501 			*noproto = 0;
502 			*inuse = 0;
503 			return -1;
504 		}
505 # elif defined(IPV6_MTU)
506 #   ifndef USE_WINSOCK
507 		/*
508 		 * On Linux, to send no larger than 1280, the PMTUD is
509 		 * disabled by default for datagrams anyway, so we set
510 		 * the MTU to use.
511 		 */
512 		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
513 			(void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
514 			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
515 				sock_strerror(errno));
516 			sock_close(s);
517 			*noproto = 0;
518 			*inuse = 0;
519 			return -1;
520 		}
521 #   elif defined(IPV6_USER_MTU)
522 		/* As later versions of the mingw crosscompiler define
523 		 * IPV6_MTU, do the same for windows but use IPV6_USER_MTU
524 		 * instead which is writable; IPV6_MTU is readonly there. */
525 		if (setsockopt(s, IPPROTO_IPV6, IPV6_USER_MTU,
526 			(void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
527 			if (WSAGetLastError() != WSAENOPROTOOPT) {
528 				log_err("setsockopt(..., IPV6_USER_MTU, ...) failed: %s",
529 					wsa_strerror(WSAGetLastError()));
530 				sock_close(s);
531 				*noproto = 0;
532 				*inuse = 0;
533 				return -1;
534 			}
535 		}
536 #   endif /* USE_WINSOCK */
537 # endif /* IPv6 MTU */
538 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
539 #  if defined(IP_PMTUDISC_OMIT)
540 		action = IP_PMTUDISC_OMIT;
541 		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
542 			&action, (socklen_t)sizeof(action)) < 0) {
543 
544 			if (errno != EINVAL) {
545 				log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
546 					strerror(errno));
547 				sock_close(s);
548 				*noproto = 0;
549 				*inuse = 0;
550 				return -1;
551 			}
552 		}
553 		else
554 		{
555 		    omit6_set = 1;
556 		}
557 #  endif
558 		if (omit6_set == 0) {
559 			action = IP_PMTUDISC_DONT;
560 			if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
561 				&action, (socklen_t)sizeof(action)) < 0) {
562 				log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
563 					strerror(errno));
564 				sock_close(s);
565 				*noproto = 0;
566 				*inuse = 0;
567 				return -1;
568 			}
569 		}
570 # endif /* IPV6_MTU_DISCOVER */
571 	} else if(family == AF_INET) {
572 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
573 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
574  * PMTU information is not accepted, but fragmentation is allowed
575  * if and only if the packet size exceeds the outgoing interface MTU
576  * (and also uses the interface mtu to determine the size of the packets).
577  * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
578  * FreeBSD already has same semantics without setting the option. */
579 		int omit_set = 0;
580 		int action;
581 #   if defined(IP_PMTUDISC_OMIT)
582 		action = IP_PMTUDISC_OMIT;
583 		if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
584 			&action, (socklen_t)sizeof(action)) < 0) {
585 
586 			if (errno != EINVAL) {
587 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
588 					strerror(errno));
589 				sock_close(s);
590 				*noproto = 0;
591 				*inuse = 0;
592 				return -1;
593 			}
594 		}
595 		else
596 		{
597 		    omit_set = 1;
598 		}
599 #   endif
600 		if (omit_set == 0) {
601    			action = IP_PMTUDISC_DONT;
602 			if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
603 				&action, (socklen_t)sizeof(action)) < 0) {
604 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
605 					strerror(errno));
606 				sock_close(s);
607 				*noproto = 0;
608 				*inuse = 0;
609 				return -1;
610 			}
611 		}
612 #  elif defined(IP_DONTFRAG) && !defined(__APPLE__)
613 		/* the IP_DONTFRAG option if defined in the 11.0 OSX headers,
614 		 * but does not work on that version, so we exclude it */
615 		/* a nonzero value disables fragmentation, according to
616 		 * docs.oracle.com for ip(4). */
617 		int off = 1;
618 		if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
619 			&off, (socklen_t)sizeof(off)) < 0) {
620 			log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
621 				strerror(errno));
622 			sock_close(s);
623 			*noproto = 0;
624 			*inuse = 0;
625 			return -1;
626 		}
627 #  endif /* IPv4 MTU */
628 	}
629 	if(
630 #ifdef HAVE_SYSTEMD
631 		!got_fd_from_systemd &&
632 #endif
633 		bind(s, (struct sockaddr*)addr, addrlen) != 0) {
634 		*noproto = 0;
635 		*inuse = 0;
636 #ifndef USE_WINSOCK
637 #ifdef EADDRINUSE
638 		*inuse = (errno == EADDRINUSE);
639 		/* detect freebsd jail with no ipv6 permission */
640 		if(family==AF_INET6 && errno==EINVAL)
641 			*noproto = 1;
642 		else if(errno != EADDRINUSE &&
643 			!(errno == EACCES && verbosity < 4 && !listen)
644 #ifdef EADDRNOTAVAIL
645 			&& !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
646 #endif
647 			) {
648 			log_err_addr("can't bind socket", strerror(errno),
649 				(struct sockaddr_storage*)addr, addrlen);
650 		}
651 #endif /* EADDRINUSE */
652 #else /* USE_WINSOCK */
653 		if(WSAGetLastError() != WSAEADDRINUSE &&
654 			WSAGetLastError() != WSAEADDRNOTAVAIL &&
655 			!(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
656 			log_err_addr("can't bind socket",
657 				wsa_strerror(WSAGetLastError()),
658 				(struct sockaddr_storage*)addr, addrlen);
659 		}
660 #endif /* USE_WINSOCK */
661 		sock_close(s);
662 		return -1;
663 	}
664 	if(!fd_set_nonblock(s)) {
665 		*noproto = 0;
666 		*inuse = 0;
667 		sock_close(s);
668 		return -1;
669 	}
670 	return s;
671 }
672 
673 int
create_tcp_accept_sock(struct addrinfo * addr,int v6only,int * noproto,int * reuseport,int transparent,int mss,int nodelay,int freebind,int use_systemd,int dscp)674 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
675 	int* reuseport, int transparent, int mss, int nodelay, int freebind,
676 	int use_systemd, int dscp)
677 {
678 	int s;
679 	char* err;
680 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY)
681 	int on = 1;
682 #endif
683 #ifdef HAVE_SYSTEMD
684 	int got_fd_from_systemd = 0;
685 #endif
686 #ifdef USE_TCP_FASTOPEN
687 	int qlen;
688 #endif
689 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
690 	(void)transparent;
691 #endif
692 #if !defined(IP_FREEBIND)
693 	(void)freebind;
694 #endif
695 	verbose_print_addr(addr);
696 	*noproto = 0;
697 #ifdef HAVE_SYSTEMD
698 	if (!use_systemd ||
699 	    (use_systemd
700 	     && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
701 					   addr->ai_addr, addr->ai_addrlen,
702 					   NULL)) == -1)) {
703 #else
704 	(void)use_systemd;
705 #endif
706 	if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
707 #ifndef USE_WINSOCK
708 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
709 			*noproto = 1;
710 			return -1;
711 		}
712 #else
713 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
714 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
715 			*noproto = 1;
716 			return -1;
717 		}
718 #endif
719 		log_err("can't create socket: %s", sock_strerror(errno));
720 		return -1;
721 	}
722 	if(nodelay) {
723 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY)
724 		if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on,
725 			(socklen_t)sizeof(on)) < 0) {
726 			#ifndef USE_WINSOCK
727 			log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
728 				strerror(errno));
729 			#else
730 			log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
731 				wsa_strerror(WSAGetLastError()));
732 			#endif
733 		}
734 #else
735 		log_warn(" setsockopt(TCP_NODELAY) unsupported");
736 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */
737 	}
738 	if (mss > 0) {
739 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
740 		if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
741 			(socklen_t)sizeof(mss)) < 0) {
742 			log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
743 				sock_strerror(errno));
744 		} else {
745 			verbose(VERB_ALGO,
746 				" tcp socket mss set to %d", mss);
747 		}
748 #else
749 		log_warn(" setsockopt(TCP_MAXSEG) unsupported");
750 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
751 	}
752 #ifdef HAVE_SYSTEMD
753 	} else {
754 		got_fd_from_systemd = 1;
755     }
756 #endif
757 #ifdef SO_REUSEADDR
758 	if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
759 		(socklen_t)sizeof(on)) < 0) {
760 		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
761 			sock_strerror(errno));
762 		sock_close(s);
763 		return -1;
764 	}
765 #endif /* SO_REUSEADDR */
766 #ifdef IP_FREEBIND
767 	if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
768 	    (socklen_t)sizeof(on)) < 0) {
769 		log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
770 		strerror(errno));
771 	}
772 #endif /* IP_FREEBIND */
773 #ifdef SO_REUSEPORT
774 	/* try to set SO_REUSEPORT so that incoming
775 	 * connections are distributed evenly among the receiving threads.
776 	 * Each thread must have its own socket bound to the same port,
777 	 * with SO_REUSEPORT set on each socket.
778 	 */
779 	if (reuseport && *reuseport &&
780 		setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
781 		(socklen_t)sizeof(on)) < 0) {
782 #ifdef ENOPROTOOPT
783 		if(errno != ENOPROTOOPT || verbosity >= 3)
784 			log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
785 				strerror(errno));
786 #endif
787 		/* this option is not essential, we can continue */
788 		*reuseport = 0;
789 	}
790 #else
791 	(void)reuseport;
792 #endif /* defined(SO_REUSEPORT) */
793 #if defined(IPV6_V6ONLY)
794 	if(addr->ai_family == AF_INET6 && v6only
795 #  ifdef HAVE_SYSTEMD
796 		/* Systemd wants to control if the socket is v6 only
797 		 * or both, with BindIPv6Only=default, ipv6-only or
798 		 * both in systemd.socket, so it is not set here. */
799 		&& !got_fd_from_systemd
800 #  endif
801 		) {
802 		if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
803 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
804 			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
805 				sock_strerror(errno));
806 			sock_close(s);
807 			return -1;
808 		}
809 	}
810 #else
811 	(void)v6only;
812 #endif /* IPV6_V6ONLY */
813 #ifdef IP_TRANSPARENT
814 	if (transparent &&
815 	    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
816 	    (socklen_t)sizeof(on)) < 0) {
817 		log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
818 			strerror(errno));
819 	}
820 #elif defined(IP_BINDANY)
821 	if (transparent &&
822 	    setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
823 	    (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
824 	    (void*)&on, (socklen_t)sizeof(on)) < 0) {
825 		log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
826 		(addr->ai_family==AF_INET6?"V6":""), strerror(errno));
827 	}
828 #elif defined(SO_BINDANY)
829 	if (transparent &&
830 	    setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
831 	    sizeof(on)) < 0) {
832 		log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
833 		strerror(errno));
834 	}
835 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
836 	err = set_ip_dscp(s, addr->ai_family, dscp);
837 	if(err != NULL)
838 		log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err);
839 	if(
840 #ifdef HAVE_SYSTEMD
841 		!got_fd_from_systemd &&
842 #endif
843         bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
844 #ifndef USE_WINSOCK
845 		/* detect freebsd jail with no ipv6 permission */
846 		if(addr->ai_family==AF_INET6 && errno==EINVAL)
847 			*noproto = 1;
848 		else {
849 			log_err_addr("can't bind socket", strerror(errno),
850 				(struct sockaddr_storage*)addr->ai_addr,
851 				addr->ai_addrlen);
852 		}
853 #else
854 		log_err_addr("can't bind socket",
855 			wsa_strerror(WSAGetLastError()),
856 			(struct sockaddr_storage*)addr->ai_addr,
857 			addr->ai_addrlen);
858 #endif
859 		sock_close(s);
860 		return -1;
861 	}
862 	if(!fd_set_nonblock(s)) {
863 		sock_close(s);
864 		return -1;
865 	}
866 	if(listen(s, TCP_BACKLOG) == -1) {
867 		log_err("can't listen: %s", sock_strerror(errno));
868 		sock_close(s);
869 		return -1;
870 	}
871 #ifdef USE_TCP_FASTOPEN
872 	/* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
873 	   against IP spoofing attacks as suggested in RFC7413 */
874 #ifdef __APPLE__
875 	/* OS X implementation only supports qlen of 1 via this call. Actual
876 	   value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */
877 	qlen = 1;
878 #else
879 	/* 5 is recommended on linux */
880 	qlen = 5;
881 #endif
882 	if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen,
883 		  sizeof(qlen))) == -1 ) {
884 #ifdef ENOPROTOOPT
885 		/* squelch ENOPROTOOPT: freebsd server mode with kernel support
886 		   disabled, except when verbosity enabled for debugging */
887 		if(errno != ENOPROTOOPT || verbosity >= 3) {
888 #endif
889 		  if(errno == EPERM) {
890 		  	log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
891 		  } else {
892 		  	log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
893 		  }
894 #ifdef ENOPROTOOPT
895 		}
896 #endif
897 	}
898 #endif
899 	return s;
900 }
901 
902 char*
set_ip_dscp(int socket,int addrfamily,int dscp)903 set_ip_dscp(int socket, int addrfamily, int dscp)
904 {
905 	int ds;
906 
907 	if(dscp == 0)
908 		return NULL;
909 	ds = dscp << 2;
910 	switch(addrfamily) {
911 	case AF_INET6:
912 	#ifdef IPV6_TCLASS
913 		if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds,
914 			sizeof(ds)) < 0)
915 			return sock_strerror(errno);
916 		break;
917 	#else
918 		return "IPV6_TCLASS not defined on this system";
919 	#endif
920 	default:
921 		if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0)
922 			return sock_strerror(errno);
923 		break;
924 	}
925 	return NULL;
926 }
927 
928 int
create_local_accept_sock(const char * path,int * noproto,int use_systemd)929 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
930 {
931 #ifdef HAVE_SYSTEMD
932 	int ret;
933 
934 	if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
935 		return ret;
936 	else {
937 #endif
938 #ifdef HAVE_SYS_UN_H
939 	int s;
940 	struct sockaddr_un usock;
941 #ifndef HAVE_SYSTEMD
942 	(void)use_systemd;
943 #endif
944 
945 	verbose(VERB_ALGO, "creating unix socket %s", path);
946 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
947 	/* this member exists on BSDs, not Linux */
948 	usock.sun_len = (unsigned)sizeof(usock);
949 #endif
950 	usock.sun_family = AF_LOCAL;
951 	/* length is 92-108, 104 on FreeBSD */
952 	(void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
953 
954 	if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
955 		log_err("Cannot create local socket %s (%s)",
956 			path, strerror(errno));
957 		return -1;
958 	}
959 
960 	if (unlink(path) && errno != ENOENT) {
961 		/* The socket already exists and cannot be removed */
962 		log_err("Cannot remove old local socket %s (%s)",
963 			path, strerror(errno));
964 		goto err;
965 	}
966 
967 	if (bind(s, (struct sockaddr *)&usock,
968 		(socklen_t)sizeof(struct sockaddr_un)) == -1) {
969 		log_err("Cannot bind local socket %s (%s)",
970 			path, strerror(errno));
971 		goto err;
972 	}
973 
974 	if (!fd_set_nonblock(s)) {
975 		log_err("Cannot set non-blocking mode");
976 		goto err;
977 	}
978 
979 	if (listen(s, TCP_BACKLOG) == -1) {
980 		log_err("can't listen: %s", strerror(errno));
981 		goto err;
982 	}
983 
984 	(void)noproto; /*unused*/
985 	return s;
986 
987 err:
988 	sock_close(s);
989 	return -1;
990 
991 #ifdef HAVE_SYSTEMD
992 	}
993 #endif
994 #else
995 	(void)use_systemd;
996 	(void)path;
997 	log_err("Local sockets are not supported");
998 	*noproto = 1;
999 	return -1;
1000 #endif
1001 }
1002 
1003 
1004 /**
1005  * Create socket from getaddrinfo results
1006  */
1007 static int
make_sock(int stype,const char * ifname,const char * port,struct addrinfo * hints,int v6only,int * noip6,size_t rcv,size_t snd,int * reuseport,int transparent,int tcp_mss,int nodelay,int freebind,int use_systemd,int dscp,struct unbound_socket * ub_sock)1008 make_sock(int stype, const char* ifname, const char* port,
1009 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1010 	int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
1011 	int use_systemd, int dscp, struct unbound_socket* ub_sock)
1012 {
1013 	struct addrinfo *res = NULL;
1014 	int r, s, inuse, noproto;
1015 	hints->ai_socktype = stype;
1016 	*noip6 = 0;
1017 	if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
1018 #ifdef USE_WINSOCK
1019 		if(r == EAI_NONAME && hints->ai_family == AF_INET6){
1020 			*noip6 = 1; /* 'Host not found' for IP6 on winXP */
1021 			return -1;
1022 		}
1023 #endif
1024 		log_err("node %s:%s getaddrinfo: %s %s",
1025 			ifname?ifname:"default", port, gai_strerror(r),
1026 #ifdef EAI_SYSTEM
1027 			(r==EAI_SYSTEM?(char*)strerror(errno):"")
1028 #else
1029 			""
1030 #endif
1031 		);
1032 		return -1;
1033 	}
1034 	if(stype == SOCK_DGRAM) {
1035 		verbose_print_addr(res);
1036 		s = create_udp_sock(res->ai_family, res->ai_socktype,
1037 			(struct sockaddr*)res->ai_addr, res->ai_addrlen,
1038 			v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
1039 			reuseport, transparent, freebind, use_systemd, dscp);
1040 		if(s == -1 && inuse) {
1041 			log_err("bind: address already in use");
1042 		} else if(s == -1 && noproto && hints->ai_family == AF_INET6){
1043 			*noip6 = 1;
1044 		}
1045 	} else	{
1046 		s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
1047 			transparent, tcp_mss, nodelay, freebind, use_systemd,
1048 			dscp);
1049 		if(s == -1 && noproto && hints->ai_family == AF_INET6){
1050 			*noip6 = 1;
1051 		}
1052 	}
1053 
1054 	if(!res->ai_addr) {
1055 		log_err("getaddrinfo returned no address");
1056 		freeaddrinfo(res);
1057 		sock_close(s);
1058 		return -1;
1059 	}
1060 	ub_sock->addr = memdup(res->ai_addr, res->ai_addrlen);
1061 	ub_sock->addrlen = res->ai_addrlen;
1062 	if(!ub_sock->addr) {
1063 		log_err("out of memory: allocate listening address");
1064 		freeaddrinfo(res);
1065 		sock_close(s);
1066 		return -1;
1067 	}
1068 	freeaddrinfo(res);
1069 
1070 	ub_sock->s = s;
1071 	ub_sock->fam = hints->ai_family;
1072 	ub_sock->acl = NULL;
1073 
1074 	return s;
1075 }
1076 
1077 /** make socket and first see if ifname contains port override info */
1078 static int
make_sock_port(int stype,const char * ifname,const char * port,struct addrinfo * hints,int v6only,int * noip6,size_t rcv,size_t snd,int * reuseport,int transparent,int tcp_mss,int nodelay,int freebind,int use_systemd,int dscp,struct unbound_socket * ub_sock)1079 make_sock_port(int stype, const char* ifname, const char* port,
1080 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1081 	int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
1082 	int use_systemd, int dscp, struct unbound_socket* ub_sock)
1083 {
1084 	char* s = strchr(ifname, '@');
1085 	if(s) {
1086 		/* override port with ifspec@port */
1087 		char p[16];
1088 		char newif[128];
1089 		if((size_t)(s-ifname) >= sizeof(newif)) {
1090 			log_err("ifname too long: %s", ifname);
1091 			*noip6 = 0;
1092 			return -1;
1093 		}
1094 		if(strlen(s+1) >= sizeof(p)) {
1095 			log_err("portnumber too long: %s", ifname);
1096 			*noip6 = 0;
1097 			return -1;
1098 		}
1099 		(void)strlcpy(newif, ifname, sizeof(newif));
1100 		newif[s-ifname] = 0;
1101 		(void)strlcpy(p, s+1, sizeof(p));
1102 		p[strlen(s+1)]=0;
1103 		return make_sock(stype, newif, p, hints, v6only, noip6, rcv,
1104 			snd, reuseport, transparent, tcp_mss, nodelay, freebind,
1105 			use_systemd, dscp, ub_sock);
1106 	}
1107 	return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
1108 		reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd,
1109 		dscp, ub_sock);
1110 }
1111 
1112 /**
1113  * Add port to open ports list.
1114  * @param list: list head. changed.
1115  * @param s: fd.
1116  * @param ftype: if fd is UDP.
1117  * @param pp2_enabled: if PROXYv2 is enabled for this port.
1118  * @param ub_sock: socket with address.
1119  * @return false on failure. list in unchanged then.
1120  */
1121 static int
port_insert(struct listen_port ** list,int s,enum listen_type ftype,int pp2_enabled,struct unbound_socket * ub_sock)1122 port_insert(struct listen_port** list, int s, enum listen_type ftype,
1123 	int pp2_enabled, struct unbound_socket* ub_sock)
1124 {
1125 	struct listen_port* item = (struct listen_port*)malloc(
1126 		sizeof(struct listen_port));
1127 	if(!item)
1128 		return 0;
1129 	item->next = *list;
1130 	item->fd = s;
1131 	item->ftype = ftype;
1132 	item->pp2_enabled = pp2_enabled;
1133 	item->socket = ub_sock;
1134 	*list = item;
1135 	return 1;
1136 }
1137 
1138 /** set fd to receive software timestamps */
1139 static int
set_recvtimestamp(int s)1140 set_recvtimestamp(int s)
1141 {
1142 #ifdef HAVE_LINUX_NET_TSTAMP_H
1143 	int opt = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE;
1144 	if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMPNS, (void*)&opt, (socklen_t)sizeof(opt)) < 0) {
1145 		log_err("setsockopt(..., SO_TIMESTAMPNS, ...) failed: %s",
1146 			strerror(errno));
1147 		return 0;
1148 	}
1149 	return 1;
1150 #else
1151 	log_err("packets timestamping is not supported on this platform");
1152 	(void)s;
1153 	return 0;
1154 #endif
1155 }
1156 
1157 /** set fd to receive source address packet info */
1158 static int
set_recvpktinfo(int s,int family)1159 set_recvpktinfo(int s, int family)
1160 {
1161 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1162 	int on = 1;
1163 #else
1164 	(void)s;
1165 #endif
1166 	if(family == AF_INET6) {
1167 #           ifdef IPV6_RECVPKTINFO
1168 		if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1169 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1170 			log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1171 				strerror(errno));
1172 			return 0;
1173 		}
1174 #           elif defined(IPV6_PKTINFO)
1175 		if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1176 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1177 			log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1178 				strerror(errno));
1179 			return 0;
1180 		}
1181 #           else
1182 		log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please "
1183 			"disable interface-automatic or do-ip6 in config");
1184 		return 0;
1185 #           endif /* defined IPV6_RECVPKTINFO */
1186 
1187 	} else if(family == AF_INET) {
1188 #           ifdef IP_PKTINFO
1189 		if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1190 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1191 			log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1192 				strerror(errno));
1193 			return 0;
1194 		}
1195 #           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1196 		if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1197 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1198 			log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1199 				strerror(errno));
1200 			return 0;
1201 		}
1202 #           else
1203 		log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1204 			"interface-automatic or do-ip4 in config");
1205 		return 0;
1206 #           endif /* IP_PKTINFO */
1207 
1208 	}
1209 	return 1;
1210 }
1211 
1212 /** see if interface is ssl, its port number == the ssl port number */
1213 static int
if_is_ssl(const char * ifname,const char * port,int ssl_port,struct config_strlist * tls_additional_port)1214 if_is_ssl(const char* ifname, const char* port, int ssl_port,
1215 	struct config_strlist* tls_additional_port)
1216 {
1217 	struct config_strlist* s;
1218 	char* p = strchr(ifname, '@');
1219 	if(!p && atoi(port) == ssl_port)
1220 		return 1;
1221 	if(p && atoi(p+1) == ssl_port)
1222 		return 1;
1223 	for(s = tls_additional_port; s; s = s->next) {
1224 		if(p && atoi(p+1) == atoi(s->str))
1225 			return 1;
1226 		if(!p && atoi(port) == atoi(s->str))
1227 			return 1;
1228 	}
1229 	return 0;
1230 }
1231 
1232 /**
1233  * Helper for ports_open. Creates one interface (or NULL for default).
1234  * @param ifname: The interface ip address.
1235  * @param do_auto: use automatic interface detection.
1236  * 	If enabled, then ifname must be the wildcard name.
1237  * @param do_udp: if udp should be used.
1238  * @param do_tcp: if tcp should be used.
1239  * @param hints: for getaddrinfo. family and flags have to be set by caller.
1240  * @param port: Port number to use (as string).
1241  * @param list: list of open ports, appended to, changed to point to list head.
1242  * @param rcv: receive buffer size for UDP
1243  * @param snd: send buffer size for UDP
1244  * @param ssl_port: ssl service port number
1245  * @param tls_additional_port: list of additional ssl service port numbers.
1246  * @param https_port: DoH service port number
1247  * @param proxy_protocol_port: list of PROXYv2 port numbers.
1248  * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1249  * 	set to false on exit if reuseport failed due to no kernel support.
1250  * @param transparent: set IP_TRANSPARENT socket option.
1251  * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1252  * @param freebind: set IP_FREEBIND socket option.
1253  * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection
1254  * @param use_systemd: if true, fetch sockets from systemd.
1255  * @param dnscrypt_port: dnscrypt service port number
1256  * @param dscp: DSCP to use.
1257  * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to
1258  * 	wait to discard if UDP packets have waited for long in the socket
1259  * 	buffer.
1260  * @return: returns false on error.
1261  */
1262 static int
ports_create_if(const char * ifname,int do_auto,int do_udp,int do_tcp,struct addrinfo * hints,const char * port,struct listen_port ** list,size_t rcv,size_t snd,int ssl_port,struct config_strlist * tls_additional_port,int https_port,struct config_strlist * proxy_protocol_port,int * reuseport,int transparent,int tcp_mss,int freebind,int http2_nodelay,int use_systemd,int dnscrypt_port,int dscp,int sock_queue_timeout)1263 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
1264 	struct addrinfo *hints, const char* port, struct listen_port** list,
1265 	size_t rcv, size_t snd, int ssl_port,
1266 	struct config_strlist* tls_additional_port, int https_port,
1267 	struct config_strlist* proxy_protocol_port,
1268 	int* reuseport, int transparent, int tcp_mss, int freebind,
1269 	int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp,
1270 	int sock_queue_timeout)
1271 {
1272 	int s, noip6=0;
1273 	int is_https = if_is_https(ifname, port, https_port);
1274 	int is_dnscrypt = if_is_dnscrypt(ifname, port, dnscrypt_port);
1275 	int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port);
1276 	int nodelay = is_https && http2_nodelay;
1277 	struct unbound_socket* ub_sock;
1278 
1279 	if(!do_udp && !do_tcp)
1280 		return 0;
1281 
1282 	if(is_pp2) {
1283 		if(is_dnscrypt) {
1284 			fatal_exit("PROXYv2 and DNSCrypt combination not "
1285 				"supported!");
1286 		} else if(is_https) {
1287 			fatal_exit("PROXYv2 and DoH combination not "
1288 				"supported!");
1289 		}
1290 	}
1291 
1292 	if(do_auto) {
1293 		ub_sock = calloc(1, sizeof(struct unbound_socket));
1294 		if(!ub_sock)
1295 			return 0;
1296 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1297 			&noip6, rcv, snd, reuseport, transparent,
1298 			tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
1299 			free(ub_sock->addr);
1300 			free(ub_sock);
1301 			if(noip6) {
1302 				log_warn("IPv6 protocol not available");
1303 				return 1;
1304 			}
1305 			return 0;
1306 		}
1307 		/* getting source addr packet info is highly non-portable */
1308 		if(!set_recvpktinfo(s, hints->ai_family)) {
1309 			sock_close(s);
1310 			free(ub_sock->addr);
1311 			free(ub_sock);
1312 			return 0;
1313 		}
1314 		if (sock_queue_timeout && !set_recvtimestamp(s)) {
1315 			log_warn("socket timestamping is not available");
1316 		}
1317 		if(!port_insert(list, s, is_dnscrypt
1318 			?listen_type_udpancil_dnscrypt:listen_type_udpancil,
1319 			is_pp2, ub_sock)) {
1320 			sock_close(s);
1321 			free(ub_sock->addr);
1322 			free(ub_sock);
1323 			return 0;
1324 		}
1325 	} else if(do_udp) {
1326 		ub_sock = calloc(1, sizeof(struct unbound_socket));
1327 		if(!ub_sock)
1328 			return 0;
1329 		/* regular udp socket */
1330 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1331 			&noip6, rcv, snd, reuseport, transparent,
1332 			tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
1333 			free(ub_sock->addr);
1334 			free(ub_sock);
1335 			if(noip6) {
1336 				log_warn("IPv6 protocol not available");
1337 				return 1;
1338 			}
1339 			return 0;
1340 		}
1341 		if (sock_queue_timeout && !set_recvtimestamp(s)) {
1342 			log_warn("socket timestamping is not available");
1343 		}
1344 		if(!port_insert(list, s, is_dnscrypt
1345 			?listen_type_udp_dnscrypt :
1346 			(sock_queue_timeout ?
1347 				listen_type_udpancil:listen_type_udp),
1348 			is_pp2, ub_sock)) {
1349 			sock_close(s);
1350 			free(ub_sock->addr);
1351 			free(ub_sock);
1352 			return 0;
1353 		}
1354 	}
1355 	if(do_tcp) {
1356 		int is_ssl = if_is_ssl(ifname, port, ssl_port,
1357 			tls_additional_port);
1358 		enum listen_type port_type;
1359 		ub_sock = calloc(1, sizeof(struct unbound_socket));
1360 		if(!ub_sock)
1361 			return 0;
1362 		if(is_ssl)
1363 			port_type = listen_type_ssl;
1364 		else if(is_https)
1365 			port_type = listen_type_http;
1366 		else if(is_dnscrypt)
1367 			port_type = listen_type_tcp_dnscrypt;
1368 		else
1369 			port_type = listen_type_tcp;
1370 		if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
1371 			&noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay,
1372 			freebind, use_systemd, dscp, ub_sock)) == -1) {
1373 			free(ub_sock->addr);
1374 			free(ub_sock);
1375 			if(noip6) {
1376 				/*log_warn("IPv6 protocol not available");*/
1377 				return 1;
1378 			}
1379 			return 0;
1380 		}
1381 		if(is_ssl)
1382 			verbose(VERB_ALGO, "setup TCP for SSL service");
1383 		if(!port_insert(list, s, port_type, is_pp2, ub_sock)) {
1384 			sock_close(s);
1385 			free(ub_sock->addr);
1386 			free(ub_sock);
1387 			return 0;
1388 		}
1389 	}
1390 	return 1;
1391 }
1392 
1393 /**
1394  * Add items to commpoint list in front.
1395  * @param c: commpoint to add.
1396  * @param front: listen struct.
1397  * @return: false on failure.
1398  */
1399 static int
listen_cp_insert(struct comm_point * c,struct listen_dnsport * front)1400 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1401 {
1402 	struct listen_list* item = (struct listen_list*)malloc(
1403 		sizeof(struct listen_list));
1404 	if(!item)
1405 		return 0;
1406 	item->com = c;
1407 	item->next = front->cps;
1408 	front->cps = item;
1409 	return 1;
1410 }
1411 
listen_setup_locks(void)1412 void listen_setup_locks(void)
1413 {
1414 	if(!stream_wait_lock_inited) {
1415 		lock_basic_init(&stream_wait_count_lock);
1416 		stream_wait_lock_inited = 1;
1417 	}
1418 	if(!http2_query_buffer_lock_inited) {
1419 		lock_basic_init(&http2_query_buffer_count_lock);
1420 		http2_query_buffer_lock_inited = 1;
1421 	}
1422 	if(!http2_response_buffer_lock_inited) {
1423 		lock_basic_init(&http2_response_buffer_count_lock);
1424 		http2_response_buffer_lock_inited = 1;
1425 	}
1426 }
1427 
listen_desetup_locks(void)1428 void listen_desetup_locks(void)
1429 {
1430 	if(stream_wait_lock_inited) {
1431 		stream_wait_lock_inited = 0;
1432 		lock_basic_destroy(&stream_wait_count_lock);
1433 	}
1434 	if(http2_query_buffer_lock_inited) {
1435 		http2_query_buffer_lock_inited = 0;
1436 		lock_basic_destroy(&http2_query_buffer_count_lock);
1437 	}
1438 	if(http2_response_buffer_lock_inited) {
1439 		http2_response_buffer_lock_inited = 0;
1440 		lock_basic_destroy(&http2_response_buffer_count_lock);
1441 	}
1442 }
1443 
1444 struct listen_dnsport*
listen_create(struct comm_base * base,struct listen_port * ports,size_t bufsize,int tcp_accept_count,int tcp_idle_timeout,int harden_large_queries,uint32_t http_max_streams,char * http_endpoint,int http_notls,struct tcl_list * tcp_conn_limit,void * sslctx,struct dt_env * dtenv,comm_point_callback_type * cb,void * cb_arg)1445 listen_create(struct comm_base* base, struct listen_port* ports,
1446 	size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1447 	int harden_large_queries, uint32_t http_max_streams,
1448 	char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
1449 	void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb,
1450 	void *cb_arg)
1451 {
1452 	struct listen_dnsport* front = (struct listen_dnsport*)
1453 		malloc(sizeof(struct listen_dnsport));
1454 	if(!front)
1455 		return NULL;
1456 	front->cps = NULL;
1457 	front->udp_buff = sldns_buffer_new(bufsize);
1458 #ifdef USE_DNSCRYPT
1459 	front->dnscrypt_udp_buff = NULL;
1460 #endif
1461 	if(!front->udp_buff) {
1462 		free(front);
1463 		return NULL;
1464 	}
1465 
1466 	/* create comm points as needed */
1467 	while(ports) {
1468 		struct comm_point* cp = NULL;
1469 		if(ports->ftype == listen_type_udp ||
1470 		   ports->ftype == listen_type_udp_dnscrypt) {
1471 			cp = comm_point_create_udp(base, ports->fd,
1472 				front->udp_buff, ports->pp2_enabled, cb,
1473 				cb_arg, ports->socket);
1474 		} else if(ports->ftype == listen_type_tcp ||
1475 				ports->ftype == listen_type_tcp_dnscrypt) {
1476 			cp = comm_point_create_tcp(base, ports->fd,
1477 				tcp_accept_count, tcp_idle_timeout,
1478 				harden_large_queries, 0, NULL,
1479 				tcp_conn_limit, bufsize, front->udp_buff,
1480 				ports->ftype, ports->pp2_enabled, cb, cb_arg,
1481 				ports->socket);
1482 		} else if(ports->ftype == listen_type_ssl ||
1483 			ports->ftype == listen_type_http) {
1484 			cp = comm_point_create_tcp(base, ports->fd,
1485 				tcp_accept_count, tcp_idle_timeout,
1486 				harden_large_queries,
1487 				http_max_streams, http_endpoint,
1488 				tcp_conn_limit, bufsize, front->udp_buff,
1489 				ports->ftype, ports->pp2_enabled, cb, cb_arg,
1490 				ports->socket);
1491 			if(ports->ftype == listen_type_http) {
1492 				if(!sslctx && !http_notls) {
1493 					log_warn("HTTPS port configured, but "
1494 						"no TLS tls-service-key or "
1495 						"tls-service-pem set");
1496 				}
1497 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
1498 				if(!http_notls) {
1499 					log_warn("Unbound is not compiled "
1500 						"with an OpenSSL version "
1501 						"supporting ALPN "
1502 						"(OpenSSL >= 1.0.2). This "
1503 						"is required to use "
1504 						"DNS-over-HTTPS");
1505 				}
1506 #endif
1507 #ifndef HAVE_NGHTTP2_NGHTTP2_H
1508 				log_warn("Unbound is not compiled with "
1509 					"nghttp2. This is required to use "
1510 					"DNS-over-HTTPS.");
1511 #endif
1512 			}
1513 		} else if(ports->ftype == listen_type_udpancil ||
1514 				  ports->ftype == listen_type_udpancil_dnscrypt) {
1515 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
1516 			cp = comm_point_create_udp_ancil(base, ports->fd,
1517 				front->udp_buff, ports->pp2_enabled, cb,
1518 				cb_arg, ports->socket);
1519 #else
1520 			log_warn("This system does not support UDP ancilliary data.");
1521 #endif
1522 		}
1523 		if(!cp) {
1524 			log_err("can't create commpoint");
1525 			listen_delete(front);
1526 			return NULL;
1527 		}
1528 		if((http_notls && ports->ftype == listen_type_http) ||
1529 			(ports->ftype == listen_type_tcp) ||
1530 			(ports->ftype == listen_type_udp) ||
1531 			(ports->ftype == listen_type_udpancil) ||
1532 			(ports->ftype == listen_type_tcp_dnscrypt) ||
1533 			(ports->ftype == listen_type_udp_dnscrypt) ||
1534 			(ports->ftype == listen_type_udpancil_dnscrypt))
1535 			cp->ssl = NULL;
1536 		else
1537 			cp->ssl = sslctx;
1538 		cp->dtenv = dtenv;
1539 		cp->do_not_close = 1;
1540 #ifdef USE_DNSCRYPT
1541 		if (ports->ftype == listen_type_udp_dnscrypt ||
1542 			ports->ftype == listen_type_tcp_dnscrypt ||
1543 			ports->ftype == listen_type_udpancil_dnscrypt) {
1544 			cp->dnscrypt = 1;
1545 			cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1546 			if(!cp->dnscrypt_buffer) {
1547 				log_err("can't alloc dnscrypt_buffer");
1548 				comm_point_delete(cp);
1549 				listen_delete(front);
1550 				return NULL;
1551 			}
1552 			front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1553 		}
1554 #endif
1555 		if(!listen_cp_insert(cp, front)) {
1556 			log_err("malloc failed");
1557 			comm_point_delete(cp);
1558 			listen_delete(front);
1559 			return NULL;
1560 		}
1561 		ports = ports->next;
1562 	}
1563 	if(!front->cps) {
1564 		log_err("Could not open sockets to accept queries.");
1565 		listen_delete(front);
1566 		return NULL;
1567 	}
1568 
1569 	return front;
1570 }
1571 
1572 void
listen_list_delete(struct listen_list * list)1573 listen_list_delete(struct listen_list* list)
1574 {
1575 	struct listen_list *p = list, *pn;
1576 	while(p) {
1577 		pn = p->next;
1578 		comm_point_delete(p->com);
1579 		free(p);
1580 		p = pn;
1581 	}
1582 }
1583 
1584 void
listen_delete(struct listen_dnsport * front)1585 listen_delete(struct listen_dnsport* front)
1586 {
1587 	if(!front)
1588 		return;
1589 	listen_list_delete(front->cps);
1590 #ifdef USE_DNSCRYPT
1591 	if(front->dnscrypt_udp_buff &&
1592 		front->udp_buff != front->dnscrypt_udp_buff) {
1593 		sldns_buffer_free(front->dnscrypt_udp_buff);
1594 	}
1595 #endif
1596 	sldns_buffer_free(front->udp_buff);
1597 	free(front);
1598 }
1599 
1600 #ifdef HAVE_GETIFADDRS
1601 static int
resolve_ifa_name(struct ifaddrs * ifas,const char * search_ifa,char *** ip_addresses,int * ip_addresses_size)1602 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size)
1603 {
1604 	struct ifaddrs *ifa;
1605 	void *tmpbuf;
1606 	int last_ip_addresses_size = *ip_addresses_size;
1607 
1608 	for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) {
1609 		sa_family_t family;
1610 		const char* atsign;
1611 #ifdef INET6      /* |   address ip    | % |  ifa name  | @ |  port  | nul */
1612 		char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1];
1613 #else
1614 		char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1];
1615 #endif
1616 
1617 		if((atsign=strrchr(search_ifa, '@')) != NULL) {
1618 			if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa)
1619 			   || strncmp(ifa->ifa_name, search_ifa,
1620 			   atsign-search_ifa) != 0)
1621 				continue;
1622 		} else {
1623 			if(strcmp(ifa->ifa_name, search_ifa) != 0)
1624 				continue;
1625 			atsign = "";
1626 		}
1627 
1628 		if(ifa->ifa_addr == NULL)
1629 			continue;
1630 
1631 		family = ifa->ifa_addr->sa_family;
1632 		if(family == AF_INET) {
1633 			char a4[INET_ADDRSTRLEN + 1];
1634 			struct sockaddr_in *in4 = (struct sockaddr_in *)
1635 				ifa->ifa_addr;
1636 			if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) {
1637 				log_err("inet_ntop failed");
1638 				return 0;
1639 			}
1640 			snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1641 				a4, atsign);
1642 		}
1643 #ifdef INET6
1644 		else if(family == AF_INET6) {
1645 			struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)
1646 				ifa->ifa_addr;
1647 			char a6[INET6_ADDRSTRLEN + 1];
1648 			char if_index_name[IF_NAMESIZE + 1];
1649 			if_index_name[0] = 0;
1650 			if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) {
1651 				log_err("inet_ntop failed");
1652 				return 0;
1653 			}
1654 			(void)if_indextoname(in6->sin6_scope_id,
1655 				(char *)if_index_name);
1656 			if (strlen(if_index_name) != 0) {
1657 				snprintf(addr_buf, sizeof(addr_buf),
1658 					"%s%%%s%s", a6, if_index_name, atsign);
1659 			} else {
1660 				snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1661 					a6, atsign);
1662 			}
1663 		}
1664 #endif
1665 		else {
1666 			continue;
1667 		}
1668 		verbose(4, "interface %s has address %s", search_ifa, addr_buf);
1669 
1670 		tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1671 		if(!tmpbuf) {
1672 			log_err("realloc failed: out of memory");
1673 			return 0;
1674 		} else {
1675 			*ip_addresses = tmpbuf;
1676 		}
1677 		(*ip_addresses)[*ip_addresses_size] = strdup(addr_buf);
1678 		if(!(*ip_addresses)[*ip_addresses_size]) {
1679 			log_err("strdup failed: out of memory");
1680 			return 0;
1681 		}
1682 		(*ip_addresses_size)++;
1683 	}
1684 
1685 	if (*ip_addresses_size == last_ip_addresses_size) {
1686 		tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1687 		if(!tmpbuf) {
1688 			log_err("realloc failed: out of memory");
1689 			return 0;
1690 		} else {
1691 			*ip_addresses = tmpbuf;
1692 		}
1693 		(*ip_addresses)[*ip_addresses_size] = strdup(search_ifa);
1694 		if(!(*ip_addresses)[*ip_addresses_size]) {
1695 			log_err("strdup failed: out of memory");
1696 			return 0;
1697 		}
1698 		(*ip_addresses_size)++;
1699 	}
1700 	return 1;
1701 }
1702 #endif /* HAVE_GETIFADDRS */
1703 
resolve_interface_names(char ** ifs,int num_ifs,struct config_strlist * list,char *** resif,int * num_resif)1704 int resolve_interface_names(char** ifs, int num_ifs,
1705 	struct config_strlist* list, char*** resif, int* num_resif)
1706 {
1707 #ifdef HAVE_GETIFADDRS
1708 	struct ifaddrs *addrs = NULL;
1709 	if(num_ifs == 0 && list == NULL) {
1710 		*resif = NULL;
1711 		*num_resif = 0;
1712 		return 1;
1713 	}
1714 	if(getifaddrs(&addrs) == -1) {
1715 		log_err("failed to list interfaces: getifaddrs: %s",
1716 			strerror(errno));
1717 		freeifaddrs(addrs);
1718 		return 0;
1719 	}
1720 	if(ifs) {
1721 		int i;
1722 		for(i=0; i<num_ifs; i++) {
1723 			if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) {
1724 				freeifaddrs(addrs);
1725 				config_del_strarray(*resif, *num_resif);
1726 				*resif = NULL;
1727 				*num_resif = 0;
1728 				return 0;
1729 			}
1730 		}
1731 	}
1732 	if(list) {
1733 		struct config_strlist* p;
1734 		for(p = list; p; p = p->next) {
1735 			if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) {
1736 				freeifaddrs(addrs);
1737 				config_del_strarray(*resif, *num_resif);
1738 				*resif = NULL;
1739 				*num_resif = 0;
1740 				return 0;
1741 			}
1742 }
1743 	}
1744 	freeifaddrs(addrs);
1745 	return 1;
1746 #else
1747 	struct config_strlist* p;
1748 	if(num_ifs == 0 && list == NULL) {
1749 		*resif = NULL;
1750 		*num_resif = 0;
1751 		return 1;
1752 	}
1753 	*num_resif = num_ifs;
1754 	for(p = list; p; p = p->next) {
1755 		(*num_resif)++;
1756 	}
1757 	*resif = calloc(*num_resif, sizeof(**resif));
1758 	if(!*resif) {
1759 		log_err("out of memory");
1760 		return 0;
1761 	}
1762 	if(ifs) {
1763 		int i;
1764 		for(i=0; i<num_ifs; i++) {
1765 			(*resif)[i] = strdup(ifs[i]);
1766 			if(!((*resif)[i])) {
1767 				log_err("out of memory");
1768 				config_del_strarray(*resif, *num_resif);
1769 				*resif = NULL;
1770 				*num_resif = 0;
1771 				return 0;
1772 			}
1773 		}
1774 	}
1775 	if(list) {
1776 		int idx = num_ifs;
1777 		for(p = list; p; p = p->next) {
1778 			(*resif)[idx] = strdup(p->str);
1779 			if(!((*resif)[idx])) {
1780 				log_err("out of memory");
1781 				config_del_strarray(*resif, *num_resif);
1782 				*resif = NULL;
1783 				*num_resif = 0;
1784 				return 0;
1785 			}
1786 			idx++;
1787 		}
1788 	}
1789 	return 1;
1790 #endif /* HAVE_GETIFADDRS */
1791 }
1792 
1793 struct listen_port*
listening_ports_open(struct config_file * cfg,char ** ifs,int num_ifs,int * reuseport)1794 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
1795 	int* reuseport)
1796 {
1797 	struct listen_port* list = NULL;
1798 	struct addrinfo hints;
1799 	int i, do_ip4, do_ip6;
1800 	int do_tcp, do_auto;
1801 	char portbuf[32];
1802 	snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1803 	do_ip4 = cfg->do_ip4;
1804 	do_ip6 = cfg->do_ip6;
1805 	do_tcp = cfg->do_tcp;
1806 	do_auto = cfg->if_automatic && cfg->do_udp;
1807 	if(cfg->incoming_num_tcp == 0)
1808 		do_tcp = 0;
1809 
1810 	/* getaddrinfo */
1811 	memset(&hints, 0, sizeof(hints));
1812 	hints.ai_flags = AI_PASSIVE;
1813 	/* no name lookups on our listening ports */
1814 	if(num_ifs > 0)
1815 		hints.ai_flags |= AI_NUMERICHOST;
1816 	hints.ai_family = AF_UNSPEC;
1817 #ifndef INET6
1818 	do_ip6 = 0;
1819 #endif
1820 	if(!do_ip4 && !do_ip6) {
1821 		return NULL;
1822 	}
1823 	/* create ip4 and ip6 ports so that return addresses are nice. */
1824 	if(do_auto || num_ifs == 0) {
1825 		if(do_auto && cfg->if_automatic_ports &&
1826 			cfg->if_automatic_ports[0]!=0) {
1827 			char* now = cfg->if_automatic_ports;
1828 			while(now && *now) {
1829 				char* after;
1830 				int extraport;
1831 				while(isspace((unsigned char)*now))
1832 					now++;
1833 				if(!*now)
1834 					break;
1835 				after = now;
1836 				extraport = (int)strtol(now, &after, 10);
1837 				if(extraport < 0 || extraport > 65535) {
1838 					log_err("interface-automatic-ports port number out of range, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports);
1839 					listening_ports_free(list);
1840 					return NULL;
1841 				}
1842 				if(extraport == 0 && now == after) {
1843 					log_err("interface-automatic-ports could not be parsed, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports);
1844 					listening_ports_free(list);
1845 					return NULL;
1846 				}
1847 				now = after;
1848 				snprintf(portbuf, sizeof(portbuf), "%d", extraport);
1849 				if(do_ip6) {
1850 					hints.ai_family = AF_INET6;
1851 					if(!ports_create_if("::0",
1852 						do_auto, cfg->do_udp, do_tcp,
1853 						&hints, portbuf, &list,
1854 						cfg->so_rcvbuf, cfg->so_sndbuf,
1855 						cfg->ssl_port, cfg->tls_additional_port,
1856 						cfg->https_port,
1857 						cfg->proxy_protocol_port,
1858 						reuseport, cfg->ip_transparent,
1859 						cfg->tcp_mss, cfg->ip_freebind,
1860 						cfg->http_nodelay, cfg->use_systemd,
1861 						cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
1862 						listening_ports_free(list);
1863 						return NULL;
1864 					}
1865 				}
1866 				if(do_ip4) {
1867 					hints.ai_family = AF_INET;
1868 					if(!ports_create_if("0.0.0.0",
1869 						do_auto, cfg->do_udp, do_tcp,
1870 						&hints, portbuf, &list,
1871 						cfg->so_rcvbuf, cfg->so_sndbuf,
1872 						cfg->ssl_port, cfg->tls_additional_port,
1873 						cfg->https_port,
1874 						cfg->proxy_protocol_port,
1875 						reuseport, cfg->ip_transparent,
1876 						cfg->tcp_mss, cfg->ip_freebind,
1877 						cfg->http_nodelay, cfg->use_systemd,
1878 						cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
1879 						listening_ports_free(list);
1880 						return NULL;
1881 					}
1882 				}
1883 			}
1884 			return list;
1885 		}
1886 		if(do_ip6) {
1887 			hints.ai_family = AF_INET6;
1888 			if(!ports_create_if(do_auto?"::0":"::1",
1889 				do_auto, cfg->do_udp, do_tcp,
1890 				&hints, portbuf, &list,
1891 				cfg->so_rcvbuf, cfg->so_sndbuf,
1892 				cfg->ssl_port, cfg->tls_additional_port,
1893 				cfg->https_port, cfg->proxy_protocol_port,
1894 				reuseport, cfg->ip_transparent,
1895 				cfg->tcp_mss, cfg->ip_freebind,
1896 				cfg->http_nodelay, cfg->use_systemd,
1897 				cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
1898 				listening_ports_free(list);
1899 				return NULL;
1900 			}
1901 		}
1902 		if(do_ip4) {
1903 			hints.ai_family = AF_INET;
1904 			if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1905 				do_auto, cfg->do_udp, do_tcp,
1906 				&hints, portbuf, &list,
1907 				cfg->so_rcvbuf, cfg->so_sndbuf,
1908 				cfg->ssl_port, cfg->tls_additional_port,
1909 				cfg->https_port, cfg->proxy_protocol_port,
1910 				reuseport, cfg->ip_transparent,
1911 				cfg->tcp_mss, cfg->ip_freebind,
1912 				cfg->http_nodelay, cfg->use_systemd,
1913 				cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
1914 				listening_ports_free(list);
1915 				return NULL;
1916 			}
1917 		}
1918 	} else for(i = 0; i<num_ifs; i++) {
1919 		if(str_is_ip6(ifs[i])) {
1920 			if(!do_ip6)
1921 				continue;
1922 			hints.ai_family = AF_INET6;
1923 			if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1924 				do_tcp, &hints, portbuf, &list,
1925 				cfg->so_rcvbuf, cfg->so_sndbuf,
1926 				cfg->ssl_port, cfg->tls_additional_port,
1927 				cfg->https_port, cfg->proxy_protocol_port,
1928 				reuseport, cfg->ip_transparent,
1929 				cfg->tcp_mss, cfg->ip_freebind,
1930 				cfg->http_nodelay, cfg->use_systemd,
1931 				cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
1932 				listening_ports_free(list);
1933 				return NULL;
1934 			}
1935 		} else {
1936 			if(!do_ip4)
1937 				continue;
1938 			hints.ai_family = AF_INET;
1939 			if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1940 				do_tcp, &hints, portbuf, &list,
1941 				cfg->so_rcvbuf, cfg->so_sndbuf,
1942 				cfg->ssl_port, cfg->tls_additional_port,
1943 				cfg->https_port, cfg->proxy_protocol_port,
1944 				reuseport, cfg->ip_transparent,
1945 				cfg->tcp_mss, cfg->ip_freebind,
1946 				cfg->http_nodelay, cfg->use_systemd,
1947 				cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
1948 				listening_ports_free(list);
1949 				return NULL;
1950 			}
1951 		}
1952 	}
1953 
1954 	return list;
1955 }
1956 
listening_ports_free(struct listen_port * list)1957 void listening_ports_free(struct listen_port* list)
1958 {
1959 	struct listen_port* nx;
1960 	while(list) {
1961 		nx = list->next;
1962 		if(list->fd != -1) {
1963 			sock_close(list->fd);
1964 		}
1965 		/* rc_ports don't have ub_socket */
1966 		if(list->socket) {
1967 			free(list->socket->addr);
1968 			free(list->socket);
1969 		}
1970 		free(list);
1971 		list = nx;
1972 	}
1973 }
1974 
listen_get_mem(struct listen_dnsport * listen)1975 size_t listen_get_mem(struct listen_dnsport* listen)
1976 {
1977 	struct listen_list* p;
1978 	size_t s = sizeof(*listen) + sizeof(*listen->base) +
1979 		sizeof(*listen->udp_buff) +
1980 		sldns_buffer_capacity(listen->udp_buff);
1981 #ifdef USE_DNSCRYPT
1982 	s += sizeof(*listen->dnscrypt_udp_buff);
1983 	if(listen->udp_buff != listen->dnscrypt_udp_buff){
1984 		s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
1985 	}
1986 #endif
1987 	for(p = listen->cps; p; p = p->next) {
1988 		s += sizeof(*p);
1989 		s += comm_point_get_mem(p->com);
1990 	}
1991 	return s;
1992 }
1993 
listen_stop_accept(struct listen_dnsport * listen)1994 void listen_stop_accept(struct listen_dnsport* listen)
1995 {
1996 	/* do not stop the ones that have no tcp_free list
1997 	 * (they have already stopped listening) */
1998 	struct listen_list* p;
1999 	for(p=listen->cps; p; p=p->next) {
2000 		if(p->com->type == comm_tcp_accept &&
2001 			p->com->tcp_free != NULL) {
2002 			comm_point_stop_listening(p->com);
2003 		}
2004 	}
2005 }
2006 
listen_start_accept(struct listen_dnsport * listen)2007 void listen_start_accept(struct listen_dnsport* listen)
2008 {
2009 	/* do not start the ones that have no tcp_free list, it is no
2010 	 * use to listen to them because they have no free tcp handlers */
2011 	struct listen_list* p;
2012 	for(p=listen->cps; p; p=p->next) {
2013 		if(p->com->type == comm_tcp_accept &&
2014 			p->com->tcp_free != NULL) {
2015 			comm_point_start_listening(p->com, -1, -1);
2016 		}
2017 	}
2018 }
2019 
2020 struct tcp_req_info*
tcp_req_info_create(struct sldns_buffer * spoolbuf)2021 tcp_req_info_create(struct sldns_buffer* spoolbuf)
2022 {
2023 	struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
2024 	if(!req) {
2025 		log_err("malloc failure for new stream outoforder processing structure");
2026 		return NULL;
2027 	}
2028 	memset(req, 0, sizeof(*req));
2029 	req->spool_buffer = spoolbuf;
2030 	return req;
2031 }
2032 
2033 void
tcp_req_info_delete(struct tcp_req_info * req)2034 tcp_req_info_delete(struct tcp_req_info* req)
2035 {
2036 	if(!req) return;
2037 	tcp_req_info_clear(req);
2038 	/* cp is pointer back to commpoint that owns this struct and
2039 	 * called delete on us */
2040 	/* spool_buffer is shared udp buffer, not deleted here */
2041 	free(req);
2042 }
2043 
tcp_req_info_clear(struct tcp_req_info * req)2044 void tcp_req_info_clear(struct tcp_req_info* req)
2045 {
2046 	struct tcp_req_open_item* open, *nopen;
2047 	struct tcp_req_done_item* item, *nitem;
2048 	if(!req) return;
2049 
2050 	/* free outstanding request mesh reply entries */
2051 	open = req->open_req_list;
2052 	while(open) {
2053 		nopen = open->next;
2054 		mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
2055 		free(open);
2056 		open = nopen;
2057 	}
2058 	req->open_req_list = NULL;
2059 	req->num_open_req = 0;
2060 
2061 	/* free pending writable result packets */
2062 	item = req->done_req_list;
2063 	while(item) {
2064 		nitem = item->next;
2065 		lock_basic_lock(&stream_wait_count_lock);
2066 		stream_wait_count -= (sizeof(struct tcp_req_done_item)
2067 			+item->len);
2068 		lock_basic_unlock(&stream_wait_count_lock);
2069 		free(item->buf);
2070 		free(item);
2071 		item = nitem;
2072 	}
2073 	req->done_req_list = NULL;
2074 	req->num_done_req = 0;
2075 	req->read_is_closed = 0;
2076 }
2077 
2078 void
tcp_req_info_remove_mesh_state(struct tcp_req_info * req,struct mesh_state * m)2079 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
2080 {
2081 	struct tcp_req_open_item* open, *prev = NULL;
2082 	if(!req || !m) return;
2083 	open = req->open_req_list;
2084 	while(open) {
2085 		if(open->mesh_state == m) {
2086 			struct tcp_req_open_item* next;
2087 			if(prev) prev->next = open->next;
2088 			else req->open_req_list = open->next;
2089 			/* caller has to manage the mesh state reply entry */
2090 			next = open->next;
2091 			free(open);
2092 			req->num_open_req --;
2093 
2094 			/* prev = prev; */
2095 			open = next;
2096 			continue;
2097 		}
2098 		prev = open;
2099 		open = open->next;
2100 	}
2101 }
2102 
2103 /** setup listening for read or write */
2104 static void
tcp_req_info_setup_listen(struct tcp_req_info * req)2105 tcp_req_info_setup_listen(struct tcp_req_info* req)
2106 {
2107 	int wr = 0;
2108 	int rd = 0;
2109 
2110 	if(req->cp->tcp_byte_count != 0) {
2111 		/* cannot change, halfway through */
2112 		return;
2113 	}
2114 
2115 	if(!req->cp->tcp_is_reading)
2116 		wr = 1;
2117 	if(!req->read_is_closed)
2118 		rd = 1;
2119 
2120 	if(wr) {
2121 		req->cp->tcp_is_reading = 0;
2122 		comm_point_stop_listening(req->cp);
2123 		comm_point_start_listening(req->cp, -1,
2124 			adjusted_tcp_timeout(req->cp));
2125 	} else if(rd) {
2126 		req->cp->tcp_is_reading = 1;
2127 		comm_point_stop_listening(req->cp);
2128 		comm_point_start_listening(req->cp, -1,
2129 			adjusted_tcp_timeout(req->cp));
2130 		/* and also read it (from SSL stack buffers), so
2131 		 * no event read event is expected since the remainder of
2132 		 * the TLS frame is sitting in the buffers. */
2133 		req->read_again = 1;
2134 	} else {
2135 		comm_point_stop_listening(req->cp);
2136 		comm_point_start_listening(req->cp, -1,
2137 			adjusted_tcp_timeout(req->cp));
2138 		comm_point_listen_for_rw(req->cp, 0, 0);
2139 	}
2140 }
2141 
2142 /** remove first item from list of pending results */
2143 static struct tcp_req_done_item*
tcp_req_info_pop_done(struct tcp_req_info * req)2144 tcp_req_info_pop_done(struct tcp_req_info* req)
2145 {
2146 	struct tcp_req_done_item* item;
2147 	log_assert(req->num_done_req > 0 && req->done_req_list);
2148 	item = req->done_req_list;
2149 	lock_basic_lock(&stream_wait_count_lock);
2150 	stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
2151 	lock_basic_unlock(&stream_wait_count_lock);
2152 	req->done_req_list = req->done_req_list->next;
2153 	req->num_done_req --;
2154 	return item;
2155 }
2156 
2157 /** Send given buffer and setup to write */
2158 static void
tcp_req_info_start_write_buf(struct tcp_req_info * req,uint8_t * buf,size_t len)2159 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
2160 	size_t len)
2161 {
2162 	sldns_buffer_clear(req->cp->buffer);
2163 	sldns_buffer_write(req->cp->buffer, buf, len);
2164 	sldns_buffer_flip(req->cp->buffer);
2165 
2166 	req->cp->tcp_is_reading = 0; /* we are now writing */
2167 }
2168 
2169 /** pick up the next result and start writing it to the channel */
2170 static void
tcp_req_pickup_next_result(struct tcp_req_info * req)2171 tcp_req_pickup_next_result(struct tcp_req_info* req)
2172 {
2173 	if(req->num_done_req > 0) {
2174 		/* unlist the done item from the list of pending results */
2175 		struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
2176 		tcp_req_info_start_write_buf(req, item->buf, item->len);
2177 		free(item->buf);
2178 		free(item);
2179 	}
2180 }
2181 
2182 /** the read channel has closed */
2183 int
tcp_req_info_handle_read_close(struct tcp_req_info * req)2184 tcp_req_info_handle_read_close(struct tcp_req_info* req)
2185 {
2186 	verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
2187 	/* reset byte count for (potential) partial read */
2188 	req->cp->tcp_byte_count = 0;
2189 	/* if we still have results to write, pick up next and write it */
2190 	if(req->num_done_req != 0) {
2191 		tcp_req_pickup_next_result(req);
2192 		tcp_req_info_setup_listen(req);
2193 		return 1;
2194 	}
2195 	/* if nothing to do, this closes the connection */
2196 	if(req->num_open_req == 0 && req->num_done_req == 0)
2197 		return 0;
2198 	/* otherwise, we must be waiting for dns resolve, wait with timeout */
2199 	req->read_is_closed = 1;
2200 	tcp_req_info_setup_listen(req);
2201 	return 1;
2202 }
2203 
2204 void
tcp_req_info_handle_writedone(struct tcp_req_info * req)2205 tcp_req_info_handle_writedone(struct tcp_req_info* req)
2206 {
2207 	/* back to reading state, we finished this write event */
2208 	sldns_buffer_clear(req->cp->buffer);
2209 	if(req->num_done_req == 0 && req->read_is_closed) {
2210 		/* no more to write and nothing to read, close it */
2211 		comm_point_drop_reply(&req->cp->repinfo);
2212 		return;
2213 	}
2214 	req->cp->tcp_is_reading = 1;
2215 	/* see if another result needs writing */
2216 	tcp_req_pickup_next_result(req);
2217 
2218 	/* see if there is more to write, if not stop_listening for writing */
2219 	/* see if new requests are allowed, if so, start_listening
2220 	 * for reading */
2221 	tcp_req_info_setup_listen(req);
2222 }
2223 
2224 void
tcp_req_info_handle_readdone(struct tcp_req_info * req)2225 tcp_req_info_handle_readdone(struct tcp_req_info* req)
2226 {
2227 	struct comm_point* c = req->cp;
2228 
2229 	/* we want to read up several requests, unless there are
2230 	 * pending answers */
2231 
2232 	req->is_drop = 0;
2233 	req->is_reply = 0;
2234 	req->in_worker_handle = 1;
2235 	sldns_buffer_set_limit(req->spool_buffer, 0);
2236 	/* handle the current request */
2237 	/* this calls the worker handle request routine that could give
2238 	 * a cache response, or localdata response, or drop the reply,
2239 	 * or schedule a mesh entry for later */
2240 	fptr_ok(fptr_whitelist_comm_point(c->callback));
2241 	if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
2242 		req->in_worker_handle = 0;
2243 		/* there is an answer, put it up.  It is already in the
2244 		 * c->buffer, just send it. */
2245 		/* since we were just reading a query, the channel is
2246 		 * clear to write to */
2247 	send_it:
2248 		c->tcp_is_reading = 0;
2249 		comm_point_stop_listening(c);
2250 		comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
2251 		return;
2252 	}
2253 	req->in_worker_handle = 0;
2254 	/* it should be waiting in the mesh for recursion.
2255 	 * If mesh failed to add a new entry and called commpoint_drop_reply.
2256 	 * Then the mesh state has been cleared. */
2257 	if(req->is_drop) {
2258 		/* the reply has been dropped, stream has been closed. */
2259 		return;
2260 	}
2261 	/* If mesh failed(mallocfail) and called commpoint_send_reply with
2262 	 * something like servfail then we pick up that reply below. */
2263 	if(req->is_reply) {
2264 		goto send_it;
2265 	}
2266 
2267 	sldns_buffer_clear(c->buffer);
2268 	/* if pending answers, pick up an answer and start sending it */
2269 	tcp_req_pickup_next_result(req);
2270 
2271 	/* if answers pending, start sending answers */
2272 	/* read more requests if we can have more requests */
2273 	tcp_req_info_setup_listen(req);
2274 }
2275 
2276 int
tcp_req_info_add_meshstate(struct tcp_req_info * req,struct mesh_area * mesh,struct mesh_state * m)2277 tcp_req_info_add_meshstate(struct tcp_req_info* req,
2278 	struct mesh_area* mesh, struct mesh_state* m)
2279 {
2280 	struct tcp_req_open_item* item;
2281 	log_assert(req && mesh && m);
2282 	item = (struct tcp_req_open_item*)malloc(sizeof(*item));
2283 	if(!item) return 0;
2284 	item->next = req->open_req_list;
2285 	item->mesh = mesh;
2286 	item->mesh_state = m;
2287 	req->open_req_list = item;
2288 	req->num_open_req++;
2289 	return 1;
2290 }
2291 
2292 /** Add a result to the result list.  At the end. */
2293 static int
tcp_req_info_add_result(struct tcp_req_info * req,uint8_t * buf,size_t len)2294 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
2295 {
2296 	struct tcp_req_done_item* last = NULL;
2297 	struct tcp_req_done_item* item;
2298 	size_t space;
2299 
2300 	/* see if we have space */
2301 	space = sizeof(struct tcp_req_done_item) + len;
2302 	lock_basic_lock(&stream_wait_count_lock);
2303 	if(stream_wait_count + space > stream_wait_max) {
2304 		lock_basic_unlock(&stream_wait_count_lock);
2305 		verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
2306 		return 0;
2307 	}
2308 	stream_wait_count += space;
2309 	lock_basic_unlock(&stream_wait_count_lock);
2310 
2311 	/* find last element */
2312 	last = req->done_req_list;
2313 	while(last && last->next)
2314 		last = last->next;
2315 
2316 	/* create new element */
2317 	item = (struct tcp_req_done_item*)malloc(sizeof(*item));
2318 	if(!item) {
2319 		log_err("malloc failure, for stream result list");
2320 		return 0;
2321 	}
2322 	item->next = NULL;
2323 	item->len = len;
2324 	item->buf = memdup(buf, len);
2325 	if(!item->buf) {
2326 		free(item);
2327 		log_err("malloc failure, adding reply to stream result list");
2328 		return 0;
2329 	}
2330 
2331 	/* link in */
2332 	if(last) last->next = item;
2333 	else req->done_req_list = item;
2334 	req->num_done_req++;
2335 	return 1;
2336 }
2337 
2338 void
tcp_req_info_send_reply(struct tcp_req_info * req)2339 tcp_req_info_send_reply(struct tcp_req_info* req)
2340 {
2341 	if(req->in_worker_handle) {
2342 		/* reply from mesh is in the spool_buffer */
2343 		/* copy now, so that the spool buffer is free for other tasks
2344 		 * before the callback is done */
2345 		sldns_buffer_clear(req->cp->buffer);
2346 		sldns_buffer_write(req->cp->buffer,
2347 			sldns_buffer_begin(req->spool_buffer),
2348 			sldns_buffer_limit(req->spool_buffer));
2349 		sldns_buffer_flip(req->cp->buffer);
2350 		req->is_reply = 1;
2351 		return;
2352 	}
2353 	/* now that the query has been handled, that mesh_reply entry
2354 	 * should be removed, from the tcp_req_info list,
2355 	 * the mesh state cleanup removes then with region_cleanup and
2356 	 * replies_sent true. */
2357 	/* see if we can send it straight away (we are not doing
2358 	 * anything else).  If so, copy to buffer and start */
2359 	if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
2360 		/* buffer is free, and was ready to read new query into,
2361 		 * but we are now going to use it to send this answer */
2362 		tcp_req_info_start_write_buf(req,
2363 			sldns_buffer_begin(req->spool_buffer),
2364 			sldns_buffer_limit(req->spool_buffer));
2365 		/* switch to listen to write events */
2366 		comm_point_stop_listening(req->cp);
2367 		comm_point_start_listening(req->cp, -1,
2368 			adjusted_tcp_timeout(req->cp));
2369 		return;
2370 	}
2371 	/* queue up the answer behind the others already pending */
2372 	if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
2373 		sldns_buffer_limit(req->spool_buffer))) {
2374 		/* drop the connection, we are out of resources */
2375 		comm_point_drop_reply(&req->cp->repinfo);
2376 	}
2377 }
2378 
tcp_req_info_get_stream_buffer_size(void)2379 size_t tcp_req_info_get_stream_buffer_size(void)
2380 {
2381 	size_t s;
2382 	if(!stream_wait_lock_inited)
2383 		return stream_wait_count;
2384 	lock_basic_lock(&stream_wait_count_lock);
2385 	s = stream_wait_count;
2386 	lock_basic_unlock(&stream_wait_count_lock);
2387 	return s;
2388 }
2389 
http2_get_query_buffer_size(void)2390 size_t http2_get_query_buffer_size(void)
2391 {
2392 	size_t s;
2393 	if(!http2_query_buffer_lock_inited)
2394 		return http2_query_buffer_count;
2395 	lock_basic_lock(&http2_query_buffer_count_lock);
2396 	s = http2_query_buffer_count;
2397 	lock_basic_unlock(&http2_query_buffer_count_lock);
2398 	return s;
2399 }
2400 
http2_get_response_buffer_size(void)2401 size_t http2_get_response_buffer_size(void)
2402 {
2403 	size_t s;
2404 	if(!http2_response_buffer_lock_inited)
2405 		return http2_response_buffer_count;
2406 	lock_basic_lock(&http2_response_buffer_count_lock);
2407 	s = http2_response_buffer_count;
2408 	lock_basic_unlock(&http2_response_buffer_count_lock);
2409 	return s;
2410 }
2411 
2412 #ifdef HAVE_NGHTTP2
2413 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */
http2_submit_response_read_callback(nghttp2_session * ATTR_UNUSED (session),int32_t stream_id,uint8_t * buf,size_t length,uint32_t * data_flags,nghttp2_data_source * source,void * ATTR_UNUSED (cb_arg))2414 static ssize_t http2_submit_response_read_callback(
2415 	nghttp2_session* ATTR_UNUSED(session),
2416 	int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2417 	nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2418 {
2419 	struct http2_stream* h2_stream;
2420 	struct http2_session* h2_session = source->ptr;
2421 	size_t copylen = length;
2422 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2423 		h2_session->session, stream_id))) {
2424 		verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2425 			"stream");
2426 		return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2427 	}
2428 	if(!h2_stream->rbuffer ||
2429 		sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2430 		verbose(VERB_QUERY, "http2: cannot submit buffer. No data "
2431 			"available in rbuffer");
2432 		/* rbuffer will be free'd in frame close cb */
2433 		return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2434 	}
2435 
2436 	if(copylen > sldns_buffer_remaining(h2_stream->rbuffer))
2437 		copylen = sldns_buffer_remaining(h2_stream->rbuffer);
2438 	if(copylen > SSIZE_MAX)
2439 		copylen = SSIZE_MAX; /* will probably never happen */
2440 
2441 	memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen);
2442 	sldns_buffer_skip(h2_stream->rbuffer, copylen);
2443 
2444 	if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2445 		*data_flags |= NGHTTP2_DATA_FLAG_EOF;
2446 		lock_basic_lock(&http2_response_buffer_count_lock);
2447 		http2_response_buffer_count -=
2448 			sldns_buffer_capacity(h2_stream->rbuffer);
2449 		lock_basic_unlock(&http2_response_buffer_count_lock);
2450 		sldns_buffer_free(h2_stream->rbuffer);
2451 		h2_stream->rbuffer = NULL;
2452 	}
2453 
2454 	return copylen;
2455 }
2456 
2457 /**
2458  * Send RST_STREAM frame for stream.
2459  * @param h2_session: http2 session to submit frame to
2460  * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM
2461  * @return 0 on error, 1 otherwise
2462  */
http2_submit_rst_stream(struct http2_session * h2_session,struct http2_stream * h2_stream)2463 static int http2_submit_rst_stream(struct http2_session* h2_session,
2464 		struct http2_stream* h2_stream)
2465 {
2466 	int ret = nghttp2_submit_rst_stream(h2_session->session,
2467 		NGHTTP2_FLAG_NONE, h2_stream->stream_id,
2468 		NGHTTP2_INTERNAL_ERROR);
2469 	if(ret) {
2470 		verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, "
2471 			"error: %s", nghttp2_strerror(ret));
2472 		return 0;
2473 	}
2474 	return 1;
2475 }
2476 
2477 /**
2478  * DNS response ready to be submitted to nghttp2, to be prepared for sending
2479  * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer
2480  * might be used before this will be sent out.
2481  * @param h2_session: http2 session, containing c->buffer which contains answer
2482  * @return 0 on error, 1 otherwise
2483  */
http2_submit_dns_response(struct http2_session * h2_session)2484 int http2_submit_dns_response(struct http2_session* h2_session)
2485 {
2486 	int ret;
2487 	nghttp2_data_provider data_prd;
2488 	char status[4];
2489 	nghttp2_nv headers[3];
2490 	struct http2_stream* h2_stream = h2_session->c->h2_stream;
2491 	size_t rlen;
2492 	char rlen_str[32];
2493 
2494 	if(h2_stream->rbuffer) {
2495 		log_err("http2 submit response error: rbuffer already "
2496 			"exists");
2497 		return 0;
2498 	}
2499 	if(sldns_buffer_remaining(h2_session->c->buffer) == 0) {
2500 		log_err("http2 submit response error: c->buffer not complete");
2501 		return 0;
2502 	}
2503 
2504 	if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2505 		verbose(VERB_QUERY, "http2: submit response error: "
2506 			"invalid status");
2507 		return 0;
2508 	}
2509 
2510 	rlen = sldns_buffer_remaining(h2_session->c->buffer);
2511 	snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen);
2512 
2513 	lock_basic_lock(&http2_response_buffer_count_lock);
2514 	if(http2_response_buffer_count + rlen > http2_response_buffer_max) {
2515 		lock_basic_unlock(&http2_response_buffer_count_lock);
2516 		verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2517 			"in https-response-buffer-size");
2518 		return http2_submit_rst_stream(h2_session, h2_stream);
2519 	}
2520 	http2_response_buffer_count += rlen;
2521 	lock_basic_unlock(&http2_response_buffer_count_lock);
2522 
2523 	if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) {
2524 		lock_basic_lock(&http2_response_buffer_count_lock);
2525 		http2_response_buffer_count -= rlen;
2526 		lock_basic_unlock(&http2_response_buffer_count_lock);
2527 		log_err("http2 submit response error: malloc failure");
2528 		return 0;
2529 	}
2530 
2531 	headers[0].name = (uint8_t*)":status";
2532 	headers[0].namelen = 7;
2533 	headers[0].value = (uint8_t*)status;
2534 	headers[0].valuelen = 3;
2535 	headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2536 
2537 	headers[1].name = (uint8_t*)"content-type";
2538 	headers[1].namelen = 12;
2539 	headers[1].value = (uint8_t*)"application/dns-message";
2540 	headers[1].valuelen = 23;
2541 	headers[1].flags = NGHTTP2_NV_FLAG_NONE;
2542 
2543 	headers[2].name = (uint8_t*)"content-length";
2544 	headers[2].namelen = 14;
2545 	headers[2].value = (uint8_t*)rlen_str;
2546 	headers[2].valuelen = strlen(rlen_str);
2547 	headers[2].flags = NGHTTP2_NV_FLAG_NONE;
2548 
2549 	sldns_buffer_write(h2_stream->rbuffer,
2550 		sldns_buffer_current(h2_session->c->buffer),
2551 		sldns_buffer_remaining(h2_session->c->buffer));
2552 	sldns_buffer_flip(h2_stream->rbuffer);
2553 
2554 	data_prd.source.ptr = h2_session;
2555 	data_prd.read_callback = http2_submit_response_read_callback;
2556 	ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2557 		headers, 3, &data_prd);
2558 	if(ret) {
2559 		verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2560 			"error: %s", nghttp2_strerror(ret));
2561 		return 0;
2562 	}
2563 	return 1;
2564 }
2565 #else
http2_submit_dns_response(void * ATTR_UNUSED (v))2566 int http2_submit_dns_response(void* ATTR_UNUSED(v))
2567 {
2568 	return 0;
2569 }
2570 #endif
2571 
2572 #ifdef HAVE_NGHTTP2
2573 /** HTTP status to descriptive string */
http_status_to_str(enum http_status s)2574 static char* http_status_to_str(enum http_status s)
2575 {
2576 	switch(s) {
2577 		case HTTP_STATUS_OK:
2578 			return "OK";
2579 		case HTTP_STATUS_BAD_REQUEST:
2580 			return "Bad Request";
2581 		case HTTP_STATUS_NOT_FOUND:
2582 			return "Not Found";
2583 		case HTTP_STATUS_PAYLOAD_TOO_LARGE:
2584 			return "Payload Too Large";
2585 		case HTTP_STATUS_URI_TOO_LONG:
2586 			return "URI Too Long";
2587 		case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE:
2588 			return "Unsupported Media Type";
2589 		case HTTP_STATUS_NOT_IMPLEMENTED:
2590 			return "Not Implemented";
2591 	}
2592 	return "Status Unknown";
2593 }
2594 
2595 /** nghttp2 callback. Used to copy error message to nghttp2 session */
http2_submit_error_read_callback(nghttp2_session * ATTR_UNUSED (session),int32_t stream_id,uint8_t * buf,size_t length,uint32_t * data_flags,nghttp2_data_source * source,void * ATTR_UNUSED (cb_arg))2596 static ssize_t http2_submit_error_read_callback(
2597 	nghttp2_session* ATTR_UNUSED(session),
2598 	int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2599 	nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2600 {
2601 	struct http2_stream* h2_stream;
2602 	struct http2_session* h2_session = source->ptr;
2603 	char* msg;
2604 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2605 		h2_session->session, stream_id))) {
2606 		verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2607 			"stream");
2608 		return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2609 	}
2610 	*data_flags |= NGHTTP2_DATA_FLAG_EOF;
2611 	msg = http_status_to_str(h2_stream->status);
2612 	if(length < strlen(msg))
2613 		return 0; /* not worth trying over multiple frames */
2614 	memcpy(buf, msg, strlen(msg));
2615 	return strlen(msg);
2616 
2617 }
2618 
2619 /**
2620  * HTTP error response ready to be submitted to nghttp2, to be prepared for
2621  * sending out. Message body will contain descriptive string for HTTP status.
2622  * @param h2_session: http2 session to submit to
2623  * @param h2_stream: http2 stream containing HTTP status to use for error
2624  * @return 0 on error, 1 otherwise
2625  */
http2_submit_error(struct http2_session * h2_session,struct http2_stream * h2_stream)2626 static int http2_submit_error(struct http2_session* h2_session,
2627 	struct http2_stream* h2_stream)
2628 {
2629 	int ret;
2630 	char status[4];
2631 	nghttp2_data_provider data_prd;
2632 	nghttp2_nv headers[1]; /* will be copied by nghttp */
2633 	if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2634 		verbose(VERB_QUERY, "http2: submit error failed, "
2635 			"invalid status");
2636 		return 0;
2637 	}
2638 	headers[0].name = (uint8_t*)":status";
2639 	headers[0].namelen = 7;
2640 	headers[0].value = (uint8_t*)status;
2641 	headers[0].valuelen = 3;
2642 	headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2643 
2644 	data_prd.source.ptr = h2_session;
2645 	data_prd.read_callback = http2_submit_error_read_callback;
2646 
2647 	ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2648 		headers, 1, &data_prd);
2649 	if(ret) {
2650 		verbose(VERB_QUERY, "http2: submit error failed, "
2651 			"error: %s", nghttp2_strerror(ret));
2652 		return 0;
2653 	}
2654 	return 1;
2655 }
2656 
2657 /**
2658  * Start query handling. Query is stored in the stream, and will be free'd here.
2659  * @param h2_session: http2 session, containing comm point
2660  * @param h2_stream: stream containing buffered query
2661  * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no
2662  * reply available (yet).
2663  */
http2_query_read_done(struct http2_session * h2_session,struct http2_stream * h2_stream)2664 static int http2_query_read_done(struct http2_session* h2_session,
2665 	struct http2_stream* h2_stream)
2666 {
2667 	log_assert(h2_stream->qbuffer);
2668 
2669 	if(h2_session->c->h2_stream) {
2670 		verbose(VERB_ALGO, "http2_query_read_done failure: shared "
2671 			"buffer already assigned to stream");
2672 		return -1;
2673 	}
2674 
2675     /* the c->buffer might be used by mesh_send_reply and no be cleard
2676 	 * need to be cleared before use */
2677 	sldns_buffer_clear(h2_session->c->buffer);
2678 	if(sldns_buffer_remaining(h2_session->c->buffer) <
2679 		sldns_buffer_remaining(h2_stream->qbuffer)) {
2680 		/* qbuffer will be free'd in frame close cb */
2681 		sldns_buffer_clear(h2_session->c->buffer);
2682 		verbose(VERB_ALGO, "http2_query_read_done failure: can't fit "
2683 			"qbuffer in c->buffer");
2684 		return -1;
2685 	}
2686 
2687 	sldns_buffer_write(h2_session->c->buffer,
2688 		sldns_buffer_current(h2_stream->qbuffer),
2689 		sldns_buffer_remaining(h2_stream->qbuffer));
2690 
2691 	lock_basic_lock(&http2_query_buffer_count_lock);
2692 	http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer);
2693 	lock_basic_unlock(&http2_query_buffer_count_lock);
2694 	sldns_buffer_free(h2_stream->qbuffer);
2695 	h2_stream->qbuffer = NULL;
2696 
2697 	sldns_buffer_flip(h2_session->c->buffer);
2698 	h2_session->c->h2_stream = h2_stream;
2699 	fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback));
2700 	if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg,
2701 		NETEVENT_NOERROR, &h2_session->c->repinfo)) {
2702 		return 1; /* answer in c->buffer */
2703 	}
2704 	sldns_buffer_clear(h2_session->c->buffer);
2705 	h2_session->c->h2_stream = NULL;
2706 	return 0; /* mesh state added, or dropped */
2707 }
2708 
2709 /** nghttp2 callback. Used to check if the received frame indicates the end of a
2710  * stream. Gather collected request data and start query handling. */
http2_req_frame_recv_cb(nghttp2_session * session,const nghttp2_frame * frame,void * cb_arg)2711 static int http2_req_frame_recv_cb(nghttp2_session* session,
2712 	const nghttp2_frame* frame, void* cb_arg)
2713 {
2714 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2715 	struct http2_stream* h2_stream;
2716 	int query_read_done;
2717 
2718 	if((frame->hd.type != NGHTTP2_DATA &&
2719 		frame->hd.type != NGHTTP2_HEADERS) ||
2720 		!(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) {
2721 			return 0;
2722 	}
2723 
2724 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2725 		session, frame->hd.stream_id)))
2726 		return 0;
2727 
2728 	if(h2_stream->invalid_endpoint) {
2729 		h2_stream->status = HTTP_STATUS_NOT_FOUND;
2730 		goto submit_http_error;
2731 	}
2732 
2733 	if(h2_stream->invalid_content_type) {
2734 		h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE;
2735 		goto submit_http_error;
2736 	}
2737 
2738 	if(h2_stream->http_method != HTTP_METHOD_GET &&
2739 		h2_stream->http_method != HTTP_METHOD_POST) {
2740 		h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED;
2741 		goto submit_http_error;
2742 	}
2743 
2744 	if(h2_stream->query_too_large) {
2745 		if(h2_stream->http_method == HTTP_METHOD_POST)
2746 			h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE;
2747 		else
2748 			h2_stream->status = HTTP_STATUS_URI_TOO_LONG;
2749 		goto submit_http_error;
2750 	}
2751 
2752 	if(!h2_stream->qbuffer) {
2753 		h2_stream->status = HTTP_STATUS_BAD_REQUEST;
2754 		goto submit_http_error;
2755 	}
2756 
2757 	if(h2_stream->status) {
2758 submit_http_error:
2759 		verbose(VERB_QUERY, "http2 request invalid, returning :status="
2760 			"%d", h2_stream->status);
2761 		if(!http2_submit_error(h2_session, h2_stream)) {
2762 			return NGHTTP2_ERR_CALLBACK_FAILURE;
2763 		}
2764 		return 0;
2765 	}
2766 	h2_stream->status = HTTP_STATUS_OK;
2767 
2768 	sldns_buffer_flip(h2_stream->qbuffer);
2769 	h2_session->postpone_drop = 1;
2770 	query_read_done = http2_query_read_done(h2_session, h2_stream);
2771 	if(query_read_done < 0)
2772 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2773 	else if(!query_read_done) {
2774 		if(h2_session->is_drop) {
2775 			/* connection needs to be closed. Return failure to make
2776 			 * sure no other action are taken anymore on comm point.
2777 			 * failure will result in reclaiming (and closing)
2778 			 * of comm point. */
2779 			verbose(VERB_QUERY, "http2 query dropped in worker cb");
2780 			h2_session->postpone_drop = 0;
2781 			return NGHTTP2_ERR_CALLBACK_FAILURE;
2782 		}
2783 		/* nothing to submit right now, query added to mesh. */
2784 		h2_session->postpone_drop = 0;
2785 		return 0;
2786 	}
2787 	if(!http2_submit_dns_response(h2_session)) {
2788 		sldns_buffer_clear(h2_session->c->buffer);
2789 		h2_session->c->h2_stream = NULL;
2790 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2791 	}
2792 	verbose(VERB_QUERY, "http2 query submitted to session");
2793 	sldns_buffer_clear(h2_session->c->buffer);
2794 	h2_session->c->h2_stream = NULL;
2795 	return 0;
2796 }
2797 
2798 /** nghttp2 callback. Used to detect start of new streams. */
http2_req_begin_headers_cb(nghttp2_session * session,const nghttp2_frame * frame,void * cb_arg)2799 static int http2_req_begin_headers_cb(nghttp2_session* session,
2800 	const nghttp2_frame* frame, void* cb_arg)
2801 {
2802 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2803 	struct http2_stream* h2_stream;
2804 	int ret;
2805 	if(frame->hd.type != NGHTTP2_HEADERS ||
2806 		frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2807 		/* only interested in request headers */
2808 		return 0;
2809 	}
2810 	if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) {
2811 		log_err("malloc failure while creating http2 stream");
2812 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2813 	}
2814 	http2_session_add_stream(h2_session, h2_stream);
2815 	ret = nghttp2_session_set_stream_user_data(session,
2816 		frame->hd.stream_id, h2_stream);
2817 	if(ret) {
2818 		/* stream does not exist */
2819 		verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2820 			"error: %s", nghttp2_strerror(ret));
2821 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2822 	}
2823 
2824 	return 0;
2825 }
2826 
2827 /**
2828  * base64url decode, store in qbuffer
2829  * @param h2_session: http2 session
2830  * @param h2_stream: http2 stream
2831  * @param start: start of the base64 string
2832  * @param length: length of the base64 string
2833  * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer,
2834  * buffer will be NULL is unparseble.
2835  */
http2_buffer_uri_query(struct http2_session * h2_session,struct http2_stream * h2_stream,const uint8_t * start,size_t length)2836 static int http2_buffer_uri_query(struct http2_session* h2_session,
2837 	struct http2_stream* h2_stream, const uint8_t* start, size_t length)
2838 {
2839 	size_t expectb64len;
2840 	int b64len;
2841 	if(h2_stream->http_method == HTTP_METHOD_POST)
2842 		return 1;
2843 	if(length == 0)
2844 		return 1;
2845 	if(h2_stream->qbuffer) {
2846 		verbose(VERB_ALGO, "http2_req_header fail, "
2847 			"qbuffer already set");
2848 		return 0;
2849 	}
2850 
2851 	/* calculate size, might be a bit bigger than the real
2852 	 * decoded buffer size */
2853 	expectb64len = sldns_b64_pton_calculate_size(length);
2854 	log_assert(expectb64len > 0);
2855 	if(expectb64len >
2856 		h2_session->c->http2_stream_max_qbuffer_size) {
2857 		h2_stream->query_too_large = 1;
2858 		return 1;
2859 	}
2860 
2861 	lock_basic_lock(&http2_query_buffer_count_lock);
2862 	if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) {
2863 		lock_basic_unlock(&http2_query_buffer_count_lock);
2864 		verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2865 			"in http2-query-buffer-size");
2866 		return http2_submit_rst_stream(h2_session, h2_stream);
2867 	}
2868 	http2_query_buffer_count += expectb64len;
2869 	lock_basic_unlock(&http2_query_buffer_count_lock);
2870 	if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) {
2871 		lock_basic_lock(&http2_query_buffer_count_lock);
2872 		http2_query_buffer_count -= expectb64len;
2873 		lock_basic_unlock(&http2_query_buffer_count_lock);
2874 		log_err("http2_req_header fail, qbuffer "
2875 			"malloc failure");
2876 		return 0;
2877 	}
2878 
2879 	if(sldns_b64_contains_nonurl((char const*)start, length)) {
2880 		char buf[65536+4];
2881 		verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding");
2882 		/* copy to the scratch buffer temporarily to terminate the
2883 		 * string with a zero */
2884 		if(length+1 > sizeof(buf)) {
2885 			/* too long */
2886 			lock_basic_lock(&http2_query_buffer_count_lock);
2887 			http2_query_buffer_count -= expectb64len;
2888 			lock_basic_unlock(&http2_query_buffer_count_lock);
2889 			sldns_buffer_free(h2_stream->qbuffer);
2890 			h2_stream->qbuffer = NULL;
2891 			return 1;
2892 		}
2893 		memmove(buf, start, length);
2894 		buf[length] = 0;
2895 		if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current(
2896 			h2_stream->qbuffer), expectb64len)) || b64len < 0) {
2897 			lock_basic_lock(&http2_query_buffer_count_lock);
2898 			http2_query_buffer_count -= expectb64len;
2899 			lock_basic_unlock(&http2_query_buffer_count_lock);
2900 			sldns_buffer_free(h2_stream->qbuffer);
2901 			h2_stream->qbuffer = NULL;
2902 			return 1;
2903 		}
2904 	} else {
2905 		if(!(b64len = sldns_b64url_pton(
2906 			(char const *)start, length,
2907 			sldns_buffer_current(h2_stream->qbuffer),
2908 			expectb64len)) || b64len < 0) {
2909 			lock_basic_lock(&http2_query_buffer_count_lock);
2910 			http2_query_buffer_count -= expectb64len;
2911 			lock_basic_unlock(&http2_query_buffer_count_lock);
2912 			sldns_buffer_free(h2_stream->qbuffer);
2913 			h2_stream->qbuffer = NULL;
2914 			/* return without error, method can be an
2915 			 * unknown POST */
2916 			return 1;
2917 		}
2918 	}
2919 	sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len);
2920 	return 1;
2921 }
2922 
2923 /** nghttp2 callback. Used to parse headers from HEADER frames. */
http2_req_header_cb(nghttp2_session * session,const nghttp2_frame * frame,const uint8_t * name,size_t namelen,const uint8_t * value,size_t valuelen,uint8_t ATTR_UNUSED (flags),void * cb_arg)2924 static int http2_req_header_cb(nghttp2_session* session,
2925 	const nghttp2_frame* frame, const uint8_t* name, size_t namelen,
2926 	const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags),
2927 	void* cb_arg)
2928 {
2929 	struct http2_stream* h2_stream = NULL;
2930 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2931 	/* nghttp2 deals with CONTINUATION frames and provides them as part of
2932 	 * the HEADER */
2933 	if(frame->hd.type != NGHTTP2_HEADERS ||
2934 		frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2935 		/* only interested in request headers */
2936 		return 0;
2937 	}
2938 	if(!(h2_stream = nghttp2_session_get_stream_user_data(session,
2939 		frame->hd.stream_id)))
2940 		return 0;
2941 
2942 	/* earlier checks already indicate we can stop handling this query */
2943 	if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED ||
2944 		h2_stream->invalid_content_type ||
2945 		h2_stream->invalid_endpoint)
2946 		return 0;
2947 
2948 
2949 	/* nghttp2 performs some sanity checks in the headers, including:
2950 	 * name and value are guaranteed to be null terminated
2951 	 * name is guaranteed to be lowercase
2952 	 * content-length value is guaranteed to contain digits
2953 	 */
2954 
2955 	if(!h2_stream->http_method && namelen == 7 &&
2956 		memcmp(":method", name, namelen) == 0) {
2957 		/* Case insensitive check on :method value to be on the safe
2958 		 * side. I failed to find text about case sensitivity in specs.
2959 		 */
2960 		if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0)
2961 			h2_stream->http_method = HTTP_METHOD_GET;
2962 		else if(valuelen == 4 &&
2963 			strcasecmp("POST", (const char*)value) == 0) {
2964 			h2_stream->http_method = HTTP_METHOD_POST;
2965 			if(h2_stream->qbuffer) {
2966 				/* POST method uses query from DATA frames */
2967 				lock_basic_lock(&http2_query_buffer_count_lock);
2968 				http2_query_buffer_count -=
2969 					sldns_buffer_capacity(h2_stream->qbuffer);
2970 				lock_basic_unlock(&http2_query_buffer_count_lock);
2971 				sldns_buffer_free(h2_stream->qbuffer);
2972 				h2_stream->qbuffer = NULL;
2973 			}
2974 		} else
2975 			h2_stream->http_method = HTTP_METHOD_UNSUPPORTED;
2976 		return 0;
2977 	}
2978 	if(namelen == 5 && memcmp(":path", name, namelen) == 0) {
2979 		/* :path may contain DNS query, depending on method. Method might
2980 		 * not be known yet here, so check after finishing receiving
2981 		 * stream. */
2982 #define	HTTP_QUERY_PARAM "?dns="
2983 		size_t el = strlen(h2_session->c->http_endpoint);
2984 		size_t qpl = strlen(HTTP_QUERY_PARAM);
2985 
2986 		if(valuelen < el || memcmp(h2_session->c->http_endpoint,
2987 			value, el) != 0) {
2988 			h2_stream->invalid_endpoint = 1;
2989 			return 0;
2990 		}
2991 		/* larger than endpoint only allowed if it is for the query
2992 		 * parameter */
2993 		if(valuelen <= el+qpl ||
2994 			memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) {
2995 			if(valuelen != el)
2996 				h2_stream->invalid_endpoint = 1;
2997 			return 0;
2998 		}
2999 
3000 		if(!http2_buffer_uri_query(h2_session, h2_stream,
3001 			value+(el+qpl), valuelen-(el+qpl))) {
3002 			return NGHTTP2_ERR_CALLBACK_FAILURE;
3003 		}
3004 		return 0;
3005 	}
3006 	/* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST,
3007 	 * and not needed when using GET. Don't enfore.
3008 	 * If set only allow lowercase "application/dns-message".
3009 	 *
3010 	 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST
3011 	 * be able to handle "application/dns-message". Since that is the only
3012 	 * content-type supported we can ignore the accept header.
3013 	 */
3014 	if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) {
3015 		if(valuelen != 23 || memcmp("application/dns-message", value,
3016 			valuelen) != 0) {
3017 			h2_stream->invalid_content_type = 1;
3018 		}
3019 	}
3020 
3021 	/* Only interested in content-lentg for POST (on not yet known) method.
3022 	 */
3023 	if((!h2_stream->http_method ||
3024 		h2_stream->http_method == HTTP_METHOD_POST) &&
3025 		!h2_stream->content_length && namelen  == 14 &&
3026 		memcmp("content-length", name, namelen) == 0) {
3027 		if(valuelen > 5) {
3028 			h2_stream->query_too_large = 1;
3029 			return 0;
3030 		}
3031 		/* guaranteed to only contain digits and be null terminated */
3032 		h2_stream->content_length = atoi((const char*)value);
3033 		if(h2_stream->content_length >
3034 			h2_session->c->http2_stream_max_qbuffer_size) {
3035 			h2_stream->query_too_large = 1;
3036 			return 0;
3037 		}
3038 	}
3039 	return 0;
3040 }
3041 
3042 /** nghttp2 callback. Used to get data from DATA frames, which can contain
3043  * queries in POST requests. */
http2_req_data_chunk_recv_cb(nghttp2_session * ATTR_UNUSED (session),uint8_t ATTR_UNUSED (flags),int32_t stream_id,const uint8_t * data,size_t len,void * cb_arg)3044 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session),
3045 	uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data,
3046 	size_t len, void* cb_arg)
3047 {
3048 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
3049 	struct http2_stream* h2_stream;
3050 	size_t qlen = 0;
3051 
3052 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
3053 		h2_session->session, stream_id))) {
3054 		return 0;
3055 	}
3056 
3057 	if(h2_stream->query_too_large)
3058 		return 0;
3059 
3060 	if(!h2_stream->qbuffer) {
3061 		if(h2_stream->content_length) {
3062 			if(h2_stream->content_length < len)
3063 				/* getting more data in DATA frame than
3064 				 * advertised in content-length header. */
3065 				return NGHTTP2_ERR_CALLBACK_FAILURE;
3066 			qlen = h2_stream->content_length;
3067 		} else if(len <= h2_session->c->http2_stream_max_qbuffer_size) {
3068 			/* setting this to msg-buffer-size can result in a lot
3069 			 * of memory consuption. Most queries should fit in a
3070 			 * single DATA frame, and most POST queries will
3071 			 * contain content-length which does not impose this
3072 			 * limit. */
3073 			qlen = len;
3074 		}
3075 	}
3076 	if(!h2_stream->qbuffer && qlen) {
3077 		lock_basic_lock(&http2_query_buffer_count_lock);
3078 		if(http2_query_buffer_count + qlen > http2_query_buffer_max) {
3079 			lock_basic_unlock(&http2_query_buffer_count_lock);
3080 			verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
3081 				"in http2-query-buffer-size");
3082 			return http2_submit_rst_stream(h2_session, h2_stream);
3083 		}
3084 		http2_query_buffer_count += qlen;
3085 		lock_basic_unlock(&http2_query_buffer_count_lock);
3086 		if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) {
3087 			lock_basic_lock(&http2_query_buffer_count_lock);
3088 			http2_query_buffer_count -= qlen;
3089 			lock_basic_unlock(&http2_query_buffer_count_lock);
3090 		}
3091 	}
3092 
3093 	if(!h2_stream->qbuffer ||
3094 		sldns_buffer_remaining(h2_stream->qbuffer) < len) {
3095 		verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough "
3096 			"buffer space for POST query. Can happen on multi "
3097 			"frame requests without content-length header");
3098 		h2_stream->query_too_large = 1;
3099 		return 0;
3100 	}
3101 
3102 	sldns_buffer_write(h2_stream->qbuffer, data, len);
3103 
3104 	return 0;
3105 }
3106 
http2_req_stream_clear(struct http2_stream * h2_stream)3107 void http2_req_stream_clear(struct http2_stream* h2_stream)
3108 {
3109 	if(h2_stream->qbuffer) {
3110 		lock_basic_lock(&http2_query_buffer_count_lock);
3111 		http2_query_buffer_count -=
3112 			sldns_buffer_capacity(h2_stream->qbuffer);
3113 		lock_basic_unlock(&http2_query_buffer_count_lock);
3114 		sldns_buffer_free(h2_stream->qbuffer);
3115 		h2_stream->qbuffer = NULL;
3116 	}
3117 	if(h2_stream->rbuffer) {
3118 		lock_basic_lock(&http2_response_buffer_count_lock);
3119 		http2_response_buffer_count -=
3120 			sldns_buffer_capacity(h2_stream->rbuffer);
3121 		lock_basic_unlock(&http2_response_buffer_count_lock);
3122 		sldns_buffer_free(h2_stream->rbuffer);
3123 		h2_stream->rbuffer = NULL;
3124 	}
3125 }
3126 
http2_req_callbacks_create(void)3127 nghttp2_session_callbacks* http2_req_callbacks_create(void)
3128 {
3129 	nghttp2_session_callbacks *callbacks;
3130 	if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) {
3131 		log_err("failed to initialize nghttp2 callback");
3132 		return NULL;
3133 	}
3134 	/* reception of header block started, used to create h2_stream */
3135 	nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks,
3136 		http2_req_begin_headers_cb);
3137 	/* complete frame received, used to get data from stream if frame
3138 	 * has end stream flag, and start processing query */
3139 	nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks,
3140 		http2_req_frame_recv_cb);
3141 	/* get request info from headers */
3142 	nghttp2_session_callbacks_set_on_header_callback(callbacks,
3143 		http2_req_header_cb);
3144 	/* get data from DATA frames, containing POST query */
3145 	nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks,
3146 		http2_req_data_chunk_recv_cb);
3147 
3148 	/* generic HTTP2 callbacks */
3149 	nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb);
3150 	nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb);
3151 	nghttp2_session_callbacks_set_on_stream_close_callback(callbacks,
3152 		http2_stream_close_cb);
3153 
3154 	return callbacks;
3155 }
3156 #endif /* HAVE_NGHTTP2 */
3157