1 /* 2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file has functions to get queries from clients. 40 */ 41 #include "config.h" 42 #ifdef HAVE_SYS_TYPES_H 43 # include <sys/types.h> 44 #endif 45 #include <sys/time.h> 46 #include <limits.h> 47 #ifdef USE_TCP_FASTOPEN 48 #include <netinet/tcp.h> 49 #endif 50 #include <ctype.h> 51 #include "services/listen_dnsport.h" 52 #include "services/outside_network.h" 53 #include "util/netevent.h" 54 #include "util/log.h" 55 #include "util/config_file.h" 56 #include "util/net_help.h" 57 #include "sldns/sbuffer.h" 58 #include "sldns/parseutil.h" 59 #include "services/mesh.h" 60 #include "util/fptr_wlist.h" 61 #include "util/locks.h" 62 63 #ifdef HAVE_NETDB_H 64 #include <netdb.h> 65 #endif 66 #include <fcntl.h> 67 68 #ifdef HAVE_SYS_UN_H 69 #include <sys/un.h> 70 #endif 71 72 #ifdef HAVE_SYSTEMD 73 #include <systemd/sd-daemon.h> 74 #endif 75 76 #ifdef HAVE_IFADDRS_H 77 #include <ifaddrs.h> 78 #endif 79 #ifdef HAVE_NET_IF_H 80 #include <net/if.h> 81 #endif 82 #ifdef HAVE_LINUX_NET_TSTAMP_H 83 #include <linux/net_tstamp.h> 84 #endif 85 /** number of queued TCP connections for listen() */ 86 #define TCP_BACKLOG 256 87 88 #ifndef THREADS_DISABLED 89 /** lock on the counter of stream buffer memory */ 90 static lock_basic_type stream_wait_count_lock; 91 /** lock on the counter of HTTP2 query buffer memory */ 92 static lock_basic_type http2_query_buffer_count_lock; 93 /** lock on the counter of HTTP2 response buffer memory */ 94 static lock_basic_type http2_response_buffer_count_lock; 95 #endif 96 /** size (in bytes) of stream wait buffers */ 97 static size_t stream_wait_count = 0; 98 /** is the lock initialised for stream wait buffers */ 99 static int stream_wait_lock_inited = 0; 100 /** size (in bytes) of HTTP2 query buffers */ 101 static size_t http2_query_buffer_count = 0; 102 /** is the lock initialised for HTTP2 query buffers */ 103 static int http2_query_buffer_lock_inited = 0; 104 /** size (in bytes) of HTTP2 response buffers */ 105 static size_t http2_response_buffer_count = 0; 106 /** is the lock initialised for HTTP2 response buffers */ 107 static int http2_response_buffer_lock_inited = 0; 108 109 /** 110 * Debug print of the getaddrinfo returned address. 111 * @param addr: the address returned. 112 */ 113 static void 114 verbose_print_addr(struct addrinfo *addr) 115 { 116 if(verbosity >= VERB_ALGO) { 117 char buf[100]; 118 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr; 119 #ifdef INET6 120 if(addr->ai_family == AF_INET6) 121 sinaddr = &((struct sockaddr_in6*)addr->ai_addr)-> 122 sin6_addr; 123 #endif /* INET6 */ 124 if(inet_ntop(addr->ai_family, sinaddr, buf, 125 (socklen_t)sizeof(buf)) == 0) { 126 (void)strlcpy(buf, "(null)", sizeof(buf)); 127 } 128 buf[sizeof(buf)-1] = 0; 129 verbose(VERB_ALGO, "creating %s%s socket %s %d", 130 addr->ai_socktype==SOCK_DGRAM?"udp": 131 addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto", 132 addr->ai_family==AF_INET?"4": 133 addr->ai_family==AF_INET6?"6": 134 "_otherfam", buf, 135 ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port)); 136 } 137 } 138 139 void 140 verbose_print_unbound_socket(struct unbound_socket* ub_sock) 141 { 142 if(verbosity >= VERB_ALGO) { 143 log_info("listing of unbound_socket structure:"); 144 verbose_print_addr(ub_sock->addr); 145 log_info("s is: %d, fam is: %s, acl: %s", ub_sock->s, 146 ub_sock->fam == AF_INET?"AF_INET":"AF_INET6", 147 ub_sock->acl?"yes":"no"); 148 } 149 } 150 151 #ifdef HAVE_SYSTEMD 152 static int 153 systemd_get_activated(int family, int socktype, int listen, 154 struct sockaddr *addr, socklen_t addrlen, 155 const char *path) 156 { 157 int i = 0; 158 int r = 0; 159 int s = -1; 160 const char* listen_pid, *listen_fds; 161 162 /* We should use "listen" option only for stream protocols. For UDP it should be -1 */ 163 164 if((r = sd_booted()) < 1) { 165 if(r == 0) 166 log_warn("systemd is not running"); 167 else 168 log_err("systemd sd_booted(): %s", strerror(-r)); 169 return -1; 170 } 171 172 listen_pid = getenv("LISTEN_PID"); 173 listen_fds = getenv("LISTEN_FDS"); 174 175 if (!listen_pid) { 176 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID"); 177 return -1; 178 } 179 180 if (!listen_fds) { 181 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS"); 182 return -1; 183 } 184 185 if((r = sd_listen_fds(0)) < 1) { 186 if(r == 0) 187 log_warn("systemd: did not return socket, check unit configuration"); 188 else 189 log_err("systemd sd_listen_fds(): %s", strerror(-r)); 190 return -1; 191 } 192 193 for(i = 0; i < r; i++) { 194 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) { 195 s = SD_LISTEN_FDS_START + i; 196 break; 197 } 198 } 199 if (s == -1) { 200 if (addr) 201 log_err_addr("systemd sd_listen_fds()", 202 "no such socket", 203 (struct sockaddr_storage *)addr, addrlen); 204 else 205 log_err("systemd sd_listen_fds(): %s", path); 206 } 207 return s; 208 } 209 #endif 210 211 int 212 create_udp_sock(int family, int socktype, struct sockaddr* addr, 213 socklen_t addrlen, int v6only, int* inuse, int* noproto, 214 int rcv, int snd, int listen, int* reuseport, int transparent, 215 int freebind, int use_systemd, int dscp) 216 { 217 int s; 218 char* err; 219 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY) 220 int on=1; 221 #endif 222 #ifdef IPV6_MTU 223 int mtu = IPV6_MIN_MTU; 224 #endif 225 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF) 226 (void)rcv; 227 #endif 228 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF) 229 (void)snd; 230 #endif 231 #ifndef IPV6_V6ONLY 232 (void)v6only; 233 #endif 234 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY) 235 (void)transparent; 236 #endif 237 #if !defined(IP_FREEBIND) 238 (void)freebind; 239 #endif 240 #ifdef HAVE_SYSTEMD 241 int got_fd_from_systemd = 0; 242 243 if (!use_systemd 244 || (use_systemd 245 && (s = systemd_get_activated(family, socktype, -1, addr, 246 addrlen, NULL)) == -1)) { 247 #else 248 (void)use_systemd; 249 #endif 250 if((s = socket(family, socktype, 0)) == -1) { 251 *inuse = 0; 252 #ifndef USE_WINSOCK 253 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { 254 *noproto = 1; 255 return -1; 256 } 257 #else 258 if(WSAGetLastError() == WSAEAFNOSUPPORT || 259 WSAGetLastError() == WSAEPROTONOSUPPORT) { 260 *noproto = 1; 261 return -1; 262 } 263 #endif 264 log_err("can't create socket: %s", sock_strerror(errno)); 265 *noproto = 0; 266 return -1; 267 } 268 #ifdef HAVE_SYSTEMD 269 } else { 270 got_fd_from_systemd = 1; 271 } 272 #endif 273 if(listen) { 274 #ifdef SO_REUSEADDR 275 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 276 (socklen_t)sizeof(on)) < 0) { 277 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", 278 sock_strerror(errno)); 279 #ifndef USE_WINSOCK 280 if(errno != ENOSYS) { 281 close(s); 282 *noproto = 0; 283 *inuse = 0; 284 return -1; 285 } 286 #else 287 closesocket(s); 288 *noproto = 0; 289 *inuse = 0; 290 return -1; 291 #endif 292 } 293 #endif /* SO_REUSEADDR */ 294 #ifdef SO_REUSEPORT 295 # ifdef SO_REUSEPORT_LB 296 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance 297 * like SO_REUSEPORT on Linux. This is what the users want 298 * with the config option in unbound.conf; if we actually 299 * need local address and port reuse they'll also need to 300 * have SO_REUSEPORT set for them, assume it was _LB they want. 301 */ 302 if (reuseport && *reuseport && 303 setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on, 304 (socklen_t)sizeof(on)) < 0) { 305 #ifdef ENOPROTOOPT 306 if(errno != ENOPROTOOPT || verbosity >= 3) 307 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s", 308 strerror(errno)); 309 #endif 310 /* this option is not essential, we can continue */ 311 *reuseport = 0; 312 } 313 # else /* no SO_REUSEPORT_LB */ 314 315 /* try to set SO_REUSEPORT so that incoming 316 * queries are distributed evenly among the receiving threads. 317 * Each thread must have its own socket bound to the same port, 318 * with SO_REUSEPORT set on each socket. 319 */ 320 if (reuseport && *reuseport && 321 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, 322 (socklen_t)sizeof(on)) < 0) { 323 #ifdef ENOPROTOOPT 324 if(errno != ENOPROTOOPT || verbosity >= 3) 325 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", 326 strerror(errno)); 327 #endif 328 /* this option is not essential, we can continue */ 329 *reuseport = 0; 330 } 331 # endif /* SO_REUSEPORT_LB */ 332 #else 333 (void)reuseport; 334 #endif /* defined(SO_REUSEPORT) */ 335 #ifdef IP_TRANSPARENT 336 if (transparent && 337 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on, 338 (socklen_t)sizeof(on)) < 0) { 339 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s", 340 strerror(errno)); 341 } 342 #elif defined(IP_BINDANY) 343 if (transparent && 344 setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP), 345 (family == AF_INET6? IPV6_BINDANY:IP_BINDANY), 346 (void*)&on, (socklen_t)sizeof(on)) < 0) { 347 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s", 348 (family==AF_INET6?"V6":""), strerror(errno)); 349 } 350 #elif defined(SO_BINDANY) 351 if (transparent && 352 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, 353 (socklen_t)sizeof(on)) < 0) { 354 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s", 355 strerror(errno)); 356 } 357 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */ 358 } 359 #ifdef IP_FREEBIND 360 if(freebind && 361 setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on, 362 (socklen_t)sizeof(on)) < 0) { 363 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s", 364 strerror(errno)); 365 } 366 #endif /* IP_FREEBIND */ 367 if(rcv) { 368 #ifdef SO_RCVBUF 369 int got; 370 socklen_t slen = (socklen_t)sizeof(got); 371 # ifdef SO_RCVBUFFORCE 372 /* Linux specific: try to use root permission to override 373 * system limits on rcvbuf. The limit is stored in 374 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */ 375 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 376 (socklen_t)sizeof(rcv)) < 0) { 377 if(errno != EPERM) { 378 log_err("setsockopt(..., SO_RCVBUFFORCE, " 379 "...) failed: %s", sock_strerror(errno)); 380 sock_close(s); 381 *noproto = 0; 382 *inuse = 0; 383 return -1; 384 } 385 # endif /* SO_RCVBUFFORCE */ 386 if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 387 (socklen_t)sizeof(rcv)) < 0) { 388 log_err("setsockopt(..., SO_RCVBUF, " 389 "...) failed: %s", sock_strerror(errno)); 390 sock_close(s); 391 *noproto = 0; 392 *inuse = 0; 393 return -1; 394 } 395 /* check if we got the right thing or if system 396 * reduced to some system max. Warn if so */ 397 if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, 398 &slen) >= 0 && got < rcv/2) { 399 log_warn("so-rcvbuf %u was not granted. " 400 "Got %u. To fix: start with " 401 "root permissions(linux) or sysctl " 402 "bigger net.core.rmem_max(linux) or " 403 "kern.ipc.maxsockbuf(bsd) values.", 404 (unsigned)rcv, (unsigned)got); 405 } 406 # ifdef SO_RCVBUFFORCE 407 } 408 # endif 409 #endif /* SO_RCVBUF */ 410 } 411 /* first do RCVBUF as the receive buffer is more important */ 412 if(snd) { 413 #ifdef SO_SNDBUF 414 int got; 415 socklen_t slen = (socklen_t)sizeof(got); 416 # ifdef SO_SNDBUFFORCE 417 /* Linux specific: try to use root permission to override 418 * system limits on sndbuf. The limit is stored in 419 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */ 420 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 421 (socklen_t)sizeof(snd)) < 0) { 422 if(errno != EPERM) { 423 log_err("setsockopt(..., SO_SNDBUFFORCE, " 424 "...) failed: %s", sock_strerror(errno)); 425 sock_close(s); 426 *noproto = 0; 427 *inuse = 0; 428 return -1; 429 } 430 # endif /* SO_SNDBUFFORCE */ 431 if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 432 (socklen_t)sizeof(snd)) < 0) { 433 log_err("setsockopt(..., SO_SNDBUF, " 434 "...) failed: %s", sock_strerror(errno)); 435 sock_close(s); 436 *noproto = 0; 437 *inuse = 0; 438 return -1; 439 } 440 /* check if we got the right thing or if system 441 * reduced to some system max. Warn if so */ 442 if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, 443 &slen) >= 0 && got < snd/2) { 444 log_warn("so-sndbuf %u was not granted. " 445 "Got %u. To fix: start with " 446 "root permissions(linux) or sysctl " 447 "bigger net.core.wmem_max(linux) or " 448 "kern.ipc.maxsockbuf(bsd) values.", 449 (unsigned)snd, (unsigned)got); 450 } 451 # ifdef SO_SNDBUFFORCE 452 } 453 # endif 454 #endif /* SO_SNDBUF */ 455 } 456 err = set_ip_dscp(s, family, dscp); 457 if(err != NULL) 458 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err); 459 if(family == AF_INET6) { 460 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 461 int omit6_set = 0; 462 int action; 463 # endif 464 # if defined(IPV6_V6ONLY) 465 if(v6only 466 # ifdef HAVE_SYSTEMD 467 /* Systemd wants to control if the socket is v6 only 468 * or both, with BindIPv6Only=default, ipv6-only or 469 * both in systemd.socket, so it is not set here. */ 470 && !got_fd_from_systemd 471 # endif 472 ) { 473 int val=(v6only==2)?0:1; 474 if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 475 (void*)&val, (socklen_t)sizeof(val)) < 0) { 476 log_err("setsockopt(..., IPV6_V6ONLY" 477 ", ...) failed: %s", sock_strerror(errno)); 478 sock_close(s); 479 *noproto = 0; 480 *inuse = 0; 481 return -1; 482 } 483 } 484 # endif 485 # if defined(IPV6_USE_MIN_MTU) 486 /* 487 * There is no fragmentation of IPv6 datagrams 488 * during forwarding in the network. Therefore 489 * we do not send UDP datagrams larger than 490 * the minimum IPv6 MTU of 1280 octets. The 491 * EDNS0 message length can be larger if the 492 * network stack supports IPV6_USE_MIN_MTU. 493 */ 494 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU, 495 (void*)&on, (socklen_t)sizeof(on)) < 0) { 496 log_err("setsockopt(..., IPV6_USE_MIN_MTU, " 497 "...) failed: %s", sock_strerror(errno)); 498 sock_close(s); 499 *noproto = 0; 500 *inuse = 0; 501 return -1; 502 } 503 # elif defined(IPV6_MTU) 504 # ifndef USE_WINSOCK 505 /* 506 * On Linux, to send no larger than 1280, the PMTUD is 507 * disabled by default for datagrams anyway, so we set 508 * the MTU to use. 509 */ 510 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU, 511 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { 512 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 513 sock_strerror(errno)); 514 sock_close(s); 515 *noproto = 0; 516 *inuse = 0; 517 return -1; 518 } 519 # elif defined(IPV6_USER_MTU) 520 /* As later versions of the mingw crosscompiler define 521 * IPV6_MTU, do the same for windows but use IPV6_USER_MTU 522 * instead which is writable; IPV6_MTU is readonly there. */ 523 if (setsockopt(s, IPPROTO_IPV6, IPV6_USER_MTU, 524 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { 525 if (WSAGetLastError() != WSAENOPROTOOPT) { 526 log_err("setsockopt(..., IPV6_USER_MTU, ...) failed: %s", 527 wsa_strerror(WSAGetLastError())); 528 sock_close(s); 529 *noproto = 0; 530 *inuse = 0; 531 return -1; 532 } 533 } 534 # endif /* USE_WINSOCK */ 535 # endif /* IPv6 MTU */ 536 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 537 # if defined(IP_PMTUDISC_OMIT) 538 action = IP_PMTUDISC_OMIT; 539 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, 540 &action, (socklen_t)sizeof(action)) < 0) { 541 542 if (errno != EINVAL) { 543 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 544 strerror(errno)); 545 sock_close(s); 546 *noproto = 0; 547 *inuse = 0; 548 return -1; 549 } 550 } 551 else 552 { 553 omit6_set = 1; 554 } 555 # endif 556 if (omit6_set == 0) { 557 action = IP_PMTUDISC_DONT; 558 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, 559 &action, (socklen_t)sizeof(action)) < 0) { 560 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 561 strerror(errno)); 562 sock_close(s); 563 *noproto = 0; 564 *inuse = 0; 565 return -1; 566 } 567 } 568 # endif /* IPV6_MTU_DISCOVER */ 569 } else if(family == AF_INET) { 570 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 571 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that 572 * PMTU information is not accepted, but fragmentation is allowed 573 * if and only if the packet size exceeds the outgoing interface MTU 574 * (and also uses the interface mtu to determine the size of the packets). 575 * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks. 576 * FreeBSD already has same semantics without setting the option. */ 577 int omit_set = 0; 578 int action; 579 # if defined(IP_PMTUDISC_OMIT) 580 action = IP_PMTUDISC_OMIT; 581 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 582 &action, (socklen_t)sizeof(action)) < 0) { 583 584 if (errno != EINVAL) { 585 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 586 strerror(errno)); 587 sock_close(s); 588 *noproto = 0; 589 *inuse = 0; 590 return -1; 591 } 592 } 593 else 594 { 595 omit_set = 1; 596 } 597 # endif 598 if (omit_set == 0) { 599 action = IP_PMTUDISC_DONT; 600 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 601 &action, (socklen_t)sizeof(action)) < 0) { 602 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 603 strerror(errno)); 604 sock_close(s); 605 *noproto = 0; 606 *inuse = 0; 607 return -1; 608 } 609 } 610 # elif defined(IP_DONTFRAG) && !defined(__APPLE__) 611 /* the IP_DONTFRAG option if defined in the 11.0 OSX headers, 612 * but does not work on that version, so we exclude it */ 613 int off = 0; 614 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, 615 &off, (socklen_t)sizeof(off)) < 0) { 616 log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s", 617 strerror(errno)); 618 sock_close(s); 619 *noproto = 0; 620 *inuse = 0; 621 return -1; 622 } 623 # endif /* IPv4 MTU */ 624 } 625 if( 626 #ifdef HAVE_SYSTEMD 627 !got_fd_from_systemd && 628 #endif 629 bind(s, (struct sockaddr*)addr, addrlen) != 0) { 630 *noproto = 0; 631 *inuse = 0; 632 #ifndef USE_WINSOCK 633 #ifdef EADDRINUSE 634 *inuse = (errno == EADDRINUSE); 635 /* detect freebsd jail with no ipv6 permission */ 636 if(family==AF_INET6 && errno==EINVAL) 637 *noproto = 1; 638 else if(errno != EADDRINUSE && 639 !(errno == EACCES && verbosity < 4 && !listen) 640 #ifdef EADDRNOTAVAIL 641 && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen) 642 #endif 643 ) { 644 log_err_addr("can't bind socket", strerror(errno), 645 (struct sockaddr_storage*)addr, addrlen); 646 } 647 #endif /* EADDRINUSE */ 648 #else /* USE_WINSOCK */ 649 if(WSAGetLastError() != WSAEADDRINUSE && 650 WSAGetLastError() != WSAEADDRNOTAVAIL && 651 !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) { 652 log_err_addr("can't bind socket", 653 wsa_strerror(WSAGetLastError()), 654 (struct sockaddr_storage*)addr, addrlen); 655 } 656 #endif /* USE_WINSOCK */ 657 sock_close(s); 658 return -1; 659 } 660 if(!fd_set_nonblock(s)) { 661 *noproto = 0; 662 *inuse = 0; 663 sock_close(s); 664 return -1; 665 } 666 return s; 667 } 668 669 int 670 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, 671 int* reuseport, int transparent, int mss, int nodelay, int freebind, 672 int use_systemd, int dscp) 673 { 674 int s; 675 char* err; 676 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY) 677 int on = 1; 678 #endif 679 #ifdef HAVE_SYSTEMD 680 int got_fd_from_systemd = 0; 681 #endif 682 #ifdef USE_TCP_FASTOPEN 683 int qlen; 684 #endif 685 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY) 686 (void)transparent; 687 #endif 688 #if !defined(IP_FREEBIND) 689 (void)freebind; 690 #endif 691 verbose_print_addr(addr); 692 *noproto = 0; 693 #ifdef HAVE_SYSTEMD 694 if (!use_systemd || 695 (use_systemd 696 && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1, 697 addr->ai_addr, addr->ai_addrlen, 698 NULL)) == -1)) { 699 #else 700 (void)use_systemd; 701 #endif 702 if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) { 703 #ifndef USE_WINSOCK 704 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { 705 *noproto = 1; 706 return -1; 707 } 708 #else 709 if(WSAGetLastError() == WSAEAFNOSUPPORT || 710 WSAGetLastError() == WSAEPROTONOSUPPORT) { 711 *noproto = 1; 712 return -1; 713 } 714 #endif 715 log_err("can't create socket: %s", sock_strerror(errno)); 716 return -1; 717 } 718 if(nodelay) { 719 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY) 720 if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on, 721 (socklen_t)sizeof(on)) < 0) { 722 #ifndef USE_WINSOCK 723 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s", 724 strerror(errno)); 725 #else 726 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s", 727 wsa_strerror(WSAGetLastError())); 728 #endif 729 } 730 #else 731 log_warn(" setsockopt(TCP_NODELAY) unsupported"); 732 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */ 733 } 734 if (mss > 0) { 735 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 736 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss, 737 (socklen_t)sizeof(mss)) < 0) { 738 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s", 739 sock_strerror(errno)); 740 } else { 741 verbose(VERB_ALGO, 742 " tcp socket mss set to %d", mss); 743 } 744 #else 745 log_warn(" setsockopt(TCP_MAXSEG) unsupported"); 746 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */ 747 } 748 #ifdef HAVE_SYSTEMD 749 } else { 750 got_fd_from_systemd = 1; 751 } 752 #endif 753 #ifdef SO_REUSEADDR 754 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 755 (socklen_t)sizeof(on)) < 0) { 756 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", 757 sock_strerror(errno)); 758 sock_close(s); 759 return -1; 760 } 761 #endif /* SO_REUSEADDR */ 762 #ifdef IP_FREEBIND 763 if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on, 764 (socklen_t)sizeof(on)) < 0) { 765 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s", 766 strerror(errno)); 767 } 768 #endif /* IP_FREEBIND */ 769 #ifdef SO_REUSEPORT 770 /* try to set SO_REUSEPORT so that incoming 771 * connections are distributed evenly among the receiving threads. 772 * Each thread must have its own socket bound to the same port, 773 * with SO_REUSEPORT set on each socket. 774 */ 775 if (reuseport && *reuseport && 776 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, 777 (socklen_t)sizeof(on)) < 0) { 778 #ifdef ENOPROTOOPT 779 if(errno != ENOPROTOOPT || verbosity >= 3) 780 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", 781 strerror(errno)); 782 #endif 783 /* this option is not essential, we can continue */ 784 *reuseport = 0; 785 } 786 #else 787 (void)reuseport; 788 #endif /* defined(SO_REUSEPORT) */ 789 #if defined(IPV6_V6ONLY) 790 if(addr->ai_family == AF_INET6 && v6only 791 # ifdef HAVE_SYSTEMD 792 /* Systemd wants to control if the socket is v6 only 793 * or both, with BindIPv6Only=default, ipv6-only or 794 * both in systemd.socket, so it is not set here. */ 795 && !got_fd_from_systemd 796 # endif 797 ) { 798 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 799 (void*)&on, (socklen_t)sizeof(on)) < 0) { 800 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s", 801 sock_strerror(errno)); 802 sock_close(s); 803 return -1; 804 } 805 } 806 #else 807 (void)v6only; 808 #endif /* IPV6_V6ONLY */ 809 #ifdef IP_TRANSPARENT 810 if (transparent && 811 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on, 812 (socklen_t)sizeof(on)) < 0) { 813 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s", 814 strerror(errno)); 815 } 816 #elif defined(IP_BINDANY) 817 if (transparent && 818 setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP), 819 (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY), 820 (void*)&on, (socklen_t)sizeof(on)) < 0) { 821 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s", 822 (addr->ai_family==AF_INET6?"V6":""), strerror(errno)); 823 } 824 #elif defined(SO_BINDANY) 825 if (transparent && 826 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t) 827 sizeof(on)) < 0) { 828 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s", 829 strerror(errno)); 830 } 831 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */ 832 err = set_ip_dscp(s, addr->ai_family, dscp); 833 if(err != NULL) 834 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err); 835 if( 836 #ifdef HAVE_SYSTEMD 837 !got_fd_from_systemd && 838 #endif 839 bind(s, addr->ai_addr, addr->ai_addrlen) != 0) { 840 #ifndef USE_WINSOCK 841 /* detect freebsd jail with no ipv6 permission */ 842 if(addr->ai_family==AF_INET6 && errno==EINVAL) 843 *noproto = 1; 844 else { 845 log_err_addr("can't bind socket", strerror(errno), 846 (struct sockaddr_storage*)addr->ai_addr, 847 addr->ai_addrlen); 848 } 849 #else 850 log_err_addr("can't bind socket", 851 wsa_strerror(WSAGetLastError()), 852 (struct sockaddr_storage*)addr->ai_addr, 853 addr->ai_addrlen); 854 #endif 855 sock_close(s); 856 return -1; 857 } 858 if(!fd_set_nonblock(s)) { 859 sock_close(s); 860 return -1; 861 } 862 if(listen(s, TCP_BACKLOG) == -1) { 863 log_err("can't listen: %s", sock_strerror(errno)); 864 sock_close(s); 865 return -1; 866 } 867 #ifdef USE_TCP_FASTOPEN 868 /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense 869 against IP spoofing attacks as suggested in RFC7413 */ 870 #ifdef __APPLE__ 871 /* OS X implementation only supports qlen of 1 via this call. Actual 872 value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */ 873 qlen = 1; 874 #else 875 /* 5 is recommended on linux */ 876 qlen = 5; 877 #endif 878 if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, 879 sizeof(qlen))) == -1 ) { 880 #ifdef ENOPROTOOPT 881 /* squelch ENOPROTOOPT: freebsd server mode with kernel support 882 disabled, except when verbosity enabled for debugging */ 883 if(errno != ENOPROTOOPT || verbosity >= 3) { 884 #endif 885 if(errno == EPERM) { 886 log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno)); 887 } else { 888 log_err("Setting TCP Fast Open as server failed: %s", strerror(errno)); 889 } 890 #ifdef ENOPROTOOPT 891 } 892 #endif 893 } 894 #endif 895 return s; 896 } 897 898 char* 899 set_ip_dscp(int socket, int addrfamily, int dscp) 900 { 901 int ds; 902 903 if(dscp == 0) 904 return NULL; 905 ds = dscp << 2; 906 switch(addrfamily) { 907 case AF_INET6: 908 #ifdef IPV6_TCLASS 909 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, 910 sizeof(ds)) < 0) 911 return sock_strerror(errno); 912 break; 913 #else 914 return "IPV6_TCLASS not defined on this system"; 915 #endif 916 default: 917 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0) 918 return sock_strerror(errno); 919 break; 920 } 921 return NULL; 922 } 923 924 int 925 create_local_accept_sock(const char *path, int* noproto, int use_systemd) 926 { 927 #ifdef HAVE_SYSTEMD 928 int ret; 929 930 if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1) 931 return ret; 932 else { 933 #endif 934 #ifdef HAVE_SYS_UN_H 935 int s; 936 struct sockaddr_un usock; 937 #ifndef HAVE_SYSTEMD 938 (void)use_systemd; 939 #endif 940 941 verbose(VERB_ALGO, "creating unix socket %s", path); 942 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN 943 /* this member exists on BSDs, not Linux */ 944 usock.sun_len = (unsigned)sizeof(usock); 945 #endif 946 usock.sun_family = AF_LOCAL; 947 /* length is 92-108, 104 on FreeBSD */ 948 (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path)); 949 950 if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) { 951 log_err("Cannot create local socket %s (%s)", 952 path, strerror(errno)); 953 return -1; 954 } 955 956 if (unlink(path) && errno != ENOENT) { 957 /* The socket already exists and cannot be removed */ 958 log_err("Cannot remove old local socket %s (%s)", 959 path, strerror(errno)); 960 goto err; 961 } 962 963 if (bind(s, (struct sockaddr *)&usock, 964 (socklen_t)sizeof(struct sockaddr_un)) == -1) { 965 log_err("Cannot bind local socket %s (%s)", 966 path, strerror(errno)); 967 goto err; 968 } 969 970 if (!fd_set_nonblock(s)) { 971 log_err("Cannot set non-blocking mode"); 972 goto err; 973 } 974 975 if (listen(s, TCP_BACKLOG) == -1) { 976 log_err("can't listen: %s", strerror(errno)); 977 goto err; 978 } 979 980 (void)noproto; /*unused*/ 981 return s; 982 983 err: 984 sock_close(s); 985 return -1; 986 987 #ifdef HAVE_SYSTEMD 988 } 989 #endif 990 #else 991 (void)use_systemd; 992 (void)path; 993 log_err("Local sockets are not supported"); 994 *noproto = 1; 995 return -1; 996 #endif 997 } 998 999 1000 /** 1001 * Create socket from getaddrinfo results 1002 */ 1003 static int 1004 make_sock(int stype, const char* ifname, const char* port, 1005 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, 1006 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, 1007 int use_systemd, int dscp, struct unbound_socket* ub_sock) 1008 { 1009 struct addrinfo *res = NULL; 1010 int r, s, inuse, noproto; 1011 hints->ai_socktype = stype; 1012 *noip6 = 0; 1013 if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) { 1014 #ifdef USE_WINSOCK 1015 if(r == EAI_NONAME && hints->ai_family == AF_INET6){ 1016 *noip6 = 1; /* 'Host not found' for IP6 on winXP */ 1017 return -1; 1018 } 1019 #endif 1020 log_err("node %s:%s getaddrinfo: %s %s", 1021 ifname?ifname:"default", port, gai_strerror(r), 1022 #ifdef EAI_SYSTEM 1023 (r==EAI_SYSTEM?(char*)strerror(errno):"") 1024 #else 1025 "" 1026 #endif 1027 ); 1028 return -1; 1029 } 1030 if(stype == SOCK_DGRAM) { 1031 verbose_print_addr(res); 1032 s = create_udp_sock(res->ai_family, res->ai_socktype, 1033 (struct sockaddr*)res->ai_addr, res->ai_addrlen, 1034 v6only, &inuse, &noproto, (int)rcv, (int)snd, 1, 1035 reuseport, transparent, freebind, use_systemd, dscp); 1036 if(s == -1 && inuse) { 1037 log_err("bind: address already in use"); 1038 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){ 1039 *noip6 = 1; 1040 } 1041 } else { 1042 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport, 1043 transparent, tcp_mss, nodelay, freebind, use_systemd, 1044 dscp); 1045 if(s == -1 && noproto && hints->ai_family == AF_INET6){ 1046 *noip6 = 1; 1047 } 1048 } 1049 1050 ub_sock->addr = res; 1051 ub_sock->s = s; 1052 ub_sock->fam = hints->ai_family; 1053 ub_sock->acl = NULL; 1054 1055 return s; 1056 } 1057 1058 /** make socket and first see if ifname contains port override info */ 1059 static int 1060 make_sock_port(int stype, const char* ifname, const char* port, 1061 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, 1062 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, 1063 int use_systemd, int dscp, struct unbound_socket* ub_sock) 1064 { 1065 char* s = strchr(ifname, '@'); 1066 if(s) { 1067 /* override port with ifspec@port */ 1068 char p[16]; 1069 char newif[128]; 1070 if((size_t)(s-ifname) >= sizeof(newif)) { 1071 log_err("ifname too long: %s", ifname); 1072 *noip6 = 0; 1073 return -1; 1074 } 1075 if(strlen(s+1) >= sizeof(p)) { 1076 log_err("portnumber too long: %s", ifname); 1077 *noip6 = 0; 1078 return -1; 1079 } 1080 (void)strlcpy(newif, ifname, sizeof(newif)); 1081 newif[s-ifname] = 0; 1082 (void)strlcpy(p, s+1, sizeof(p)); 1083 p[strlen(s+1)]=0; 1084 return make_sock(stype, newif, p, hints, v6only, noip6, rcv, 1085 snd, reuseport, transparent, tcp_mss, nodelay, freebind, 1086 use_systemd, dscp, ub_sock); 1087 } 1088 return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd, 1089 reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd, 1090 dscp, ub_sock); 1091 } 1092 1093 /** 1094 * Add port to open ports list. 1095 * @param list: list head. changed. 1096 * @param s: fd. 1097 * @param ftype: if fd is UDP. 1098 * @param pp2_enabled: if PROXYv2 is enabled for this port. 1099 * @param ub_sock: socket with address. 1100 * @return false on failure. list in unchanged then. 1101 */ 1102 static int 1103 port_insert(struct listen_port** list, int s, enum listen_type ftype, 1104 int pp2_enabled, struct unbound_socket* ub_sock) 1105 { 1106 struct listen_port* item = (struct listen_port*)malloc( 1107 sizeof(struct listen_port)); 1108 if(!item) 1109 return 0; 1110 item->next = *list; 1111 item->fd = s; 1112 item->ftype = ftype; 1113 item->pp2_enabled = pp2_enabled; 1114 item->socket = ub_sock; 1115 *list = item; 1116 return 1; 1117 } 1118 1119 /** set fd to receive software timestamps */ 1120 static int 1121 set_recvtimestamp(int s) 1122 { 1123 #ifdef HAVE_LINUX_NET_TSTAMP_H 1124 int opt = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; 1125 if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMPNS, (void*)&opt, (socklen_t)sizeof(opt)) < 0) { 1126 log_err("setsockopt(..., SO_TIMESTAMPNS, ...) failed: %s", 1127 strerror(errno)); 1128 return 0; 1129 } 1130 return 1; 1131 #else 1132 log_err("packets timestamping is not supported on this platform"); 1133 (void)s; 1134 return 0; 1135 #endif 1136 } 1137 1138 /** set fd to receive source address packet info */ 1139 static int 1140 set_recvpktinfo(int s, int family) 1141 { 1142 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO) 1143 int on = 1; 1144 #else 1145 (void)s; 1146 #endif 1147 if(family == AF_INET6) { 1148 # ifdef IPV6_RECVPKTINFO 1149 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1150 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1151 log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s", 1152 strerror(errno)); 1153 return 0; 1154 } 1155 # elif defined(IPV6_PKTINFO) 1156 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO, 1157 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1158 log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s", 1159 strerror(errno)); 1160 return 0; 1161 } 1162 # else 1163 log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please " 1164 "disable interface-automatic or do-ip6 in config"); 1165 return 0; 1166 # endif /* defined IPV6_RECVPKTINFO */ 1167 1168 } else if(family == AF_INET) { 1169 # ifdef IP_PKTINFO 1170 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO, 1171 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1172 log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s", 1173 strerror(errno)); 1174 return 0; 1175 } 1176 # elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR) 1177 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR, 1178 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1179 log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s", 1180 strerror(errno)); 1181 return 0; 1182 } 1183 # else 1184 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable " 1185 "interface-automatic or do-ip4 in config"); 1186 return 0; 1187 # endif /* IP_PKTINFO */ 1188 1189 } 1190 return 1; 1191 } 1192 1193 /** see if interface is ssl, its port number == the ssl port number */ 1194 static int 1195 if_is_ssl(const char* ifname, const char* port, int ssl_port, 1196 struct config_strlist* tls_additional_port) 1197 { 1198 struct config_strlist* s; 1199 char* p = strchr(ifname, '@'); 1200 if(!p && atoi(port) == ssl_port) 1201 return 1; 1202 if(p && atoi(p+1) == ssl_port) 1203 return 1; 1204 for(s = tls_additional_port; s; s = s->next) { 1205 if(p && atoi(p+1) == atoi(s->str)) 1206 return 1; 1207 if(!p && atoi(port) == atoi(s->str)) 1208 return 1; 1209 } 1210 return 0; 1211 } 1212 1213 /** 1214 * Helper for ports_open. Creates one interface (or NULL for default). 1215 * @param ifname: The interface ip address. 1216 * @param do_auto: use automatic interface detection. 1217 * If enabled, then ifname must be the wildcard name. 1218 * @param do_udp: if udp should be used. 1219 * @param do_tcp: if tcp should be used. 1220 * @param hints: for getaddrinfo. family and flags have to be set by caller. 1221 * @param port: Port number to use (as string). 1222 * @param list: list of open ports, appended to, changed to point to list head. 1223 * @param rcv: receive buffer size for UDP 1224 * @param snd: send buffer size for UDP 1225 * @param ssl_port: ssl service port number 1226 * @param tls_additional_port: list of additional ssl service port numbers. 1227 * @param https_port: DoH service port number 1228 * @param proxy_protocol_port: list of PROXYv2 port numbers. 1229 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true. 1230 * set to false on exit if reuseport failed due to no kernel support. 1231 * @param transparent: set IP_TRANSPARENT socket option. 1232 * @param tcp_mss: maximum segment size of tcp socket. default if zero. 1233 * @param freebind: set IP_FREEBIND socket option. 1234 * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection 1235 * @param use_systemd: if true, fetch sockets from systemd. 1236 * @param dnscrypt_port: dnscrypt service port number 1237 * @param dscp: DSCP to use. 1238 * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to 1239 * wait to discard if UDP packets have waited for long in the socket 1240 * buffer. 1241 * @return: returns false on error. 1242 */ 1243 static int 1244 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, 1245 struct addrinfo *hints, const char* port, struct listen_port** list, 1246 size_t rcv, size_t snd, int ssl_port, 1247 struct config_strlist* tls_additional_port, int https_port, 1248 struct config_strlist* proxy_protocol_port, 1249 int* reuseport, int transparent, int tcp_mss, int freebind, 1250 int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp, 1251 int sock_queue_timeout) 1252 { 1253 int s, noip6=0; 1254 int is_https = if_is_https(ifname, port, https_port); 1255 int is_dnscrypt = if_is_dnscrypt(ifname, port, dnscrypt_port); 1256 int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port); 1257 int nodelay = is_https && http2_nodelay; 1258 struct unbound_socket* ub_sock; 1259 1260 if(!do_udp && !do_tcp) 1261 return 0; 1262 1263 if(is_pp2) { 1264 if(is_dnscrypt) { 1265 fatal_exit("PROXYv2 and DNSCrypt combination not " 1266 "supported!"); 1267 } else if(is_https) { 1268 fatal_exit("PROXYv2 and DoH combination not " 1269 "supported!"); 1270 } 1271 } 1272 1273 if(do_auto) { 1274 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1275 if(!ub_sock) 1276 return 0; 1277 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 1278 &noip6, rcv, snd, reuseport, transparent, 1279 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { 1280 if(ub_sock->addr) 1281 freeaddrinfo(ub_sock->addr); 1282 free(ub_sock); 1283 if(noip6) { 1284 log_warn("IPv6 protocol not available"); 1285 return 1; 1286 } 1287 return 0; 1288 } 1289 /* getting source addr packet info is highly non-portable */ 1290 if(!set_recvpktinfo(s, hints->ai_family)) { 1291 sock_close(s); 1292 if(ub_sock->addr) 1293 freeaddrinfo(ub_sock->addr); 1294 free(ub_sock); 1295 return 0; 1296 } 1297 if (sock_queue_timeout && !set_recvtimestamp(s)) { 1298 log_warn("socket timestamping is not available"); 1299 } 1300 if(!port_insert(list, s, is_dnscrypt 1301 ?listen_type_udpancil_dnscrypt:listen_type_udpancil, 1302 is_pp2, ub_sock)) { 1303 sock_close(s); 1304 if(ub_sock->addr) 1305 freeaddrinfo(ub_sock->addr); 1306 free(ub_sock); 1307 return 0; 1308 } 1309 } else if(do_udp) { 1310 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1311 if(!ub_sock) 1312 return 0; 1313 /* regular udp socket */ 1314 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 1315 &noip6, rcv, snd, reuseport, transparent, 1316 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { 1317 if(ub_sock->addr) 1318 freeaddrinfo(ub_sock->addr); 1319 free(ub_sock); 1320 if(noip6) { 1321 log_warn("IPv6 protocol not available"); 1322 return 1; 1323 } 1324 return 0; 1325 } 1326 if (sock_queue_timeout && !set_recvtimestamp(s)) { 1327 log_warn("socket timestamping is not available"); 1328 } 1329 if(!port_insert(list, s, is_dnscrypt 1330 ?listen_type_udp_dnscrypt:listen_type_udp, 1331 is_pp2, ub_sock)) { 1332 sock_close(s); 1333 if(ub_sock->addr) 1334 freeaddrinfo(ub_sock->addr); 1335 free(ub_sock); 1336 return 0; 1337 } 1338 } 1339 if(do_tcp) { 1340 int is_ssl = if_is_ssl(ifname, port, ssl_port, 1341 tls_additional_port); 1342 enum listen_type port_type; 1343 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1344 if(!ub_sock) 1345 return 0; 1346 if(is_ssl) 1347 port_type = listen_type_ssl; 1348 else if(is_https) 1349 port_type = listen_type_http; 1350 else if(is_dnscrypt) 1351 port_type = listen_type_tcp_dnscrypt; 1352 else 1353 port_type = listen_type_tcp; 1354 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, 1355 &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay, 1356 freebind, use_systemd, dscp, ub_sock)) == -1) { 1357 if(ub_sock->addr) 1358 freeaddrinfo(ub_sock->addr); 1359 free(ub_sock); 1360 if(noip6) { 1361 /*log_warn("IPv6 protocol not available");*/ 1362 return 1; 1363 } 1364 return 0; 1365 } 1366 if(is_ssl) 1367 verbose(VERB_ALGO, "setup TCP for SSL service"); 1368 if(!port_insert(list, s, port_type, is_pp2, ub_sock)) { 1369 sock_close(s); 1370 if(ub_sock->addr) 1371 freeaddrinfo(ub_sock->addr); 1372 free(ub_sock); 1373 return 0; 1374 } 1375 } 1376 return 1; 1377 } 1378 1379 /** 1380 * Add items to commpoint list in front. 1381 * @param c: commpoint to add. 1382 * @param front: listen struct. 1383 * @return: false on failure. 1384 */ 1385 static int 1386 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front) 1387 { 1388 struct listen_list* item = (struct listen_list*)malloc( 1389 sizeof(struct listen_list)); 1390 if(!item) 1391 return 0; 1392 item->com = c; 1393 item->next = front->cps; 1394 front->cps = item; 1395 return 1; 1396 } 1397 1398 void listen_setup_locks(void) 1399 { 1400 if(!stream_wait_lock_inited) { 1401 lock_basic_init(&stream_wait_count_lock); 1402 stream_wait_lock_inited = 1; 1403 } 1404 if(!http2_query_buffer_lock_inited) { 1405 lock_basic_init(&http2_query_buffer_count_lock); 1406 http2_query_buffer_lock_inited = 1; 1407 } 1408 if(!http2_response_buffer_lock_inited) { 1409 lock_basic_init(&http2_response_buffer_count_lock); 1410 http2_response_buffer_lock_inited = 1; 1411 } 1412 } 1413 1414 void listen_desetup_locks(void) 1415 { 1416 if(stream_wait_lock_inited) { 1417 stream_wait_lock_inited = 0; 1418 lock_basic_destroy(&stream_wait_count_lock); 1419 } 1420 if(http2_query_buffer_lock_inited) { 1421 http2_query_buffer_lock_inited = 0; 1422 lock_basic_destroy(&http2_query_buffer_count_lock); 1423 } 1424 if(http2_response_buffer_lock_inited) { 1425 http2_response_buffer_lock_inited = 0; 1426 lock_basic_destroy(&http2_response_buffer_count_lock); 1427 } 1428 } 1429 1430 struct listen_dnsport* 1431 listen_create(struct comm_base* base, struct listen_port* ports, 1432 size_t bufsize, int tcp_accept_count, int tcp_idle_timeout, 1433 int harden_large_queries, uint32_t http_max_streams, 1434 char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit, 1435 void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb, 1436 void *cb_arg) 1437 { 1438 struct listen_dnsport* front = (struct listen_dnsport*) 1439 malloc(sizeof(struct listen_dnsport)); 1440 if(!front) 1441 return NULL; 1442 front->cps = NULL; 1443 front->udp_buff = sldns_buffer_new(bufsize); 1444 #ifdef USE_DNSCRYPT 1445 front->dnscrypt_udp_buff = NULL; 1446 #endif 1447 if(!front->udp_buff) { 1448 free(front); 1449 return NULL; 1450 } 1451 1452 /* create comm points as needed */ 1453 while(ports) { 1454 struct comm_point* cp = NULL; 1455 if(ports->ftype == listen_type_udp || 1456 ports->ftype == listen_type_udp_dnscrypt) { 1457 cp = comm_point_create_udp(base, ports->fd, 1458 front->udp_buff, ports->pp2_enabled, cb, 1459 cb_arg, ports->socket); 1460 } else if(ports->ftype == listen_type_tcp || 1461 ports->ftype == listen_type_tcp_dnscrypt) { 1462 cp = comm_point_create_tcp(base, ports->fd, 1463 tcp_accept_count, tcp_idle_timeout, 1464 harden_large_queries, 0, NULL, 1465 tcp_conn_limit, bufsize, front->udp_buff, 1466 ports->ftype, ports->pp2_enabled, cb, cb_arg, 1467 ports->socket); 1468 } else if(ports->ftype == listen_type_ssl || 1469 ports->ftype == listen_type_http) { 1470 cp = comm_point_create_tcp(base, ports->fd, 1471 tcp_accept_count, tcp_idle_timeout, 1472 harden_large_queries, 1473 http_max_streams, http_endpoint, 1474 tcp_conn_limit, bufsize, front->udp_buff, 1475 ports->ftype, ports->pp2_enabled, cb, cb_arg, 1476 ports->socket); 1477 if(ports->ftype == listen_type_http) { 1478 if(!sslctx && !http_notls) { 1479 log_warn("HTTPS port configured, but " 1480 "no TLS tls-service-key or " 1481 "tls-service-pem set"); 1482 } 1483 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB 1484 if(!http_notls) { 1485 log_warn("Unbound is not compiled " 1486 "with an OpenSSL version " 1487 "supporting ALPN " 1488 "(OpenSSL >= 1.0.2). This " 1489 "is required to use " 1490 "DNS-over-HTTPS"); 1491 } 1492 #endif 1493 #ifndef HAVE_NGHTTP2_NGHTTP2_H 1494 log_warn("Unbound is not compiled with " 1495 "nghttp2. This is required to use " 1496 "DNS-over-HTTPS."); 1497 #endif 1498 } 1499 } else if(ports->ftype == listen_type_udpancil || 1500 ports->ftype == listen_type_udpancil_dnscrypt) { 1501 cp = comm_point_create_udp_ancil(base, ports->fd, 1502 front->udp_buff, ports->pp2_enabled, cb, 1503 cb_arg, ports->socket); 1504 } 1505 if(!cp) { 1506 log_err("can't create commpoint"); 1507 listen_delete(front); 1508 return NULL; 1509 } 1510 if((http_notls && ports->ftype == listen_type_http) || 1511 (ports->ftype == listen_type_tcp) || 1512 (ports->ftype == listen_type_udp) || 1513 (ports->ftype == listen_type_udpancil) || 1514 (ports->ftype == listen_type_tcp_dnscrypt) || 1515 (ports->ftype == listen_type_udp_dnscrypt) || 1516 (ports->ftype == listen_type_udpancil_dnscrypt)) 1517 cp->ssl = NULL; 1518 else 1519 cp->ssl = sslctx; 1520 cp->dtenv = dtenv; 1521 cp->do_not_close = 1; 1522 #ifdef USE_DNSCRYPT 1523 if (ports->ftype == listen_type_udp_dnscrypt || 1524 ports->ftype == listen_type_tcp_dnscrypt || 1525 ports->ftype == listen_type_udpancil_dnscrypt) { 1526 cp->dnscrypt = 1; 1527 cp->dnscrypt_buffer = sldns_buffer_new(bufsize); 1528 if(!cp->dnscrypt_buffer) { 1529 log_err("can't alloc dnscrypt_buffer"); 1530 comm_point_delete(cp); 1531 listen_delete(front); 1532 return NULL; 1533 } 1534 front->dnscrypt_udp_buff = cp->dnscrypt_buffer; 1535 } 1536 #endif 1537 if(!listen_cp_insert(cp, front)) { 1538 log_err("malloc failed"); 1539 comm_point_delete(cp); 1540 listen_delete(front); 1541 return NULL; 1542 } 1543 ports = ports->next; 1544 } 1545 if(!front->cps) { 1546 log_err("Could not open sockets to accept queries."); 1547 listen_delete(front); 1548 return NULL; 1549 } 1550 1551 return front; 1552 } 1553 1554 void 1555 listen_list_delete(struct listen_list* list) 1556 { 1557 struct listen_list *p = list, *pn; 1558 while(p) { 1559 pn = p->next; 1560 comm_point_delete(p->com); 1561 free(p); 1562 p = pn; 1563 } 1564 } 1565 1566 void 1567 listen_delete(struct listen_dnsport* front) 1568 { 1569 if(!front) 1570 return; 1571 listen_list_delete(front->cps); 1572 #ifdef USE_DNSCRYPT 1573 if(front->dnscrypt_udp_buff && 1574 front->udp_buff != front->dnscrypt_udp_buff) { 1575 sldns_buffer_free(front->dnscrypt_udp_buff); 1576 } 1577 #endif 1578 sldns_buffer_free(front->udp_buff); 1579 free(front); 1580 } 1581 1582 #ifdef HAVE_GETIFADDRS 1583 static int 1584 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size) 1585 { 1586 struct ifaddrs *ifa; 1587 void *tmpbuf; 1588 int last_ip_addresses_size = *ip_addresses_size; 1589 1590 for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) { 1591 sa_family_t family; 1592 const char* atsign; 1593 #ifdef INET6 /* | address ip | % | ifa name | @ | port | nul */ 1594 char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1]; 1595 #else 1596 char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1]; 1597 #endif 1598 1599 if((atsign=strrchr(search_ifa, '@')) != NULL) { 1600 if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa) 1601 || strncmp(ifa->ifa_name, search_ifa, 1602 atsign-search_ifa) != 0) 1603 continue; 1604 } else { 1605 if(strcmp(ifa->ifa_name, search_ifa) != 0) 1606 continue; 1607 atsign = ""; 1608 } 1609 1610 if(ifa->ifa_addr == NULL) 1611 continue; 1612 1613 family = ifa->ifa_addr->sa_family; 1614 if(family == AF_INET) { 1615 char a4[INET_ADDRSTRLEN + 1]; 1616 struct sockaddr_in *in4 = (struct sockaddr_in *) 1617 ifa->ifa_addr; 1618 if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) { 1619 log_err("inet_ntop failed"); 1620 return 0; 1621 } 1622 snprintf(addr_buf, sizeof(addr_buf), "%s%s", 1623 a4, atsign); 1624 } 1625 #ifdef INET6 1626 else if(family == AF_INET6) { 1627 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) 1628 ifa->ifa_addr; 1629 char a6[INET6_ADDRSTRLEN + 1]; 1630 char if_index_name[IF_NAMESIZE + 1]; 1631 if_index_name[0] = 0; 1632 if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) { 1633 log_err("inet_ntop failed"); 1634 return 0; 1635 } 1636 (void)if_indextoname(in6->sin6_scope_id, 1637 (char *)if_index_name); 1638 if (strlen(if_index_name) != 0) { 1639 snprintf(addr_buf, sizeof(addr_buf), 1640 "%s%%%s%s", a6, if_index_name, atsign); 1641 } else { 1642 snprintf(addr_buf, sizeof(addr_buf), "%s%s", 1643 a6, atsign); 1644 } 1645 } 1646 #endif 1647 else { 1648 continue; 1649 } 1650 verbose(4, "interface %s has address %s", search_ifa, addr_buf); 1651 1652 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1)); 1653 if(!tmpbuf) { 1654 log_err("realloc failed: out of memory"); 1655 return 0; 1656 } else { 1657 *ip_addresses = tmpbuf; 1658 } 1659 (*ip_addresses)[*ip_addresses_size] = strdup(addr_buf); 1660 if(!(*ip_addresses)[*ip_addresses_size]) { 1661 log_err("strdup failed: out of memory"); 1662 return 0; 1663 } 1664 (*ip_addresses_size)++; 1665 } 1666 1667 if (*ip_addresses_size == last_ip_addresses_size) { 1668 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1)); 1669 if(!tmpbuf) { 1670 log_err("realloc failed: out of memory"); 1671 return 0; 1672 } else { 1673 *ip_addresses = tmpbuf; 1674 } 1675 (*ip_addresses)[*ip_addresses_size] = strdup(search_ifa); 1676 if(!(*ip_addresses)[*ip_addresses_size]) { 1677 log_err("strdup failed: out of memory"); 1678 return 0; 1679 } 1680 (*ip_addresses_size)++; 1681 } 1682 return 1; 1683 } 1684 #endif /* HAVE_GETIFADDRS */ 1685 1686 int resolve_interface_names(char** ifs, int num_ifs, 1687 struct config_strlist* list, char*** resif, int* num_resif) 1688 { 1689 #ifdef HAVE_GETIFADDRS 1690 struct ifaddrs *addrs = NULL; 1691 if(num_ifs == 0 && list == NULL) { 1692 *resif = NULL; 1693 *num_resif = 0; 1694 return 1; 1695 } 1696 if(getifaddrs(&addrs) == -1) { 1697 log_err("failed to list interfaces: getifaddrs: %s", 1698 strerror(errno)); 1699 freeifaddrs(addrs); 1700 return 0; 1701 } 1702 if(ifs) { 1703 int i; 1704 for(i=0; i<num_ifs; i++) { 1705 if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) { 1706 freeifaddrs(addrs); 1707 config_del_strarray(*resif, *num_resif); 1708 *resif = NULL; 1709 *num_resif = 0; 1710 return 0; 1711 } 1712 } 1713 } 1714 if(list) { 1715 struct config_strlist* p; 1716 for(p = list; p; p = p->next) { 1717 if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) { 1718 freeifaddrs(addrs); 1719 config_del_strarray(*resif, *num_resif); 1720 *resif = NULL; 1721 *num_resif = 0; 1722 return 0; 1723 } 1724 } 1725 } 1726 freeifaddrs(addrs); 1727 return 1; 1728 #else 1729 struct config_strlist* p; 1730 if(num_ifs == 0 && list == NULL) { 1731 *resif = NULL; 1732 *num_resif = 0; 1733 return 1; 1734 } 1735 *num_resif = num_ifs; 1736 for(p = list; p; p = p->next) { 1737 (*num_resif)++; 1738 } 1739 *resif = calloc(*num_resif, sizeof(**resif)); 1740 if(!*resif) { 1741 log_err("out of memory"); 1742 return 0; 1743 } 1744 if(ifs) { 1745 int i; 1746 for(i=0; i<num_ifs; i++) { 1747 (*resif)[i] = strdup(ifs[i]); 1748 if(!((*resif)[i])) { 1749 log_err("out of memory"); 1750 config_del_strarray(*resif, *num_resif); 1751 *resif = NULL; 1752 *num_resif = 0; 1753 return 0; 1754 } 1755 } 1756 } 1757 if(list) { 1758 int idx = num_ifs; 1759 for(p = list; p; p = p->next) { 1760 (*resif)[idx] = strdup(p->str); 1761 if(!((*resif)[idx])) { 1762 log_err("out of memory"); 1763 config_del_strarray(*resif, *num_resif); 1764 *resif = NULL; 1765 *num_resif = 0; 1766 return 0; 1767 } 1768 idx++; 1769 } 1770 } 1771 return 1; 1772 #endif /* HAVE_GETIFADDRS */ 1773 } 1774 1775 struct listen_port* 1776 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, 1777 int* reuseport) 1778 { 1779 struct listen_port* list = NULL; 1780 struct addrinfo hints; 1781 int i, do_ip4, do_ip6; 1782 int do_tcp, do_auto; 1783 char portbuf[32]; 1784 snprintf(portbuf, sizeof(portbuf), "%d", cfg->port); 1785 do_ip4 = cfg->do_ip4; 1786 do_ip6 = cfg->do_ip6; 1787 do_tcp = cfg->do_tcp; 1788 do_auto = cfg->if_automatic && cfg->do_udp; 1789 if(cfg->incoming_num_tcp == 0) 1790 do_tcp = 0; 1791 1792 /* getaddrinfo */ 1793 memset(&hints, 0, sizeof(hints)); 1794 hints.ai_flags = AI_PASSIVE; 1795 /* no name lookups on our listening ports */ 1796 if(num_ifs > 0) 1797 hints.ai_flags |= AI_NUMERICHOST; 1798 hints.ai_family = AF_UNSPEC; 1799 #ifndef INET6 1800 do_ip6 = 0; 1801 #endif 1802 if(!do_ip4 && !do_ip6) { 1803 return NULL; 1804 } 1805 /* create ip4 and ip6 ports so that return addresses are nice. */ 1806 if(do_auto || num_ifs == 0) { 1807 if(do_auto && cfg->if_automatic_ports && 1808 cfg->if_automatic_ports[0]!=0) { 1809 char* now = cfg->if_automatic_ports; 1810 while(now && *now) { 1811 char* after; 1812 int extraport; 1813 while(isspace((unsigned char)*now)) 1814 now++; 1815 if(!*now) 1816 break; 1817 after = now; 1818 extraport = (int)strtol(now, &after, 10); 1819 if(extraport < 0 || extraport > 65535) { 1820 log_err("interface-automatic-ports port number out of range, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); 1821 listening_ports_free(list); 1822 return NULL; 1823 } 1824 if(extraport == 0 && now == after) { 1825 log_err("interface-automatic-ports could not be parsed, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); 1826 listening_ports_free(list); 1827 return NULL; 1828 } 1829 now = after; 1830 snprintf(portbuf, sizeof(portbuf), "%d", extraport); 1831 if(do_ip6) { 1832 hints.ai_family = AF_INET6; 1833 if(!ports_create_if("::0", 1834 do_auto, cfg->do_udp, do_tcp, 1835 &hints, portbuf, &list, 1836 cfg->so_rcvbuf, cfg->so_sndbuf, 1837 cfg->ssl_port, cfg->tls_additional_port, 1838 cfg->https_port, 1839 cfg->proxy_protocol_port, 1840 reuseport, cfg->ip_transparent, 1841 cfg->tcp_mss, cfg->ip_freebind, 1842 cfg->http_nodelay, cfg->use_systemd, 1843 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1844 listening_ports_free(list); 1845 return NULL; 1846 } 1847 } 1848 if(do_ip4) { 1849 hints.ai_family = AF_INET; 1850 if(!ports_create_if("0.0.0.0", 1851 do_auto, cfg->do_udp, do_tcp, 1852 &hints, portbuf, &list, 1853 cfg->so_rcvbuf, cfg->so_sndbuf, 1854 cfg->ssl_port, cfg->tls_additional_port, 1855 cfg->https_port, 1856 cfg->proxy_protocol_port, 1857 reuseport, cfg->ip_transparent, 1858 cfg->tcp_mss, cfg->ip_freebind, 1859 cfg->http_nodelay, cfg->use_systemd, 1860 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1861 listening_ports_free(list); 1862 return NULL; 1863 } 1864 } 1865 } 1866 return list; 1867 } 1868 if(do_ip6) { 1869 hints.ai_family = AF_INET6; 1870 if(!ports_create_if(do_auto?"::0":"::1", 1871 do_auto, cfg->do_udp, do_tcp, 1872 &hints, portbuf, &list, 1873 cfg->so_rcvbuf, cfg->so_sndbuf, 1874 cfg->ssl_port, cfg->tls_additional_port, 1875 cfg->https_port, cfg->proxy_protocol_port, 1876 reuseport, cfg->ip_transparent, 1877 cfg->tcp_mss, cfg->ip_freebind, 1878 cfg->http_nodelay, cfg->use_systemd, 1879 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1880 listening_ports_free(list); 1881 return NULL; 1882 } 1883 } 1884 if(do_ip4) { 1885 hints.ai_family = AF_INET; 1886 if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", 1887 do_auto, cfg->do_udp, do_tcp, 1888 &hints, portbuf, &list, 1889 cfg->so_rcvbuf, cfg->so_sndbuf, 1890 cfg->ssl_port, cfg->tls_additional_port, 1891 cfg->https_port, cfg->proxy_protocol_port, 1892 reuseport, cfg->ip_transparent, 1893 cfg->tcp_mss, cfg->ip_freebind, 1894 cfg->http_nodelay, cfg->use_systemd, 1895 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1896 listening_ports_free(list); 1897 return NULL; 1898 } 1899 } 1900 } else for(i = 0; i<num_ifs; i++) { 1901 if(str_is_ip6(ifs[i])) { 1902 if(!do_ip6) 1903 continue; 1904 hints.ai_family = AF_INET6; 1905 if(!ports_create_if(ifs[i], 0, cfg->do_udp, 1906 do_tcp, &hints, portbuf, &list, 1907 cfg->so_rcvbuf, cfg->so_sndbuf, 1908 cfg->ssl_port, cfg->tls_additional_port, 1909 cfg->https_port, cfg->proxy_protocol_port, 1910 reuseport, cfg->ip_transparent, 1911 cfg->tcp_mss, cfg->ip_freebind, 1912 cfg->http_nodelay, cfg->use_systemd, 1913 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1914 listening_ports_free(list); 1915 return NULL; 1916 } 1917 } else { 1918 if(!do_ip4) 1919 continue; 1920 hints.ai_family = AF_INET; 1921 if(!ports_create_if(ifs[i], 0, cfg->do_udp, 1922 do_tcp, &hints, portbuf, &list, 1923 cfg->so_rcvbuf, cfg->so_sndbuf, 1924 cfg->ssl_port, cfg->tls_additional_port, 1925 cfg->https_port, cfg->proxy_protocol_port, 1926 reuseport, cfg->ip_transparent, 1927 cfg->tcp_mss, cfg->ip_freebind, 1928 cfg->http_nodelay, cfg->use_systemd, 1929 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1930 listening_ports_free(list); 1931 return NULL; 1932 } 1933 } 1934 } 1935 1936 return list; 1937 } 1938 1939 void listening_ports_free(struct listen_port* list) 1940 { 1941 struct listen_port* nx; 1942 while(list) { 1943 nx = list->next; 1944 if(list->fd != -1) { 1945 sock_close(list->fd); 1946 } 1947 /* rc_ports don't have ub_socket */ 1948 if(list->socket) { 1949 if(list->socket->addr) 1950 freeaddrinfo(list->socket->addr); 1951 free(list->socket); 1952 } 1953 free(list); 1954 list = nx; 1955 } 1956 } 1957 1958 size_t listen_get_mem(struct listen_dnsport* listen) 1959 { 1960 struct listen_list* p; 1961 size_t s = sizeof(*listen) + sizeof(*listen->base) + 1962 sizeof(*listen->udp_buff) + 1963 sldns_buffer_capacity(listen->udp_buff); 1964 #ifdef USE_DNSCRYPT 1965 s += sizeof(*listen->dnscrypt_udp_buff); 1966 if(listen->udp_buff != listen->dnscrypt_udp_buff){ 1967 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff); 1968 } 1969 #endif 1970 for(p = listen->cps; p; p = p->next) { 1971 s += sizeof(*p); 1972 s += comm_point_get_mem(p->com); 1973 } 1974 return s; 1975 } 1976 1977 void listen_stop_accept(struct listen_dnsport* listen) 1978 { 1979 /* do not stop the ones that have no tcp_free list 1980 * (they have already stopped listening) */ 1981 struct listen_list* p; 1982 for(p=listen->cps; p; p=p->next) { 1983 if(p->com->type == comm_tcp_accept && 1984 p->com->tcp_free != NULL) { 1985 comm_point_stop_listening(p->com); 1986 } 1987 } 1988 } 1989 1990 void listen_start_accept(struct listen_dnsport* listen) 1991 { 1992 /* do not start the ones that have no tcp_free list, it is no 1993 * use to listen to them because they have no free tcp handlers */ 1994 struct listen_list* p; 1995 for(p=listen->cps; p; p=p->next) { 1996 if(p->com->type == comm_tcp_accept && 1997 p->com->tcp_free != NULL) { 1998 comm_point_start_listening(p->com, -1, -1); 1999 } 2000 } 2001 } 2002 2003 struct tcp_req_info* 2004 tcp_req_info_create(struct sldns_buffer* spoolbuf) 2005 { 2006 struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req)); 2007 if(!req) { 2008 log_err("malloc failure for new stream outoforder processing structure"); 2009 return NULL; 2010 } 2011 memset(req, 0, sizeof(*req)); 2012 req->spool_buffer = spoolbuf; 2013 return req; 2014 } 2015 2016 void 2017 tcp_req_info_delete(struct tcp_req_info* req) 2018 { 2019 if(!req) return; 2020 tcp_req_info_clear(req); 2021 /* cp is pointer back to commpoint that owns this struct and 2022 * called delete on us */ 2023 /* spool_buffer is shared udp buffer, not deleted here */ 2024 free(req); 2025 } 2026 2027 void tcp_req_info_clear(struct tcp_req_info* req) 2028 { 2029 struct tcp_req_open_item* open, *nopen; 2030 struct tcp_req_done_item* item, *nitem; 2031 if(!req) return; 2032 2033 /* free outstanding request mesh reply entries */ 2034 open = req->open_req_list; 2035 while(open) { 2036 nopen = open->next; 2037 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp); 2038 free(open); 2039 open = nopen; 2040 } 2041 req->open_req_list = NULL; 2042 req->num_open_req = 0; 2043 2044 /* free pending writable result packets */ 2045 item = req->done_req_list; 2046 while(item) { 2047 nitem = item->next; 2048 lock_basic_lock(&stream_wait_count_lock); 2049 stream_wait_count -= (sizeof(struct tcp_req_done_item) 2050 +item->len); 2051 lock_basic_unlock(&stream_wait_count_lock); 2052 free(item->buf); 2053 free(item); 2054 item = nitem; 2055 } 2056 req->done_req_list = NULL; 2057 req->num_done_req = 0; 2058 req->read_is_closed = 0; 2059 } 2060 2061 void 2062 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m) 2063 { 2064 struct tcp_req_open_item* open, *prev = NULL; 2065 if(!req || !m) return; 2066 open = req->open_req_list; 2067 while(open) { 2068 if(open->mesh_state == m) { 2069 struct tcp_req_open_item* next; 2070 if(prev) prev->next = open->next; 2071 else req->open_req_list = open->next; 2072 /* caller has to manage the mesh state reply entry */ 2073 next = open->next; 2074 free(open); 2075 req->num_open_req --; 2076 2077 /* prev = prev; */ 2078 open = next; 2079 continue; 2080 } 2081 prev = open; 2082 open = open->next; 2083 } 2084 } 2085 2086 /** setup listening for read or write */ 2087 static void 2088 tcp_req_info_setup_listen(struct tcp_req_info* req) 2089 { 2090 int wr = 0; 2091 int rd = 0; 2092 2093 if(req->cp->tcp_byte_count != 0) { 2094 /* cannot change, halfway through */ 2095 return; 2096 } 2097 2098 if(!req->cp->tcp_is_reading) 2099 wr = 1; 2100 if(!req->read_is_closed) 2101 rd = 1; 2102 2103 if(wr) { 2104 req->cp->tcp_is_reading = 0; 2105 comm_point_stop_listening(req->cp); 2106 comm_point_start_listening(req->cp, -1, 2107 adjusted_tcp_timeout(req->cp)); 2108 } else if(rd) { 2109 req->cp->tcp_is_reading = 1; 2110 comm_point_stop_listening(req->cp); 2111 comm_point_start_listening(req->cp, -1, 2112 adjusted_tcp_timeout(req->cp)); 2113 /* and also read it (from SSL stack buffers), so 2114 * no event read event is expected since the remainder of 2115 * the TLS frame is sitting in the buffers. */ 2116 req->read_again = 1; 2117 } else { 2118 comm_point_stop_listening(req->cp); 2119 comm_point_start_listening(req->cp, -1, 2120 adjusted_tcp_timeout(req->cp)); 2121 comm_point_listen_for_rw(req->cp, 0, 0); 2122 } 2123 } 2124 2125 /** remove first item from list of pending results */ 2126 static struct tcp_req_done_item* 2127 tcp_req_info_pop_done(struct tcp_req_info* req) 2128 { 2129 struct tcp_req_done_item* item; 2130 log_assert(req->num_done_req > 0 && req->done_req_list); 2131 item = req->done_req_list; 2132 lock_basic_lock(&stream_wait_count_lock); 2133 stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len); 2134 lock_basic_unlock(&stream_wait_count_lock); 2135 req->done_req_list = req->done_req_list->next; 2136 req->num_done_req --; 2137 return item; 2138 } 2139 2140 /** Send given buffer and setup to write */ 2141 static void 2142 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf, 2143 size_t len) 2144 { 2145 sldns_buffer_clear(req->cp->buffer); 2146 sldns_buffer_write(req->cp->buffer, buf, len); 2147 sldns_buffer_flip(req->cp->buffer); 2148 2149 req->cp->tcp_is_reading = 0; /* we are now writing */ 2150 } 2151 2152 /** pick up the next result and start writing it to the channel */ 2153 static void 2154 tcp_req_pickup_next_result(struct tcp_req_info* req) 2155 { 2156 if(req->num_done_req > 0) { 2157 /* unlist the done item from the list of pending results */ 2158 struct tcp_req_done_item* item = tcp_req_info_pop_done(req); 2159 tcp_req_info_start_write_buf(req, item->buf, item->len); 2160 free(item->buf); 2161 free(item); 2162 } 2163 } 2164 2165 /** the read channel has closed */ 2166 int 2167 tcp_req_info_handle_read_close(struct tcp_req_info* req) 2168 { 2169 verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd); 2170 /* reset byte count for (potential) partial read */ 2171 req->cp->tcp_byte_count = 0; 2172 /* if we still have results to write, pick up next and write it */ 2173 if(req->num_done_req != 0) { 2174 tcp_req_pickup_next_result(req); 2175 tcp_req_info_setup_listen(req); 2176 return 1; 2177 } 2178 /* if nothing to do, this closes the connection */ 2179 if(req->num_open_req == 0 && req->num_done_req == 0) 2180 return 0; 2181 /* otherwise, we must be waiting for dns resolve, wait with timeout */ 2182 req->read_is_closed = 1; 2183 tcp_req_info_setup_listen(req); 2184 return 1; 2185 } 2186 2187 void 2188 tcp_req_info_handle_writedone(struct tcp_req_info* req) 2189 { 2190 /* back to reading state, we finished this write event */ 2191 sldns_buffer_clear(req->cp->buffer); 2192 if(req->num_done_req == 0 && req->read_is_closed) { 2193 /* no more to write and nothing to read, close it */ 2194 comm_point_drop_reply(&req->cp->repinfo); 2195 return; 2196 } 2197 req->cp->tcp_is_reading = 1; 2198 /* see if another result needs writing */ 2199 tcp_req_pickup_next_result(req); 2200 2201 /* see if there is more to write, if not stop_listening for writing */ 2202 /* see if new requests are allowed, if so, start_listening 2203 * for reading */ 2204 tcp_req_info_setup_listen(req); 2205 } 2206 2207 void 2208 tcp_req_info_handle_readdone(struct tcp_req_info* req) 2209 { 2210 struct comm_point* c = req->cp; 2211 2212 /* we want to read up several requests, unless there are 2213 * pending answers */ 2214 2215 req->is_drop = 0; 2216 req->is_reply = 0; 2217 req->in_worker_handle = 1; 2218 sldns_buffer_set_limit(req->spool_buffer, 0); 2219 /* handle the current request */ 2220 /* this calls the worker handle request routine that could give 2221 * a cache response, or localdata response, or drop the reply, 2222 * or schedule a mesh entry for later */ 2223 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2224 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 2225 req->in_worker_handle = 0; 2226 /* there is an answer, put it up. It is already in the 2227 * c->buffer, just send it. */ 2228 /* since we were just reading a query, the channel is 2229 * clear to write to */ 2230 send_it: 2231 c->tcp_is_reading = 0; 2232 comm_point_stop_listening(c); 2233 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 2234 return; 2235 } 2236 req->in_worker_handle = 0; 2237 /* it should be waiting in the mesh for recursion. 2238 * If mesh failed to add a new entry and called commpoint_drop_reply. 2239 * Then the mesh state has been cleared. */ 2240 if(req->is_drop) { 2241 /* the reply has been dropped, stream has been closed. */ 2242 return; 2243 } 2244 /* If mesh failed(mallocfail) and called commpoint_send_reply with 2245 * something like servfail then we pick up that reply below. */ 2246 if(req->is_reply) { 2247 goto send_it; 2248 } 2249 2250 sldns_buffer_clear(c->buffer); 2251 /* if pending answers, pick up an answer and start sending it */ 2252 tcp_req_pickup_next_result(req); 2253 2254 /* if answers pending, start sending answers */ 2255 /* read more requests if we can have more requests */ 2256 tcp_req_info_setup_listen(req); 2257 } 2258 2259 int 2260 tcp_req_info_add_meshstate(struct tcp_req_info* req, 2261 struct mesh_area* mesh, struct mesh_state* m) 2262 { 2263 struct tcp_req_open_item* item; 2264 log_assert(req && mesh && m); 2265 item = (struct tcp_req_open_item*)malloc(sizeof(*item)); 2266 if(!item) return 0; 2267 item->next = req->open_req_list; 2268 item->mesh = mesh; 2269 item->mesh_state = m; 2270 req->open_req_list = item; 2271 req->num_open_req++; 2272 return 1; 2273 } 2274 2275 /** Add a result to the result list. At the end. */ 2276 static int 2277 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len) 2278 { 2279 struct tcp_req_done_item* last = NULL; 2280 struct tcp_req_done_item* item; 2281 size_t space; 2282 2283 /* see if we have space */ 2284 space = sizeof(struct tcp_req_done_item) + len; 2285 lock_basic_lock(&stream_wait_count_lock); 2286 if(stream_wait_count + space > stream_wait_max) { 2287 lock_basic_unlock(&stream_wait_count_lock); 2288 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size"); 2289 return 0; 2290 } 2291 stream_wait_count += space; 2292 lock_basic_unlock(&stream_wait_count_lock); 2293 2294 /* find last element */ 2295 last = req->done_req_list; 2296 while(last && last->next) 2297 last = last->next; 2298 2299 /* create new element */ 2300 item = (struct tcp_req_done_item*)malloc(sizeof(*item)); 2301 if(!item) { 2302 log_err("malloc failure, for stream result list"); 2303 return 0; 2304 } 2305 item->next = NULL; 2306 item->len = len; 2307 item->buf = memdup(buf, len); 2308 if(!item->buf) { 2309 free(item); 2310 log_err("malloc failure, adding reply to stream result list"); 2311 return 0; 2312 } 2313 2314 /* link in */ 2315 if(last) last->next = item; 2316 else req->done_req_list = item; 2317 req->num_done_req++; 2318 return 1; 2319 } 2320 2321 void 2322 tcp_req_info_send_reply(struct tcp_req_info* req) 2323 { 2324 if(req->in_worker_handle) { 2325 /* reply from mesh is in the spool_buffer */ 2326 /* copy now, so that the spool buffer is free for other tasks 2327 * before the callback is done */ 2328 sldns_buffer_clear(req->cp->buffer); 2329 sldns_buffer_write(req->cp->buffer, 2330 sldns_buffer_begin(req->spool_buffer), 2331 sldns_buffer_limit(req->spool_buffer)); 2332 sldns_buffer_flip(req->cp->buffer); 2333 req->is_reply = 1; 2334 return; 2335 } 2336 /* now that the query has been handled, that mesh_reply entry 2337 * should be removed, from the tcp_req_info list, 2338 * the mesh state cleanup removes then with region_cleanup and 2339 * replies_sent true. */ 2340 /* see if we can send it straight away (we are not doing 2341 * anything else). If so, copy to buffer and start */ 2342 if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) { 2343 /* buffer is free, and was ready to read new query into, 2344 * but we are now going to use it to send this answer */ 2345 tcp_req_info_start_write_buf(req, 2346 sldns_buffer_begin(req->spool_buffer), 2347 sldns_buffer_limit(req->spool_buffer)); 2348 /* switch to listen to write events */ 2349 comm_point_stop_listening(req->cp); 2350 comm_point_start_listening(req->cp, -1, 2351 adjusted_tcp_timeout(req->cp)); 2352 return; 2353 } 2354 /* queue up the answer behind the others already pending */ 2355 if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer), 2356 sldns_buffer_limit(req->spool_buffer))) { 2357 /* drop the connection, we are out of resources */ 2358 comm_point_drop_reply(&req->cp->repinfo); 2359 } 2360 } 2361 2362 size_t tcp_req_info_get_stream_buffer_size(void) 2363 { 2364 size_t s; 2365 if(!stream_wait_lock_inited) 2366 return stream_wait_count; 2367 lock_basic_lock(&stream_wait_count_lock); 2368 s = stream_wait_count; 2369 lock_basic_unlock(&stream_wait_count_lock); 2370 return s; 2371 } 2372 2373 size_t http2_get_query_buffer_size(void) 2374 { 2375 size_t s; 2376 if(!http2_query_buffer_lock_inited) 2377 return http2_query_buffer_count; 2378 lock_basic_lock(&http2_query_buffer_count_lock); 2379 s = http2_query_buffer_count; 2380 lock_basic_unlock(&http2_query_buffer_count_lock); 2381 return s; 2382 } 2383 2384 size_t http2_get_response_buffer_size(void) 2385 { 2386 size_t s; 2387 if(!http2_response_buffer_lock_inited) 2388 return http2_response_buffer_count; 2389 lock_basic_lock(&http2_response_buffer_count_lock); 2390 s = http2_response_buffer_count; 2391 lock_basic_unlock(&http2_response_buffer_count_lock); 2392 return s; 2393 } 2394 2395 #ifdef HAVE_NGHTTP2 2396 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */ 2397 static ssize_t http2_submit_response_read_callback( 2398 nghttp2_session* ATTR_UNUSED(session), 2399 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags, 2400 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg)) 2401 { 2402 struct http2_stream* h2_stream; 2403 struct http2_session* h2_session = source->ptr; 2404 size_t copylen = length; 2405 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2406 h2_session->session, stream_id))) { 2407 verbose(VERB_QUERY, "http2: cannot get stream data, closing " 2408 "stream"); 2409 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2410 } 2411 if(!h2_stream->rbuffer || 2412 sldns_buffer_remaining(h2_stream->rbuffer) == 0) { 2413 verbose(VERB_QUERY, "http2: cannot submit buffer. No data " 2414 "available in rbuffer"); 2415 /* rbuffer will be free'd in frame close cb */ 2416 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2417 } 2418 2419 if(copylen > sldns_buffer_remaining(h2_stream->rbuffer)) 2420 copylen = sldns_buffer_remaining(h2_stream->rbuffer); 2421 if(copylen > SSIZE_MAX) 2422 copylen = SSIZE_MAX; /* will probably never happen */ 2423 2424 memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen); 2425 sldns_buffer_skip(h2_stream->rbuffer, copylen); 2426 2427 if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) { 2428 *data_flags |= NGHTTP2_DATA_FLAG_EOF; 2429 lock_basic_lock(&http2_response_buffer_count_lock); 2430 http2_response_buffer_count -= 2431 sldns_buffer_capacity(h2_stream->rbuffer); 2432 lock_basic_unlock(&http2_response_buffer_count_lock); 2433 sldns_buffer_free(h2_stream->rbuffer); 2434 h2_stream->rbuffer = NULL; 2435 } 2436 2437 return copylen; 2438 } 2439 2440 /** 2441 * Send RST_STREAM frame for stream. 2442 * @param h2_session: http2 session to submit frame to 2443 * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM 2444 * @return 0 on error, 1 otherwise 2445 */ 2446 static int http2_submit_rst_stream(struct http2_session* h2_session, 2447 struct http2_stream* h2_stream) 2448 { 2449 int ret = nghttp2_submit_rst_stream(h2_session->session, 2450 NGHTTP2_FLAG_NONE, h2_stream->stream_id, 2451 NGHTTP2_INTERNAL_ERROR); 2452 if(ret) { 2453 verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, " 2454 "error: %s", nghttp2_strerror(ret)); 2455 return 0; 2456 } 2457 return 1; 2458 } 2459 2460 /** 2461 * DNS response ready to be submitted to nghttp2, to be prepared for sending 2462 * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer 2463 * might be used before this will be sent out. 2464 * @param h2_session: http2 session, containing c->buffer which contains answer 2465 * @return 0 on error, 1 otherwise 2466 */ 2467 int http2_submit_dns_response(struct http2_session* h2_session) 2468 { 2469 int ret; 2470 nghttp2_data_provider data_prd; 2471 char status[4]; 2472 nghttp2_nv headers[3]; 2473 struct http2_stream* h2_stream = h2_session->c->h2_stream; 2474 size_t rlen; 2475 char rlen_str[32]; 2476 2477 if(h2_stream->rbuffer) { 2478 log_err("http2 submit response error: rbuffer already " 2479 "exists"); 2480 return 0; 2481 } 2482 if(sldns_buffer_remaining(h2_session->c->buffer) == 0) { 2483 log_err("http2 submit response error: c->buffer not complete"); 2484 return 0; 2485 } 2486 2487 if(snprintf(status, 4, "%d", h2_stream->status) != 3) { 2488 verbose(VERB_QUERY, "http2: submit response error: " 2489 "invalid status"); 2490 return 0; 2491 } 2492 2493 rlen = sldns_buffer_remaining(h2_session->c->buffer); 2494 snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen); 2495 2496 lock_basic_lock(&http2_response_buffer_count_lock); 2497 if(http2_response_buffer_count + rlen > http2_response_buffer_max) { 2498 lock_basic_unlock(&http2_response_buffer_count_lock); 2499 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 2500 "in https-response-buffer-size"); 2501 return http2_submit_rst_stream(h2_session, h2_stream); 2502 } 2503 http2_response_buffer_count += rlen; 2504 lock_basic_unlock(&http2_response_buffer_count_lock); 2505 2506 if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) { 2507 lock_basic_lock(&http2_response_buffer_count_lock); 2508 http2_response_buffer_count -= rlen; 2509 lock_basic_unlock(&http2_response_buffer_count_lock); 2510 log_err("http2 submit response error: malloc failure"); 2511 return 0; 2512 } 2513 2514 headers[0].name = (uint8_t*)":status"; 2515 headers[0].namelen = 7; 2516 headers[0].value = (uint8_t*)status; 2517 headers[0].valuelen = 3; 2518 headers[0].flags = NGHTTP2_NV_FLAG_NONE; 2519 2520 headers[1].name = (uint8_t*)"content-type"; 2521 headers[1].namelen = 12; 2522 headers[1].value = (uint8_t*)"application/dns-message"; 2523 headers[1].valuelen = 23; 2524 headers[1].flags = NGHTTP2_NV_FLAG_NONE; 2525 2526 headers[2].name = (uint8_t*)"content-length"; 2527 headers[2].namelen = 14; 2528 headers[2].value = (uint8_t*)rlen_str; 2529 headers[2].valuelen = strlen(rlen_str); 2530 headers[2].flags = NGHTTP2_NV_FLAG_NONE; 2531 2532 sldns_buffer_write(h2_stream->rbuffer, 2533 sldns_buffer_current(h2_session->c->buffer), 2534 sldns_buffer_remaining(h2_session->c->buffer)); 2535 sldns_buffer_flip(h2_stream->rbuffer); 2536 2537 data_prd.source.ptr = h2_session; 2538 data_prd.read_callback = http2_submit_response_read_callback; 2539 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id, 2540 headers, 3, &data_prd); 2541 if(ret) { 2542 verbose(VERB_QUERY, "http2: set_stream_user_data failed, " 2543 "error: %s", nghttp2_strerror(ret)); 2544 return 0; 2545 } 2546 return 1; 2547 } 2548 #else 2549 int http2_submit_dns_response(void* ATTR_UNUSED(v)) 2550 { 2551 return 0; 2552 } 2553 #endif 2554 2555 #ifdef HAVE_NGHTTP2 2556 /** HTTP status to descriptive string */ 2557 static char* http_status_to_str(enum http_status s) 2558 { 2559 switch(s) { 2560 case HTTP_STATUS_OK: 2561 return "OK"; 2562 case HTTP_STATUS_BAD_REQUEST: 2563 return "Bad Request"; 2564 case HTTP_STATUS_NOT_FOUND: 2565 return "Not Found"; 2566 case HTTP_STATUS_PAYLOAD_TOO_LARGE: 2567 return "Payload Too Large"; 2568 case HTTP_STATUS_URI_TOO_LONG: 2569 return "URI Too Long"; 2570 case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE: 2571 return "Unsupported Media Type"; 2572 case HTTP_STATUS_NOT_IMPLEMENTED: 2573 return "Not Implemented"; 2574 } 2575 return "Status Unknown"; 2576 } 2577 2578 /** nghttp2 callback. Used to copy error message to nghttp2 session */ 2579 static ssize_t http2_submit_error_read_callback( 2580 nghttp2_session* ATTR_UNUSED(session), 2581 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags, 2582 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg)) 2583 { 2584 struct http2_stream* h2_stream; 2585 struct http2_session* h2_session = source->ptr; 2586 char* msg; 2587 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2588 h2_session->session, stream_id))) { 2589 verbose(VERB_QUERY, "http2: cannot get stream data, closing " 2590 "stream"); 2591 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2592 } 2593 *data_flags |= NGHTTP2_DATA_FLAG_EOF; 2594 msg = http_status_to_str(h2_stream->status); 2595 if(length < strlen(msg)) 2596 return 0; /* not worth trying over multiple frames */ 2597 memcpy(buf, msg, strlen(msg)); 2598 return strlen(msg); 2599 2600 } 2601 2602 /** 2603 * HTTP error response ready to be submitted to nghttp2, to be prepared for 2604 * sending out. Message body will contain descriptive string for HTTP status. 2605 * @param h2_session: http2 session to submit to 2606 * @param h2_stream: http2 stream containing HTTP status to use for error 2607 * @return 0 on error, 1 otherwise 2608 */ 2609 static int http2_submit_error(struct http2_session* h2_session, 2610 struct http2_stream* h2_stream) 2611 { 2612 int ret; 2613 char status[4]; 2614 nghttp2_data_provider data_prd; 2615 nghttp2_nv headers[1]; /* will be copied by nghttp */ 2616 if(snprintf(status, 4, "%d", h2_stream->status) != 3) { 2617 verbose(VERB_QUERY, "http2: submit error failed, " 2618 "invalid status"); 2619 return 0; 2620 } 2621 headers[0].name = (uint8_t*)":status"; 2622 headers[0].namelen = 7; 2623 headers[0].value = (uint8_t*)status; 2624 headers[0].valuelen = 3; 2625 headers[0].flags = NGHTTP2_NV_FLAG_NONE; 2626 2627 data_prd.source.ptr = h2_session; 2628 data_prd.read_callback = http2_submit_error_read_callback; 2629 2630 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id, 2631 headers, 1, &data_prd); 2632 if(ret) { 2633 verbose(VERB_QUERY, "http2: submit error failed, " 2634 "error: %s", nghttp2_strerror(ret)); 2635 return 0; 2636 } 2637 return 1; 2638 } 2639 2640 /** 2641 * Start query handling. Query is stored in the stream, and will be free'd here. 2642 * @param h2_session: http2 session, containing comm point 2643 * @param h2_stream: stream containing buffered query 2644 * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no 2645 * reply available (yet). 2646 */ 2647 static int http2_query_read_done(struct http2_session* h2_session, 2648 struct http2_stream* h2_stream) 2649 { 2650 log_assert(h2_stream->qbuffer); 2651 2652 if(h2_session->c->h2_stream) { 2653 verbose(VERB_ALGO, "http2_query_read_done failure: shared " 2654 "buffer already assigned to stream"); 2655 return -1; 2656 } 2657 2658 /* the c->buffer might be used by mesh_send_reply and no be cleard 2659 * need to be cleared before use */ 2660 sldns_buffer_clear(h2_session->c->buffer); 2661 if(sldns_buffer_remaining(h2_session->c->buffer) < 2662 sldns_buffer_remaining(h2_stream->qbuffer)) { 2663 /* qbuffer will be free'd in frame close cb */ 2664 sldns_buffer_clear(h2_session->c->buffer); 2665 verbose(VERB_ALGO, "http2_query_read_done failure: can't fit " 2666 "qbuffer in c->buffer"); 2667 return -1; 2668 } 2669 2670 sldns_buffer_write(h2_session->c->buffer, 2671 sldns_buffer_current(h2_stream->qbuffer), 2672 sldns_buffer_remaining(h2_stream->qbuffer)); 2673 2674 lock_basic_lock(&http2_query_buffer_count_lock); 2675 http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer); 2676 lock_basic_unlock(&http2_query_buffer_count_lock); 2677 sldns_buffer_free(h2_stream->qbuffer); 2678 h2_stream->qbuffer = NULL; 2679 2680 sldns_buffer_flip(h2_session->c->buffer); 2681 h2_session->c->h2_stream = h2_stream; 2682 fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback)); 2683 if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg, 2684 NETEVENT_NOERROR, &h2_session->c->repinfo)) { 2685 return 1; /* answer in c->buffer */ 2686 } 2687 sldns_buffer_clear(h2_session->c->buffer); 2688 h2_session->c->h2_stream = NULL; 2689 return 0; /* mesh state added, or dropped */ 2690 } 2691 2692 /** nghttp2 callback. Used to check if the received frame indicates the end of a 2693 * stream. Gather collected request data and start query handling. */ 2694 static int http2_req_frame_recv_cb(nghttp2_session* session, 2695 const nghttp2_frame* frame, void* cb_arg) 2696 { 2697 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2698 struct http2_stream* h2_stream; 2699 int query_read_done; 2700 2701 if((frame->hd.type != NGHTTP2_DATA && 2702 frame->hd.type != NGHTTP2_HEADERS) || 2703 !(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) { 2704 return 0; 2705 } 2706 2707 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2708 session, frame->hd.stream_id))) 2709 return 0; 2710 2711 if(h2_stream->invalid_endpoint) { 2712 h2_stream->status = HTTP_STATUS_NOT_FOUND; 2713 goto submit_http_error; 2714 } 2715 2716 if(h2_stream->invalid_content_type) { 2717 h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE; 2718 goto submit_http_error; 2719 } 2720 2721 if(h2_stream->http_method != HTTP_METHOD_GET && 2722 h2_stream->http_method != HTTP_METHOD_POST) { 2723 h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED; 2724 goto submit_http_error; 2725 } 2726 2727 if(h2_stream->query_too_large) { 2728 if(h2_stream->http_method == HTTP_METHOD_POST) 2729 h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE; 2730 else 2731 h2_stream->status = HTTP_STATUS_URI_TOO_LONG; 2732 goto submit_http_error; 2733 } 2734 2735 if(!h2_stream->qbuffer) { 2736 h2_stream->status = HTTP_STATUS_BAD_REQUEST; 2737 goto submit_http_error; 2738 } 2739 2740 if(h2_stream->status) { 2741 submit_http_error: 2742 verbose(VERB_QUERY, "http2 request invalid, returning :status=" 2743 "%d", h2_stream->status); 2744 if(!http2_submit_error(h2_session, h2_stream)) { 2745 return NGHTTP2_ERR_CALLBACK_FAILURE; 2746 } 2747 return 0; 2748 } 2749 h2_stream->status = HTTP_STATUS_OK; 2750 2751 sldns_buffer_flip(h2_stream->qbuffer); 2752 h2_session->postpone_drop = 1; 2753 query_read_done = http2_query_read_done(h2_session, h2_stream); 2754 if(query_read_done < 0) 2755 return NGHTTP2_ERR_CALLBACK_FAILURE; 2756 else if(!query_read_done) { 2757 if(h2_session->is_drop) { 2758 /* connection needs to be closed. Return failure to make 2759 * sure no other action are taken anymore on comm point. 2760 * failure will result in reclaiming (and closing) 2761 * of comm point. */ 2762 verbose(VERB_QUERY, "http2 query dropped in worker cb"); 2763 h2_session->postpone_drop = 0; 2764 return NGHTTP2_ERR_CALLBACK_FAILURE; 2765 } 2766 /* nothing to submit right now, query added to mesh. */ 2767 h2_session->postpone_drop = 0; 2768 return 0; 2769 } 2770 if(!http2_submit_dns_response(h2_session)) { 2771 sldns_buffer_clear(h2_session->c->buffer); 2772 h2_session->c->h2_stream = NULL; 2773 return NGHTTP2_ERR_CALLBACK_FAILURE; 2774 } 2775 verbose(VERB_QUERY, "http2 query submitted to session"); 2776 sldns_buffer_clear(h2_session->c->buffer); 2777 h2_session->c->h2_stream = NULL; 2778 return 0; 2779 } 2780 2781 /** nghttp2 callback. Used to detect start of new streams. */ 2782 static int http2_req_begin_headers_cb(nghttp2_session* session, 2783 const nghttp2_frame* frame, void* cb_arg) 2784 { 2785 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2786 struct http2_stream* h2_stream; 2787 int ret; 2788 if(frame->hd.type != NGHTTP2_HEADERS || 2789 frame->headers.cat != NGHTTP2_HCAT_REQUEST) { 2790 /* only interested in request headers */ 2791 return 0; 2792 } 2793 if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) { 2794 log_err("malloc failure while creating http2 stream"); 2795 return NGHTTP2_ERR_CALLBACK_FAILURE; 2796 } 2797 http2_session_add_stream(h2_session, h2_stream); 2798 ret = nghttp2_session_set_stream_user_data(session, 2799 frame->hd.stream_id, h2_stream); 2800 if(ret) { 2801 /* stream does not exist */ 2802 verbose(VERB_QUERY, "http2: set_stream_user_data failed, " 2803 "error: %s", nghttp2_strerror(ret)); 2804 return NGHTTP2_ERR_CALLBACK_FAILURE; 2805 } 2806 2807 return 0; 2808 } 2809 2810 /** 2811 * base64url decode, store in qbuffer 2812 * @param h2_session: http2 session 2813 * @param h2_stream: http2 stream 2814 * @param start: start of the base64 string 2815 * @param length: length of the base64 string 2816 * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer, 2817 * buffer will be NULL is unparseble. 2818 */ 2819 static int http2_buffer_uri_query(struct http2_session* h2_session, 2820 struct http2_stream* h2_stream, const uint8_t* start, size_t length) 2821 { 2822 size_t expectb64len; 2823 int b64len; 2824 if(h2_stream->http_method == HTTP_METHOD_POST) 2825 return 1; 2826 if(length == 0) 2827 return 1; 2828 if(h2_stream->qbuffer) { 2829 verbose(VERB_ALGO, "http2_req_header fail, " 2830 "qbuffer already set"); 2831 return 0; 2832 } 2833 2834 /* calculate size, might be a bit bigger than the real 2835 * decoded buffer size */ 2836 expectb64len = sldns_b64_pton_calculate_size(length); 2837 log_assert(expectb64len > 0); 2838 if(expectb64len > 2839 h2_session->c->http2_stream_max_qbuffer_size) { 2840 h2_stream->query_too_large = 1; 2841 return 1; 2842 } 2843 2844 lock_basic_lock(&http2_query_buffer_count_lock); 2845 if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) { 2846 lock_basic_unlock(&http2_query_buffer_count_lock); 2847 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 2848 "in http2-query-buffer-size"); 2849 return http2_submit_rst_stream(h2_session, h2_stream); 2850 } 2851 http2_query_buffer_count += expectb64len; 2852 lock_basic_unlock(&http2_query_buffer_count_lock); 2853 if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) { 2854 lock_basic_lock(&http2_query_buffer_count_lock); 2855 http2_query_buffer_count -= expectb64len; 2856 lock_basic_unlock(&http2_query_buffer_count_lock); 2857 log_err("http2_req_header fail, qbuffer " 2858 "malloc failure"); 2859 return 0; 2860 } 2861 2862 if(sldns_b64_contains_nonurl((char const*)start, length)) { 2863 char buf[65536+4]; 2864 verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding"); 2865 /* copy to the scratch buffer temporarily to terminate the 2866 * string with a zero */ 2867 if(length+1 > sizeof(buf)) { 2868 /* too long */ 2869 lock_basic_lock(&http2_query_buffer_count_lock); 2870 http2_query_buffer_count -= expectb64len; 2871 lock_basic_unlock(&http2_query_buffer_count_lock); 2872 sldns_buffer_free(h2_stream->qbuffer); 2873 h2_stream->qbuffer = NULL; 2874 return 1; 2875 } 2876 memmove(buf, start, length); 2877 buf[length] = 0; 2878 if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current( 2879 h2_stream->qbuffer), expectb64len)) || b64len < 0) { 2880 lock_basic_lock(&http2_query_buffer_count_lock); 2881 http2_query_buffer_count -= expectb64len; 2882 lock_basic_unlock(&http2_query_buffer_count_lock); 2883 sldns_buffer_free(h2_stream->qbuffer); 2884 h2_stream->qbuffer = NULL; 2885 return 1; 2886 } 2887 } else { 2888 if(!(b64len = sldns_b64url_pton( 2889 (char const *)start, length, 2890 sldns_buffer_current(h2_stream->qbuffer), 2891 expectb64len)) || b64len < 0) { 2892 lock_basic_lock(&http2_query_buffer_count_lock); 2893 http2_query_buffer_count -= expectb64len; 2894 lock_basic_unlock(&http2_query_buffer_count_lock); 2895 sldns_buffer_free(h2_stream->qbuffer); 2896 h2_stream->qbuffer = NULL; 2897 /* return without error, method can be an 2898 * unknown POST */ 2899 return 1; 2900 } 2901 } 2902 sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len); 2903 return 1; 2904 } 2905 2906 /** nghttp2 callback. Used to parse headers from HEADER frames. */ 2907 static int http2_req_header_cb(nghttp2_session* session, 2908 const nghttp2_frame* frame, const uint8_t* name, size_t namelen, 2909 const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags), 2910 void* cb_arg) 2911 { 2912 struct http2_stream* h2_stream = NULL; 2913 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2914 /* nghttp2 deals with CONTINUATION frames and provides them as part of 2915 * the HEADER */ 2916 if(frame->hd.type != NGHTTP2_HEADERS || 2917 frame->headers.cat != NGHTTP2_HCAT_REQUEST) { 2918 /* only interested in request headers */ 2919 return 0; 2920 } 2921 if(!(h2_stream = nghttp2_session_get_stream_user_data(session, 2922 frame->hd.stream_id))) 2923 return 0; 2924 2925 /* earlier checks already indicate we can stop handling this query */ 2926 if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED || 2927 h2_stream->invalid_content_type || 2928 h2_stream->invalid_endpoint) 2929 return 0; 2930 2931 2932 /* nghttp2 performs some sanity checks in the headers, including: 2933 * name and value are guaranteed to be null terminated 2934 * name is guaranteed to be lowercase 2935 * content-length value is guaranteed to contain digits 2936 */ 2937 2938 if(!h2_stream->http_method && namelen == 7 && 2939 memcmp(":method", name, namelen) == 0) { 2940 /* Case insensitive check on :method value to be on the safe 2941 * side. I failed to find text about case sensitivity in specs. 2942 */ 2943 if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0) 2944 h2_stream->http_method = HTTP_METHOD_GET; 2945 else if(valuelen == 4 && 2946 strcasecmp("POST", (const char*)value) == 0) { 2947 h2_stream->http_method = HTTP_METHOD_POST; 2948 if(h2_stream->qbuffer) { 2949 /* POST method uses query from DATA frames */ 2950 lock_basic_lock(&http2_query_buffer_count_lock); 2951 http2_query_buffer_count -= 2952 sldns_buffer_capacity(h2_stream->qbuffer); 2953 lock_basic_unlock(&http2_query_buffer_count_lock); 2954 sldns_buffer_free(h2_stream->qbuffer); 2955 h2_stream->qbuffer = NULL; 2956 } 2957 } else 2958 h2_stream->http_method = HTTP_METHOD_UNSUPPORTED; 2959 return 0; 2960 } 2961 if(namelen == 5 && memcmp(":path", name, namelen) == 0) { 2962 /* :path may contain DNS query, depending on method. Method might 2963 * not be known yet here, so check after finishing receiving 2964 * stream. */ 2965 #define HTTP_QUERY_PARAM "?dns=" 2966 size_t el = strlen(h2_session->c->http_endpoint); 2967 size_t qpl = strlen(HTTP_QUERY_PARAM); 2968 2969 if(valuelen < el || memcmp(h2_session->c->http_endpoint, 2970 value, el) != 0) { 2971 h2_stream->invalid_endpoint = 1; 2972 return 0; 2973 } 2974 /* larger than endpoint only allowed if it is for the query 2975 * parameter */ 2976 if(valuelen <= el+qpl || 2977 memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) { 2978 if(valuelen != el) 2979 h2_stream->invalid_endpoint = 1; 2980 return 0; 2981 } 2982 2983 if(!http2_buffer_uri_query(h2_session, h2_stream, 2984 value+(el+qpl), valuelen-(el+qpl))) { 2985 return NGHTTP2_ERR_CALLBACK_FAILURE; 2986 } 2987 return 0; 2988 } 2989 /* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST, 2990 * and not needed when using GET. Don't enfore. 2991 * If set only allow lowercase "application/dns-message". 2992 * 2993 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST 2994 * be able to handle "application/dns-message". Since that is the only 2995 * content-type supported we can ignore the accept header. 2996 */ 2997 if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) { 2998 if(valuelen != 23 || memcmp("application/dns-message", value, 2999 valuelen) != 0) { 3000 h2_stream->invalid_content_type = 1; 3001 } 3002 } 3003 3004 /* Only interested in content-lentg for POST (on not yet known) method. 3005 */ 3006 if((!h2_stream->http_method || 3007 h2_stream->http_method == HTTP_METHOD_POST) && 3008 !h2_stream->content_length && namelen == 14 && 3009 memcmp("content-length", name, namelen) == 0) { 3010 if(valuelen > 5) { 3011 h2_stream->query_too_large = 1; 3012 return 0; 3013 } 3014 /* guaranteed to only contain digits and be null terminated */ 3015 h2_stream->content_length = atoi((const char*)value); 3016 if(h2_stream->content_length > 3017 h2_session->c->http2_stream_max_qbuffer_size) { 3018 h2_stream->query_too_large = 1; 3019 return 0; 3020 } 3021 } 3022 return 0; 3023 } 3024 3025 /** nghttp2 callback. Used to get data from DATA frames, which can contain 3026 * queries in POST requests. */ 3027 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session), 3028 uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data, 3029 size_t len, void* cb_arg) 3030 { 3031 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3032 struct http2_stream* h2_stream; 3033 size_t qlen = 0; 3034 3035 if(!(h2_stream = nghttp2_session_get_stream_user_data( 3036 h2_session->session, stream_id))) { 3037 return 0; 3038 } 3039 3040 if(h2_stream->query_too_large) 3041 return 0; 3042 3043 if(!h2_stream->qbuffer) { 3044 if(h2_stream->content_length) { 3045 if(h2_stream->content_length < len) 3046 /* getting more data in DATA frame than 3047 * advertised in content-length header. */ 3048 return NGHTTP2_ERR_CALLBACK_FAILURE; 3049 qlen = h2_stream->content_length; 3050 } else if(len <= h2_session->c->http2_stream_max_qbuffer_size) { 3051 /* setting this to msg-buffer-size can result in a lot 3052 * of memory consuption. Most queries should fit in a 3053 * single DATA frame, and most POST queries will 3054 * contain content-length which does not impose this 3055 * limit. */ 3056 qlen = len; 3057 } 3058 } 3059 if(!h2_stream->qbuffer && qlen) { 3060 lock_basic_lock(&http2_query_buffer_count_lock); 3061 if(http2_query_buffer_count + qlen > http2_query_buffer_max) { 3062 lock_basic_unlock(&http2_query_buffer_count_lock); 3063 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 3064 "in http2-query-buffer-size"); 3065 return http2_submit_rst_stream(h2_session, h2_stream); 3066 } 3067 http2_query_buffer_count += qlen; 3068 lock_basic_unlock(&http2_query_buffer_count_lock); 3069 if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) { 3070 lock_basic_lock(&http2_query_buffer_count_lock); 3071 http2_query_buffer_count -= qlen; 3072 lock_basic_unlock(&http2_query_buffer_count_lock); 3073 } 3074 } 3075 3076 if(!h2_stream->qbuffer || 3077 sldns_buffer_remaining(h2_stream->qbuffer) < len) { 3078 verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough " 3079 "buffer space for POST query. Can happen on multi " 3080 "frame requests without content-length header"); 3081 h2_stream->query_too_large = 1; 3082 return 0; 3083 } 3084 3085 sldns_buffer_write(h2_stream->qbuffer, data, len); 3086 3087 return 0; 3088 } 3089 3090 void http2_req_stream_clear(struct http2_stream* h2_stream) 3091 { 3092 if(h2_stream->qbuffer) { 3093 lock_basic_lock(&http2_query_buffer_count_lock); 3094 http2_query_buffer_count -= 3095 sldns_buffer_capacity(h2_stream->qbuffer); 3096 lock_basic_unlock(&http2_query_buffer_count_lock); 3097 sldns_buffer_free(h2_stream->qbuffer); 3098 h2_stream->qbuffer = NULL; 3099 } 3100 if(h2_stream->rbuffer) { 3101 lock_basic_lock(&http2_response_buffer_count_lock); 3102 http2_response_buffer_count -= 3103 sldns_buffer_capacity(h2_stream->rbuffer); 3104 lock_basic_unlock(&http2_response_buffer_count_lock); 3105 sldns_buffer_free(h2_stream->rbuffer); 3106 h2_stream->rbuffer = NULL; 3107 } 3108 } 3109 3110 nghttp2_session_callbacks* http2_req_callbacks_create(void) 3111 { 3112 nghttp2_session_callbacks *callbacks; 3113 if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) { 3114 log_err("failed to initialize nghttp2 callback"); 3115 return NULL; 3116 } 3117 /* reception of header block started, used to create h2_stream */ 3118 nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks, 3119 http2_req_begin_headers_cb); 3120 /* complete frame received, used to get data from stream if frame 3121 * has end stream flag, and start processing query */ 3122 nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks, 3123 http2_req_frame_recv_cb); 3124 /* get request info from headers */ 3125 nghttp2_session_callbacks_set_on_header_callback(callbacks, 3126 http2_req_header_cb); 3127 /* get data from DATA frames, containing POST query */ 3128 nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks, 3129 http2_req_data_chunk_recv_cb); 3130 3131 /* generic HTTP2 callbacks */ 3132 nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb); 3133 nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb); 3134 nghttp2_session_callbacks_set_on_stream_close_callback(callbacks, 3135 http2_stream_close_cb); 3136 3137 return callbacks; 3138 } 3139 #endif /* HAVE_NGHTTP2 */ 3140