1 /* 2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file has functions to get queries from clients. 40 */ 41 #include "config.h" 42 #ifdef HAVE_SYS_TYPES_H 43 # include <sys/types.h> 44 #endif 45 #include <sys/time.h> 46 #include <limits.h> 47 #ifdef USE_TCP_FASTOPEN 48 #include <netinet/tcp.h> 49 #endif 50 #include <ctype.h> 51 #include "services/listen_dnsport.h" 52 #include "services/outside_network.h" 53 #include "util/netevent.h" 54 #include "util/log.h" 55 #include "util/config_file.h" 56 #include "util/net_help.h" 57 #include "sldns/sbuffer.h" 58 #include "sldns/parseutil.h" 59 #include "services/mesh.h" 60 #include "util/fptr_wlist.h" 61 #include "util/locks.h" 62 63 #ifdef HAVE_NETDB_H 64 #include <netdb.h> 65 #endif 66 #include <fcntl.h> 67 68 #ifdef HAVE_SYS_UN_H 69 #include <sys/un.h> 70 #endif 71 72 #ifdef HAVE_SYSTEMD 73 #include <systemd/sd-daemon.h> 74 #endif 75 76 #ifdef HAVE_IFADDRS_H 77 #include <ifaddrs.h> 78 #endif 79 #ifdef HAVE_NET_IF_H 80 #include <net/if.h> 81 #endif 82 #ifdef HAVE_LINUX_NET_TSTAMP_H 83 #include <linux/net_tstamp.h> 84 #endif 85 /** number of queued TCP connections for listen() */ 86 #define TCP_BACKLOG 256 87 88 #ifndef THREADS_DISABLED 89 /** lock on the counter of stream buffer memory */ 90 static lock_basic_type stream_wait_count_lock; 91 /** lock on the counter of HTTP2 query buffer memory */ 92 static lock_basic_type http2_query_buffer_count_lock; 93 /** lock on the counter of HTTP2 response buffer memory */ 94 static lock_basic_type http2_response_buffer_count_lock; 95 #endif 96 /** size (in bytes) of stream wait buffers */ 97 static size_t stream_wait_count = 0; 98 /** is the lock initialised for stream wait buffers */ 99 static int stream_wait_lock_inited = 0; 100 /** size (in bytes) of HTTP2 query buffers */ 101 static size_t http2_query_buffer_count = 0; 102 /** is the lock initialised for HTTP2 query buffers */ 103 static int http2_query_buffer_lock_inited = 0; 104 /** size (in bytes) of HTTP2 response buffers */ 105 static size_t http2_response_buffer_count = 0; 106 /** is the lock initialised for HTTP2 response buffers */ 107 static int http2_response_buffer_lock_inited = 0; 108 109 /** 110 * Debug print of the getaddrinfo returned address. 111 * @param addr: the address returned. 112 */ 113 static void 114 verbose_print_addr(struct addrinfo *addr) 115 { 116 if(verbosity >= VERB_ALGO) { 117 char buf[100]; 118 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr; 119 #ifdef INET6 120 if(addr->ai_family == AF_INET6) 121 sinaddr = &((struct sockaddr_in6*)addr->ai_addr)-> 122 sin6_addr; 123 #endif /* INET6 */ 124 if(inet_ntop(addr->ai_family, sinaddr, buf, 125 (socklen_t)sizeof(buf)) == 0) { 126 (void)strlcpy(buf, "(null)", sizeof(buf)); 127 } 128 buf[sizeof(buf)-1] = 0; 129 verbose(VERB_ALGO, "creating %s%s socket %s %d", 130 addr->ai_socktype==SOCK_DGRAM?"udp": 131 addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto", 132 addr->ai_family==AF_INET?"4": 133 addr->ai_family==AF_INET6?"6": 134 "_otherfam", buf, 135 ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port)); 136 } 137 } 138 139 void 140 verbose_print_unbound_socket(struct unbound_socket* ub_sock) 141 { 142 if(verbosity >= VERB_ALGO) { 143 log_info("listing of unbound_socket structure:"); 144 verbose_print_addr(ub_sock->addr); 145 log_info("s is: %d, fam is: %s, acl: %s", ub_sock->s, 146 ub_sock->fam == AF_INET?"AF_INET":"AF_INET6", 147 ub_sock->acl?"yes":"no"); 148 } 149 } 150 151 #ifdef HAVE_SYSTEMD 152 static int 153 systemd_get_activated(int family, int socktype, int listen, 154 struct sockaddr *addr, socklen_t addrlen, 155 const char *path) 156 { 157 int i = 0; 158 int r = 0; 159 int s = -1; 160 const char* listen_pid, *listen_fds; 161 162 /* We should use "listen" option only for stream protocols. For UDP it should be -1 */ 163 164 if((r = sd_booted()) < 1) { 165 if(r == 0) 166 log_warn("systemd is not running"); 167 else 168 log_err("systemd sd_booted(): %s", strerror(-r)); 169 return -1; 170 } 171 172 listen_pid = getenv("LISTEN_PID"); 173 listen_fds = getenv("LISTEN_FDS"); 174 175 if (!listen_pid) { 176 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID"); 177 return -1; 178 } 179 180 if (!listen_fds) { 181 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS"); 182 return -1; 183 } 184 185 if((r = sd_listen_fds(0)) < 1) { 186 if(r == 0) 187 log_warn("systemd: did not return socket, check unit configuration"); 188 else 189 log_err("systemd sd_listen_fds(): %s", strerror(-r)); 190 return -1; 191 } 192 193 for(i = 0; i < r; i++) { 194 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) { 195 s = SD_LISTEN_FDS_START + i; 196 break; 197 } 198 } 199 if (s == -1) { 200 if (addr) 201 log_err_addr("systemd sd_listen_fds()", 202 "no such socket", 203 (struct sockaddr_storage *)addr, addrlen); 204 else 205 log_err("systemd sd_listen_fds(): %s", path); 206 } 207 return s; 208 } 209 #endif 210 211 int 212 create_udp_sock(int family, int socktype, struct sockaddr* addr, 213 socklen_t addrlen, int v6only, int* inuse, int* noproto, 214 int rcv, int snd, int listen, int* reuseport, int transparent, 215 int freebind, int use_systemd, int dscp) 216 { 217 int s; 218 char* err; 219 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY) 220 int on=1; 221 #endif 222 #ifdef IPV6_MTU 223 int mtu = IPV6_MIN_MTU; 224 #endif 225 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF) 226 (void)rcv; 227 #endif 228 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF) 229 (void)snd; 230 #endif 231 #ifndef IPV6_V6ONLY 232 (void)v6only; 233 #endif 234 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY) 235 (void)transparent; 236 #endif 237 #if !defined(IP_FREEBIND) 238 (void)freebind; 239 #endif 240 #ifdef HAVE_SYSTEMD 241 int got_fd_from_systemd = 0; 242 243 if (!use_systemd 244 || (use_systemd 245 && (s = systemd_get_activated(family, socktype, -1, addr, 246 addrlen, NULL)) == -1)) { 247 #else 248 (void)use_systemd; 249 #endif 250 if((s = socket(family, socktype, 0)) == -1) { 251 *inuse = 0; 252 #ifndef USE_WINSOCK 253 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { 254 *noproto = 1; 255 return -1; 256 } 257 #else 258 if(WSAGetLastError() == WSAEAFNOSUPPORT || 259 WSAGetLastError() == WSAEPROTONOSUPPORT) { 260 *noproto = 1; 261 return -1; 262 } 263 #endif 264 log_err("can't create socket: %s", sock_strerror(errno)); 265 *noproto = 0; 266 return -1; 267 } 268 #ifdef HAVE_SYSTEMD 269 } else { 270 got_fd_from_systemd = 1; 271 } 272 #endif 273 if(listen) { 274 #ifdef SO_REUSEADDR 275 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 276 (socklen_t)sizeof(on)) < 0) { 277 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", 278 sock_strerror(errno)); 279 #ifndef USE_WINSOCK 280 if(errno != ENOSYS) { 281 close(s); 282 *noproto = 0; 283 *inuse = 0; 284 return -1; 285 } 286 #else 287 closesocket(s); 288 *noproto = 0; 289 *inuse = 0; 290 return -1; 291 #endif 292 } 293 #endif /* SO_REUSEADDR */ 294 #ifdef SO_REUSEPORT 295 # ifdef SO_REUSEPORT_LB 296 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance 297 * like SO_REUSEPORT on Linux. This is what the users want 298 * with the config option in unbound.conf; if we actually 299 * need local address and port reuse they'll also need to 300 * have SO_REUSEPORT set for them, assume it was _LB they want. 301 */ 302 if (reuseport && *reuseport && 303 setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on, 304 (socklen_t)sizeof(on)) < 0) { 305 #ifdef ENOPROTOOPT 306 if(errno != ENOPROTOOPT || verbosity >= 3) 307 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s", 308 strerror(errno)); 309 #endif 310 /* this option is not essential, we can continue */ 311 *reuseport = 0; 312 } 313 # else /* no SO_REUSEPORT_LB */ 314 315 /* try to set SO_REUSEPORT so that incoming 316 * queries are distributed evenly among the receiving threads. 317 * Each thread must have its own socket bound to the same port, 318 * with SO_REUSEPORT set on each socket. 319 */ 320 if (reuseport && *reuseport && 321 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, 322 (socklen_t)sizeof(on)) < 0) { 323 #ifdef ENOPROTOOPT 324 if(errno != ENOPROTOOPT || verbosity >= 3) 325 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", 326 strerror(errno)); 327 #endif 328 /* this option is not essential, we can continue */ 329 *reuseport = 0; 330 } 331 # endif /* SO_REUSEPORT_LB */ 332 #else 333 (void)reuseport; 334 #endif /* defined(SO_REUSEPORT) */ 335 #ifdef IP_TRANSPARENT 336 if (transparent && 337 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on, 338 (socklen_t)sizeof(on)) < 0) { 339 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s", 340 strerror(errno)); 341 } 342 #elif defined(IP_BINDANY) 343 if (transparent && 344 setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP), 345 (family == AF_INET6? IPV6_BINDANY:IP_BINDANY), 346 (void*)&on, (socklen_t)sizeof(on)) < 0) { 347 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s", 348 (family==AF_INET6?"V6":""), strerror(errno)); 349 } 350 #elif defined(SO_BINDANY) 351 if (transparent && 352 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, 353 (socklen_t)sizeof(on)) < 0) { 354 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s", 355 strerror(errno)); 356 } 357 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */ 358 } 359 #ifdef IP_FREEBIND 360 if(freebind && 361 setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on, 362 (socklen_t)sizeof(on)) < 0) { 363 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s", 364 strerror(errno)); 365 } 366 #endif /* IP_FREEBIND */ 367 if(rcv) { 368 #ifdef SO_RCVBUF 369 int got; 370 socklen_t slen = (socklen_t)sizeof(got); 371 # ifdef SO_RCVBUFFORCE 372 /* Linux specific: try to use root permission to override 373 * system limits on rcvbuf. The limit is stored in 374 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */ 375 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 376 (socklen_t)sizeof(rcv)) < 0) { 377 if(errno != EPERM) { 378 log_err("setsockopt(..., SO_RCVBUFFORCE, " 379 "...) failed: %s", sock_strerror(errno)); 380 sock_close(s); 381 *noproto = 0; 382 *inuse = 0; 383 return -1; 384 } 385 # endif /* SO_RCVBUFFORCE */ 386 if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 387 (socklen_t)sizeof(rcv)) < 0) { 388 log_err("setsockopt(..., SO_RCVBUF, " 389 "...) failed: %s", sock_strerror(errno)); 390 sock_close(s); 391 *noproto = 0; 392 *inuse = 0; 393 return -1; 394 } 395 /* check if we got the right thing or if system 396 * reduced to some system max. Warn if so */ 397 if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, 398 &slen) >= 0 && got < rcv/2) { 399 log_warn("so-rcvbuf %u was not granted. " 400 "Got %u. To fix: start with " 401 "root permissions(linux) or sysctl " 402 "bigger net.core.rmem_max(linux) or " 403 "kern.ipc.maxsockbuf(bsd) values.", 404 (unsigned)rcv, (unsigned)got); 405 } 406 # ifdef SO_RCVBUFFORCE 407 } 408 # endif 409 #endif /* SO_RCVBUF */ 410 } 411 /* first do RCVBUF as the receive buffer is more important */ 412 if(snd) { 413 #ifdef SO_SNDBUF 414 int got; 415 socklen_t slen = (socklen_t)sizeof(got); 416 # ifdef SO_SNDBUFFORCE 417 /* Linux specific: try to use root permission to override 418 * system limits on sndbuf. The limit is stored in 419 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */ 420 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 421 (socklen_t)sizeof(snd)) < 0) { 422 if(errno != EPERM) { 423 log_err("setsockopt(..., SO_SNDBUFFORCE, " 424 "...) failed: %s", sock_strerror(errno)); 425 sock_close(s); 426 *noproto = 0; 427 *inuse = 0; 428 return -1; 429 } 430 # endif /* SO_SNDBUFFORCE */ 431 if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 432 (socklen_t)sizeof(snd)) < 0) { 433 log_err("setsockopt(..., SO_SNDBUF, " 434 "...) failed: %s", sock_strerror(errno)); 435 sock_close(s); 436 *noproto = 0; 437 *inuse = 0; 438 return -1; 439 } 440 /* check if we got the right thing or if system 441 * reduced to some system max. Warn if so */ 442 if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, 443 &slen) >= 0 && got < snd/2) { 444 log_warn("so-sndbuf %u was not granted. " 445 "Got %u. To fix: start with " 446 "root permissions(linux) or sysctl " 447 "bigger net.core.wmem_max(linux) or " 448 "kern.ipc.maxsockbuf(bsd) values.", 449 (unsigned)snd, (unsigned)got); 450 } 451 # ifdef SO_SNDBUFFORCE 452 } 453 # endif 454 #endif /* SO_SNDBUF */ 455 } 456 err = set_ip_dscp(s, family, dscp); 457 if(err != NULL) 458 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err); 459 if(family == AF_INET6) { 460 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 461 int omit6_set = 0; 462 int action; 463 # endif 464 # if defined(IPV6_V6ONLY) 465 if(v6only 466 # ifdef HAVE_SYSTEMD 467 /* Systemd wants to control if the socket is v6 only 468 * or both, with BindIPv6Only=default, ipv6-only or 469 * both in systemd.socket, so it is not set here. */ 470 && !got_fd_from_systemd 471 # endif 472 ) { 473 int val=(v6only==2)?0:1; 474 if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 475 (void*)&val, (socklen_t)sizeof(val)) < 0) { 476 log_err("setsockopt(..., IPV6_V6ONLY" 477 ", ...) failed: %s", sock_strerror(errno)); 478 sock_close(s); 479 *noproto = 0; 480 *inuse = 0; 481 return -1; 482 } 483 } 484 # endif 485 # if defined(IPV6_USE_MIN_MTU) 486 /* 487 * There is no fragmentation of IPv6 datagrams 488 * during forwarding in the network. Therefore 489 * we do not send UDP datagrams larger than 490 * the minimum IPv6 MTU of 1280 octets. The 491 * EDNS0 message length can be larger if the 492 * network stack supports IPV6_USE_MIN_MTU. 493 */ 494 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU, 495 (void*)&on, (socklen_t)sizeof(on)) < 0) { 496 log_err("setsockopt(..., IPV6_USE_MIN_MTU, " 497 "...) failed: %s", sock_strerror(errno)); 498 sock_close(s); 499 *noproto = 0; 500 *inuse = 0; 501 return -1; 502 } 503 # elif defined(IPV6_MTU) 504 # ifndef USE_WINSOCK 505 /* 506 * On Linux, to send no larger than 1280, the PMTUD is 507 * disabled by default for datagrams anyway, so we set 508 * the MTU to use. 509 */ 510 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU, 511 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { 512 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 513 sock_strerror(errno)); 514 sock_close(s); 515 *noproto = 0; 516 *inuse = 0; 517 return -1; 518 } 519 # elif defined(IPV6_USER_MTU) 520 /* As later versions of the mingw crosscompiler define 521 * IPV6_MTU, do the same for windows but use IPV6_USER_MTU 522 * instead which is writable; IPV6_MTU is readonly there. */ 523 if (setsockopt(s, IPPROTO_IPV6, IPV6_USER_MTU, 524 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { 525 if (WSAGetLastError() != WSAENOPROTOOPT) { 526 log_err("setsockopt(..., IPV6_USER_MTU, ...) failed: %s", 527 wsa_strerror(WSAGetLastError())); 528 sock_close(s); 529 *noproto = 0; 530 *inuse = 0; 531 return -1; 532 } 533 } 534 # endif /* USE_WINSOCK */ 535 # endif /* IPv6 MTU */ 536 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 537 # if defined(IP_PMTUDISC_OMIT) 538 action = IP_PMTUDISC_OMIT; 539 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, 540 &action, (socklen_t)sizeof(action)) < 0) { 541 542 if (errno != EINVAL) { 543 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 544 strerror(errno)); 545 sock_close(s); 546 *noproto = 0; 547 *inuse = 0; 548 return -1; 549 } 550 } 551 else 552 { 553 omit6_set = 1; 554 } 555 # endif 556 if (omit6_set == 0) { 557 action = IP_PMTUDISC_DONT; 558 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, 559 &action, (socklen_t)sizeof(action)) < 0) { 560 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 561 strerror(errno)); 562 sock_close(s); 563 *noproto = 0; 564 *inuse = 0; 565 return -1; 566 } 567 } 568 # endif /* IPV6_MTU_DISCOVER */ 569 } else if(family == AF_INET) { 570 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 571 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that 572 * PMTU information is not accepted, but fragmentation is allowed 573 * if and only if the packet size exceeds the outgoing interface MTU 574 * (and also uses the interface mtu to determine the size of the packets). 575 * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks. 576 * FreeBSD already has same semantics without setting the option. */ 577 int omit_set = 0; 578 int action; 579 # if defined(IP_PMTUDISC_OMIT) 580 action = IP_PMTUDISC_OMIT; 581 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 582 &action, (socklen_t)sizeof(action)) < 0) { 583 584 if (errno != EINVAL) { 585 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 586 strerror(errno)); 587 sock_close(s); 588 *noproto = 0; 589 *inuse = 0; 590 return -1; 591 } 592 } 593 else 594 { 595 omit_set = 1; 596 } 597 # endif 598 if (omit_set == 0) { 599 action = IP_PMTUDISC_DONT; 600 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 601 &action, (socklen_t)sizeof(action)) < 0) { 602 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 603 strerror(errno)); 604 sock_close(s); 605 *noproto = 0; 606 *inuse = 0; 607 return -1; 608 } 609 } 610 # elif defined(IP_DONTFRAG) && !defined(__APPLE__) 611 /* the IP_DONTFRAG option if defined in the 11.0 OSX headers, 612 * but does not work on that version, so we exclude it */ 613 int off = 0; 614 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, 615 &off, (socklen_t)sizeof(off)) < 0) { 616 log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s", 617 strerror(errno)); 618 sock_close(s); 619 *noproto = 0; 620 *inuse = 0; 621 return -1; 622 } 623 # endif /* IPv4 MTU */ 624 } 625 if( 626 #ifdef HAVE_SYSTEMD 627 !got_fd_from_systemd && 628 #endif 629 bind(s, (struct sockaddr*)addr, addrlen) != 0) { 630 *noproto = 0; 631 *inuse = 0; 632 #ifndef USE_WINSOCK 633 #ifdef EADDRINUSE 634 *inuse = (errno == EADDRINUSE); 635 /* detect freebsd jail with no ipv6 permission */ 636 if(family==AF_INET6 && errno==EINVAL) 637 *noproto = 1; 638 else if(errno != EADDRINUSE && 639 !(errno == EACCES && verbosity < 4 && !listen) 640 #ifdef EADDRNOTAVAIL 641 && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen) 642 #endif 643 ) { 644 log_err_addr("can't bind socket", strerror(errno), 645 (struct sockaddr_storage*)addr, addrlen); 646 } 647 #endif /* EADDRINUSE */ 648 #else /* USE_WINSOCK */ 649 if(WSAGetLastError() != WSAEADDRINUSE && 650 WSAGetLastError() != WSAEADDRNOTAVAIL && 651 !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) { 652 log_err_addr("can't bind socket", 653 wsa_strerror(WSAGetLastError()), 654 (struct sockaddr_storage*)addr, addrlen); 655 } 656 #endif /* USE_WINSOCK */ 657 sock_close(s); 658 return -1; 659 } 660 if(!fd_set_nonblock(s)) { 661 *noproto = 0; 662 *inuse = 0; 663 sock_close(s); 664 return -1; 665 } 666 return s; 667 } 668 669 int 670 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, 671 int* reuseport, int transparent, int mss, int nodelay, int freebind, 672 int use_systemd, int dscp) 673 { 674 int s; 675 char* err; 676 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY) 677 int on = 1; 678 #endif 679 #ifdef HAVE_SYSTEMD 680 int got_fd_from_systemd = 0; 681 #endif 682 #ifdef USE_TCP_FASTOPEN 683 int qlen; 684 #endif 685 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY) 686 (void)transparent; 687 #endif 688 #if !defined(IP_FREEBIND) 689 (void)freebind; 690 #endif 691 verbose_print_addr(addr); 692 *noproto = 0; 693 #ifdef HAVE_SYSTEMD 694 if (!use_systemd || 695 (use_systemd 696 && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1, 697 addr->ai_addr, addr->ai_addrlen, 698 NULL)) == -1)) { 699 #else 700 (void)use_systemd; 701 #endif 702 if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) { 703 #ifndef USE_WINSOCK 704 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { 705 *noproto = 1; 706 return -1; 707 } 708 #else 709 if(WSAGetLastError() == WSAEAFNOSUPPORT || 710 WSAGetLastError() == WSAEPROTONOSUPPORT) { 711 *noproto = 1; 712 return -1; 713 } 714 #endif 715 log_err("can't create socket: %s", sock_strerror(errno)); 716 return -1; 717 } 718 if(nodelay) { 719 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY) 720 if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on, 721 (socklen_t)sizeof(on)) < 0) { 722 #ifndef USE_WINSOCK 723 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s", 724 strerror(errno)); 725 #else 726 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s", 727 wsa_strerror(WSAGetLastError())); 728 #endif 729 } 730 #else 731 log_warn(" setsockopt(TCP_NODELAY) unsupported"); 732 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */ 733 } 734 if (mss > 0) { 735 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 736 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss, 737 (socklen_t)sizeof(mss)) < 0) { 738 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s", 739 sock_strerror(errno)); 740 } else { 741 verbose(VERB_ALGO, 742 " tcp socket mss set to %d", mss); 743 } 744 #else 745 log_warn(" setsockopt(TCP_MAXSEG) unsupported"); 746 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */ 747 } 748 #ifdef HAVE_SYSTEMD 749 } else { 750 got_fd_from_systemd = 1; 751 } 752 #endif 753 #ifdef SO_REUSEADDR 754 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 755 (socklen_t)sizeof(on)) < 0) { 756 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", 757 sock_strerror(errno)); 758 sock_close(s); 759 return -1; 760 } 761 #endif /* SO_REUSEADDR */ 762 #ifdef IP_FREEBIND 763 if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on, 764 (socklen_t)sizeof(on)) < 0) { 765 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s", 766 strerror(errno)); 767 } 768 #endif /* IP_FREEBIND */ 769 #ifdef SO_REUSEPORT 770 /* try to set SO_REUSEPORT so that incoming 771 * connections are distributed evenly among the receiving threads. 772 * Each thread must have its own socket bound to the same port, 773 * with SO_REUSEPORT set on each socket. 774 */ 775 if (reuseport && *reuseport && 776 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, 777 (socklen_t)sizeof(on)) < 0) { 778 #ifdef ENOPROTOOPT 779 if(errno != ENOPROTOOPT || verbosity >= 3) 780 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", 781 strerror(errno)); 782 #endif 783 /* this option is not essential, we can continue */ 784 *reuseport = 0; 785 } 786 #else 787 (void)reuseport; 788 #endif /* defined(SO_REUSEPORT) */ 789 #if defined(IPV6_V6ONLY) 790 if(addr->ai_family == AF_INET6 && v6only 791 # ifdef HAVE_SYSTEMD 792 /* Systemd wants to control if the socket is v6 only 793 * or both, with BindIPv6Only=default, ipv6-only or 794 * both in systemd.socket, so it is not set here. */ 795 && !got_fd_from_systemd 796 # endif 797 ) { 798 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 799 (void*)&on, (socklen_t)sizeof(on)) < 0) { 800 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s", 801 sock_strerror(errno)); 802 sock_close(s); 803 return -1; 804 } 805 } 806 #else 807 (void)v6only; 808 #endif /* IPV6_V6ONLY */ 809 #ifdef IP_TRANSPARENT 810 if (transparent && 811 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on, 812 (socklen_t)sizeof(on)) < 0) { 813 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s", 814 strerror(errno)); 815 } 816 #elif defined(IP_BINDANY) 817 if (transparent && 818 setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP), 819 (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY), 820 (void*)&on, (socklen_t)sizeof(on)) < 0) { 821 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s", 822 (addr->ai_family==AF_INET6?"V6":""), strerror(errno)); 823 } 824 #elif defined(SO_BINDANY) 825 if (transparent && 826 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t) 827 sizeof(on)) < 0) { 828 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s", 829 strerror(errno)); 830 } 831 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */ 832 err = set_ip_dscp(s, addr->ai_family, dscp); 833 if(err != NULL) 834 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err); 835 if( 836 #ifdef HAVE_SYSTEMD 837 !got_fd_from_systemd && 838 #endif 839 bind(s, addr->ai_addr, addr->ai_addrlen) != 0) { 840 #ifndef USE_WINSOCK 841 /* detect freebsd jail with no ipv6 permission */ 842 if(addr->ai_family==AF_INET6 && errno==EINVAL) 843 *noproto = 1; 844 else { 845 log_err_addr("can't bind socket", strerror(errno), 846 (struct sockaddr_storage*)addr->ai_addr, 847 addr->ai_addrlen); 848 } 849 #else 850 log_err_addr("can't bind socket", 851 wsa_strerror(WSAGetLastError()), 852 (struct sockaddr_storage*)addr->ai_addr, 853 addr->ai_addrlen); 854 #endif 855 sock_close(s); 856 return -1; 857 } 858 if(!fd_set_nonblock(s)) { 859 sock_close(s); 860 return -1; 861 } 862 if(listen(s, TCP_BACKLOG) == -1) { 863 log_err("can't listen: %s", sock_strerror(errno)); 864 sock_close(s); 865 return -1; 866 } 867 #ifdef USE_TCP_FASTOPEN 868 /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense 869 against IP spoofing attacks as suggested in RFC7413 */ 870 #ifdef __APPLE__ 871 /* OS X implementation only supports qlen of 1 via this call. Actual 872 value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */ 873 qlen = 1; 874 #else 875 /* 5 is recommended on linux */ 876 qlen = 5; 877 #endif 878 if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, 879 sizeof(qlen))) == -1 ) { 880 #ifdef ENOPROTOOPT 881 /* squelch ENOPROTOOPT: freebsd server mode with kernel support 882 disabled, except when verbosity enabled for debugging */ 883 if(errno != ENOPROTOOPT || verbosity >= 3) { 884 #endif 885 if(errno == EPERM) { 886 log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno)); 887 } else { 888 log_err("Setting TCP Fast Open as server failed: %s", strerror(errno)); 889 } 890 #ifdef ENOPROTOOPT 891 } 892 #endif 893 } 894 #endif 895 return s; 896 } 897 898 char* 899 set_ip_dscp(int socket, int addrfamily, int dscp) 900 { 901 int ds; 902 903 if(dscp == 0) 904 return NULL; 905 ds = dscp << 2; 906 switch(addrfamily) { 907 case AF_INET6: 908 #ifdef IPV6_TCLASS 909 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, 910 sizeof(ds)) < 0) 911 return sock_strerror(errno); 912 break; 913 #else 914 return "IPV6_TCLASS not defined on this system"; 915 #endif 916 default: 917 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0) 918 return sock_strerror(errno); 919 break; 920 } 921 return NULL; 922 } 923 924 int 925 create_local_accept_sock(const char *path, int* noproto, int use_systemd) 926 { 927 #ifdef HAVE_SYSTEMD 928 int ret; 929 930 if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1) 931 return ret; 932 else { 933 #endif 934 #ifdef HAVE_SYS_UN_H 935 int s; 936 struct sockaddr_un usock; 937 #ifndef HAVE_SYSTEMD 938 (void)use_systemd; 939 #endif 940 941 verbose(VERB_ALGO, "creating unix socket %s", path); 942 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN 943 /* this member exists on BSDs, not Linux */ 944 usock.sun_len = (unsigned)sizeof(usock); 945 #endif 946 usock.sun_family = AF_LOCAL; 947 /* length is 92-108, 104 on FreeBSD */ 948 (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path)); 949 950 if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) { 951 log_err("Cannot create local socket %s (%s)", 952 path, strerror(errno)); 953 return -1; 954 } 955 956 if (unlink(path) && errno != ENOENT) { 957 /* The socket already exists and cannot be removed */ 958 log_err("Cannot remove old local socket %s (%s)", 959 path, strerror(errno)); 960 goto err; 961 } 962 963 if (bind(s, (struct sockaddr *)&usock, 964 (socklen_t)sizeof(struct sockaddr_un)) == -1) { 965 log_err("Cannot bind local socket %s (%s)", 966 path, strerror(errno)); 967 goto err; 968 } 969 970 if (!fd_set_nonblock(s)) { 971 log_err("Cannot set non-blocking mode"); 972 goto err; 973 } 974 975 if (listen(s, TCP_BACKLOG) == -1) { 976 log_err("can't listen: %s", strerror(errno)); 977 goto err; 978 } 979 980 (void)noproto; /*unused*/ 981 return s; 982 983 err: 984 sock_close(s); 985 return -1; 986 987 #ifdef HAVE_SYSTEMD 988 } 989 #endif 990 #else 991 (void)use_systemd; 992 (void)path; 993 log_err("Local sockets are not supported"); 994 *noproto = 1; 995 return -1; 996 #endif 997 } 998 999 1000 /** 1001 * Create socket from getaddrinfo results 1002 */ 1003 static int 1004 make_sock(int stype, const char* ifname, const char* port, 1005 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, 1006 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, 1007 int use_systemd, int dscp, struct unbound_socket* ub_sock) 1008 { 1009 struct addrinfo *res = NULL; 1010 int r, s, inuse, noproto; 1011 hints->ai_socktype = stype; 1012 *noip6 = 0; 1013 if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) { 1014 #ifdef USE_WINSOCK 1015 if(r == EAI_NONAME && hints->ai_family == AF_INET6){ 1016 *noip6 = 1; /* 'Host not found' for IP6 on winXP */ 1017 return -1; 1018 } 1019 #endif 1020 log_err("node %s:%s getaddrinfo: %s %s", 1021 ifname?ifname:"default", port, gai_strerror(r), 1022 #ifdef EAI_SYSTEM 1023 (r==EAI_SYSTEM?(char*)strerror(errno):"") 1024 #else 1025 "" 1026 #endif 1027 ); 1028 return -1; 1029 } 1030 if(stype == SOCK_DGRAM) { 1031 verbose_print_addr(res); 1032 s = create_udp_sock(res->ai_family, res->ai_socktype, 1033 (struct sockaddr*)res->ai_addr, res->ai_addrlen, 1034 v6only, &inuse, &noproto, (int)rcv, (int)snd, 1, 1035 reuseport, transparent, freebind, use_systemd, dscp); 1036 if(s == -1 && inuse) { 1037 log_err("bind: address already in use"); 1038 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){ 1039 *noip6 = 1; 1040 } 1041 } else { 1042 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport, 1043 transparent, tcp_mss, nodelay, freebind, use_systemd, 1044 dscp); 1045 if(s == -1 && noproto && hints->ai_family == AF_INET6){ 1046 *noip6 = 1; 1047 } 1048 } 1049 1050 ub_sock->addr = res; 1051 ub_sock->s = s; 1052 ub_sock->fam = hints->ai_family; 1053 ub_sock->acl = NULL; 1054 1055 return s; 1056 } 1057 1058 /** make socket and first see if ifname contains port override info */ 1059 static int 1060 make_sock_port(int stype, const char* ifname, const char* port, 1061 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, 1062 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, 1063 int use_systemd, int dscp, struct unbound_socket* ub_sock) 1064 { 1065 char* s = strchr(ifname, '@'); 1066 if(s) { 1067 /* override port with ifspec@port */ 1068 char p[16]; 1069 char newif[128]; 1070 if((size_t)(s-ifname) >= sizeof(newif)) { 1071 log_err("ifname too long: %s", ifname); 1072 *noip6 = 0; 1073 return -1; 1074 } 1075 if(strlen(s+1) >= sizeof(p)) { 1076 log_err("portnumber too long: %s", ifname); 1077 *noip6 = 0; 1078 return -1; 1079 } 1080 (void)strlcpy(newif, ifname, sizeof(newif)); 1081 newif[s-ifname] = 0; 1082 (void)strlcpy(p, s+1, sizeof(p)); 1083 p[strlen(s+1)]=0; 1084 return make_sock(stype, newif, p, hints, v6only, noip6, rcv, 1085 snd, reuseport, transparent, tcp_mss, nodelay, freebind, 1086 use_systemd, dscp, ub_sock); 1087 } 1088 return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd, 1089 reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd, 1090 dscp, ub_sock); 1091 } 1092 1093 /** 1094 * Add port to open ports list. 1095 * @param list: list head. changed. 1096 * @param s: fd. 1097 * @param ftype: if fd is UDP. 1098 * @param pp2_enabled: if PROXYv2 is enabled for this port. 1099 * @param ub_sock: socket with address. 1100 * @return false on failure. list in unchanged then. 1101 */ 1102 static int 1103 port_insert(struct listen_port** list, int s, enum listen_type ftype, 1104 int pp2_enabled, struct unbound_socket* ub_sock) 1105 { 1106 struct listen_port* item = (struct listen_port*)malloc( 1107 sizeof(struct listen_port)); 1108 if(!item) 1109 return 0; 1110 item->next = *list; 1111 item->fd = s; 1112 item->ftype = ftype; 1113 item->pp2_enabled = pp2_enabled; 1114 item->socket = ub_sock; 1115 *list = item; 1116 return 1; 1117 } 1118 1119 /** set fd to receive software timestamps */ 1120 static int 1121 set_recvtimestamp(int s) 1122 { 1123 #ifdef HAVE_LINUX_NET_TSTAMP_H 1124 int opt = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; 1125 if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMPNS, (void*)&opt, (socklen_t)sizeof(opt)) < 0) { 1126 log_err("setsockopt(..., SO_TIMESTAMPNS, ...) failed: %s", 1127 strerror(errno)); 1128 return 0; 1129 } 1130 return 1; 1131 #else 1132 log_err("packets timestamping is not supported on this platform"); 1133 (void)s; 1134 return 0; 1135 #endif 1136 } 1137 1138 /** set fd to receive source address packet info */ 1139 static int 1140 set_recvpktinfo(int s, int family) 1141 { 1142 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO) 1143 int on = 1; 1144 #else 1145 (void)s; 1146 #endif 1147 if(family == AF_INET6) { 1148 # ifdef IPV6_RECVPKTINFO 1149 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1150 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1151 log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s", 1152 strerror(errno)); 1153 return 0; 1154 } 1155 # elif defined(IPV6_PKTINFO) 1156 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO, 1157 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1158 log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s", 1159 strerror(errno)); 1160 return 0; 1161 } 1162 # else 1163 log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please " 1164 "disable interface-automatic or do-ip6 in config"); 1165 return 0; 1166 # endif /* defined IPV6_RECVPKTINFO */ 1167 1168 } else if(family == AF_INET) { 1169 # ifdef IP_PKTINFO 1170 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO, 1171 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1172 log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s", 1173 strerror(errno)); 1174 return 0; 1175 } 1176 # elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR) 1177 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR, 1178 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1179 log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s", 1180 strerror(errno)); 1181 return 0; 1182 } 1183 # else 1184 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable " 1185 "interface-automatic or do-ip4 in config"); 1186 return 0; 1187 # endif /* IP_PKTINFO */ 1188 1189 } 1190 return 1; 1191 } 1192 1193 /** see if interface is ssl, its port number == the ssl port number */ 1194 static int 1195 if_is_ssl(const char* ifname, const char* port, int ssl_port, 1196 struct config_strlist* tls_additional_port) 1197 { 1198 struct config_strlist* s; 1199 char* p = strchr(ifname, '@'); 1200 if(!p && atoi(port) == ssl_port) 1201 return 1; 1202 if(p && atoi(p+1) == ssl_port) 1203 return 1; 1204 for(s = tls_additional_port; s; s = s->next) { 1205 if(p && atoi(p+1) == atoi(s->str)) 1206 return 1; 1207 if(!p && atoi(port) == atoi(s->str)) 1208 return 1; 1209 } 1210 return 0; 1211 } 1212 1213 /** 1214 * Helper for ports_open. Creates one interface (or NULL for default). 1215 * @param ifname: The interface ip address. 1216 * @param do_auto: use automatic interface detection. 1217 * If enabled, then ifname must be the wildcard name. 1218 * @param do_udp: if udp should be used. 1219 * @param do_tcp: if tcp should be used. 1220 * @param hints: for getaddrinfo. family and flags have to be set by caller. 1221 * @param port: Port number to use (as string). 1222 * @param list: list of open ports, appended to, changed to point to list head. 1223 * @param rcv: receive buffer size for UDP 1224 * @param snd: send buffer size for UDP 1225 * @param ssl_port: ssl service port number 1226 * @param tls_additional_port: list of additional ssl service port numbers. 1227 * @param https_port: DoH service port number 1228 * @param proxy_protocol_port: list of PROXYv2 port numbers. 1229 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true. 1230 * set to false on exit if reuseport failed due to no kernel support. 1231 * @param transparent: set IP_TRANSPARENT socket option. 1232 * @param tcp_mss: maximum segment size of tcp socket. default if zero. 1233 * @param freebind: set IP_FREEBIND socket option. 1234 * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection 1235 * @param use_systemd: if true, fetch sockets from systemd. 1236 * @param dnscrypt_port: dnscrypt service port number 1237 * @param dscp: DSCP to use. 1238 * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to 1239 * wait to discard if UDP packets have waited for long in the socket 1240 * buffer. 1241 * @return: returns false on error. 1242 */ 1243 static int 1244 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, 1245 struct addrinfo *hints, const char* port, struct listen_port** list, 1246 size_t rcv, size_t snd, int ssl_port, 1247 struct config_strlist* tls_additional_port, int https_port, 1248 struct config_strlist* proxy_protocol_port, 1249 int* reuseport, int transparent, int tcp_mss, int freebind, 1250 int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp, 1251 int sock_queue_timeout) 1252 { 1253 int s, noip6=0; 1254 int is_https = if_is_https(ifname, port, https_port); 1255 int is_dnscrypt = if_is_dnscrypt(ifname, port, dnscrypt_port); 1256 int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port); 1257 int nodelay = is_https && http2_nodelay; 1258 struct unbound_socket* ub_sock; 1259 1260 if(!do_udp && !do_tcp) 1261 return 0; 1262 1263 if(is_pp2) { 1264 if(is_dnscrypt) { 1265 fatal_exit("PROXYv2 and DNSCrypt combination not " 1266 "supported!"); 1267 } else if(is_https) { 1268 fatal_exit("PROXYv2 and DoH combination not " 1269 "supported!"); 1270 } 1271 } 1272 1273 if(do_auto) { 1274 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1275 if(!ub_sock) 1276 return 0; 1277 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 1278 &noip6, rcv, snd, reuseport, transparent, 1279 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { 1280 if(ub_sock->addr) 1281 freeaddrinfo(ub_sock->addr); 1282 free(ub_sock); 1283 if(noip6) { 1284 log_warn("IPv6 protocol not available"); 1285 return 1; 1286 } 1287 return 0; 1288 } 1289 /* getting source addr packet info is highly non-portable */ 1290 if(!set_recvpktinfo(s, hints->ai_family)) { 1291 sock_close(s); 1292 if(ub_sock->addr) 1293 freeaddrinfo(ub_sock->addr); 1294 free(ub_sock); 1295 return 0; 1296 } 1297 if (sock_queue_timeout && !set_recvtimestamp(s)) { 1298 log_warn("socket timestamping is not available"); 1299 } 1300 if(!port_insert(list, s, is_dnscrypt 1301 ?listen_type_udpancil_dnscrypt:listen_type_udpancil, 1302 is_pp2, ub_sock)) { 1303 sock_close(s); 1304 if(ub_sock->addr) 1305 freeaddrinfo(ub_sock->addr); 1306 free(ub_sock); 1307 return 0; 1308 } 1309 } else if(do_udp) { 1310 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1311 if(!ub_sock) 1312 return 0; 1313 /* regular udp socket */ 1314 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 1315 &noip6, rcv, snd, reuseport, transparent, 1316 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { 1317 if(ub_sock->addr) 1318 freeaddrinfo(ub_sock->addr); 1319 free(ub_sock); 1320 if(noip6) { 1321 log_warn("IPv6 protocol not available"); 1322 return 1; 1323 } 1324 return 0; 1325 } 1326 if (sock_queue_timeout && !set_recvtimestamp(s)) { 1327 log_warn("socket timestamping is not available"); 1328 } 1329 if(!port_insert(list, s, is_dnscrypt 1330 ?listen_type_udp_dnscrypt : 1331 (sock_queue_timeout ? 1332 listen_type_udpancil:listen_type_udp), 1333 is_pp2, ub_sock)) { 1334 sock_close(s); 1335 if(ub_sock->addr) 1336 freeaddrinfo(ub_sock->addr); 1337 free(ub_sock); 1338 return 0; 1339 } 1340 } 1341 if(do_tcp) { 1342 int is_ssl = if_is_ssl(ifname, port, ssl_port, 1343 tls_additional_port); 1344 enum listen_type port_type; 1345 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1346 if(!ub_sock) 1347 return 0; 1348 if(is_ssl) 1349 port_type = listen_type_ssl; 1350 else if(is_https) 1351 port_type = listen_type_http; 1352 else if(is_dnscrypt) 1353 port_type = listen_type_tcp_dnscrypt; 1354 else 1355 port_type = listen_type_tcp; 1356 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, 1357 &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay, 1358 freebind, use_systemd, dscp, ub_sock)) == -1) { 1359 if(ub_sock->addr) 1360 freeaddrinfo(ub_sock->addr); 1361 free(ub_sock); 1362 if(noip6) { 1363 /*log_warn("IPv6 protocol not available");*/ 1364 return 1; 1365 } 1366 return 0; 1367 } 1368 if(is_ssl) 1369 verbose(VERB_ALGO, "setup TCP for SSL service"); 1370 if(!port_insert(list, s, port_type, is_pp2, ub_sock)) { 1371 sock_close(s); 1372 if(ub_sock->addr) 1373 freeaddrinfo(ub_sock->addr); 1374 free(ub_sock); 1375 return 0; 1376 } 1377 } 1378 return 1; 1379 } 1380 1381 /** 1382 * Add items to commpoint list in front. 1383 * @param c: commpoint to add. 1384 * @param front: listen struct. 1385 * @return: false on failure. 1386 */ 1387 static int 1388 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front) 1389 { 1390 struct listen_list* item = (struct listen_list*)malloc( 1391 sizeof(struct listen_list)); 1392 if(!item) 1393 return 0; 1394 item->com = c; 1395 item->next = front->cps; 1396 front->cps = item; 1397 return 1; 1398 } 1399 1400 void listen_setup_locks(void) 1401 { 1402 if(!stream_wait_lock_inited) { 1403 lock_basic_init(&stream_wait_count_lock); 1404 stream_wait_lock_inited = 1; 1405 } 1406 if(!http2_query_buffer_lock_inited) { 1407 lock_basic_init(&http2_query_buffer_count_lock); 1408 http2_query_buffer_lock_inited = 1; 1409 } 1410 if(!http2_response_buffer_lock_inited) { 1411 lock_basic_init(&http2_response_buffer_count_lock); 1412 http2_response_buffer_lock_inited = 1; 1413 } 1414 } 1415 1416 void listen_desetup_locks(void) 1417 { 1418 if(stream_wait_lock_inited) { 1419 stream_wait_lock_inited = 0; 1420 lock_basic_destroy(&stream_wait_count_lock); 1421 } 1422 if(http2_query_buffer_lock_inited) { 1423 http2_query_buffer_lock_inited = 0; 1424 lock_basic_destroy(&http2_query_buffer_count_lock); 1425 } 1426 if(http2_response_buffer_lock_inited) { 1427 http2_response_buffer_lock_inited = 0; 1428 lock_basic_destroy(&http2_response_buffer_count_lock); 1429 } 1430 } 1431 1432 struct listen_dnsport* 1433 listen_create(struct comm_base* base, struct listen_port* ports, 1434 size_t bufsize, int tcp_accept_count, int tcp_idle_timeout, 1435 int harden_large_queries, uint32_t http_max_streams, 1436 char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit, 1437 void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb, 1438 void *cb_arg) 1439 { 1440 struct listen_dnsport* front = (struct listen_dnsport*) 1441 malloc(sizeof(struct listen_dnsport)); 1442 if(!front) 1443 return NULL; 1444 front->cps = NULL; 1445 front->udp_buff = sldns_buffer_new(bufsize); 1446 #ifdef USE_DNSCRYPT 1447 front->dnscrypt_udp_buff = NULL; 1448 #endif 1449 if(!front->udp_buff) { 1450 free(front); 1451 return NULL; 1452 } 1453 1454 /* create comm points as needed */ 1455 while(ports) { 1456 struct comm_point* cp = NULL; 1457 if(ports->ftype == listen_type_udp || 1458 ports->ftype == listen_type_udp_dnscrypt) { 1459 cp = comm_point_create_udp(base, ports->fd, 1460 front->udp_buff, ports->pp2_enabled, cb, 1461 cb_arg, ports->socket); 1462 } else if(ports->ftype == listen_type_tcp || 1463 ports->ftype == listen_type_tcp_dnscrypt) { 1464 cp = comm_point_create_tcp(base, ports->fd, 1465 tcp_accept_count, tcp_idle_timeout, 1466 harden_large_queries, 0, NULL, 1467 tcp_conn_limit, bufsize, front->udp_buff, 1468 ports->ftype, ports->pp2_enabled, cb, cb_arg, 1469 ports->socket); 1470 } else if(ports->ftype == listen_type_ssl || 1471 ports->ftype == listen_type_http) { 1472 cp = comm_point_create_tcp(base, ports->fd, 1473 tcp_accept_count, tcp_idle_timeout, 1474 harden_large_queries, 1475 http_max_streams, http_endpoint, 1476 tcp_conn_limit, bufsize, front->udp_buff, 1477 ports->ftype, ports->pp2_enabled, cb, cb_arg, 1478 ports->socket); 1479 if(ports->ftype == listen_type_http) { 1480 if(!sslctx && !http_notls) { 1481 log_warn("HTTPS port configured, but " 1482 "no TLS tls-service-key or " 1483 "tls-service-pem set"); 1484 } 1485 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB 1486 if(!http_notls) { 1487 log_warn("Unbound is not compiled " 1488 "with an OpenSSL version " 1489 "supporting ALPN " 1490 "(OpenSSL >= 1.0.2). This " 1491 "is required to use " 1492 "DNS-over-HTTPS"); 1493 } 1494 #endif 1495 #ifndef HAVE_NGHTTP2_NGHTTP2_H 1496 log_warn("Unbound is not compiled with " 1497 "nghttp2. This is required to use " 1498 "DNS-over-HTTPS."); 1499 #endif 1500 } 1501 } else if(ports->ftype == listen_type_udpancil || 1502 ports->ftype == listen_type_udpancil_dnscrypt) { 1503 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 1504 cp = comm_point_create_udp_ancil(base, ports->fd, 1505 front->udp_buff, ports->pp2_enabled, cb, 1506 cb_arg, ports->socket); 1507 #else 1508 log_warn("This system does not support UDP ancilliary data."); 1509 #endif 1510 } 1511 if(!cp) { 1512 log_err("can't create commpoint"); 1513 listen_delete(front); 1514 return NULL; 1515 } 1516 if((http_notls && ports->ftype == listen_type_http) || 1517 (ports->ftype == listen_type_tcp) || 1518 (ports->ftype == listen_type_udp) || 1519 (ports->ftype == listen_type_udpancil) || 1520 (ports->ftype == listen_type_tcp_dnscrypt) || 1521 (ports->ftype == listen_type_udp_dnscrypt) || 1522 (ports->ftype == listen_type_udpancil_dnscrypt)) 1523 cp->ssl = NULL; 1524 else 1525 cp->ssl = sslctx; 1526 cp->dtenv = dtenv; 1527 cp->do_not_close = 1; 1528 #ifdef USE_DNSCRYPT 1529 if (ports->ftype == listen_type_udp_dnscrypt || 1530 ports->ftype == listen_type_tcp_dnscrypt || 1531 ports->ftype == listen_type_udpancil_dnscrypt) { 1532 cp->dnscrypt = 1; 1533 cp->dnscrypt_buffer = sldns_buffer_new(bufsize); 1534 if(!cp->dnscrypt_buffer) { 1535 log_err("can't alloc dnscrypt_buffer"); 1536 comm_point_delete(cp); 1537 listen_delete(front); 1538 return NULL; 1539 } 1540 front->dnscrypt_udp_buff = cp->dnscrypt_buffer; 1541 } 1542 #endif 1543 if(!listen_cp_insert(cp, front)) { 1544 log_err("malloc failed"); 1545 comm_point_delete(cp); 1546 listen_delete(front); 1547 return NULL; 1548 } 1549 ports = ports->next; 1550 } 1551 if(!front->cps) { 1552 log_err("Could not open sockets to accept queries."); 1553 listen_delete(front); 1554 return NULL; 1555 } 1556 1557 return front; 1558 } 1559 1560 void 1561 listen_list_delete(struct listen_list* list) 1562 { 1563 struct listen_list *p = list, *pn; 1564 while(p) { 1565 pn = p->next; 1566 comm_point_delete(p->com); 1567 free(p); 1568 p = pn; 1569 } 1570 } 1571 1572 void 1573 listen_delete(struct listen_dnsport* front) 1574 { 1575 if(!front) 1576 return; 1577 listen_list_delete(front->cps); 1578 #ifdef USE_DNSCRYPT 1579 if(front->dnscrypt_udp_buff && 1580 front->udp_buff != front->dnscrypt_udp_buff) { 1581 sldns_buffer_free(front->dnscrypt_udp_buff); 1582 } 1583 #endif 1584 sldns_buffer_free(front->udp_buff); 1585 free(front); 1586 } 1587 1588 #ifdef HAVE_GETIFADDRS 1589 static int 1590 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size) 1591 { 1592 struct ifaddrs *ifa; 1593 void *tmpbuf; 1594 int last_ip_addresses_size = *ip_addresses_size; 1595 1596 for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) { 1597 sa_family_t family; 1598 const char* atsign; 1599 #ifdef INET6 /* | address ip | % | ifa name | @ | port | nul */ 1600 char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1]; 1601 #else 1602 char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1]; 1603 #endif 1604 1605 if((atsign=strrchr(search_ifa, '@')) != NULL) { 1606 if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa) 1607 || strncmp(ifa->ifa_name, search_ifa, 1608 atsign-search_ifa) != 0) 1609 continue; 1610 } else { 1611 if(strcmp(ifa->ifa_name, search_ifa) != 0) 1612 continue; 1613 atsign = ""; 1614 } 1615 1616 if(ifa->ifa_addr == NULL) 1617 continue; 1618 1619 family = ifa->ifa_addr->sa_family; 1620 if(family == AF_INET) { 1621 char a4[INET_ADDRSTRLEN + 1]; 1622 struct sockaddr_in *in4 = (struct sockaddr_in *) 1623 ifa->ifa_addr; 1624 if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) { 1625 log_err("inet_ntop failed"); 1626 return 0; 1627 } 1628 snprintf(addr_buf, sizeof(addr_buf), "%s%s", 1629 a4, atsign); 1630 } 1631 #ifdef INET6 1632 else if(family == AF_INET6) { 1633 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) 1634 ifa->ifa_addr; 1635 char a6[INET6_ADDRSTRLEN + 1]; 1636 char if_index_name[IF_NAMESIZE + 1]; 1637 if_index_name[0] = 0; 1638 if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) { 1639 log_err("inet_ntop failed"); 1640 return 0; 1641 } 1642 (void)if_indextoname(in6->sin6_scope_id, 1643 (char *)if_index_name); 1644 if (strlen(if_index_name) != 0) { 1645 snprintf(addr_buf, sizeof(addr_buf), 1646 "%s%%%s%s", a6, if_index_name, atsign); 1647 } else { 1648 snprintf(addr_buf, sizeof(addr_buf), "%s%s", 1649 a6, atsign); 1650 } 1651 } 1652 #endif 1653 else { 1654 continue; 1655 } 1656 verbose(4, "interface %s has address %s", search_ifa, addr_buf); 1657 1658 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1)); 1659 if(!tmpbuf) { 1660 log_err("realloc failed: out of memory"); 1661 return 0; 1662 } else { 1663 *ip_addresses = tmpbuf; 1664 } 1665 (*ip_addresses)[*ip_addresses_size] = strdup(addr_buf); 1666 if(!(*ip_addresses)[*ip_addresses_size]) { 1667 log_err("strdup failed: out of memory"); 1668 return 0; 1669 } 1670 (*ip_addresses_size)++; 1671 } 1672 1673 if (*ip_addresses_size == last_ip_addresses_size) { 1674 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1)); 1675 if(!tmpbuf) { 1676 log_err("realloc failed: out of memory"); 1677 return 0; 1678 } else { 1679 *ip_addresses = tmpbuf; 1680 } 1681 (*ip_addresses)[*ip_addresses_size] = strdup(search_ifa); 1682 if(!(*ip_addresses)[*ip_addresses_size]) { 1683 log_err("strdup failed: out of memory"); 1684 return 0; 1685 } 1686 (*ip_addresses_size)++; 1687 } 1688 return 1; 1689 } 1690 #endif /* HAVE_GETIFADDRS */ 1691 1692 int resolve_interface_names(char** ifs, int num_ifs, 1693 struct config_strlist* list, char*** resif, int* num_resif) 1694 { 1695 #ifdef HAVE_GETIFADDRS 1696 struct ifaddrs *addrs = NULL; 1697 if(num_ifs == 0 && list == NULL) { 1698 *resif = NULL; 1699 *num_resif = 0; 1700 return 1; 1701 } 1702 if(getifaddrs(&addrs) == -1) { 1703 log_err("failed to list interfaces: getifaddrs: %s", 1704 strerror(errno)); 1705 freeifaddrs(addrs); 1706 return 0; 1707 } 1708 if(ifs) { 1709 int i; 1710 for(i=0; i<num_ifs; i++) { 1711 if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) { 1712 freeifaddrs(addrs); 1713 config_del_strarray(*resif, *num_resif); 1714 *resif = NULL; 1715 *num_resif = 0; 1716 return 0; 1717 } 1718 } 1719 } 1720 if(list) { 1721 struct config_strlist* p; 1722 for(p = list; p; p = p->next) { 1723 if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) { 1724 freeifaddrs(addrs); 1725 config_del_strarray(*resif, *num_resif); 1726 *resif = NULL; 1727 *num_resif = 0; 1728 return 0; 1729 } 1730 } 1731 } 1732 freeifaddrs(addrs); 1733 return 1; 1734 #else 1735 struct config_strlist* p; 1736 if(num_ifs == 0 && list == NULL) { 1737 *resif = NULL; 1738 *num_resif = 0; 1739 return 1; 1740 } 1741 *num_resif = num_ifs; 1742 for(p = list; p; p = p->next) { 1743 (*num_resif)++; 1744 } 1745 *resif = calloc(*num_resif, sizeof(**resif)); 1746 if(!*resif) { 1747 log_err("out of memory"); 1748 return 0; 1749 } 1750 if(ifs) { 1751 int i; 1752 for(i=0; i<num_ifs; i++) { 1753 (*resif)[i] = strdup(ifs[i]); 1754 if(!((*resif)[i])) { 1755 log_err("out of memory"); 1756 config_del_strarray(*resif, *num_resif); 1757 *resif = NULL; 1758 *num_resif = 0; 1759 return 0; 1760 } 1761 } 1762 } 1763 if(list) { 1764 int idx = num_ifs; 1765 for(p = list; p; p = p->next) { 1766 (*resif)[idx] = strdup(p->str); 1767 if(!((*resif)[idx])) { 1768 log_err("out of memory"); 1769 config_del_strarray(*resif, *num_resif); 1770 *resif = NULL; 1771 *num_resif = 0; 1772 return 0; 1773 } 1774 idx++; 1775 } 1776 } 1777 return 1; 1778 #endif /* HAVE_GETIFADDRS */ 1779 } 1780 1781 struct listen_port* 1782 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, 1783 int* reuseport) 1784 { 1785 struct listen_port* list = NULL; 1786 struct addrinfo hints; 1787 int i, do_ip4, do_ip6; 1788 int do_tcp, do_auto; 1789 char portbuf[32]; 1790 snprintf(portbuf, sizeof(portbuf), "%d", cfg->port); 1791 do_ip4 = cfg->do_ip4; 1792 do_ip6 = cfg->do_ip6; 1793 do_tcp = cfg->do_tcp; 1794 do_auto = cfg->if_automatic && cfg->do_udp; 1795 if(cfg->incoming_num_tcp == 0) 1796 do_tcp = 0; 1797 1798 /* getaddrinfo */ 1799 memset(&hints, 0, sizeof(hints)); 1800 hints.ai_flags = AI_PASSIVE; 1801 /* no name lookups on our listening ports */ 1802 if(num_ifs > 0) 1803 hints.ai_flags |= AI_NUMERICHOST; 1804 hints.ai_family = AF_UNSPEC; 1805 #ifndef INET6 1806 do_ip6 = 0; 1807 #endif 1808 if(!do_ip4 && !do_ip6) { 1809 return NULL; 1810 } 1811 /* create ip4 and ip6 ports so that return addresses are nice. */ 1812 if(do_auto || num_ifs == 0) { 1813 if(do_auto && cfg->if_automatic_ports && 1814 cfg->if_automatic_ports[0]!=0) { 1815 char* now = cfg->if_automatic_ports; 1816 while(now && *now) { 1817 char* after; 1818 int extraport; 1819 while(isspace((unsigned char)*now)) 1820 now++; 1821 if(!*now) 1822 break; 1823 after = now; 1824 extraport = (int)strtol(now, &after, 10); 1825 if(extraport < 0 || extraport > 65535) { 1826 log_err("interface-automatic-ports port number out of range, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); 1827 listening_ports_free(list); 1828 return NULL; 1829 } 1830 if(extraport == 0 && now == after) { 1831 log_err("interface-automatic-ports could not be parsed, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); 1832 listening_ports_free(list); 1833 return NULL; 1834 } 1835 now = after; 1836 snprintf(portbuf, sizeof(portbuf), "%d", extraport); 1837 if(do_ip6) { 1838 hints.ai_family = AF_INET6; 1839 if(!ports_create_if("::0", 1840 do_auto, cfg->do_udp, do_tcp, 1841 &hints, portbuf, &list, 1842 cfg->so_rcvbuf, cfg->so_sndbuf, 1843 cfg->ssl_port, cfg->tls_additional_port, 1844 cfg->https_port, 1845 cfg->proxy_protocol_port, 1846 reuseport, cfg->ip_transparent, 1847 cfg->tcp_mss, cfg->ip_freebind, 1848 cfg->http_nodelay, cfg->use_systemd, 1849 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1850 listening_ports_free(list); 1851 return NULL; 1852 } 1853 } 1854 if(do_ip4) { 1855 hints.ai_family = AF_INET; 1856 if(!ports_create_if("0.0.0.0", 1857 do_auto, cfg->do_udp, do_tcp, 1858 &hints, portbuf, &list, 1859 cfg->so_rcvbuf, cfg->so_sndbuf, 1860 cfg->ssl_port, cfg->tls_additional_port, 1861 cfg->https_port, 1862 cfg->proxy_protocol_port, 1863 reuseport, cfg->ip_transparent, 1864 cfg->tcp_mss, cfg->ip_freebind, 1865 cfg->http_nodelay, cfg->use_systemd, 1866 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1867 listening_ports_free(list); 1868 return NULL; 1869 } 1870 } 1871 } 1872 return list; 1873 } 1874 if(do_ip6) { 1875 hints.ai_family = AF_INET6; 1876 if(!ports_create_if(do_auto?"::0":"::1", 1877 do_auto, cfg->do_udp, do_tcp, 1878 &hints, portbuf, &list, 1879 cfg->so_rcvbuf, cfg->so_sndbuf, 1880 cfg->ssl_port, cfg->tls_additional_port, 1881 cfg->https_port, cfg->proxy_protocol_port, 1882 reuseport, cfg->ip_transparent, 1883 cfg->tcp_mss, cfg->ip_freebind, 1884 cfg->http_nodelay, cfg->use_systemd, 1885 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1886 listening_ports_free(list); 1887 return NULL; 1888 } 1889 } 1890 if(do_ip4) { 1891 hints.ai_family = AF_INET; 1892 if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", 1893 do_auto, cfg->do_udp, do_tcp, 1894 &hints, portbuf, &list, 1895 cfg->so_rcvbuf, cfg->so_sndbuf, 1896 cfg->ssl_port, cfg->tls_additional_port, 1897 cfg->https_port, cfg->proxy_protocol_port, 1898 reuseport, cfg->ip_transparent, 1899 cfg->tcp_mss, cfg->ip_freebind, 1900 cfg->http_nodelay, cfg->use_systemd, 1901 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1902 listening_ports_free(list); 1903 return NULL; 1904 } 1905 } 1906 } else for(i = 0; i<num_ifs; i++) { 1907 if(str_is_ip6(ifs[i])) { 1908 if(!do_ip6) 1909 continue; 1910 hints.ai_family = AF_INET6; 1911 if(!ports_create_if(ifs[i], 0, cfg->do_udp, 1912 do_tcp, &hints, portbuf, &list, 1913 cfg->so_rcvbuf, cfg->so_sndbuf, 1914 cfg->ssl_port, cfg->tls_additional_port, 1915 cfg->https_port, cfg->proxy_protocol_port, 1916 reuseport, cfg->ip_transparent, 1917 cfg->tcp_mss, cfg->ip_freebind, 1918 cfg->http_nodelay, cfg->use_systemd, 1919 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1920 listening_ports_free(list); 1921 return NULL; 1922 } 1923 } else { 1924 if(!do_ip4) 1925 continue; 1926 hints.ai_family = AF_INET; 1927 if(!ports_create_if(ifs[i], 0, cfg->do_udp, 1928 do_tcp, &hints, portbuf, &list, 1929 cfg->so_rcvbuf, cfg->so_sndbuf, 1930 cfg->ssl_port, cfg->tls_additional_port, 1931 cfg->https_port, cfg->proxy_protocol_port, 1932 reuseport, cfg->ip_transparent, 1933 cfg->tcp_mss, cfg->ip_freebind, 1934 cfg->http_nodelay, cfg->use_systemd, 1935 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1936 listening_ports_free(list); 1937 return NULL; 1938 } 1939 } 1940 } 1941 1942 return list; 1943 } 1944 1945 void listening_ports_free(struct listen_port* list) 1946 { 1947 struct listen_port* nx; 1948 while(list) { 1949 nx = list->next; 1950 if(list->fd != -1) { 1951 sock_close(list->fd); 1952 } 1953 /* rc_ports don't have ub_socket */ 1954 if(list->socket) { 1955 if(list->socket->addr) 1956 freeaddrinfo(list->socket->addr); 1957 free(list->socket); 1958 } 1959 free(list); 1960 list = nx; 1961 } 1962 } 1963 1964 size_t listen_get_mem(struct listen_dnsport* listen) 1965 { 1966 struct listen_list* p; 1967 size_t s = sizeof(*listen) + sizeof(*listen->base) + 1968 sizeof(*listen->udp_buff) + 1969 sldns_buffer_capacity(listen->udp_buff); 1970 #ifdef USE_DNSCRYPT 1971 s += sizeof(*listen->dnscrypt_udp_buff); 1972 if(listen->udp_buff != listen->dnscrypt_udp_buff){ 1973 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff); 1974 } 1975 #endif 1976 for(p = listen->cps; p; p = p->next) { 1977 s += sizeof(*p); 1978 s += comm_point_get_mem(p->com); 1979 } 1980 return s; 1981 } 1982 1983 void listen_stop_accept(struct listen_dnsport* listen) 1984 { 1985 /* do not stop the ones that have no tcp_free list 1986 * (they have already stopped listening) */ 1987 struct listen_list* p; 1988 for(p=listen->cps; p; p=p->next) { 1989 if(p->com->type == comm_tcp_accept && 1990 p->com->tcp_free != NULL) { 1991 comm_point_stop_listening(p->com); 1992 } 1993 } 1994 } 1995 1996 void listen_start_accept(struct listen_dnsport* listen) 1997 { 1998 /* do not start the ones that have no tcp_free list, it is no 1999 * use to listen to them because they have no free tcp handlers */ 2000 struct listen_list* p; 2001 for(p=listen->cps; p; p=p->next) { 2002 if(p->com->type == comm_tcp_accept && 2003 p->com->tcp_free != NULL) { 2004 comm_point_start_listening(p->com, -1, -1); 2005 } 2006 } 2007 } 2008 2009 struct tcp_req_info* 2010 tcp_req_info_create(struct sldns_buffer* spoolbuf) 2011 { 2012 struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req)); 2013 if(!req) { 2014 log_err("malloc failure for new stream outoforder processing structure"); 2015 return NULL; 2016 } 2017 memset(req, 0, sizeof(*req)); 2018 req->spool_buffer = spoolbuf; 2019 return req; 2020 } 2021 2022 void 2023 tcp_req_info_delete(struct tcp_req_info* req) 2024 { 2025 if(!req) return; 2026 tcp_req_info_clear(req); 2027 /* cp is pointer back to commpoint that owns this struct and 2028 * called delete on us */ 2029 /* spool_buffer is shared udp buffer, not deleted here */ 2030 free(req); 2031 } 2032 2033 void tcp_req_info_clear(struct tcp_req_info* req) 2034 { 2035 struct tcp_req_open_item* open, *nopen; 2036 struct tcp_req_done_item* item, *nitem; 2037 if(!req) return; 2038 2039 /* free outstanding request mesh reply entries */ 2040 open = req->open_req_list; 2041 while(open) { 2042 nopen = open->next; 2043 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp); 2044 free(open); 2045 open = nopen; 2046 } 2047 req->open_req_list = NULL; 2048 req->num_open_req = 0; 2049 2050 /* free pending writable result packets */ 2051 item = req->done_req_list; 2052 while(item) { 2053 nitem = item->next; 2054 lock_basic_lock(&stream_wait_count_lock); 2055 stream_wait_count -= (sizeof(struct tcp_req_done_item) 2056 +item->len); 2057 lock_basic_unlock(&stream_wait_count_lock); 2058 free(item->buf); 2059 free(item); 2060 item = nitem; 2061 } 2062 req->done_req_list = NULL; 2063 req->num_done_req = 0; 2064 req->read_is_closed = 0; 2065 } 2066 2067 void 2068 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m) 2069 { 2070 struct tcp_req_open_item* open, *prev = NULL; 2071 if(!req || !m) return; 2072 open = req->open_req_list; 2073 while(open) { 2074 if(open->mesh_state == m) { 2075 struct tcp_req_open_item* next; 2076 if(prev) prev->next = open->next; 2077 else req->open_req_list = open->next; 2078 /* caller has to manage the mesh state reply entry */ 2079 next = open->next; 2080 free(open); 2081 req->num_open_req --; 2082 2083 /* prev = prev; */ 2084 open = next; 2085 continue; 2086 } 2087 prev = open; 2088 open = open->next; 2089 } 2090 } 2091 2092 /** setup listening for read or write */ 2093 static void 2094 tcp_req_info_setup_listen(struct tcp_req_info* req) 2095 { 2096 int wr = 0; 2097 int rd = 0; 2098 2099 if(req->cp->tcp_byte_count != 0) { 2100 /* cannot change, halfway through */ 2101 return; 2102 } 2103 2104 if(!req->cp->tcp_is_reading) 2105 wr = 1; 2106 if(!req->read_is_closed) 2107 rd = 1; 2108 2109 if(wr) { 2110 req->cp->tcp_is_reading = 0; 2111 comm_point_stop_listening(req->cp); 2112 comm_point_start_listening(req->cp, -1, 2113 adjusted_tcp_timeout(req->cp)); 2114 } else if(rd) { 2115 req->cp->tcp_is_reading = 1; 2116 comm_point_stop_listening(req->cp); 2117 comm_point_start_listening(req->cp, -1, 2118 adjusted_tcp_timeout(req->cp)); 2119 /* and also read it (from SSL stack buffers), so 2120 * no event read event is expected since the remainder of 2121 * the TLS frame is sitting in the buffers. */ 2122 req->read_again = 1; 2123 } else { 2124 comm_point_stop_listening(req->cp); 2125 comm_point_start_listening(req->cp, -1, 2126 adjusted_tcp_timeout(req->cp)); 2127 comm_point_listen_for_rw(req->cp, 0, 0); 2128 } 2129 } 2130 2131 /** remove first item from list of pending results */ 2132 static struct tcp_req_done_item* 2133 tcp_req_info_pop_done(struct tcp_req_info* req) 2134 { 2135 struct tcp_req_done_item* item; 2136 log_assert(req->num_done_req > 0 && req->done_req_list); 2137 item = req->done_req_list; 2138 lock_basic_lock(&stream_wait_count_lock); 2139 stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len); 2140 lock_basic_unlock(&stream_wait_count_lock); 2141 req->done_req_list = req->done_req_list->next; 2142 req->num_done_req --; 2143 return item; 2144 } 2145 2146 /** Send given buffer and setup to write */ 2147 static void 2148 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf, 2149 size_t len) 2150 { 2151 sldns_buffer_clear(req->cp->buffer); 2152 sldns_buffer_write(req->cp->buffer, buf, len); 2153 sldns_buffer_flip(req->cp->buffer); 2154 2155 req->cp->tcp_is_reading = 0; /* we are now writing */ 2156 } 2157 2158 /** pick up the next result and start writing it to the channel */ 2159 static void 2160 tcp_req_pickup_next_result(struct tcp_req_info* req) 2161 { 2162 if(req->num_done_req > 0) { 2163 /* unlist the done item from the list of pending results */ 2164 struct tcp_req_done_item* item = tcp_req_info_pop_done(req); 2165 tcp_req_info_start_write_buf(req, item->buf, item->len); 2166 free(item->buf); 2167 free(item); 2168 } 2169 } 2170 2171 /** the read channel has closed */ 2172 int 2173 tcp_req_info_handle_read_close(struct tcp_req_info* req) 2174 { 2175 verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd); 2176 /* reset byte count for (potential) partial read */ 2177 req->cp->tcp_byte_count = 0; 2178 /* if we still have results to write, pick up next and write it */ 2179 if(req->num_done_req != 0) { 2180 tcp_req_pickup_next_result(req); 2181 tcp_req_info_setup_listen(req); 2182 return 1; 2183 } 2184 /* if nothing to do, this closes the connection */ 2185 if(req->num_open_req == 0 && req->num_done_req == 0) 2186 return 0; 2187 /* otherwise, we must be waiting for dns resolve, wait with timeout */ 2188 req->read_is_closed = 1; 2189 tcp_req_info_setup_listen(req); 2190 return 1; 2191 } 2192 2193 void 2194 tcp_req_info_handle_writedone(struct tcp_req_info* req) 2195 { 2196 /* back to reading state, we finished this write event */ 2197 sldns_buffer_clear(req->cp->buffer); 2198 if(req->num_done_req == 0 && req->read_is_closed) { 2199 /* no more to write and nothing to read, close it */ 2200 comm_point_drop_reply(&req->cp->repinfo); 2201 return; 2202 } 2203 req->cp->tcp_is_reading = 1; 2204 /* see if another result needs writing */ 2205 tcp_req_pickup_next_result(req); 2206 2207 /* see if there is more to write, if not stop_listening for writing */ 2208 /* see if new requests are allowed, if so, start_listening 2209 * for reading */ 2210 tcp_req_info_setup_listen(req); 2211 } 2212 2213 void 2214 tcp_req_info_handle_readdone(struct tcp_req_info* req) 2215 { 2216 struct comm_point* c = req->cp; 2217 2218 /* we want to read up several requests, unless there are 2219 * pending answers */ 2220 2221 req->is_drop = 0; 2222 req->is_reply = 0; 2223 req->in_worker_handle = 1; 2224 sldns_buffer_set_limit(req->spool_buffer, 0); 2225 /* handle the current request */ 2226 /* this calls the worker handle request routine that could give 2227 * a cache response, or localdata response, or drop the reply, 2228 * or schedule a mesh entry for later */ 2229 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2230 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 2231 req->in_worker_handle = 0; 2232 /* there is an answer, put it up. It is already in the 2233 * c->buffer, just send it. */ 2234 /* since we were just reading a query, the channel is 2235 * clear to write to */ 2236 send_it: 2237 c->tcp_is_reading = 0; 2238 comm_point_stop_listening(c); 2239 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 2240 return; 2241 } 2242 req->in_worker_handle = 0; 2243 /* it should be waiting in the mesh for recursion. 2244 * If mesh failed to add a new entry and called commpoint_drop_reply. 2245 * Then the mesh state has been cleared. */ 2246 if(req->is_drop) { 2247 /* the reply has been dropped, stream has been closed. */ 2248 return; 2249 } 2250 /* If mesh failed(mallocfail) and called commpoint_send_reply with 2251 * something like servfail then we pick up that reply below. */ 2252 if(req->is_reply) { 2253 goto send_it; 2254 } 2255 2256 sldns_buffer_clear(c->buffer); 2257 /* if pending answers, pick up an answer and start sending it */ 2258 tcp_req_pickup_next_result(req); 2259 2260 /* if answers pending, start sending answers */ 2261 /* read more requests if we can have more requests */ 2262 tcp_req_info_setup_listen(req); 2263 } 2264 2265 int 2266 tcp_req_info_add_meshstate(struct tcp_req_info* req, 2267 struct mesh_area* mesh, struct mesh_state* m) 2268 { 2269 struct tcp_req_open_item* item; 2270 log_assert(req && mesh && m); 2271 item = (struct tcp_req_open_item*)malloc(sizeof(*item)); 2272 if(!item) return 0; 2273 item->next = req->open_req_list; 2274 item->mesh = mesh; 2275 item->mesh_state = m; 2276 req->open_req_list = item; 2277 req->num_open_req++; 2278 return 1; 2279 } 2280 2281 /** Add a result to the result list. At the end. */ 2282 static int 2283 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len) 2284 { 2285 struct tcp_req_done_item* last = NULL; 2286 struct tcp_req_done_item* item; 2287 size_t space; 2288 2289 /* see if we have space */ 2290 space = sizeof(struct tcp_req_done_item) + len; 2291 lock_basic_lock(&stream_wait_count_lock); 2292 if(stream_wait_count + space > stream_wait_max) { 2293 lock_basic_unlock(&stream_wait_count_lock); 2294 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size"); 2295 return 0; 2296 } 2297 stream_wait_count += space; 2298 lock_basic_unlock(&stream_wait_count_lock); 2299 2300 /* find last element */ 2301 last = req->done_req_list; 2302 while(last && last->next) 2303 last = last->next; 2304 2305 /* create new element */ 2306 item = (struct tcp_req_done_item*)malloc(sizeof(*item)); 2307 if(!item) { 2308 log_err("malloc failure, for stream result list"); 2309 return 0; 2310 } 2311 item->next = NULL; 2312 item->len = len; 2313 item->buf = memdup(buf, len); 2314 if(!item->buf) { 2315 free(item); 2316 log_err("malloc failure, adding reply to stream result list"); 2317 return 0; 2318 } 2319 2320 /* link in */ 2321 if(last) last->next = item; 2322 else req->done_req_list = item; 2323 req->num_done_req++; 2324 return 1; 2325 } 2326 2327 void 2328 tcp_req_info_send_reply(struct tcp_req_info* req) 2329 { 2330 if(req->in_worker_handle) { 2331 /* reply from mesh is in the spool_buffer */ 2332 /* copy now, so that the spool buffer is free for other tasks 2333 * before the callback is done */ 2334 sldns_buffer_clear(req->cp->buffer); 2335 sldns_buffer_write(req->cp->buffer, 2336 sldns_buffer_begin(req->spool_buffer), 2337 sldns_buffer_limit(req->spool_buffer)); 2338 sldns_buffer_flip(req->cp->buffer); 2339 req->is_reply = 1; 2340 return; 2341 } 2342 /* now that the query has been handled, that mesh_reply entry 2343 * should be removed, from the tcp_req_info list, 2344 * the mesh state cleanup removes then with region_cleanup and 2345 * replies_sent true. */ 2346 /* see if we can send it straight away (we are not doing 2347 * anything else). If so, copy to buffer and start */ 2348 if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) { 2349 /* buffer is free, and was ready to read new query into, 2350 * but we are now going to use it to send this answer */ 2351 tcp_req_info_start_write_buf(req, 2352 sldns_buffer_begin(req->spool_buffer), 2353 sldns_buffer_limit(req->spool_buffer)); 2354 /* switch to listen to write events */ 2355 comm_point_stop_listening(req->cp); 2356 comm_point_start_listening(req->cp, -1, 2357 adjusted_tcp_timeout(req->cp)); 2358 return; 2359 } 2360 /* queue up the answer behind the others already pending */ 2361 if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer), 2362 sldns_buffer_limit(req->spool_buffer))) { 2363 /* drop the connection, we are out of resources */ 2364 comm_point_drop_reply(&req->cp->repinfo); 2365 } 2366 } 2367 2368 size_t tcp_req_info_get_stream_buffer_size(void) 2369 { 2370 size_t s; 2371 if(!stream_wait_lock_inited) 2372 return stream_wait_count; 2373 lock_basic_lock(&stream_wait_count_lock); 2374 s = stream_wait_count; 2375 lock_basic_unlock(&stream_wait_count_lock); 2376 return s; 2377 } 2378 2379 size_t http2_get_query_buffer_size(void) 2380 { 2381 size_t s; 2382 if(!http2_query_buffer_lock_inited) 2383 return http2_query_buffer_count; 2384 lock_basic_lock(&http2_query_buffer_count_lock); 2385 s = http2_query_buffer_count; 2386 lock_basic_unlock(&http2_query_buffer_count_lock); 2387 return s; 2388 } 2389 2390 size_t http2_get_response_buffer_size(void) 2391 { 2392 size_t s; 2393 if(!http2_response_buffer_lock_inited) 2394 return http2_response_buffer_count; 2395 lock_basic_lock(&http2_response_buffer_count_lock); 2396 s = http2_response_buffer_count; 2397 lock_basic_unlock(&http2_response_buffer_count_lock); 2398 return s; 2399 } 2400 2401 #ifdef HAVE_NGHTTP2 2402 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */ 2403 static ssize_t http2_submit_response_read_callback( 2404 nghttp2_session* ATTR_UNUSED(session), 2405 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags, 2406 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg)) 2407 { 2408 struct http2_stream* h2_stream; 2409 struct http2_session* h2_session = source->ptr; 2410 size_t copylen = length; 2411 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2412 h2_session->session, stream_id))) { 2413 verbose(VERB_QUERY, "http2: cannot get stream data, closing " 2414 "stream"); 2415 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2416 } 2417 if(!h2_stream->rbuffer || 2418 sldns_buffer_remaining(h2_stream->rbuffer) == 0) { 2419 verbose(VERB_QUERY, "http2: cannot submit buffer. No data " 2420 "available in rbuffer"); 2421 /* rbuffer will be free'd in frame close cb */ 2422 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2423 } 2424 2425 if(copylen > sldns_buffer_remaining(h2_stream->rbuffer)) 2426 copylen = sldns_buffer_remaining(h2_stream->rbuffer); 2427 if(copylen > SSIZE_MAX) 2428 copylen = SSIZE_MAX; /* will probably never happen */ 2429 2430 memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen); 2431 sldns_buffer_skip(h2_stream->rbuffer, copylen); 2432 2433 if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) { 2434 *data_flags |= NGHTTP2_DATA_FLAG_EOF; 2435 lock_basic_lock(&http2_response_buffer_count_lock); 2436 http2_response_buffer_count -= 2437 sldns_buffer_capacity(h2_stream->rbuffer); 2438 lock_basic_unlock(&http2_response_buffer_count_lock); 2439 sldns_buffer_free(h2_stream->rbuffer); 2440 h2_stream->rbuffer = NULL; 2441 } 2442 2443 return copylen; 2444 } 2445 2446 /** 2447 * Send RST_STREAM frame for stream. 2448 * @param h2_session: http2 session to submit frame to 2449 * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM 2450 * @return 0 on error, 1 otherwise 2451 */ 2452 static int http2_submit_rst_stream(struct http2_session* h2_session, 2453 struct http2_stream* h2_stream) 2454 { 2455 int ret = nghttp2_submit_rst_stream(h2_session->session, 2456 NGHTTP2_FLAG_NONE, h2_stream->stream_id, 2457 NGHTTP2_INTERNAL_ERROR); 2458 if(ret) { 2459 verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, " 2460 "error: %s", nghttp2_strerror(ret)); 2461 return 0; 2462 } 2463 return 1; 2464 } 2465 2466 /** 2467 * DNS response ready to be submitted to nghttp2, to be prepared for sending 2468 * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer 2469 * might be used before this will be sent out. 2470 * @param h2_session: http2 session, containing c->buffer which contains answer 2471 * @return 0 on error, 1 otherwise 2472 */ 2473 int http2_submit_dns_response(struct http2_session* h2_session) 2474 { 2475 int ret; 2476 nghttp2_data_provider data_prd; 2477 char status[4]; 2478 nghttp2_nv headers[3]; 2479 struct http2_stream* h2_stream = h2_session->c->h2_stream; 2480 size_t rlen; 2481 char rlen_str[32]; 2482 2483 if(h2_stream->rbuffer) { 2484 log_err("http2 submit response error: rbuffer already " 2485 "exists"); 2486 return 0; 2487 } 2488 if(sldns_buffer_remaining(h2_session->c->buffer) == 0) { 2489 log_err("http2 submit response error: c->buffer not complete"); 2490 return 0; 2491 } 2492 2493 if(snprintf(status, 4, "%d", h2_stream->status) != 3) { 2494 verbose(VERB_QUERY, "http2: submit response error: " 2495 "invalid status"); 2496 return 0; 2497 } 2498 2499 rlen = sldns_buffer_remaining(h2_session->c->buffer); 2500 snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen); 2501 2502 lock_basic_lock(&http2_response_buffer_count_lock); 2503 if(http2_response_buffer_count + rlen > http2_response_buffer_max) { 2504 lock_basic_unlock(&http2_response_buffer_count_lock); 2505 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 2506 "in https-response-buffer-size"); 2507 return http2_submit_rst_stream(h2_session, h2_stream); 2508 } 2509 http2_response_buffer_count += rlen; 2510 lock_basic_unlock(&http2_response_buffer_count_lock); 2511 2512 if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) { 2513 lock_basic_lock(&http2_response_buffer_count_lock); 2514 http2_response_buffer_count -= rlen; 2515 lock_basic_unlock(&http2_response_buffer_count_lock); 2516 log_err("http2 submit response error: malloc failure"); 2517 return 0; 2518 } 2519 2520 headers[0].name = (uint8_t*)":status"; 2521 headers[0].namelen = 7; 2522 headers[0].value = (uint8_t*)status; 2523 headers[0].valuelen = 3; 2524 headers[0].flags = NGHTTP2_NV_FLAG_NONE; 2525 2526 headers[1].name = (uint8_t*)"content-type"; 2527 headers[1].namelen = 12; 2528 headers[1].value = (uint8_t*)"application/dns-message"; 2529 headers[1].valuelen = 23; 2530 headers[1].flags = NGHTTP2_NV_FLAG_NONE; 2531 2532 headers[2].name = (uint8_t*)"content-length"; 2533 headers[2].namelen = 14; 2534 headers[2].value = (uint8_t*)rlen_str; 2535 headers[2].valuelen = strlen(rlen_str); 2536 headers[2].flags = NGHTTP2_NV_FLAG_NONE; 2537 2538 sldns_buffer_write(h2_stream->rbuffer, 2539 sldns_buffer_current(h2_session->c->buffer), 2540 sldns_buffer_remaining(h2_session->c->buffer)); 2541 sldns_buffer_flip(h2_stream->rbuffer); 2542 2543 data_prd.source.ptr = h2_session; 2544 data_prd.read_callback = http2_submit_response_read_callback; 2545 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id, 2546 headers, 3, &data_prd); 2547 if(ret) { 2548 verbose(VERB_QUERY, "http2: set_stream_user_data failed, " 2549 "error: %s", nghttp2_strerror(ret)); 2550 return 0; 2551 } 2552 return 1; 2553 } 2554 #else 2555 int http2_submit_dns_response(void* ATTR_UNUSED(v)) 2556 { 2557 return 0; 2558 } 2559 #endif 2560 2561 #ifdef HAVE_NGHTTP2 2562 /** HTTP status to descriptive string */ 2563 static char* http_status_to_str(enum http_status s) 2564 { 2565 switch(s) { 2566 case HTTP_STATUS_OK: 2567 return "OK"; 2568 case HTTP_STATUS_BAD_REQUEST: 2569 return "Bad Request"; 2570 case HTTP_STATUS_NOT_FOUND: 2571 return "Not Found"; 2572 case HTTP_STATUS_PAYLOAD_TOO_LARGE: 2573 return "Payload Too Large"; 2574 case HTTP_STATUS_URI_TOO_LONG: 2575 return "URI Too Long"; 2576 case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE: 2577 return "Unsupported Media Type"; 2578 case HTTP_STATUS_NOT_IMPLEMENTED: 2579 return "Not Implemented"; 2580 } 2581 return "Status Unknown"; 2582 } 2583 2584 /** nghttp2 callback. Used to copy error message to nghttp2 session */ 2585 static ssize_t http2_submit_error_read_callback( 2586 nghttp2_session* ATTR_UNUSED(session), 2587 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags, 2588 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg)) 2589 { 2590 struct http2_stream* h2_stream; 2591 struct http2_session* h2_session = source->ptr; 2592 char* msg; 2593 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2594 h2_session->session, stream_id))) { 2595 verbose(VERB_QUERY, "http2: cannot get stream data, closing " 2596 "stream"); 2597 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2598 } 2599 *data_flags |= NGHTTP2_DATA_FLAG_EOF; 2600 msg = http_status_to_str(h2_stream->status); 2601 if(length < strlen(msg)) 2602 return 0; /* not worth trying over multiple frames */ 2603 memcpy(buf, msg, strlen(msg)); 2604 return strlen(msg); 2605 2606 } 2607 2608 /** 2609 * HTTP error response ready to be submitted to nghttp2, to be prepared for 2610 * sending out. Message body will contain descriptive string for HTTP status. 2611 * @param h2_session: http2 session to submit to 2612 * @param h2_stream: http2 stream containing HTTP status to use for error 2613 * @return 0 on error, 1 otherwise 2614 */ 2615 static int http2_submit_error(struct http2_session* h2_session, 2616 struct http2_stream* h2_stream) 2617 { 2618 int ret; 2619 char status[4]; 2620 nghttp2_data_provider data_prd; 2621 nghttp2_nv headers[1]; /* will be copied by nghttp */ 2622 if(snprintf(status, 4, "%d", h2_stream->status) != 3) { 2623 verbose(VERB_QUERY, "http2: submit error failed, " 2624 "invalid status"); 2625 return 0; 2626 } 2627 headers[0].name = (uint8_t*)":status"; 2628 headers[0].namelen = 7; 2629 headers[0].value = (uint8_t*)status; 2630 headers[0].valuelen = 3; 2631 headers[0].flags = NGHTTP2_NV_FLAG_NONE; 2632 2633 data_prd.source.ptr = h2_session; 2634 data_prd.read_callback = http2_submit_error_read_callback; 2635 2636 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id, 2637 headers, 1, &data_prd); 2638 if(ret) { 2639 verbose(VERB_QUERY, "http2: submit error failed, " 2640 "error: %s", nghttp2_strerror(ret)); 2641 return 0; 2642 } 2643 return 1; 2644 } 2645 2646 /** 2647 * Start query handling. Query is stored in the stream, and will be free'd here. 2648 * @param h2_session: http2 session, containing comm point 2649 * @param h2_stream: stream containing buffered query 2650 * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no 2651 * reply available (yet). 2652 */ 2653 static int http2_query_read_done(struct http2_session* h2_session, 2654 struct http2_stream* h2_stream) 2655 { 2656 log_assert(h2_stream->qbuffer); 2657 2658 if(h2_session->c->h2_stream) { 2659 verbose(VERB_ALGO, "http2_query_read_done failure: shared " 2660 "buffer already assigned to stream"); 2661 return -1; 2662 } 2663 2664 /* the c->buffer might be used by mesh_send_reply and no be cleard 2665 * need to be cleared before use */ 2666 sldns_buffer_clear(h2_session->c->buffer); 2667 if(sldns_buffer_remaining(h2_session->c->buffer) < 2668 sldns_buffer_remaining(h2_stream->qbuffer)) { 2669 /* qbuffer will be free'd in frame close cb */ 2670 sldns_buffer_clear(h2_session->c->buffer); 2671 verbose(VERB_ALGO, "http2_query_read_done failure: can't fit " 2672 "qbuffer in c->buffer"); 2673 return -1; 2674 } 2675 2676 sldns_buffer_write(h2_session->c->buffer, 2677 sldns_buffer_current(h2_stream->qbuffer), 2678 sldns_buffer_remaining(h2_stream->qbuffer)); 2679 2680 lock_basic_lock(&http2_query_buffer_count_lock); 2681 http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer); 2682 lock_basic_unlock(&http2_query_buffer_count_lock); 2683 sldns_buffer_free(h2_stream->qbuffer); 2684 h2_stream->qbuffer = NULL; 2685 2686 sldns_buffer_flip(h2_session->c->buffer); 2687 h2_session->c->h2_stream = h2_stream; 2688 fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback)); 2689 if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg, 2690 NETEVENT_NOERROR, &h2_session->c->repinfo)) { 2691 return 1; /* answer in c->buffer */ 2692 } 2693 sldns_buffer_clear(h2_session->c->buffer); 2694 h2_session->c->h2_stream = NULL; 2695 return 0; /* mesh state added, or dropped */ 2696 } 2697 2698 /** nghttp2 callback. Used to check if the received frame indicates the end of a 2699 * stream. Gather collected request data and start query handling. */ 2700 static int http2_req_frame_recv_cb(nghttp2_session* session, 2701 const nghttp2_frame* frame, void* cb_arg) 2702 { 2703 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2704 struct http2_stream* h2_stream; 2705 int query_read_done; 2706 2707 if((frame->hd.type != NGHTTP2_DATA && 2708 frame->hd.type != NGHTTP2_HEADERS) || 2709 !(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) { 2710 return 0; 2711 } 2712 2713 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2714 session, frame->hd.stream_id))) 2715 return 0; 2716 2717 if(h2_stream->invalid_endpoint) { 2718 h2_stream->status = HTTP_STATUS_NOT_FOUND; 2719 goto submit_http_error; 2720 } 2721 2722 if(h2_stream->invalid_content_type) { 2723 h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE; 2724 goto submit_http_error; 2725 } 2726 2727 if(h2_stream->http_method != HTTP_METHOD_GET && 2728 h2_stream->http_method != HTTP_METHOD_POST) { 2729 h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED; 2730 goto submit_http_error; 2731 } 2732 2733 if(h2_stream->query_too_large) { 2734 if(h2_stream->http_method == HTTP_METHOD_POST) 2735 h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE; 2736 else 2737 h2_stream->status = HTTP_STATUS_URI_TOO_LONG; 2738 goto submit_http_error; 2739 } 2740 2741 if(!h2_stream->qbuffer) { 2742 h2_stream->status = HTTP_STATUS_BAD_REQUEST; 2743 goto submit_http_error; 2744 } 2745 2746 if(h2_stream->status) { 2747 submit_http_error: 2748 verbose(VERB_QUERY, "http2 request invalid, returning :status=" 2749 "%d", h2_stream->status); 2750 if(!http2_submit_error(h2_session, h2_stream)) { 2751 return NGHTTP2_ERR_CALLBACK_FAILURE; 2752 } 2753 return 0; 2754 } 2755 h2_stream->status = HTTP_STATUS_OK; 2756 2757 sldns_buffer_flip(h2_stream->qbuffer); 2758 h2_session->postpone_drop = 1; 2759 query_read_done = http2_query_read_done(h2_session, h2_stream); 2760 if(query_read_done < 0) 2761 return NGHTTP2_ERR_CALLBACK_FAILURE; 2762 else if(!query_read_done) { 2763 if(h2_session->is_drop) { 2764 /* connection needs to be closed. Return failure to make 2765 * sure no other action are taken anymore on comm point. 2766 * failure will result in reclaiming (and closing) 2767 * of comm point. */ 2768 verbose(VERB_QUERY, "http2 query dropped in worker cb"); 2769 h2_session->postpone_drop = 0; 2770 return NGHTTP2_ERR_CALLBACK_FAILURE; 2771 } 2772 /* nothing to submit right now, query added to mesh. */ 2773 h2_session->postpone_drop = 0; 2774 return 0; 2775 } 2776 if(!http2_submit_dns_response(h2_session)) { 2777 sldns_buffer_clear(h2_session->c->buffer); 2778 h2_session->c->h2_stream = NULL; 2779 return NGHTTP2_ERR_CALLBACK_FAILURE; 2780 } 2781 verbose(VERB_QUERY, "http2 query submitted to session"); 2782 sldns_buffer_clear(h2_session->c->buffer); 2783 h2_session->c->h2_stream = NULL; 2784 return 0; 2785 } 2786 2787 /** nghttp2 callback. Used to detect start of new streams. */ 2788 static int http2_req_begin_headers_cb(nghttp2_session* session, 2789 const nghttp2_frame* frame, void* cb_arg) 2790 { 2791 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2792 struct http2_stream* h2_stream; 2793 int ret; 2794 if(frame->hd.type != NGHTTP2_HEADERS || 2795 frame->headers.cat != NGHTTP2_HCAT_REQUEST) { 2796 /* only interested in request headers */ 2797 return 0; 2798 } 2799 if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) { 2800 log_err("malloc failure while creating http2 stream"); 2801 return NGHTTP2_ERR_CALLBACK_FAILURE; 2802 } 2803 http2_session_add_stream(h2_session, h2_stream); 2804 ret = nghttp2_session_set_stream_user_data(session, 2805 frame->hd.stream_id, h2_stream); 2806 if(ret) { 2807 /* stream does not exist */ 2808 verbose(VERB_QUERY, "http2: set_stream_user_data failed, " 2809 "error: %s", nghttp2_strerror(ret)); 2810 return NGHTTP2_ERR_CALLBACK_FAILURE; 2811 } 2812 2813 return 0; 2814 } 2815 2816 /** 2817 * base64url decode, store in qbuffer 2818 * @param h2_session: http2 session 2819 * @param h2_stream: http2 stream 2820 * @param start: start of the base64 string 2821 * @param length: length of the base64 string 2822 * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer, 2823 * buffer will be NULL is unparseble. 2824 */ 2825 static int http2_buffer_uri_query(struct http2_session* h2_session, 2826 struct http2_stream* h2_stream, const uint8_t* start, size_t length) 2827 { 2828 size_t expectb64len; 2829 int b64len; 2830 if(h2_stream->http_method == HTTP_METHOD_POST) 2831 return 1; 2832 if(length == 0) 2833 return 1; 2834 if(h2_stream->qbuffer) { 2835 verbose(VERB_ALGO, "http2_req_header fail, " 2836 "qbuffer already set"); 2837 return 0; 2838 } 2839 2840 /* calculate size, might be a bit bigger than the real 2841 * decoded buffer size */ 2842 expectb64len = sldns_b64_pton_calculate_size(length); 2843 log_assert(expectb64len > 0); 2844 if(expectb64len > 2845 h2_session->c->http2_stream_max_qbuffer_size) { 2846 h2_stream->query_too_large = 1; 2847 return 1; 2848 } 2849 2850 lock_basic_lock(&http2_query_buffer_count_lock); 2851 if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) { 2852 lock_basic_unlock(&http2_query_buffer_count_lock); 2853 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 2854 "in http2-query-buffer-size"); 2855 return http2_submit_rst_stream(h2_session, h2_stream); 2856 } 2857 http2_query_buffer_count += expectb64len; 2858 lock_basic_unlock(&http2_query_buffer_count_lock); 2859 if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) { 2860 lock_basic_lock(&http2_query_buffer_count_lock); 2861 http2_query_buffer_count -= expectb64len; 2862 lock_basic_unlock(&http2_query_buffer_count_lock); 2863 log_err("http2_req_header fail, qbuffer " 2864 "malloc failure"); 2865 return 0; 2866 } 2867 2868 if(sldns_b64_contains_nonurl((char const*)start, length)) { 2869 char buf[65536+4]; 2870 verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding"); 2871 /* copy to the scratch buffer temporarily to terminate the 2872 * string with a zero */ 2873 if(length+1 > sizeof(buf)) { 2874 /* too long */ 2875 lock_basic_lock(&http2_query_buffer_count_lock); 2876 http2_query_buffer_count -= expectb64len; 2877 lock_basic_unlock(&http2_query_buffer_count_lock); 2878 sldns_buffer_free(h2_stream->qbuffer); 2879 h2_stream->qbuffer = NULL; 2880 return 1; 2881 } 2882 memmove(buf, start, length); 2883 buf[length] = 0; 2884 if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current( 2885 h2_stream->qbuffer), expectb64len)) || b64len < 0) { 2886 lock_basic_lock(&http2_query_buffer_count_lock); 2887 http2_query_buffer_count -= expectb64len; 2888 lock_basic_unlock(&http2_query_buffer_count_lock); 2889 sldns_buffer_free(h2_stream->qbuffer); 2890 h2_stream->qbuffer = NULL; 2891 return 1; 2892 } 2893 } else { 2894 if(!(b64len = sldns_b64url_pton( 2895 (char const *)start, length, 2896 sldns_buffer_current(h2_stream->qbuffer), 2897 expectb64len)) || b64len < 0) { 2898 lock_basic_lock(&http2_query_buffer_count_lock); 2899 http2_query_buffer_count -= expectb64len; 2900 lock_basic_unlock(&http2_query_buffer_count_lock); 2901 sldns_buffer_free(h2_stream->qbuffer); 2902 h2_stream->qbuffer = NULL; 2903 /* return without error, method can be an 2904 * unknown POST */ 2905 return 1; 2906 } 2907 } 2908 sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len); 2909 return 1; 2910 } 2911 2912 /** nghttp2 callback. Used to parse headers from HEADER frames. */ 2913 static int http2_req_header_cb(nghttp2_session* session, 2914 const nghttp2_frame* frame, const uint8_t* name, size_t namelen, 2915 const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags), 2916 void* cb_arg) 2917 { 2918 struct http2_stream* h2_stream = NULL; 2919 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2920 /* nghttp2 deals with CONTINUATION frames and provides them as part of 2921 * the HEADER */ 2922 if(frame->hd.type != NGHTTP2_HEADERS || 2923 frame->headers.cat != NGHTTP2_HCAT_REQUEST) { 2924 /* only interested in request headers */ 2925 return 0; 2926 } 2927 if(!(h2_stream = nghttp2_session_get_stream_user_data(session, 2928 frame->hd.stream_id))) 2929 return 0; 2930 2931 /* earlier checks already indicate we can stop handling this query */ 2932 if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED || 2933 h2_stream->invalid_content_type || 2934 h2_stream->invalid_endpoint) 2935 return 0; 2936 2937 2938 /* nghttp2 performs some sanity checks in the headers, including: 2939 * name and value are guaranteed to be null terminated 2940 * name is guaranteed to be lowercase 2941 * content-length value is guaranteed to contain digits 2942 */ 2943 2944 if(!h2_stream->http_method && namelen == 7 && 2945 memcmp(":method", name, namelen) == 0) { 2946 /* Case insensitive check on :method value to be on the safe 2947 * side. I failed to find text about case sensitivity in specs. 2948 */ 2949 if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0) 2950 h2_stream->http_method = HTTP_METHOD_GET; 2951 else if(valuelen == 4 && 2952 strcasecmp("POST", (const char*)value) == 0) { 2953 h2_stream->http_method = HTTP_METHOD_POST; 2954 if(h2_stream->qbuffer) { 2955 /* POST method uses query from DATA frames */ 2956 lock_basic_lock(&http2_query_buffer_count_lock); 2957 http2_query_buffer_count -= 2958 sldns_buffer_capacity(h2_stream->qbuffer); 2959 lock_basic_unlock(&http2_query_buffer_count_lock); 2960 sldns_buffer_free(h2_stream->qbuffer); 2961 h2_stream->qbuffer = NULL; 2962 } 2963 } else 2964 h2_stream->http_method = HTTP_METHOD_UNSUPPORTED; 2965 return 0; 2966 } 2967 if(namelen == 5 && memcmp(":path", name, namelen) == 0) { 2968 /* :path may contain DNS query, depending on method. Method might 2969 * not be known yet here, so check after finishing receiving 2970 * stream. */ 2971 #define HTTP_QUERY_PARAM "?dns=" 2972 size_t el = strlen(h2_session->c->http_endpoint); 2973 size_t qpl = strlen(HTTP_QUERY_PARAM); 2974 2975 if(valuelen < el || memcmp(h2_session->c->http_endpoint, 2976 value, el) != 0) { 2977 h2_stream->invalid_endpoint = 1; 2978 return 0; 2979 } 2980 /* larger than endpoint only allowed if it is for the query 2981 * parameter */ 2982 if(valuelen <= el+qpl || 2983 memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) { 2984 if(valuelen != el) 2985 h2_stream->invalid_endpoint = 1; 2986 return 0; 2987 } 2988 2989 if(!http2_buffer_uri_query(h2_session, h2_stream, 2990 value+(el+qpl), valuelen-(el+qpl))) { 2991 return NGHTTP2_ERR_CALLBACK_FAILURE; 2992 } 2993 return 0; 2994 } 2995 /* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST, 2996 * and not needed when using GET. Don't enfore. 2997 * If set only allow lowercase "application/dns-message". 2998 * 2999 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST 3000 * be able to handle "application/dns-message". Since that is the only 3001 * content-type supported we can ignore the accept header. 3002 */ 3003 if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) { 3004 if(valuelen != 23 || memcmp("application/dns-message", value, 3005 valuelen) != 0) { 3006 h2_stream->invalid_content_type = 1; 3007 } 3008 } 3009 3010 /* Only interested in content-lentg for POST (on not yet known) method. 3011 */ 3012 if((!h2_stream->http_method || 3013 h2_stream->http_method == HTTP_METHOD_POST) && 3014 !h2_stream->content_length && namelen == 14 && 3015 memcmp("content-length", name, namelen) == 0) { 3016 if(valuelen > 5) { 3017 h2_stream->query_too_large = 1; 3018 return 0; 3019 } 3020 /* guaranteed to only contain digits and be null terminated */ 3021 h2_stream->content_length = atoi((const char*)value); 3022 if(h2_stream->content_length > 3023 h2_session->c->http2_stream_max_qbuffer_size) { 3024 h2_stream->query_too_large = 1; 3025 return 0; 3026 } 3027 } 3028 return 0; 3029 } 3030 3031 /** nghttp2 callback. Used to get data from DATA frames, which can contain 3032 * queries in POST requests. */ 3033 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session), 3034 uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data, 3035 size_t len, void* cb_arg) 3036 { 3037 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3038 struct http2_stream* h2_stream; 3039 size_t qlen = 0; 3040 3041 if(!(h2_stream = nghttp2_session_get_stream_user_data( 3042 h2_session->session, stream_id))) { 3043 return 0; 3044 } 3045 3046 if(h2_stream->query_too_large) 3047 return 0; 3048 3049 if(!h2_stream->qbuffer) { 3050 if(h2_stream->content_length) { 3051 if(h2_stream->content_length < len) 3052 /* getting more data in DATA frame than 3053 * advertised in content-length header. */ 3054 return NGHTTP2_ERR_CALLBACK_FAILURE; 3055 qlen = h2_stream->content_length; 3056 } else if(len <= h2_session->c->http2_stream_max_qbuffer_size) { 3057 /* setting this to msg-buffer-size can result in a lot 3058 * of memory consuption. Most queries should fit in a 3059 * single DATA frame, and most POST queries will 3060 * contain content-length which does not impose this 3061 * limit. */ 3062 qlen = len; 3063 } 3064 } 3065 if(!h2_stream->qbuffer && qlen) { 3066 lock_basic_lock(&http2_query_buffer_count_lock); 3067 if(http2_query_buffer_count + qlen > http2_query_buffer_max) { 3068 lock_basic_unlock(&http2_query_buffer_count_lock); 3069 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 3070 "in http2-query-buffer-size"); 3071 return http2_submit_rst_stream(h2_session, h2_stream); 3072 } 3073 http2_query_buffer_count += qlen; 3074 lock_basic_unlock(&http2_query_buffer_count_lock); 3075 if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) { 3076 lock_basic_lock(&http2_query_buffer_count_lock); 3077 http2_query_buffer_count -= qlen; 3078 lock_basic_unlock(&http2_query_buffer_count_lock); 3079 } 3080 } 3081 3082 if(!h2_stream->qbuffer || 3083 sldns_buffer_remaining(h2_stream->qbuffer) < len) { 3084 verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough " 3085 "buffer space for POST query. Can happen on multi " 3086 "frame requests without content-length header"); 3087 h2_stream->query_too_large = 1; 3088 return 0; 3089 } 3090 3091 sldns_buffer_write(h2_stream->qbuffer, data, len); 3092 3093 return 0; 3094 } 3095 3096 void http2_req_stream_clear(struct http2_stream* h2_stream) 3097 { 3098 if(h2_stream->qbuffer) { 3099 lock_basic_lock(&http2_query_buffer_count_lock); 3100 http2_query_buffer_count -= 3101 sldns_buffer_capacity(h2_stream->qbuffer); 3102 lock_basic_unlock(&http2_query_buffer_count_lock); 3103 sldns_buffer_free(h2_stream->qbuffer); 3104 h2_stream->qbuffer = NULL; 3105 } 3106 if(h2_stream->rbuffer) { 3107 lock_basic_lock(&http2_response_buffer_count_lock); 3108 http2_response_buffer_count -= 3109 sldns_buffer_capacity(h2_stream->rbuffer); 3110 lock_basic_unlock(&http2_response_buffer_count_lock); 3111 sldns_buffer_free(h2_stream->rbuffer); 3112 h2_stream->rbuffer = NULL; 3113 } 3114 } 3115 3116 nghttp2_session_callbacks* http2_req_callbacks_create(void) 3117 { 3118 nghttp2_session_callbacks *callbacks; 3119 if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) { 3120 log_err("failed to initialize nghttp2 callback"); 3121 return NULL; 3122 } 3123 /* reception of header block started, used to create h2_stream */ 3124 nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks, 3125 http2_req_begin_headers_cb); 3126 /* complete frame received, used to get data from stream if frame 3127 * has end stream flag, and start processing query */ 3128 nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks, 3129 http2_req_frame_recv_cb); 3130 /* get request info from headers */ 3131 nghttp2_session_callbacks_set_on_header_callback(callbacks, 3132 http2_req_header_cb); 3133 /* get data from DATA frames, containing POST query */ 3134 nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks, 3135 http2_req_data_chunk_recv_cb); 3136 3137 /* generic HTTP2 callbacks */ 3138 nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb); 3139 nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb); 3140 nghttp2_session_callbacks_set_on_stream_close_callback(callbacks, 3141 http2_stream_close_cb); 3142 3143 return callbacks; 3144 } 3145 #endif /* HAVE_NGHTTP2 */ 3146