1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 63 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $ 64 */ 65 66 #include "opt_ipsec.h" 67 #include "opt_inet6.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/kernel.h> 72 #include <sys/malloc.h> 73 #include <sys/mbuf.h> 74 #include <sys/domain.h> 75 #include <sys/proc.h> 76 #include <sys/priv.h> 77 #include <sys/protosw.h> 78 #include <sys/socket.h> 79 #include <sys/socketvar.h> 80 #include <sys/sysctl.h> 81 #include <sys/syslog.h> 82 #include <sys/in_cksum.h> 83 #include <sys/ktr.h> 84 85 #include <sys/thread2.h> 86 #include <sys/socketvar2.h> 87 #include <sys/serialize.h> 88 89 #include <machine/stdarg.h> 90 91 #include <net/if.h> 92 #include <net/route.h> 93 #include <net/netmsg2.h> 94 #include <net/netisr2.h> 95 96 #include <netinet/in.h> 97 #include <netinet/in_systm.h> 98 #include <netinet/ip.h> 99 #ifdef INET6 100 #include <netinet/ip6.h> 101 #endif 102 #include <netinet/in_pcb.h> 103 #include <netinet/in_var.h> 104 #include <netinet/ip_var.h> 105 #ifdef INET6 106 #include <netinet6/ip6_var.h> 107 #endif 108 #include <netinet/ip_icmp.h> 109 #include <netinet/icmp_var.h> 110 #include <netinet/udp.h> 111 #include <netinet/udp_var.h> 112 113 #ifdef FAST_IPSEC 114 #include <netproto/ipsec/ipsec.h> 115 #endif 116 117 #ifdef IPSEC 118 #include <netinet6/ipsec.h> 119 #endif 120 121 #define MSGF_UDP_SEND MSGF_PROTO1 122 123 #define INP_DIRECT_DETACH INP_FLAG_PROTO2 124 125 #define UDP_KTR_STRING "inp=%p" 126 #define UDP_KTR_ARGS struct inpcb *inp 127 128 #ifndef KTR_UDP 129 #define KTR_UDP KTR_ALL 130 #endif 131 132 KTR_INFO_MASTER(udp); 133 KTR_INFO(KTR_UDP, udp, send_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS); 134 KTR_INFO(KTR_UDP, udp, send_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS); 135 KTR_INFO(KTR_UDP, udp, send_ipout, 2, UDP_KTR_STRING, UDP_KTR_ARGS); 136 KTR_INFO(KTR_UDP, udp, redisp_ipout_beg, 3, UDP_KTR_STRING, UDP_KTR_ARGS); 137 KTR_INFO(KTR_UDP, udp, redisp_ipout_end, 4, UDP_KTR_STRING, UDP_KTR_ARGS); 138 KTR_INFO(KTR_UDP, udp, send_redisp, 5, UDP_KTR_STRING, UDP_KTR_ARGS); 139 KTR_INFO(KTR_UDP, udp, send_inswildcard, 6, UDP_KTR_STRING, UDP_KTR_ARGS); 140 141 #define logudp(name, inp) KTR_LOG(udp_##name, inp) 142 143 /* 144 * UDP protocol implementation. 145 * Per RFC 768, August, 1980. 146 */ 147 #ifndef COMPAT_42 148 static int udpcksum = 1; 149 #else 150 static int udpcksum = 0; /* XXX */ 151 #endif 152 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, 153 &udpcksum, 0, "Enable checksumming of UDP packets"); 154 155 int log_in_vain = 0; 156 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 157 &log_in_vain, 0, "Log all incoming UDP packets"); 158 159 static int blackhole = 0; 160 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, 161 &blackhole, 0, "Do not send port unreachables for refused connects"); 162 163 static int strict_mcast_mship = 1; 164 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, 165 &strict_mcast_mship, 0, "Only send multicast to member sockets"); 166 167 int udp_sosend_async = 1; 168 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW, 169 &udp_sosend_async, 0, "UDP asynchronized pru_send"); 170 171 int udp_sosend_prepend = 1; 172 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW, 173 &udp_sosend_prepend, 0, 174 "Prepend enough space for proto and link header in pru_send"); 175 176 static int udp_reuseport_ext = 1; 177 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW, 178 &udp_reuseport_ext, 0, "SO_REUSEPORT extension"); 179 180 struct inpcbinfo udbinfo[MAXCPU]; 181 182 #ifndef UDBHASHSIZE 183 #define UDBHASHSIZE 16 184 #endif 185 186 struct udpstat udpstat_percpu[MAXCPU] __cachealign; 187 188 #ifdef INET6 189 struct udp_in6 { 190 struct sockaddr_in6 uin6_sin; 191 u_char uin6_init_done : 1; 192 }; 193 struct udp_ip6 { 194 struct ip6_hdr uip6_ip6; 195 u_char uip6_init_done : 1; 196 }; 197 #else 198 struct udp_in6; 199 struct udp_ip6; 200 #endif /* INET6 */ 201 202 static void udp_append (struct inpcb *last, struct ip *ip, 203 struct mbuf *n, int off, struct sockaddr_in *udp_in, 204 struct udp_in6 *, struct udp_ip6 *); 205 #ifdef INET6 206 static void ip_2_ip6_hdr (struct ip6_hdr *ip6, struct ip *ip); 207 #endif 208 209 static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 210 struct sockaddr_in *if_sin); 211 212 static boolean_t udp_inswildcardhash(struct inpcb *inp, 213 struct netmsg_base *msg, int error); 214 static void udp_remwildcardhash(struct inpcb *inp); 215 216 void 217 udp_init(void) 218 { 219 struct inpcbportinfo *portinfo; 220 int cpu; 221 222 portinfo = kmalloc_cachealign(sizeof(*portinfo) * ncpus2, M_PCB, 223 M_WAITOK); 224 225 for (cpu = 0; cpu < ncpus2; cpu++) { 226 struct inpcbinfo *uicb = &udbinfo[cpu]; 227 228 /* 229 * NOTE: 230 * UDP pcb list, wildcard hash table and localgroup hash 231 * table are shared. 232 */ 233 in_pcbinfo_init(uicb, cpu, TRUE); 234 uicb->hashbase = hashinit(UDBHASHSIZE, M_PCB, &uicb->hashmask); 235 236 in_pcbportinfo_init(&portinfo[cpu], UDBHASHSIZE, TRUE, cpu); 237 uicb->portinfo = portinfo; 238 uicb->portinfo_mask = ncpus2_mask; 239 240 uicb->wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB, 241 &uicb->wildcardhashmask); 242 uicb->localgrphashbase = hashinit(UDBHASHSIZE, M_PCB, 243 &uicb->localgrphashmask); 244 245 uicb->ipi_size = sizeof(struct inpcb); 246 } 247 248 /* 249 * Initialize UDP statistics counters for each CPU. 250 */ 251 for (cpu = 0; cpu < ncpus; ++cpu) 252 bzero(&udpstat_percpu[cpu], sizeof(struct udpstat)); 253 } 254 255 static int 256 sysctl_udpstat(SYSCTL_HANDLER_ARGS) 257 { 258 int cpu, error = 0; 259 260 for (cpu = 0; cpu < ncpus; ++cpu) { 261 if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu], 262 sizeof(struct udpstat)))) 263 break; 264 if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu], 265 sizeof(struct udpstat)))) 266 break; 267 } 268 269 return (error); 270 } 271 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW), 272 0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics"); 273 274 void 275 udp_ctloutput(netmsg_t msg) 276 { 277 struct socket *so = msg->base.nm_so; 278 struct sockopt *sopt = msg->ctloutput.nm_sopt; 279 struct inpcb *inp = so->so_pcb; 280 281 if (sopt->sopt_level == IPPROTO_IP) { 282 switch (sopt->sopt_name) { 283 case IP_MULTICAST_IF: 284 case IP_MULTICAST_VIF: 285 case IP_MULTICAST_TTL: 286 case IP_MULTICAST_LOOP: 287 case IP_ADD_MEMBERSHIP: 288 case IP_DROP_MEMBERSHIP: 289 if (&curthread->td_msgport != netisr_cpuport(0)) { 290 /* 291 * This pr_ctloutput msg will be forwarded 292 * to netisr0 to run; we can't do direct 293 * detaching anymore. 294 */ 295 inp->inp_flags &= ~INP_DIRECT_DETACH; 296 } 297 break; 298 } 299 } 300 return ip_ctloutput(msg); 301 } 302 303 /* 304 * Check multicast packets to make sure they are only sent to sockets with 305 * multicast memberships for the packet's destination address and arrival 306 * interface. Multicast packets to multicast-unaware sockets are also 307 * disallowed. 308 * 309 * Returns 0 if the packet is acceptable, -1 if it is not. 310 */ 311 static __inline int 312 check_multicast_membership(const struct ip *ip, const struct inpcb *inp, 313 const struct mbuf *m) 314 { 315 const struct ip_moptions *mopt; 316 int mshipno; 317 318 if (strict_mcast_mship == 0 || 319 !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 320 return (0); 321 } 322 323 KASSERT(&curthread->td_msgport == netisr_cpuport(0), 324 ("multicast input not in netisr0")); 325 326 mopt = inp->inp_moptions; 327 if (mopt == NULL) 328 return (-1); 329 for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) { 330 const struct in_multi *maddr = mopt->imo_membership[mshipno]; 331 332 if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr && 333 m->m_pkthdr.rcvif == maddr->inm_ifp) { 334 return (0); 335 } 336 } 337 return (-1); 338 } 339 340 struct udp_mcast_arg { 341 struct inpcb *inp; 342 struct inpcb *last; 343 struct ip *ip; 344 struct mbuf *m; 345 int iphlen; 346 struct sockaddr_in *udp_in; 347 #ifdef INET6 348 struct udp_in6 *udp_in6; 349 struct udp_ip6 *udp_ip6; 350 #endif 351 }; 352 353 static int 354 udp_mcast_input(struct udp_mcast_arg *arg) 355 { 356 struct inpcb *inp = arg->inp; 357 struct inpcb *last = arg->last; 358 struct ip *ip = arg->ip; 359 struct mbuf *m = arg->m; 360 361 if (check_multicast_membership(ip, inp, m) < 0) 362 return ERESTART; /* caller continue */ 363 364 if (last != NULL) { 365 struct mbuf *n; 366 367 #ifdef IPSEC 368 /* check AH/ESP integrity. */ 369 if (ipsec4_in_reject_so(m, last->inp_socket)) 370 ipsecstat.in_polvio++; 371 /* do not inject data to pcb */ 372 else 373 #endif /*IPSEC*/ 374 #ifdef FAST_IPSEC 375 /* check AH/ESP integrity. */ 376 if (ipsec4_in_reject(m, last)) 377 ; 378 else 379 #endif /*FAST_IPSEC*/ 380 if ((n = m_copypacket(m, MB_DONTWAIT)) != NULL) 381 udp_append(last, ip, n, 382 arg->iphlen + sizeof(struct udphdr), 383 arg->udp_in, 384 #ifdef INET6 385 arg->udp_in6, arg->udp_ip6 386 #else 387 NULL, NULL 388 #endif 389 ); 390 } 391 arg->last = last = inp; 392 393 /* 394 * Don't look for additional matches if this one does 395 * not have either the SO_REUSEPORT or SO_REUSEADDR 396 * socket options set. This heuristic avoids searching 397 * through all pcbs in the common case of a non-shared 398 * port. It * assumes that an application will never 399 * clear these options after setting them. 400 */ 401 if (!(last->inp_socket->so_options & 402 (SO_REUSEPORT | SO_REUSEADDR))) 403 return EJUSTRETURN; /* caller stop */ 404 return 0; 405 } 406 407 int 408 udp_input(struct mbuf **mp, int *offp, int proto) 409 { 410 struct sockaddr_in udp_in = { sizeof udp_in, AF_INET }; 411 #ifdef INET6 412 struct udp_in6 udp_in6 = { 413 { sizeof udp_in6.uin6_sin, AF_INET6 }, 0 414 }; 415 struct udp_ip6 udp_ip6; 416 #endif 417 418 int iphlen; 419 struct ip *ip; 420 struct udphdr *uh; 421 struct inpcb *inp; 422 struct mbuf *m; 423 struct mbuf *opts = NULL; 424 int len, off; 425 struct ip save_ip; 426 struct sockaddr *append_sa; 427 struct inpcbinfo *pcbinfo = &udbinfo[mycpuid]; 428 429 off = *offp; 430 m = *mp; 431 *mp = NULL; 432 433 iphlen = off; 434 udp_stat.udps_ipackets++; 435 436 /* 437 * Strip IP options, if any; should skip this, 438 * make available to user, and use on returned packets, 439 * but we don't yet have a way to check the checksum 440 * with options still present. 441 */ 442 if (iphlen > sizeof(struct ip)) { 443 ip_stripoptions(m); 444 iphlen = sizeof(struct ip); 445 } 446 447 /* 448 * IP and UDP headers are together in first mbuf. 449 * Already checked and pulled up in ip_demux(). 450 */ 451 KASSERT(m->m_len >= iphlen + sizeof(struct udphdr), 452 ("UDP header not in one mbuf")); 453 454 ip = mtod(m, struct ip *); 455 uh = (struct udphdr *)((caddr_t)ip + iphlen); 456 457 /* destination port of 0 is illegal, based on RFC768. */ 458 if (uh->uh_dport == 0) 459 goto bad; 460 461 /* 462 * Make mbuf data length reflect UDP length. 463 * If not enough data to reflect UDP length, drop. 464 */ 465 len = ntohs((u_short)uh->uh_ulen); 466 if (ip->ip_len != len) { 467 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 468 udp_stat.udps_badlen++; 469 goto bad; 470 } 471 m_adj(m, len - ip->ip_len); 472 /* ip->ip_len = len; */ 473 } 474 /* 475 * Save a copy of the IP header in case we want restore it 476 * for sending an ICMP error message in response. 477 */ 478 save_ip = *ip; 479 480 /* 481 * Checksum extended UDP header and data. 482 */ 483 if (uh->uh_sum) { 484 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 485 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 486 uh->uh_sum = m->m_pkthdr.csum_data; 487 else 488 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, 489 ip->ip_dst.s_addr, htonl((u_short)len + 490 m->m_pkthdr.csum_data + IPPROTO_UDP)); 491 uh->uh_sum ^= 0xffff; 492 } else { 493 char b[9]; 494 495 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 496 bzero(((struct ipovly *)ip)->ih_x1, 9); 497 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 498 uh->uh_sum = in_cksum(m, len + sizeof(struct ip)); 499 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 500 } 501 if (uh->uh_sum) { 502 udp_stat.udps_badsum++; 503 m_freem(m); 504 return(IPPROTO_DONE); 505 } 506 } else 507 udp_stat.udps_nosum++; 508 509 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 510 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { 511 struct inpcbhead *connhead; 512 struct inpcontainer *ic, *ic_marker; 513 struct inpcontainerhead *ichead; 514 struct udp_mcast_arg arg; 515 struct inpcb *last; 516 int error; 517 518 /* 519 * Deliver a multicast or broadcast datagram to *all* sockets 520 * for which the local and remote addresses and ports match 521 * those of the incoming datagram. This allows more than 522 * one process to receive multi/broadcasts on the same port. 523 * (This really ought to be done for unicast datagrams as 524 * well, but that would cause problems with existing 525 * applications that open both address-specific sockets and 526 * a wildcard socket listening to the same port -- they would 527 * end up receiving duplicates of every unicast datagram. 528 * Those applications open the multiple sockets to overcome an 529 * inadequacy of the UDP socket interface, but for backwards 530 * compatibility we avoid the problem here rather than 531 * fixing the interface. Maybe 4.5BSD will remedy this?) 532 */ 533 534 /* 535 * Construct sockaddr format source address. 536 */ 537 udp_in.sin_port = uh->uh_sport; 538 udp_in.sin_addr = ip->ip_src; 539 arg.udp_in = &udp_in; 540 /* 541 * Locate pcb(s) for datagram. 542 * (Algorithm copied from raw_intr().) 543 */ 544 last = NULL; 545 #ifdef INET6 546 udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0; 547 arg.udp_in6 = &udp_in6; 548 arg.udp_ip6 = &udp_ip6; 549 #endif 550 arg.iphlen = iphlen; 551 552 connhead = &pcbinfo->hashbase[ 553 INP_PCBCONNHASH(ip->ip_src.s_addr, uh->uh_sport, 554 ip->ip_dst.s_addr, uh->uh_dport, pcbinfo->hashmask)]; 555 LIST_FOREACH(inp, connhead, inp_hash) { 556 #ifdef INET6 557 if (!(inp->inp_vflag & INP_IPV4)) 558 continue; 559 #endif 560 if (!in_hosteq(inp->inp_faddr, ip->ip_src) || 561 !in_hosteq(inp->inp_laddr, ip->ip_dst) || 562 inp->inp_fport != uh->uh_sport || 563 inp->inp_lport != uh->uh_dport) 564 continue; 565 566 arg.inp = inp; 567 arg.last = last; 568 arg.ip = ip; 569 arg.m = m; 570 571 error = udp_mcast_input(&arg); 572 if (error == ERESTART) 573 continue; 574 last = arg.last; 575 576 if (error == EJUSTRETURN) 577 goto done; 578 } 579 580 ichead = &pcbinfo->wildcardhashbase[ 581 INP_PCBWILDCARDHASH(uh->uh_dport, 582 pcbinfo->wildcardhashmask)]; 583 ic_marker = in_pcbcontainer_marker(mycpuid); 584 585 GET_PCBINFO_TOKEN(pcbinfo); 586 LIST_INSERT_HEAD(ichead, ic_marker, ic_list); 587 while ((ic = LIST_NEXT(ic_marker, ic_list)) != NULL) { 588 LIST_REMOVE(ic_marker, ic_list); 589 LIST_INSERT_AFTER(ic, ic_marker, ic_list); 590 591 inp = ic->ic_inp; 592 if (inp->inp_flags & INP_PLACEMARKER) 593 continue; 594 #ifdef INET6 595 if (!(inp->inp_vflag & INP_IPV4)) 596 continue; 597 #endif 598 if (inp->inp_lport != uh->uh_dport) 599 continue; 600 if (inp->inp_laddr.s_addr != INADDR_ANY && 601 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 602 continue; 603 604 arg.inp = inp; 605 arg.last = last; 606 arg.ip = ip; 607 arg.m = m; 608 609 error = udp_mcast_input(&arg); 610 if (error == ERESTART) 611 continue; 612 last = arg.last; 613 614 if (error == EJUSTRETURN) 615 break; 616 } 617 LIST_REMOVE(ic_marker, ic_list); 618 REL_PCBINFO_TOKEN(pcbinfo); 619 done: 620 if (last == NULL) { 621 /* 622 * No matching pcb found; discard datagram. 623 * (No need to send an ICMP Port Unreachable 624 * for a broadcast or multicast datgram.) 625 */ 626 udp_stat.udps_noportbcast++; 627 goto bad; 628 } 629 #ifdef IPSEC 630 /* check AH/ESP integrity. */ 631 if (ipsec4_in_reject_so(m, last->inp_socket)) { 632 ipsecstat.in_polvio++; 633 goto bad; 634 } 635 #endif /*IPSEC*/ 636 #ifdef FAST_IPSEC 637 /* check AH/ESP integrity. */ 638 if (ipsec4_in_reject(m, last)) 639 goto bad; 640 #endif /*FAST_IPSEC*/ 641 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 642 &udp_in, 643 #ifdef INET6 644 &udp_in6, &udp_ip6 645 #else 646 NULL, NULL 647 #endif 648 ); 649 return(IPPROTO_DONE); 650 } 651 /* 652 * Locate pcb for datagram. 653 */ 654 inp = in_pcblookup_pkthash(pcbinfo, ip->ip_src, uh->uh_sport, 655 ip->ip_dst, uh->uh_dport, TRUE, m->m_pkthdr.rcvif, 656 udp_reuseport_ext ? m : NULL); 657 if (inp == NULL) { 658 if (log_in_vain) { 659 char buf[sizeof "aaa.bbb.ccc.ddd"]; 660 661 strcpy(buf, inet_ntoa(ip->ip_dst)); 662 log(LOG_INFO, 663 "Connection attempt to UDP %s:%d from %s:%d\n", 664 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), 665 ntohs(uh->uh_sport)); 666 } 667 udp_stat.udps_noport++; 668 if (m->m_flags & (M_BCAST | M_MCAST)) { 669 udp_stat.udps_noportbcast++; 670 goto bad; 671 } 672 if (blackhole) 673 goto bad; 674 #ifdef ICMP_BANDLIM 675 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 676 goto bad; 677 #endif 678 *ip = save_ip; 679 ip->ip_len += iphlen; 680 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 681 return(IPPROTO_DONE); 682 } 683 #ifdef IPSEC 684 if (ipsec4_in_reject_so(m, inp->inp_socket)) { 685 ipsecstat.in_polvio++; 686 goto bad; 687 } 688 #endif /*IPSEC*/ 689 #ifdef FAST_IPSEC 690 if (ipsec4_in_reject(m, inp)) 691 goto bad; 692 #endif /*FAST_IPSEC*/ 693 /* 694 * Check the minimum TTL for socket. 695 */ 696 if (ip->ip_ttl < inp->inp_ip_minttl) 697 goto bad; 698 699 /* 700 * Construct sockaddr format source address. 701 * Stuff source address and datagram in user buffer. 702 */ 703 udp_in.sin_port = uh->uh_sport; 704 udp_in.sin_addr = ip->ip_src; 705 if ((inp->inp_flags & INP_CONTROLOPTS) || 706 (inp->inp_socket->so_options & SO_TIMESTAMP)) { 707 #ifdef INET6 708 if (inp->inp_vflag & INP_IPV6) { 709 int savedflags; 710 711 ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip); 712 savedflags = inp->inp_flags; 713 inp->inp_flags &= ~INP_UNMAPPABLEOPTS; 714 ip6_savecontrol(inp, &opts, &udp_ip6.uip6_ip6, m); 715 inp->inp_flags = savedflags; 716 } else 717 #endif 718 ip_savecontrol(inp, &opts, ip, m); 719 } 720 m_adj(m, iphlen + sizeof(struct udphdr)); 721 #ifdef INET6 722 if (inp->inp_vflag & INP_IPV6) { 723 in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin); 724 append_sa = (struct sockaddr *)&udp_in6; 725 } else 726 #endif 727 append_sa = (struct sockaddr *)&udp_in; 728 729 lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token); 730 if (ssb_appendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts) == 0) { 731 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 732 udp_stat.udps_fullsock++; 733 goto bad; 734 } 735 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 736 sorwakeup(inp->inp_socket); 737 return(IPPROTO_DONE); 738 bad: 739 m_freem(m); 740 if (opts) 741 m_freem(opts); 742 return(IPPROTO_DONE); 743 } 744 745 #ifdef INET6 746 static void 747 ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip) 748 { 749 bzero(ip6, sizeof *ip6); 750 751 ip6->ip6_vfc = IPV6_VERSION; 752 ip6->ip6_plen = ip->ip_len; 753 ip6->ip6_nxt = ip->ip_p; 754 ip6->ip6_hlim = ip->ip_ttl; 755 ip6->ip6_src.s6_addr32[2] = ip6->ip6_dst.s6_addr32[2] = 756 IPV6_ADDR_INT32_SMP; 757 ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr; 758 ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr; 759 } 760 #endif 761 762 /* 763 * subroutine of udp_input(), mainly for source code readability. 764 * caller must properly init udp_ip6 and udp_in6 beforehand. 765 */ 766 static void 767 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, 768 struct sockaddr_in *udp_in, 769 struct udp_in6 *udp_in6, struct udp_ip6 *udp_ip6) 770 { 771 struct sockaddr *append_sa; 772 struct mbuf *opts = NULL; 773 int ret; 774 775 if (last->inp_flags & INP_CONTROLOPTS || 776 last->inp_socket->so_options & SO_TIMESTAMP) { 777 #ifdef INET6 778 if (last->inp_vflag & INP_IPV6) { 779 int savedflags; 780 781 if (udp_ip6->uip6_init_done == 0) { 782 ip_2_ip6_hdr(&udp_ip6->uip6_ip6, ip); 783 udp_ip6->uip6_init_done = 1; 784 } 785 savedflags = last->inp_flags; 786 last->inp_flags &= ~INP_UNMAPPABLEOPTS; 787 ip6_savecontrol(last, &opts, &udp_ip6->uip6_ip6, n); 788 last->inp_flags = savedflags; 789 } else 790 #endif 791 ip_savecontrol(last, &opts, ip, n); 792 } 793 #ifdef INET6 794 if (last->inp_vflag & INP_IPV6) { 795 if (udp_in6->uin6_init_done == 0) { 796 in6_sin_2_v4mapsin6(udp_in, &udp_in6->uin6_sin); 797 udp_in6->uin6_init_done = 1; 798 } 799 append_sa = (struct sockaddr *)&udp_in6->uin6_sin; 800 } else 801 #endif 802 append_sa = (struct sockaddr *)udp_in; 803 m_adj(n, off); 804 805 lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token); 806 ret = ssb_appendaddr(&last->inp_socket->so_rcv, append_sa, n, opts); 807 lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token); 808 if (ret == 0) { 809 m_freem(n); 810 if (opts) 811 m_freem(opts); 812 udp_stat.udps_fullsock++; 813 } else { 814 sorwakeup(last->inp_socket); 815 } 816 } 817 818 /* 819 * Notify a udp user of an asynchronous error; 820 * just wake up so that he can collect error status. 821 */ 822 void 823 udp_notify(struct inpcb *inp, int error) 824 { 825 inp->inp_socket->so_error = error; 826 sorwakeup(inp->inp_socket); 827 sowwakeup(inp->inp_socket); 828 } 829 830 struct netmsg_udp_notify { 831 struct netmsg_base base; 832 void (*nm_notify)(struct inpcb *, int); 833 struct in_addr nm_faddr; 834 int nm_arg; 835 }; 836 837 static void 838 udp_notifyall_oncpu(netmsg_t msg) 839 { 840 struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg; 841 int nextcpu, cpu = mycpuid; 842 843 in_pcbnotifyall(&udbinfo[cpu], nm->nm_faddr, nm->nm_arg, nm->nm_notify); 844 845 nextcpu = cpu + 1; 846 if (nextcpu < ncpus2) 847 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg); 848 else 849 lwkt_replymsg(&nm->base.lmsg, 0); 850 } 851 852 void 853 udp_ctlinput(netmsg_t msg) 854 { 855 struct sockaddr *sa = msg->ctlinput.nm_arg; 856 struct ip *ip = msg->ctlinput.nm_extra; 857 int cmd = msg->ctlinput.nm_cmd; 858 struct udphdr *uh; 859 void (*notify) (struct inpcb *, int) = udp_notify; 860 struct in_addr faddr; 861 struct inpcb *inp; 862 863 faddr = ((struct sockaddr_in *)sa)->sin_addr; 864 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 865 goto done; 866 867 if (PRC_IS_REDIRECT(cmd)) { 868 ip = NULL; 869 notify = in_rtchange; 870 } else if (cmd == PRC_HOSTDEAD) { 871 ip = NULL; 872 } else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) { 873 goto done; 874 } 875 876 if (ip) { 877 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 878 inp = in_pcblookup_hash(&udbinfo[mycpuid], faddr, uh->uh_dport, 879 ip->ip_src, uh->uh_sport, 0, NULL); 880 if (inp != NULL && inp->inp_socket != NULL) 881 (*notify)(inp, inetctlerrmap[cmd]); 882 } else { 883 struct netmsg_udp_notify *nm; 884 885 KKASSERT(&curthread->td_msgport == netisr_cpuport(0)); 886 nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT); 887 netmsg_init(&nm->base, NULL, &netisr_afree_rport, 888 0, udp_notifyall_oncpu); 889 nm->nm_faddr = faddr; 890 nm->nm_arg = inetctlerrmap[cmd]; 891 nm->nm_notify = notify; 892 lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg); 893 } 894 done: 895 lwkt_replymsg(&msg->lmsg, 0); 896 } 897 898 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, udbinfo, 0, 899 in_pcblist_global_ncpus2, "S,xinpcb", "List of active UDP sockets"); 900 901 static int 902 udp_getcred(SYSCTL_HANDLER_ARGS) 903 { 904 struct sockaddr_in addrs[2]; 905 struct ucred cred0, *cred = NULL; 906 struct inpcb *inp; 907 int error, cpu, origcpu; 908 909 error = priv_check(req->td, PRIV_ROOT); 910 if (error) 911 return (error); 912 error = SYSCTL_IN(req, addrs, sizeof addrs); 913 if (error) 914 return (error); 915 916 origcpu = mycpuid; 917 cpu = udp_addrcpu(addrs[1].sin_addr.s_addr, addrs[1].sin_port, 918 addrs[0].sin_addr.s_addr, addrs[0].sin_port); 919 920 lwkt_migratecpu(cpu); 921 922 inp = in_pcblookup_hash(&udbinfo[cpu], 923 addrs[1].sin_addr, addrs[1].sin_port, 924 addrs[0].sin_addr, addrs[0].sin_port, TRUE, NULL); 925 if (inp == NULL || inp->inp_socket == NULL) { 926 error = ENOENT; 927 } else if (inp->inp_socket->so_cred != NULL) { 928 cred0 = *(inp->inp_socket->so_cred); 929 cred = &cred0; 930 } 931 932 lwkt_migratecpu(origcpu); 933 934 if (error) 935 return error; 936 937 return SYSCTL_OUT(req, cred, sizeof(struct ucred)); 938 } 939 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 940 0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection"); 941 942 static void 943 udp_send_redispatch(netmsg_t msg) 944 { 945 struct mbuf *m = msg->send.nm_m; 946 int pru_flags = msg->send.nm_flags; 947 struct inpcb *inp = msg->send.base.nm_so->so_pcb; 948 struct mbuf *m_opt = msg->send.nm_control; /* XXX save ipopt */ 949 int flags = msg->send.nm_priv; /* ip_output flags */ 950 int error; 951 952 logudp(redisp_ipout_beg, inp); 953 954 /* 955 * - Don't use inp route cache. It should only be used in the 956 * inp owner netisr. 957 * - Access to inp_moptions should be safe, since multicast UDP 958 * datagrams are redispatched to netisr0 and inp_moptions is 959 * changed only in netisr0. 960 */ 961 error = ip_output(m, m_opt, NULL, flags, inp->inp_moptions, inp); 962 if ((pru_flags & PRUS_NOREPLY) == 0) 963 lwkt_replymsg(&msg->send.base.lmsg, error); 964 965 if (m_opt != NULL) { 966 /* Free saved ip options, if any */ 967 m_freem(m_opt); 968 } 969 970 logudp(redisp_ipout_end, inp); 971 } 972 973 static void 974 udp_send(netmsg_t msg) 975 { 976 struct socket *so = msg->send.base.nm_so; 977 struct mbuf *m = msg->send.nm_m; 978 struct sockaddr *dstaddr = msg->send.nm_addr; 979 int pru_flags = msg->send.nm_flags; 980 struct inpcb *inp = so->so_pcb; 981 struct thread *td = msg->send.nm_td; 982 int flags; 983 984 struct udpiphdr *ui; 985 int len = m->m_pkthdr.len; 986 struct sockaddr_in *sin; /* really is initialized before use */ 987 int error = 0, cpu; 988 989 KKASSERT(msg->send.nm_control == NULL); 990 991 logudp(send_beg, inp); 992 993 if (inp == NULL) { 994 error = EINVAL; 995 goto release; 996 } 997 998 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 999 error = EMSGSIZE; 1000 goto release; 1001 } 1002 1003 if (inp->inp_lport == 0) { /* unbound socket */ 1004 boolean_t forwarded; 1005 1006 error = in_pcbbind(inp, NULL, td); 1007 if (error) 1008 goto release; 1009 1010 /* 1011 * Need to call udp_send again, after this inpcb is 1012 * inserted into wildcard hash table. 1013 */ 1014 msg->send.base.lmsg.ms_flags |= MSGF_UDP_SEND; 1015 forwarded = udp_inswildcardhash(inp, &msg->send.base, 0); 1016 if (forwarded) { 1017 /* 1018 * The message is further forwarded, so we are 1019 * done here. 1020 */ 1021 logudp(send_inswildcard, inp); 1022 return; 1023 } 1024 } 1025 1026 if (dstaddr != NULL) { /* destination address specified */ 1027 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1028 /* already connected */ 1029 error = EISCONN; 1030 goto release; 1031 } 1032 sin = (struct sockaddr_in *)dstaddr; 1033 if (!prison_remote_ip(td, (struct sockaddr *)&sin)) { 1034 error = EAFNOSUPPORT; /* IPv6 only jail */ 1035 goto release; 1036 } 1037 } else { 1038 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1039 /* no destination specified and not already connected */ 1040 error = ENOTCONN; 1041 goto release; 1042 } 1043 sin = NULL; 1044 } 1045 1046 /* 1047 * Calculate data length and get a mbuf 1048 * for UDP and IP headers. 1049 */ 1050 M_PREPEND(m, sizeof(struct udpiphdr), MB_DONTWAIT); 1051 if (m == NULL) { 1052 error = ENOBUFS; 1053 goto release; 1054 } 1055 1056 /* 1057 * Fill in mbuf with extended UDP header 1058 * and addresses and length put into network format. 1059 */ 1060 ui = mtod(m, struct udpiphdr *); 1061 bzero(ui->ui_x1, sizeof ui->ui_x1); /* XXX still needed? */ 1062 ui->ui_pr = IPPROTO_UDP; 1063 1064 /* 1065 * Set destination address. 1066 */ 1067 if (dstaddr != NULL) { /* use specified destination */ 1068 ui->ui_dst = sin->sin_addr; 1069 ui->ui_dport = sin->sin_port; 1070 } else { /* use connected destination */ 1071 ui->ui_dst = inp->inp_faddr; 1072 ui->ui_dport = inp->inp_fport; 1073 } 1074 1075 /* 1076 * Set source address. 1077 */ 1078 if (inp->inp_laddr.s_addr == INADDR_ANY || 1079 IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) { 1080 struct sockaddr_in *if_sin; 1081 1082 if (dstaddr == NULL) { 1083 /* 1084 * connect() had (or should have) failed because 1085 * the interface had no IP address, but the 1086 * application proceeded to call send() anyways. 1087 */ 1088 error = ENOTCONN; 1089 goto release; 1090 } 1091 1092 /* Look up outgoing interface. */ 1093 error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1); 1094 if (error) 1095 goto release; 1096 ui->ui_src = if_sin->sin_addr; /* use address of interface */ 1097 } else { 1098 ui->ui_src = inp->inp_laddr; /* use non-null bound address */ 1099 } 1100 ui->ui_sport = inp->inp_lport; 1101 KASSERT(inp->inp_lport != 0, ("inp lport should have been bound")); 1102 1103 /* 1104 * Release the original thread, since it is no longer used 1105 */ 1106 if (pru_flags & PRUS_HELDTD) { 1107 lwkt_rele(td); 1108 pru_flags &= ~PRUS_HELDTD; 1109 } 1110 /* 1111 * Free the dest address, since it is no longer needed 1112 */ 1113 if (pru_flags & PRUS_FREEADDR) { 1114 kfree(dstaddr, M_SONAME); 1115 pru_flags &= ~PRUS_FREEADDR; 1116 } 1117 1118 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1119 1120 /* 1121 * Set up checksum and output datagram. 1122 */ 1123 if (udpcksum) { 1124 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, 1125 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1126 m->m_pkthdr.csum_flags = CSUM_UDP; 1127 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1128 m->m_pkthdr.csum_thlen = sizeof(struct udphdr); 1129 } else { 1130 ui->ui_sum = 0; 1131 } 1132 ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len; 1133 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1134 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1135 udp_stat.udps_opackets++; 1136 1137 flags = IP_DEBUGROUTE | 1138 (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); 1139 if (pru_flags & PRUS_DONTROUTE) 1140 flags |= SO_DONTROUTE; 1141 1142 if (inp->inp_flags & INP_CONNECTED) { 1143 /* 1144 * For connected socket, this datagram has already 1145 * been in the correct netisr; no need to rehash. 1146 */ 1147 goto sendit; 1148 } 1149 1150 cpu = udp_addrcpu(ui->ui_dst.s_addr, ui->ui_dport, 1151 ui->ui_src.s_addr, ui->ui_sport); 1152 if (cpu != mycpuid) { 1153 struct mbuf *m_opt = NULL; 1154 struct netmsg_pru_send *smsg; 1155 struct lwkt_port *port = netisr_cpuport(cpu); 1156 1157 /* 1158 * Not on the CPU that matches this UDP datagram hash; 1159 * redispatch to the correct CPU to do the ip_output(). 1160 */ 1161 if (inp->inp_options != NULL) { 1162 /* 1163 * If there are ip options, then save a copy, 1164 * since accessing inp_options on other CPUs' 1165 * is not safe. 1166 * 1167 * XXX optimize this? 1168 */ 1169 m_opt = m_copym(inp->inp_options, 0, M_COPYALL, 1170 MB_WAIT); 1171 } 1172 if ((pru_flags & PRUS_NOREPLY) == 0) { 1173 /* 1174 * Change some parts of the original netmsg and 1175 * forward it to the target netisr. 1176 * 1177 * NOTE: so_port MUST NOT be checked in the target 1178 * netisr. 1179 */ 1180 smsg = &msg->send; 1181 smsg->nm_priv = flags; /* ip_output flags */ 1182 smsg->nm_m = m; 1183 smsg->nm_control = m_opt; /* XXX save ipopt */ 1184 smsg->base.lmsg.ms_flags |= MSGF_IGNSOPORT; 1185 smsg->base.nm_dispatch = udp_send_redispatch; 1186 lwkt_forwardmsg(port, &smsg->base.lmsg); 1187 } else { 1188 /* 1189 * Recreate the netmsg, since the original mbuf 1190 * could have been changed. And send it to the 1191 * target netisr. 1192 * 1193 * NOTE: so_port MUST NOT be checked in the target 1194 * netisr. 1195 */ 1196 smsg = &m->m_hdr.mh_sndmsg; 1197 netmsg_init(&smsg->base, so, &netisr_apanic_rport, 1198 MSGF_IGNSOPORT, udp_send_redispatch); 1199 smsg->nm_priv = flags; /* ip_output flags */ 1200 smsg->nm_flags = pru_flags; 1201 smsg->nm_m = m; 1202 smsg->nm_control = m_opt; /* XXX save ipopt */ 1203 lwkt_sendmsg(port, &smsg->base.lmsg); 1204 } 1205 1206 /* This UDP datagram is redispatched; done */ 1207 logudp(send_redisp, inp); 1208 return; 1209 } 1210 1211 sendit: 1212 logudp(send_ipout, inp); 1213 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 1214 inp->inp_moptions, inp); 1215 m = NULL; 1216 1217 release: 1218 if (m != NULL) 1219 m_freem(m); 1220 1221 if (pru_flags & PRUS_HELDTD) 1222 lwkt_rele(td); 1223 if (pru_flags & PRUS_FREEADDR) 1224 kfree(dstaddr, M_SONAME); 1225 if ((pru_flags & PRUS_NOREPLY) == 0) 1226 lwkt_replymsg(&msg->send.base.lmsg, error); 1227 1228 logudp(send_end, inp); 1229 } 1230 1231 u_long udp_sendspace = 9216; /* really max datagram size */ 1232 /* 40 1K datagrams */ 1233 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 1234 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 1235 1236 u_long udp_recvspace = 40 * (1024 + 1237 #ifdef INET6 1238 sizeof(struct sockaddr_in6) 1239 #else 1240 sizeof(struct sockaddr_in) 1241 #endif 1242 ); 1243 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1244 &udp_recvspace, 0, "Maximum incoming UDP datagram size"); 1245 1246 /* 1247 * This should never happen, since UDP socket does not support 1248 * connection acception (SO_ACCEPTCONN, i.e. listen(2)). 1249 */ 1250 static void 1251 udp_abort(netmsg_t msg __unused) 1252 { 1253 panic("udp_abort is called"); 1254 } 1255 1256 static void 1257 udp_attach(netmsg_t msg) 1258 { 1259 struct socket *so = msg->attach.base.nm_so; 1260 struct pru_attach_info *ai = msg->attach.nm_ai; 1261 struct inpcb *inp; 1262 int error; 1263 1264 inp = so->so_pcb; 1265 if (inp != NULL) { 1266 error = EINVAL; 1267 goto out; 1268 } 1269 error = soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit); 1270 if (error) 1271 goto out; 1272 1273 error = in_pcballoc(so, &udbinfo[mycpuid]); 1274 if (error) 1275 goto out; 1276 1277 inp = (struct inpcb *)so->so_pcb; 1278 inp->inp_flags |= INP_DIRECT_DETACH; 1279 inp->inp_vflag |= INP_IPV4; 1280 inp->inp_ip_ttl = ip_defttl; 1281 error = 0; 1282 out: 1283 lwkt_replymsg(&msg->attach.base.lmsg, error); 1284 } 1285 1286 static void 1287 udp_inswildcard_replymsg(netmsg_t msg) 1288 { 1289 lwkt_msg_t lmsg = &msg->lmsg; 1290 1291 if (lmsg->ms_flags & MSGF_UDP_SEND) { 1292 udp_send(msg); 1293 /* msg is replied by udp_send() */ 1294 } else { 1295 lwkt_replymsg(lmsg, lmsg->ms_error); 1296 } 1297 } 1298 1299 static void 1300 udp_soreuseport_dispatch(netmsg_t msg) 1301 { 1302 /* This inpcb has already been in the wildcard hash. */ 1303 in_pcblink_flags(msg->base.nm_so->so_pcb, &udbinfo[mycpuid], 0); 1304 udp_inswildcard_replymsg(msg); 1305 } 1306 1307 static void 1308 udp_sosetport(struct lwkt_msg *msg, lwkt_port_t port) 1309 { 1310 sosetport(((struct netmsg_base *)msg)->nm_so, port); 1311 } 1312 1313 static boolean_t 1314 udp_inswildcardhash_oncpu(struct inpcb *inp, struct netmsg_base *msg) 1315 { 1316 int cpu; 1317 1318 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1319 ("not on owner cpu")); 1320 1321 in_pcbinswildcardhash(inp); 1322 for (cpu = 0; cpu < ncpus2; ++cpu) { 1323 if (cpu == mycpuid) { 1324 /* 1325 * This inpcb has been inserted by the above 1326 * in_pcbinswildcardhash(). 1327 */ 1328 continue; 1329 } 1330 in_pcbinswildcardhash_oncpu(inp, &udbinfo[cpu]); 1331 } 1332 1333 if (inp->inp_socket->so_options & SO_REUSEPORT) { 1334 /* 1335 * For SO_REUSEPORT socket, redistribute it based on its 1336 * local group index. 1337 */ 1338 cpu = inp->inp_lgrpindex & ncpus2_mask; 1339 if (cpu != mycpuid) { 1340 struct lwkt_port *port = netisr_cpuport(cpu); 1341 lwkt_msg_t lmsg = &msg->lmsg; 1342 1343 /* 1344 * We are moving the protocol processing port the 1345 * socket is on, we have to unlink here and re-link 1346 * on the target cpu (this inpcb is still left in 1347 * the wildcard hash). 1348 */ 1349 in_pcbunlink_flags(inp, &udbinfo[mycpuid], 0); 1350 msg->nm_dispatch = udp_soreuseport_dispatch; 1351 1352 /* 1353 * See the related comment in tcp_usrreq.c 1354 * tcp_connect() 1355 */ 1356 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1357 lwkt_forwardmsg(port, lmsg); 1358 return TRUE; /* forwarded */ 1359 } 1360 } 1361 return FALSE; 1362 } 1363 1364 static void 1365 udp_inswildcardhash_dispatch(netmsg_t msg) 1366 { 1367 struct inpcb *inp = msg->base.nm_so->so_pcb; 1368 boolean_t forwarded; 1369 1370 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1371 KASSERT((ntohs(inp->inp_lport) & ncpus2_mask) == mycpuid, 1372 ("not target cpu")); 1373 1374 in_pcblink(inp, &udbinfo[mycpuid]); 1375 1376 forwarded = udp_inswildcardhash_oncpu(inp, &msg->base); 1377 if (forwarded) { 1378 /* The message is further forwarded, so we are done here. */ 1379 return; 1380 } 1381 udp_inswildcard_replymsg(msg); 1382 } 1383 1384 static boolean_t 1385 udp_inswildcardhash(struct inpcb *inp, struct netmsg_base *msg, int error) 1386 { 1387 lwkt_msg_t lmsg = &msg->lmsg; 1388 int cpu; 1389 1390 ASSERT_INP_NOTINHASH(inp); 1391 1392 /* This inpcb could no longer be directly detached */ 1393 inp->inp_flags &= ~INP_DIRECT_DETACH; 1394 1395 /* 1396 * Always clear the route cache, so we don't need to 1397 * worry about any owner CPU changes later. 1398 */ 1399 in_pcbresetroute(inp); 1400 1401 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1402 cpu = ntohs(inp->inp_lport) & ncpus2_mask; 1403 1404 lmsg->ms_error = error; 1405 if (cpu != mycpuid) { 1406 struct lwkt_port *port = netisr_cpuport(cpu); 1407 1408 /* 1409 * We are moving the protocol processing port the socket 1410 * is on, we have to unlink here and re-link on the 1411 * target cpu. 1412 */ 1413 in_pcbunlink(inp, &udbinfo[mycpuid]); 1414 msg->nm_dispatch = udp_inswildcardhash_dispatch; 1415 1416 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1417 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1418 lwkt_forwardmsg(port, lmsg); 1419 return TRUE; /* forwarded */ 1420 } 1421 1422 return udp_inswildcardhash_oncpu(inp, msg); 1423 } 1424 1425 static void 1426 udp_bind(netmsg_t msg) 1427 { 1428 struct socket *so = msg->bind.base.nm_so; 1429 struct inpcb *inp; 1430 int error; 1431 1432 inp = so->so_pcb; 1433 if (inp) { 1434 struct sockaddr *nam = msg->bind.nm_nam; 1435 struct thread *td = msg->bind.nm_td; 1436 1437 error = in_pcbbind(inp, nam, td); 1438 if (error == 0) { 1439 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1440 boolean_t forwarded; 1441 1442 if (sin->sin_addr.s_addr != INADDR_ANY) 1443 inp->inp_flags |= INP_WASBOUND_NOTANY; 1444 1445 forwarded = udp_inswildcardhash(inp, 1446 &msg->bind.base, 0); 1447 if (forwarded) { 1448 /* 1449 * The message is further forwarded, so 1450 * we are done here. 1451 */ 1452 return; 1453 } 1454 } 1455 } else { 1456 error = EINVAL; 1457 } 1458 lwkt_replymsg(&msg->bind.base.lmsg, error); 1459 } 1460 1461 static void 1462 udp_connect(netmsg_t msg) 1463 { 1464 struct socket *so = msg->connect.base.nm_so; 1465 struct sockaddr *nam = msg->connect.nm_nam; 1466 struct thread *td = msg->connect.nm_td; 1467 struct inpcb *inp; 1468 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1469 struct sockaddr_in *if_sin; 1470 struct lwkt_port *port; 1471 int error; 1472 1473 KKASSERT(msg->connect.nm_m == NULL); 1474 1475 inp = so->so_pcb; 1476 if (inp == NULL) { 1477 error = EINVAL; 1478 goto out; 1479 } 1480 1481 if (msg->connect.nm_flags & PRUC_RECONNECT) { 1482 msg->connect.nm_flags &= ~PRUC_RECONNECT; 1483 in_pcblink(inp, &udbinfo[mycpuid]); 1484 } 1485 1486 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1487 error = EISCONN; 1488 goto out; 1489 } 1490 error = 0; 1491 1492 /* 1493 * Bind if we have to 1494 */ 1495 if (inp->inp_lport == 0) { 1496 error = in_pcbbind(inp, NULL, td); 1497 if (error) 1498 goto out; 1499 } 1500 1501 /* 1502 * Calculate the correct protocol processing thread. The connect 1503 * operation must run there. 1504 */ 1505 error = in_pcbladdr(inp, nam, &if_sin, td); 1506 if (error) 1507 goto out; 1508 if (!prison_remote_ip(td, nam)) { 1509 error = EAFNOSUPPORT; /* IPv6 only jail */ 1510 goto out; 1511 } 1512 1513 port = udp_addrport(sin->sin_addr.s_addr, sin->sin_port, 1514 inp->inp_laddr.s_addr != INADDR_ANY ? 1515 inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr, inp->inp_lport); 1516 if (port != &curthread->td_msgport) { 1517 lwkt_msg_t lmsg = &msg->connect.base.lmsg; 1518 int nm_flags = PRUC_RECONNECT; 1519 1520 /* 1521 * in_pcbladdr() may have allocated a route entry for us 1522 * on the current CPU, but we need a route entry on the 1523 * inpcb's owner CPU, so free it here. 1524 */ 1525 in_pcbresetroute(inp); 1526 1527 if (inp->inp_flags & INP_WILDCARD) { 1528 /* 1529 * Remove this inpcb from the wildcard hash before 1530 * the socket's msgport changes. 1531 */ 1532 udp_remwildcardhash(inp); 1533 } 1534 1535 /* 1536 * We are moving the protocol processing port the socket 1537 * is on, we have to unlink here and re-link on the 1538 * target cpu. 1539 */ 1540 in_pcbunlink(inp, &udbinfo[mycpuid]); 1541 msg->connect.nm_flags |= nm_flags; 1542 1543 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1544 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1545 lwkt_forwardmsg(port, lmsg); 1546 /* msg invalid now */ 1547 return; 1548 } 1549 error = udp_connect_oncpu(inp, sin, if_sin); 1550 out: 1551 if (error && inp != NULL && inp->inp_lport != 0 && 1552 (inp->inp_flags & INP_WILDCARD) == 0) { 1553 boolean_t forwarded; 1554 1555 /* Connect failed; put it to wildcard hash. */ 1556 forwarded = udp_inswildcardhash(inp, &msg->connect.base, 1557 error); 1558 if (forwarded) { 1559 /* 1560 * The message is further forwarded, so we are done 1561 * here. 1562 */ 1563 return; 1564 } 1565 } 1566 lwkt_replymsg(&msg->connect.base.lmsg, error); 1567 } 1568 1569 static void 1570 udp_remwildcardhash(struct inpcb *inp) 1571 { 1572 int cpu; 1573 1574 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1575 ("not on owner cpu")); 1576 1577 for (cpu = 0; cpu < ncpus2; ++cpu) { 1578 if (cpu == mycpuid) { 1579 /* 1580 * This inpcb will be removed by the later 1581 * in_pcbremwildcardhash(). 1582 */ 1583 continue; 1584 } 1585 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpu]); 1586 } 1587 in_pcbremwildcardhash(inp); 1588 } 1589 1590 static int 1591 udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 1592 struct sockaddr_in *if_sin) 1593 { 1594 struct socket *so = inp->inp_socket; 1595 struct inpcb *oinp; 1596 1597 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 1598 sin->sin_addr, sin->sin_port, 1599 inp->inp_laddr.s_addr != INADDR_ANY ? 1600 inp->inp_laddr : if_sin->sin_addr, inp->inp_lport, FALSE, NULL); 1601 if (oinp != NULL) 1602 return EADDRINUSE; 1603 1604 /* 1605 * No more errors can occur, finish adjusting the socket 1606 * and change the processing port to reflect the connected 1607 * socket. Once set we can no longer safely mess with the 1608 * socket. 1609 */ 1610 1611 if (inp->inp_flags & INP_WILDCARD) 1612 udp_remwildcardhash(inp); 1613 1614 if (inp->inp_laddr.s_addr == INADDR_ANY) 1615 inp->inp_laddr = if_sin->sin_addr; 1616 inp->inp_faddr = sin->sin_addr; 1617 inp->inp_fport = sin->sin_port; 1618 in_pcbinsconnhash(inp); 1619 1620 soisconnected(so); 1621 1622 return 0; 1623 } 1624 1625 static void 1626 udp_detach2(struct socket *so) 1627 { 1628 in_pcbdetach(so->so_pcb); 1629 sodiscard(so); 1630 sofree(so); 1631 } 1632 1633 static void 1634 udp_detach_final_dispatch(netmsg_t msg) 1635 { 1636 udp_detach2(msg->base.nm_so); 1637 } 1638 1639 static void 1640 udp_detach_oncpu_dispatch(netmsg_t msg) 1641 { 1642 struct netmsg_base *clomsg = &msg->base; 1643 struct socket *so = clomsg->nm_so; 1644 struct inpcb *inp = so->so_pcb; 1645 struct thread *td = curthread; 1646 int nextcpu, cpuid = mycpuid; 1647 1648 KASSERT(td->td_type == TD_TYPE_NETISR, ("not in netisr")); 1649 1650 if (inp->inp_flags & INP_WILDCARD) { 1651 /* 1652 * This inp will be removed on the inp's 1653 * owner CPU later, so don't do it now. 1654 */ 1655 if (&td->td_msgport != so->so_port) 1656 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpuid]); 1657 } 1658 1659 if (cpuid == 0) { 1660 /* 1661 * Free and clear multicast socket option, 1662 * which is only accessed in netisr0. 1663 */ 1664 ip_freemoptions(inp->inp_moptions); 1665 inp->inp_moptions = NULL; 1666 } 1667 1668 nextcpu = cpuid + 1; 1669 if (nextcpu < ncpus2) { 1670 lwkt_forwardmsg(netisr_cpuport(nextcpu), &clomsg->lmsg); 1671 } else { 1672 /* 1673 * No one could see this inpcb now; destroy this 1674 * inpcb in its owner netisr. 1675 */ 1676 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1677 udp_detach_final_dispatch); 1678 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1679 } 1680 } 1681 1682 static void 1683 udp_detach(netmsg_t msg) 1684 { 1685 struct socket *so = msg->detach.base.nm_so; 1686 struct netmsg_base *clomsg; 1687 struct inpcb *inp; 1688 1689 inp = so->so_pcb; 1690 if (inp == NULL) { 1691 lwkt_replymsg(&msg->detach.base.lmsg, EINVAL); 1692 return; 1693 } 1694 1695 /* 1696 * Reply EJUSTRETURN ASAP, we will call sodiscard() and 1697 * sofree() later. 1698 */ 1699 lwkt_replymsg(&msg->detach.base.lmsg, EJUSTRETURN); 1700 1701 if (ncpus2 == 1) { 1702 /* Only one CPU, detach the inpcb directly. */ 1703 udp_detach2(so); 1704 return; 1705 } 1706 1707 /* 1708 * Remove this inpcb from the inpcb list first, so that 1709 * no one could find this inpcb from the inpcb list. 1710 */ 1711 in_pcbofflist(inp); 1712 1713 if (inp->inp_flags & INP_DIRECT_DETACH) { 1714 /* 1715 * Direct detaching is allowed 1716 */ 1717 KASSERT((inp->inp_flags & INP_WILDCARD) == 0, 1718 ("in the wildcardhash")); 1719 KASSERT(inp->inp_moptions == NULL, ("has mcast options")); 1720 udp_detach2(so); 1721 return; 1722 } 1723 1724 /* 1725 * Go through netisrs which process UDP to make sure 1726 * no one could find this inpcb anymore. 1727 */ 1728 clomsg = &so->so_clomsg; 1729 netmsg_init(clomsg, so, &netisr_apanic_rport, MSGF_IGNSOPORT, 1730 udp_detach_oncpu_dispatch); 1731 lwkt_sendmsg(netisr_cpuport(0), &clomsg->lmsg); 1732 } 1733 1734 static void 1735 udp_disconnect(netmsg_t msg) 1736 { 1737 struct socket *so = msg->disconnect.base.nm_so; 1738 struct inpcb *inp; 1739 boolean_t forwarded; 1740 int error = 0; 1741 1742 inp = so->so_pcb; 1743 if (inp == NULL) { 1744 error = EINVAL; 1745 goto out; 1746 } 1747 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1748 error = ENOTCONN; 1749 goto out; 1750 } 1751 1752 soclrstate(so, SS_ISCONNECTED); /* XXX */ 1753 1754 in_pcbdisconnect(inp); 1755 1756 /* 1757 * Follow traditional BSD behavior and retain the local port 1758 * binding. But, fix the old misbehavior of overwriting any 1759 * previously bound local address. 1760 */ 1761 if (!(inp->inp_flags & INP_WASBOUND_NOTANY)) 1762 inp->inp_laddr.s_addr = INADDR_ANY; 1763 1764 if (so->so_state & SS_ISCLOSING) { 1765 /* 1766 * If this socket is being closed, there is no need 1767 * to put this socket back into wildcard hash table. 1768 */ 1769 error = 0; 1770 goto out; 1771 } 1772 1773 forwarded = udp_inswildcardhash(inp, &msg->disconnect.base, 0); 1774 if (forwarded) { 1775 /* 1776 * The message is further forwarded, so we are done 1777 * here. 1778 */ 1779 return; 1780 } 1781 out: 1782 lwkt_replymsg(&msg->disconnect.base.lmsg, error); 1783 } 1784 1785 void 1786 udp_shutdown(netmsg_t msg) 1787 { 1788 struct socket *so = msg->shutdown.base.nm_so; 1789 struct inpcb *inp; 1790 int error; 1791 1792 inp = so->so_pcb; 1793 if (inp) { 1794 socantsendmore(so); 1795 error = 0; 1796 } else { 1797 error = EINVAL; 1798 } 1799 lwkt_replymsg(&msg->shutdown.base.lmsg, error); 1800 } 1801 1802 struct pr_usrreqs udp_usrreqs = { 1803 .pru_abort = udp_abort, 1804 .pru_accept = pr_generic_notsupp, 1805 .pru_attach = udp_attach, 1806 .pru_bind = udp_bind, 1807 .pru_connect = udp_connect, 1808 .pru_connect2 = pr_generic_notsupp, 1809 .pru_control = in_control_dispatch, 1810 .pru_detach = udp_detach, 1811 .pru_disconnect = udp_disconnect, 1812 .pru_listen = pr_generic_notsupp, 1813 .pru_peeraddr = in_setpeeraddr_dispatch, 1814 .pru_rcvd = pr_generic_notsupp, 1815 .pru_rcvoob = pr_generic_notsupp, 1816 .pru_send = udp_send, 1817 .pru_sense = pru_sense_null, 1818 .pru_shutdown = udp_shutdown, 1819 .pru_sockaddr = in_setsockaddr_dispatch, 1820 .pru_sosend = sosendudp, 1821 .pru_soreceive = soreceive 1822 }; 1823