1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 63 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $ 64 */ 65 66 #include "opt_ipsec.h" 67 #include "opt_inet6.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/kernel.h> 72 #include <sys/malloc.h> 73 #include <sys/mbuf.h> 74 #include <sys/domain.h> 75 #include <sys/proc.h> 76 #include <sys/priv.h> 77 #include <sys/protosw.h> 78 #include <sys/socket.h> 79 #include <sys/socketvar.h> 80 #include <sys/sysctl.h> 81 #include <sys/syslog.h> 82 #include <sys/in_cksum.h> 83 #include <sys/ktr.h> 84 85 #include <sys/thread2.h> 86 #include <sys/socketvar2.h> 87 #include <sys/serialize.h> 88 89 #include <machine/stdarg.h> 90 91 #include <net/if.h> 92 #include <net/route.h> 93 #include <net/netmsg2.h> 94 #include <net/netisr2.h> 95 96 #include <netinet/in.h> 97 #include <netinet/in_systm.h> 98 #include <netinet/ip.h> 99 #ifdef INET6 100 #include <netinet/ip6.h> 101 #endif 102 #include <netinet/in_pcb.h> 103 #include <netinet/in_var.h> 104 #include <netinet/ip_var.h> 105 #ifdef INET6 106 #include <netinet6/ip6_var.h> 107 #endif 108 #include <netinet/ip_icmp.h> 109 #include <netinet/icmp_var.h> 110 #include <netinet/udp.h> 111 #include <netinet/udp_var.h> 112 113 #ifdef FAST_IPSEC 114 #include <netproto/ipsec/ipsec.h> 115 #endif 116 117 #ifdef IPSEC 118 #include <netinet6/ipsec.h> 119 #endif 120 121 #define MSGF_UDP_SEND MSGF_PROTO1 122 123 #define INP_DIRECT_DETACH INP_FLAG_PROTO2 124 125 #define UDP_KTR_STRING "inp=%p" 126 #define UDP_KTR_ARGS struct inpcb *inp 127 128 #ifndef KTR_UDP 129 #define KTR_UDP KTR_ALL 130 #endif 131 132 KTR_INFO_MASTER(udp); 133 KTR_INFO(KTR_UDP, udp, send_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS); 134 KTR_INFO(KTR_UDP, udp, send_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS); 135 KTR_INFO(KTR_UDP, udp, send_ipout, 2, UDP_KTR_STRING, UDP_KTR_ARGS); 136 KTR_INFO(KTR_UDP, udp, redisp_ipout_beg, 3, UDP_KTR_STRING, UDP_KTR_ARGS); 137 KTR_INFO(KTR_UDP, udp, redisp_ipout_end, 4, UDP_KTR_STRING, UDP_KTR_ARGS); 138 KTR_INFO(KTR_UDP, udp, send_redisp, 5, UDP_KTR_STRING, UDP_KTR_ARGS); 139 KTR_INFO(KTR_UDP, udp, send_inswildcard, 6, UDP_KTR_STRING, UDP_KTR_ARGS); 140 141 #define logudp(name, inp) KTR_LOG(udp_##name, inp) 142 143 /* 144 * UDP protocol implementation. 145 * Per RFC 768, August, 1980. 146 */ 147 #ifndef COMPAT_42 148 static int udpcksum = 1; 149 #else 150 static int udpcksum = 0; /* XXX */ 151 #endif 152 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, 153 &udpcksum, 0, "Enable checksumming of UDP packets"); 154 155 int log_in_vain = 0; 156 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 157 &log_in_vain, 0, "Log all incoming UDP packets"); 158 159 static int blackhole = 0; 160 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, 161 &blackhole, 0, "Do not send port unreachables for refused connects"); 162 163 static int strict_mcast_mship = 1; 164 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, 165 &strict_mcast_mship, 0, "Only send multicast to member sockets"); 166 167 int udp_sosend_async = 1; 168 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW, 169 &udp_sosend_async, 0, "UDP asynchronized pru_send"); 170 171 int udp_sosend_prepend = 1; 172 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW, 173 &udp_sosend_prepend, 0, 174 "Prepend enough space for proto and link header in pru_send"); 175 176 static int udp_reuseport_ext = 1; 177 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW, 178 &udp_reuseport_ext, 0, "SO_REUSEPORT extension"); 179 180 struct inpcbinfo udbinfo[MAXCPU]; 181 182 #ifndef UDBHASHSIZE 183 #define UDBHASHSIZE 16 184 #endif 185 CTASSERT((UDBHASHSIZE & (UDBHASHSIZE - 1)) == 0); 186 187 struct udpstat udpstat_percpu[MAXCPU] __cachealign; 188 189 static void udp_append(struct inpcb *last, struct ip *ip, 190 struct mbuf *n, int off, struct sockaddr_in *udp_in); 191 192 static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 193 struct sockaddr_in *if_sin, uint16_t hash); 194 195 static boolean_t udp_inswildcardhash(struct inpcb *inp, 196 struct netmsg_base *msg, int error); 197 static void udp_remwildcardhash(struct inpcb *inp); 198 199 static __inline int 200 udp_lportcpu(short lport) 201 { 202 return (ntohs(lport) % netisr_ncpus); 203 } 204 205 void 206 udp_init(void) 207 { 208 struct inpcbportinfo *portinfo; 209 int cpu; 210 211 portinfo = kmalloc_cachealign(sizeof(*portinfo) * netisr_ncpus, M_PCB, 212 M_WAITOK); 213 214 for (cpu = 0; cpu < netisr_ncpus; cpu++) { 215 struct inpcbinfo *uicb = &udbinfo[cpu]; 216 217 /* 218 * NOTE: 219 * UDP pcb list, wildcard hash table and localgroup hash 220 * table are shared. 221 */ 222 in_pcbinfo_init(uicb, cpu, TRUE); 223 uicb->hashbase = hashinit(UDBHASHSIZE, M_PCB, &uicb->hashmask); 224 225 in_pcbportinfo_init(&portinfo[cpu], UDBHASHSIZE, cpu); 226 in_pcbportinfo_set(uicb, portinfo, netisr_ncpus); 227 228 uicb->wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB, 229 &uicb->wildcardhashmask); 230 uicb->localgrphashbase = hashinit(UDBHASHSIZE, M_PCB, 231 &uicb->localgrphashmask); 232 233 uicb->ipi_size = sizeof(struct inpcb); 234 } 235 236 /* 237 * Initialize UDP statistics counters for each CPU. 238 */ 239 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 240 bzero(&udpstat_percpu[cpu], sizeof(struct udpstat)); 241 } 242 243 static int 244 sysctl_udpstat(SYSCTL_HANDLER_ARGS) 245 { 246 int cpu, error = 0; 247 248 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 249 if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu], 250 sizeof(struct udpstat)))) 251 break; 252 if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu], 253 sizeof(struct udpstat)))) 254 break; 255 } 256 257 return (error); 258 } 259 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW), 260 0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics"); 261 262 void 263 udp_ctloutput(netmsg_t msg) 264 { 265 struct socket *so = msg->base.nm_so; 266 struct sockopt *sopt = msg->ctloutput.nm_sopt; 267 struct inpcb *inp = so->so_pcb; 268 269 if (inp == NULL) { 270 lwkt_replymsg(&msg->lmsg, EINVAL); 271 return; 272 } 273 274 if (sopt->sopt_level == IPPROTO_IP && sopt->sopt_dir == SOPT_SET) { 275 switch (sopt->sopt_name) { 276 case IP_MULTICAST_IF: 277 case IP_MULTICAST_VIF: 278 case IP_MULTICAST_TTL: 279 case IP_MULTICAST_LOOP: 280 case IP_ADD_MEMBERSHIP: 281 case IP_DROP_MEMBERSHIP: 282 /* 283 * This pr_ctloutput msg will be forwarded 284 * to netisr0 to run; we can't do direct 285 * detaching anymore. 286 * 287 * NOTE: 288 * Don't optimize for the sockets whose 289 * current so_port is netisr0's msgport. 290 * These sockets could be connect(2)'ed 291 * later and the so_port will be changed. 292 */ 293 inp->inp_flags &= ~INP_DIRECT_DETACH; 294 break; 295 } 296 } 297 return ip_ctloutput(msg); 298 } 299 300 /* 301 * Check multicast packets to make sure they are only sent to sockets with 302 * multicast memberships for the packet's destination address and arrival 303 * interface. Multicast packets to multicast-unaware sockets are also 304 * disallowed. 305 * 306 * Returns 0 if the packet is acceptable, -1 if it is not. 307 */ 308 static __inline int 309 check_multicast_membership(const struct ip *ip, const struct inpcb *inp, 310 const struct mbuf *m) 311 { 312 const struct ip_moptions *mopt; 313 int mshipno; 314 315 if (strict_mcast_mship == 0 || 316 !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 317 return (0); 318 } 319 320 ASSERT_NETISR0; 321 322 mopt = inp->inp_moptions; 323 if (mopt == NULL) 324 return (-1); 325 for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) { 326 const struct in_multi *maddr = mopt->imo_membership[mshipno]; 327 328 if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr && 329 m->m_pkthdr.rcvif == maddr->inm_ifp) { 330 return (0); 331 } 332 } 333 return (-1); 334 } 335 336 struct udp_mcast_arg { 337 struct inpcb *inp; 338 struct inpcb *last; 339 struct ip *ip; 340 struct mbuf *m; 341 int iphlen; 342 struct sockaddr_in *udp_in; 343 }; 344 345 static int 346 udp_mcast_input(struct udp_mcast_arg *arg) 347 { 348 struct inpcb *inp = arg->inp; 349 struct inpcb *last = arg->last; 350 struct ip *ip = arg->ip; 351 struct mbuf *m = arg->m; 352 353 if (check_multicast_membership(ip, inp, m) < 0) 354 return ERESTART; /* caller continue */ 355 356 if (last != NULL) { 357 struct mbuf *n; 358 359 #ifdef IPSEC 360 /* check AH/ESP integrity. */ 361 if (ipsec4_in_reject_so(m, last->inp_socket)) 362 ipsecstat.in_polvio++; 363 /* do not inject data to pcb */ 364 else 365 #endif /*IPSEC*/ 366 #ifdef FAST_IPSEC 367 /* check AH/ESP integrity. */ 368 if (ipsec4_in_reject(m, last)) 369 ; 370 else 371 #endif /*FAST_IPSEC*/ 372 if ((n = m_copypacket(m, M_NOWAIT)) != NULL) 373 udp_append(last, ip, n, 374 arg->iphlen + sizeof(struct udphdr), 375 arg->udp_in); 376 } 377 arg->last = last = inp; 378 379 /* 380 * Don't look for additional matches if this one does 381 * not have either the SO_REUSEPORT or SO_REUSEADDR 382 * socket options set. This heuristic avoids searching 383 * through all pcbs in the common case of a non-shared 384 * port. It * assumes that an application will never 385 * clear these options after setting them. 386 */ 387 if (!(last->inp_socket->so_options & 388 (SO_REUSEPORT | SO_REUSEADDR))) 389 return EJUSTRETURN; /* caller stop */ 390 return 0; 391 } 392 393 int 394 udp_input(struct mbuf **mp, int *offp, int proto) 395 { 396 struct sockaddr_in udp_in = { sizeof udp_in, AF_INET }; 397 int iphlen; 398 struct ip *ip; 399 struct udphdr *uh; 400 struct inpcb *inp; 401 struct mbuf *m; 402 struct mbuf *opts = NULL; 403 int len, off; 404 struct ip save_ip; 405 struct inpcbinfo *pcbinfo = &udbinfo[mycpuid]; 406 407 off = *offp; 408 m = *mp; 409 *mp = NULL; 410 411 iphlen = off; 412 udp_stat.udps_ipackets++; 413 414 /* 415 * Strip IP options, if any; should skip this, 416 * make available to user, and use on returned packets, 417 * but we don't yet have a way to check the checksum 418 * with options still present. 419 */ 420 if (iphlen > sizeof(struct ip)) { 421 ip_stripoptions(m); 422 iphlen = sizeof(struct ip); 423 } 424 425 /* 426 * IP and UDP headers are together in first mbuf. 427 * Already checked and pulled up in ip_demux(). 428 */ 429 KASSERT(m->m_len >= iphlen + sizeof(struct udphdr), 430 ("UDP header not in one mbuf")); 431 432 ip = mtod(m, struct ip *); 433 uh = (struct udphdr *)((caddr_t)ip + iphlen); 434 435 /* destination port of 0 is illegal, based on RFC768. */ 436 if (uh->uh_dport == 0) 437 goto bad; 438 439 /* 440 * Make mbuf data length reflect UDP length. 441 * If not enough data to reflect UDP length, drop. 442 */ 443 len = ntohs((u_short)uh->uh_ulen); 444 if (ip->ip_len != len) { 445 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 446 udp_stat.udps_badlen++; 447 goto bad; 448 } 449 m_adj(m, len - ip->ip_len); 450 /* ip->ip_len = len; */ 451 } 452 /* 453 * Save a copy of the IP header in case we want restore it 454 * for sending an ICMP error message in response. 455 */ 456 save_ip = *ip; 457 458 /* 459 * Checksum extended UDP header and data. 460 */ 461 if (uh->uh_sum) { 462 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 463 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 464 uh->uh_sum = m->m_pkthdr.csum_data; 465 else 466 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, 467 ip->ip_dst.s_addr, htonl((u_short)len + 468 m->m_pkthdr.csum_data + IPPROTO_UDP)); 469 uh->uh_sum ^= 0xffff; 470 } else { 471 char b[9]; 472 473 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 474 bzero(((struct ipovly *)ip)->ih_x1, 9); 475 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 476 uh->uh_sum = in_cksum(m, len + sizeof(struct ip)); 477 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 478 } 479 if (uh->uh_sum) { 480 udp_stat.udps_badsum++; 481 m_freem(m); 482 return(IPPROTO_DONE); 483 } 484 } else 485 udp_stat.udps_nosum++; 486 487 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 488 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { 489 struct inpcbhead *connhead; 490 struct inpcontainer *ic, *ic_marker; 491 struct inpcontainerhead *ichead; 492 struct udp_mcast_arg arg; 493 struct inpcb *last; 494 int error; 495 496 /* 497 * Deliver a multicast or broadcast datagram to *all* sockets 498 * for which the local and remote addresses and ports match 499 * those of the incoming datagram. This allows more than 500 * one process to receive multi/broadcasts on the same port. 501 * (This really ought to be done for unicast datagrams as 502 * well, but that would cause problems with existing 503 * applications that open both address-specific sockets and 504 * a wildcard socket listening to the same port -- they would 505 * end up receiving duplicates of every unicast datagram. 506 * Those applications open the multiple sockets to overcome an 507 * inadequacy of the UDP socket interface, but for backwards 508 * compatibility we avoid the problem here rather than 509 * fixing the interface. Maybe 4.5BSD will remedy this?) 510 */ 511 512 /* 513 * Construct sockaddr format source address. 514 */ 515 udp_in.sin_port = uh->uh_sport; 516 udp_in.sin_addr = ip->ip_src; 517 arg.udp_in = &udp_in; 518 /* 519 * Locate pcb(s) for datagram. 520 * (Algorithm copied from raw_intr().) 521 */ 522 last = NULL; 523 arg.iphlen = iphlen; 524 525 connhead = &pcbinfo->hashbase[ 526 INP_PCBCONNHASH(ip->ip_src.s_addr, uh->uh_sport, 527 ip->ip_dst.s_addr, uh->uh_dport, pcbinfo->hashmask)]; 528 LIST_FOREACH(inp, connhead, inp_hash) { 529 #ifdef INET6 530 if (!INP_ISIPV4(inp)) 531 continue; 532 #endif 533 if (!in_hosteq(inp->inp_faddr, ip->ip_src) || 534 !in_hosteq(inp->inp_laddr, ip->ip_dst) || 535 inp->inp_fport != uh->uh_sport || 536 inp->inp_lport != uh->uh_dport) 537 continue; 538 539 arg.inp = inp; 540 arg.last = last; 541 arg.ip = ip; 542 arg.m = m; 543 544 error = udp_mcast_input(&arg); 545 if (error == ERESTART) 546 continue; 547 last = arg.last; 548 549 if (error == EJUSTRETURN) 550 goto done; 551 } 552 553 ichead = &pcbinfo->wildcardhashbase[ 554 INP_PCBWILDCARDHASH(uh->uh_dport, 555 pcbinfo->wildcardhashmask)]; 556 ic_marker = in_pcbcontainer_marker(); 557 558 GET_PCBINFO_TOKEN(pcbinfo); 559 LIST_INSERT_HEAD(ichead, ic_marker, ic_list); 560 while ((ic = LIST_NEXT(ic_marker, ic_list)) != NULL) { 561 LIST_REMOVE(ic_marker, ic_list); 562 LIST_INSERT_AFTER(ic, ic_marker, ic_list); 563 564 inp = ic->ic_inp; 565 if (inp->inp_flags & INP_PLACEMARKER) 566 continue; 567 #ifdef INET6 568 if (!INP_ISIPV4(inp)) 569 continue; 570 #endif 571 if (inp->inp_lport != uh->uh_dport) 572 continue; 573 if (inp->inp_laddr.s_addr != INADDR_ANY && 574 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 575 continue; 576 577 arg.inp = inp; 578 arg.last = last; 579 arg.ip = ip; 580 arg.m = m; 581 582 error = udp_mcast_input(&arg); 583 if (error == ERESTART) 584 continue; 585 last = arg.last; 586 587 if (error == EJUSTRETURN) 588 break; 589 } 590 LIST_REMOVE(ic_marker, ic_list); 591 REL_PCBINFO_TOKEN(pcbinfo); 592 done: 593 if (last == NULL) { 594 /* 595 * No matching pcb found; discard datagram. 596 * (No need to send an ICMP Port Unreachable 597 * for a broadcast or multicast datgram.) 598 */ 599 udp_stat.udps_noportbcast++; 600 goto bad; 601 } 602 #ifdef IPSEC 603 /* check AH/ESP integrity. */ 604 if (ipsec4_in_reject_so(m, last->inp_socket)) { 605 ipsecstat.in_polvio++; 606 goto bad; 607 } 608 #endif /*IPSEC*/ 609 #ifdef FAST_IPSEC 610 /* check AH/ESP integrity. */ 611 if (ipsec4_in_reject(m, last)) 612 goto bad; 613 #endif /*FAST_IPSEC*/ 614 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 615 &udp_in); 616 return(IPPROTO_DONE); 617 } 618 /* 619 * Locate pcb for datagram. 620 */ 621 inp = in_pcblookup_pkthash(pcbinfo, ip->ip_src, uh->uh_sport, 622 ip->ip_dst, uh->uh_dport, TRUE, m->m_pkthdr.rcvif, 623 udp_reuseport_ext ? m : NULL); 624 if (inp == NULL) { 625 if (log_in_vain) { 626 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 627 628 log(LOG_INFO, 629 "Connection attempt to UDP %s:%d from %s:%d\n", 630 kinet_ntoa(ip->ip_dst, dst), ntohs(uh->uh_dport), 631 kinet_ntoa(ip->ip_src, src), ntohs(uh->uh_sport)); 632 } 633 udp_stat.udps_noport++; 634 if (m->m_flags & (M_BCAST | M_MCAST)) { 635 udp_stat.udps_noportbcast++; 636 goto bad; 637 } 638 if (blackhole) 639 goto bad; 640 #ifdef ICMP_BANDLIM 641 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 642 goto bad; 643 #endif 644 *ip = save_ip; 645 ip->ip_len += iphlen; 646 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 647 return(IPPROTO_DONE); 648 } 649 KASSERT(INP_ISIPV4(inp), ("not inet inpcb")); 650 #ifdef IPSEC 651 if (ipsec4_in_reject_so(m, inp->inp_socket)) { 652 ipsecstat.in_polvio++; 653 goto bad; 654 } 655 #endif /*IPSEC*/ 656 #ifdef FAST_IPSEC 657 if (ipsec4_in_reject(m, inp)) 658 goto bad; 659 #endif /*FAST_IPSEC*/ 660 /* 661 * Check the minimum TTL for socket. 662 */ 663 if (ip->ip_ttl < inp->inp_ip_minttl) 664 goto bad; 665 666 /* 667 * Construct sockaddr format source address. 668 * Stuff source address and datagram in user buffer. 669 */ 670 udp_in.sin_port = uh->uh_sport; 671 udp_in.sin_addr = ip->ip_src; 672 if ((inp->inp_flags & INP_CONTROLOPTS) || 673 (inp->inp_socket->so_options & SO_TIMESTAMP)) 674 ip_savecontrol(inp, &opts, ip, m); 675 m_adj(m, iphlen + sizeof(struct udphdr)); 676 677 lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token); 678 if (ssb_appendaddr(&inp->inp_socket->so_rcv, 679 (struct sockaddr *)&udp_in, m, opts) == 0) { 680 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 681 udp_stat.udps_fullsock++; 682 goto bad; 683 } 684 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 685 sorwakeup(inp->inp_socket); 686 return(IPPROTO_DONE); 687 bad: 688 m_freem(m); 689 if (opts) 690 m_freem(opts); 691 return(IPPROTO_DONE); 692 } 693 694 /* 695 * subroutine of udp_input(), mainly for source code readability. 696 * caller must properly init udp_ip6 and udp_in6 beforehand. 697 */ 698 static void 699 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, 700 struct sockaddr_in *udp_in) 701 { 702 struct mbuf *opts = NULL; 703 int ret; 704 705 KASSERT(INP_ISIPV4(last), ("not inet inpcb")); 706 707 if (last->inp_flags & INP_CONTROLOPTS || 708 last->inp_socket->so_options & SO_TIMESTAMP) 709 ip_savecontrol(last, &opts, ip, n); 710 m_adj(n, off); 711 712 lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token); 713 ret = ssb_appendaddr(&last->inp_socket->so_rcv, 714 (struct sockaddr *)udp_in, n, opts); 715 lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token); 716 if (ret == 0) { 717 m_freem(n); 718 if (opts) 719 m_freem(opts); 720 udp_stat.udps_fullsock++; 721 } else { 722 sorwakeup(last->inp_socket); 723 } 724 } 725 726 /* 727 * Notify a udp user of an asynchronous error; 728 * just wake up so that he can collect error status. 729 */ 730 void 731 udp_notify(struct inpcb *inp, int error) 732 { 733 inp->inp_socket->so_error = error; 734 sorwakeup(inp->inp_socket); 735 sowwakeup(inp->inp_socket); 736 } 737 738 struct netmsg_udp_notify { 739 struct netmsg_base base; 740 inp_notify_t nm_notify; 741 struct in_addr nm_faddr; 742 int nm_arg; 743 }; 744 745 static void 746 udp_notifyall_oncpu(netmsg_t msg) 747 { 748 struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg; 749 int nextcpu, cpu = mycpuid; 750 751 ASSERT_NETISR_NCPUS(cpu); 752 753 in_pcbnotifyall(&udbinfo[cpu], nm->nm_faddr, nm->nm_arg, nm->nm_notify); 754 755 nextcpu = cpu + 1; 756 if (nextcpu < netisr_ncpus) 757 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg); 758 else 759 lwkt_replymsg(&nm->base.lmsg, 0); 760 } 761 762 inp_notify_t 763 udp_get_inpnotify(int cmd, const struct sockaddr *sa, 764 struct ip **ip0, int *cpuid) 765 { 766 struct in_addr faddr; 767 struct ip *ip = *ip0; 768 inp_notify_t notify = udp_notify; 769 770 faddr = ((const struct sockaddr_in *)sa)->sin_addr; 771 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 772 return NULL; 773 774 if (PRC_IS_REDIRECT(cmd)) { 775 ip = NULL; 776 notify = in_rtchange; 777 } else if (cmd == PRC_HOSTDEAD) { 778 ip = NULL; 779 } else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) { 780 return NULL; 781 } 782 783 if (cpuid != NULL) { 784 if (ip == NULL) { 785 /* Go through all effective netisr CPUs. */ 786 *cpuid = netisr_ncpus; 787 } else { 788 const struct udphdr *uh; 789 790 uh = (const struct udphdr *) 791 ((caddr_t)ip + (ip->ip_hl << 2)); 792 *cpuid = udp_addrcpu(faddr.s_addr, uh->uh_dport, 793 ip->ip_src.s_addr, uh->uh_sport); 794 } 795 } 796 797 *ip0 = ip; 798 return notify; 799 } 800 801 void 802 udp_ctlinput(netmsg_t msg) 803 { 804 struct sockaddr *sa = msg->ctlinput.nm_arg; 805 struct ip *ip = msg->ctlinput.nm_extra; 806 int cmd = msg->ctlinput.nm_cmd, cpuid; 807 inp_notify_t notify; 808 struct in_addr faddr; 809 810 ASSERT_NETISR_NCPUS(mycpuid); 811 812 notify = udp_get_inpnotify(cmd, sa, &ip, &cpuid); 813 if (notify == NULL) 814 goto done; 815 816 faddr = ((struct sockaddr_in *)sa)->sin_addr; 817 if (ip) { 818 const struct udphdr *uh; 819 struct inpcb *inp; 820 821 if (cpuid != mycpuid) 822 goto done; 823 824 uh = (const struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 825 inp = in_pcblookup_hash(&udbinfo[mycpuid], faddr, uh->uh_dport, 826 ip->ip_src, uh->uh_sport, 0, NULL); 827 if (inp != NULL && inp->inp_socket != NULL) 828 notify(inp, inetctlerrmap[cmd]); 829 } else if (msg->ctlinput.nm_direct) { 830 if (cpuid != netisr_ncpus && cpuid != mycpuid) 831 goto done; 832 833 in_pcbnotifyall(&udbinfo[mycpuid], faddr, inetctlerrmap[cmd], 834 notify); 835 } else { 836 struct netmsg_udp_notify *nm; 837 838 ASSERT_NETISR0; 839 nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT); 840 netmsg_init(&nm->base, NULL, &netisr_afree_rport, 841 0, udp_notifyall_oncpu); 842 nm->nm_faddr = faddr; 843 nm->nm_arg = inetctlerrmap[cmd]; 844 nm->nm_notify = notify; 845 lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg); 846 } 847 done: 848 lwkt_replymsg(&msg->lmsg, 0); 849 } 850 851 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, udbinfo, 0, 852 in_pcblist_ncpus, "S,xinpcb", "List of active UDP sockets"); 853 854 static int 855 udp_getcred(SYSCTL_HANDLER_ARGS) 856 { 857 struct sockaddr_in addrs[2]; 858 struct ucred cred0, *cred = NULL; 859 struct inpcb *inp; 860 int error, cpu, origcpu; 861 862 error = priv_check(req->td, PRIV_ROOT); 863 if (error) 864 return (error); 865 error = SYSCTL_IN(req, addrs, sizeof addrs); 866 if (error) 867 return (error); 868 869 origcpu = mycpuid; 870 cpu = udp_addrcpu(addrs[1].sin_addr.s_addr, addrs[1].sin_port, 871 addrs[0].sin_addr.s_addr, addrs[0].sin_port); 872 873 lwkt_migratecpu(cpu); 874 875 inp = in_pcblookup_hash(&udbinfo[cpu], 876 addrs[1].sin_addr, addrs[1].sin_port, 877 addrs[0].sin_addr, addrs[0].sin_port, TRUE, NULL); 878 if (inp == NULL || inp->inp_socket == NULL) { 879 error = ENOENT; 880 } else if (inp->inp_socket->so_cred != NULL) { 881 cred0 = *(inp->inp_socket->so_cred); 882 cred = &cred0; 883 } 884 885 lwkt_migratecpu(origcpu); 886 887 if (error) 888 return error; 889 890 return SYSCTL_OUT(req, cred, sizeof(struct ucred)); 891 } 892 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 893 0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection"); 894 895 static void 896 udp_send_redispatch(netmsg_t msg) 897 { 898 struct mbuf *m = msg->send.nm_m; 899 int pru_flags = msg->send.nm_flags; 900 struct inpcb *inp = msg->send.base.nm_so->so_pcb; 901 struct mbuf *m_opt = msg->send.nm_control; /* XXX save ipopt */ 902 int flags = msg->send.nm_priv; /* ip_output flags */ 903 int error; 904 905 logudp(redisp_ipout_beg, inp); 906 907 /* 908 * - Don't use inp route cache. It should only be used in the 909 * inp owner netisr. 910 * - Access to inp_moptions should be safe, since multicast UDP 911 * datagrams are redispatched to netisr0 and inp_moptions is 912 * changed only in netisr0. 913 */ 914 error = ip_output(m, m_opt, NULL, flags, inp->inp_moptions, inp); 915 if ((pru_flags & PRUS_NOREPLY) == 0) 916 lwkt_replymsg(&msg->send.base.lmsg, error); 917 918 if (m_opt != NULL) { 919 /* Free saved ip options, if any */ 920 m_freem(m_opt); 921 } 922 923 logudp(redisp_ipout_end, inp); 924 } 925 926 static void 927 udp_send(netmsg_t msg) 928 { 929 struct socket *so = msg->send.base.nm_so; 930 struct mbuf *m = msg->send.nm_m; 931 struct sockaddr *dstaddr = msg->send.nm_addr; 932 int pru_flags = msg->send.nm_flags; 933 struct inpcb *inp = so->so_pcb; 934 struct thread *td = msg->send.nm_td; 935 uint16_t hash; 936 int flags; 937 938 struct udpiphdr *ui; 939 int len = m->m_pkthdr.len; 940 struct sockaddr_in *sin; /* really is initialized before use */ 941 int error = 0, cpu; 942 943 KKASSERT(msg->send.nm_control == NULL); 944 945 logudp(send_beg, inp); 946 947 if (inp == NULL) { 948 error = EINVAL; 949 goto release; 950 } 951 952 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 953 error = EMSGSIZE; 954 goto release; 955 } 956 957 if (inp->inp_lport == 0) { /* unbound socket */ 958 boolean_t forwarded; 959 960 error = in_pcbbind(inp, NULL, td); 961 if (error) 962 goto release; 963 964 /* 965 * Need to call udp_send again, after this inpcb is 966 * inserted into wildcard hash table. 967 */ 968 msg->send.base.lmsg.ms_flags |= MSGF_UDP_SEND; 969 forwarded = udp_inswildcardhash(inp, &msg->send.base, 0); 970 if (forwarded) { 971 /* 972 * The message is further forwarded, so we are 973 * done here. 974 */ 975 logudp(send_inswildcard, inp); 976 return; 977 } 978 } 979 980 if (dstaddr != NULL) { /* destination address specified */ 981 if (inp->inp_faddr.s_addr != INADDR_ANY) { 982 /* already connected */ 983 error = EISCONN; 984 goto release; 985 } 986 sin = (struct sockaddr_in *)dstaddr; 987 if (!prison_remote_ip(td, (struct sockaddr *)&sin)) { 988 error = EAFNOSUPPORT; /* IPv6 only jail */ 989 goto release; 990 } 991 } else { 992 if (inp->inp_faddr.s_addr == INADDR_ANY) { 993 /* no destination specified and not already connected */ 994 error = ENOTCONN; 995 goto release; 996 } 997 sin = NULL; 998 } 999 1000 /* 1001 * Calculate data length and get a mbuf 1002 * for UDP and IP headers. 1003 */ 1004 M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT); 1005 if (m == NULL) { 1006 error = ENOBUFS; 1007 goto release; 1008 } 1009 1010 /* 1011 * Fill in mbuf with extended UDP header 1012 * and addresses and length put into network format. 1013 */ 1014 ui = mtod(m, struct udpiphdr *); 1015 bzero(ui->ui_x1, sizeof ui->ui_x1); /* XXX still needed? */ 1016 ui->ui_pr = IPPROTO_UDP; 1017 1018 /* 1019 * Set destination address. 1020 */ 1021 if (dstaddr != NULL) { /* use specified destination */ 1022 ui->ui_dst = sin->sin_addr; 1023 ui->ui_dport = sin->sin_port; 1024 } else { /* use connected destination */ 1025 ui->ui_dst = inp->inp_faddr; 1026 ui->ui_dport = inp->inp_fport; 1027 } 1028 1029 /* 1030 * Set source address. 1031 */ 1032 if (inp->inp_laddr.s_addr == INADDR_ANY || 1033 IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) { 1034 struct sockaddr_in *if_sin; 1035 1036 if (dstaddr == NULL) { 1037 /* 1038 * connect() had (or should have) failed because 1039 * the interface had no IP address, but the 1040 * application proceeded to call send() anyways. 1041 */ 1042 error = ENOTCONN; 1043 goto release; 1044 } 1045 1046 /* Look up outgoing interface. */ 1047 error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1); 1048 if (error) 1049 goto release; 1050 ui->ui_src = if_sin->sin_addr; /* use address of interface */ 1051 } else { 1052 ui->ui_src = inp->inp_laddr; /* use non-null bound address */ 1053 } 1054 ui->ui_sport = inp->inp_lport; 1055 KASSERT(inp->inp_lport != 0, ("inp lport should have been bound")); 1056 1057 /* 1058 * Release the original thread, since it is no longer used 1059 */ 1060 if (pru_flags & PRUS_HELDTD) { 1061 lwkt_rele(td); 1062 pru_flags &= ~PRUS_HELDTD; 1063 } 1064 /* 1065 * Free the dest address, since it is no longer needed 1066 */ 1067 if (pru_flags & PRUS_FREEADDR) { 1068 kfree(dstaddr, M_SONAME); 1069 pru_flags &= ~PRUS_FREEADDR; 1070 } 1071 1072 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1073 1074 /* 1075 * Set up checksum and output datagram. 1076 */ 1077 if (udpcksum) { 1078 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, 1079 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1080 m->m_pkthdr.csum_flags = CSUM_UDP; 1081 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1082 m->m_pkthdr.csum_thlen = sizeof(struct udphdr); 1083 } else { 1084 ui->ui_sum = 0; 1085 } 1086 ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len; 1087 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1088 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1089 udp_stat.udps_opackets++; 1090 1091 flags = IP_DEBUGROUTE | 1092 (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); 1093 if (pru_flags & PRUS_DONTROUTE) 1094 flags |= SO_DONTROUTE; 1095 1096 if (inp->inp_flags & INP_CONNECTED) { 1097 /* 1098 * For connected socket, this datagram has already 1099 * been in the correct netisr; no need to rehash. 1100 */ 1101 KASSERT(inp->inp_flags & INP_HASH, ("inpcb has no hash")); 1102 m_sethash(m, inp->inp_hashval); 1103 goto sendit; 1104 } 1105 1106 hash = udp_addrhash(ui->ui_dst.s_addr, ui->ui_dport, 1107 ui->ui_src.s_addr, ui->ui_sport); 1108 m_sethash(m, hash); 1109 1110 cpu = netisr_hashcpu(hash); 1111 if (cpu != mycpuid) { 1112 struct mbuf *m_opt = NULL; 1113 struct netmsg_pru_send *smsg; 1114 struct lwkt_port *port = netisr_cpuport(cpu); 1115 1116 /* 1117 * Not on the CPU that matches this UDP datagram hash; 1118 * redispatch to the correct CPU to do the ip_output(). 1119 */ 1120 if (inp->inp_options != NULL) { 1121 /* 1122 * If there are ip options, then save a copy, 1123 * since accessing inp_options on other CPUs' 1124 * is not safe. 1125 * 1126 * XXX optimize this? 1127 */ 1128 m_opt = m_copym(inp->inp_options, 0, M_COPYALL, 1129 M_WAITOK); 1130 } 1131 if ((pru_flags & PRUS_NOREPLY) == 0) { 1132 /* 1133 * Change some parts of the original netmsg and 1134 * forward it to the target netisr. 1135 * 1136 * NOTE: so_port MUST NOT be checked in the target 1137 * netisr. 1138 */ 1139 smsg = &msg->send; 1140 smsg->nm_priv = flags; /* ip_output flags */ 1141 smsg->nm_m = m; 1142 smsg->nm_control = m_opt; /* XXX save ipopt */ 1143 smsg->base.lmsg.ms_flags |= MSGF_IGNSOPORT; 1144 smsg->base.nm_dispatch = udp_send_redispatch; 1145 lwkt_forwardmsg(port, &smsg->base.lmsg); 1146 } else { 1147 /* 1148 * Recreate the netmsg, since the original mbuf 1149 * could have been changed. And send it to the 1150 * target netisr. 1151 * 1152 * NOTE: so_port MUST NOT be checked in the target 1153 * netisr. 1154 */ 1155 smsg = &m->m_hdr.mh_sndmsg; 1156 netmsg_init(&smsg->base, so, &netisr_apanic_rport, 1157 MSGF_IGNSOPORT, udp_send_redispatch); 1158 smsg->nm_priv = flags; /* ip_output flags */ 1159 smsg->nm_flags = pru_flags; 1160 smsg->nm_m = m; 1161 smsg->nm_control = m_opt; /* XXX save ipopt */ 1162 lwkt_sendmsg(port, &smsg->base.lmsg); 1163 } 1164 1165 /* This UDP datagram is redispatched; done */ 1166 logudp(send_redisp, inp); 1167 return; 1168 } 1169 1170 sendit: 1171 logudp(send_ipout, inp); 1172 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 1173 inp->inp_moptions, inp); 1174 m = NULL; 1175 1176 release: 1177 if (m != NULL) 1178 m_freem(m); 1179 1180 if (pru_flags & PRUS_HELDTD) 1181 lwkt_rele(td); 1182 if (pru_flags & PRUS_FREEADDR) 1183 kfree(dstaddr, M_SONAME); 1184 if ((pru_flags & PRUS_NOREPLY) == 0) 1185 lwkt_replymsg(&msg->send.base.lmsg, error); 1186 1187 logudp(send_end, inp); 1188 } 1189 1190 u_long udp_sendspace = 9216; /* really max datagram size */ 1191 /* 40 1K datagrams */ 1192 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 1193 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 1194 1195 u_long udp_recvspace = 40 * (1024 + 1196 #ifdef INET6 1197 sizeof(struct sockaddr_in6) 1198 #else 1199 sizeof(struct sockaddr_in) 1200 #endif 1201 ); 1202 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1203 &udp_recvspace, 0, "Maximum incoming UDP datagram size"); 1204 1205 /* 1206 * This should never happen, since UDP socket does not support 1207 * connection acception (SO_ACCEPTCONN, i.e. listen(2)). 1208 */ 1209 static void 1210 udp_abort(netmsg_t msg __unused) 1211 { 1212 panic("udp_abort is called"); 1213 } 1214 1215 static int 1216 udp_preattach(struct socket *so, int proto __unused, struct pru_attach_info *ai) 1217 { 1218 return soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit); 1219 } 1220 1221 static void 1222 udp_attach(netmsg_t msg) 1223 { 1224 struct socket *so = msg->attach.base.nm_so; 1225 struct pru_attach_info *ai = msg->attach.nm_ai; 1226 struct inpcb *inp; 1227 int error; 1228 1229 KASSERT(so->so_pcb == NULL, ("udp socket attached")); 1230 1231 if (ai != NULL) { 1232 error = udp_preattach(so, 0 /* don't care */, ai); 1233 if (error) 1234 goto out; 1235 } else { 1236 /* Post attach; do nothing */ 1237 } 1238 1239 error = in_pcballoc(so, &udbinfo[mycpuid]); 1240 if (error) 1241 goto out; 1242 1243 inp = so->so_pcb; 1244 inp->inp_flags |= INP_DIRECT_DETACH; 1245 inp->inp_ip_ttl = ip_defttl; 1246 error = 0; 1247 out: 1248 lwkt_replymsg(&msg->attach.base.lmsg, error); 1249 } 1250 1251 static void 1252 udp_inswildcard_replymsg(netmsg_t msg) 1253 { 1254 lwkt_msg_t lmsg = &msg->lmsg; 1255 1256 if (lmsg->ms_flags & MSGF_UDP_SEND) { 1257 udp_send(msg); 1258 /* msg is replied by udp_send() */ 1259 } else { 1260 lwkt_replymsg(lmsg, lmsg->ms_error); 1261 } 1262 } 1263 1264 static void 1265 udp_soreuseport_dispatch(netmsg_t msg) 1266 { 1267 /* This inpcb has already been in the wildcard hash. */ 1268 in_pcblink_flags(msg->base.nm_so->so_pcb, &udbinfo[mycpuid], 0); 1269 udp_inswildcard_replymsg(msg); 1270 } 1271 1272 static void 1273 udp_sosetport(struct lwkt_msg *msg, lwkt_port_t port) 1274 { 1275 sosetport(((struct netmsg_base *)msg)->nm_so, port); 1276 } 1277 1278 static boolean_t 1279 udp_inswildcardhash_oncpu(struct inpcb *inp, struct netmsg_base *msg) 1280 { 1281 int cpu; 1282 1283 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1284 ("not on owner cpu")); 1285 1286 in_pcbinswildcardhash(inp); 1287 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 1288 if (cpu == mycpuid) { 1289 /* 1290 * This inpcb has been inserted by the above 1291 * in_pcbinswildcardhash(). 1292 */ 1293 continue; 1294 } 1295 in_pcbinswildcardhash_oncpu(inp, &udbinfo[cpu]); 1296 } 1297 1298 /* NOTE: inp_lgrpindex is _not_ assigned in jail. */ 1299 if ((inp->inp_socket->so_options & SO_REUSEPORT) && 1300 inp->inp_lgrpindex >= 0) { 1301 /* 1302 * For SO_REUSEPORT socket, redistribute it based on its 1303 * local group index. 1304 */ 1305 cpu = inp->inp_lgrpindex % netisr_ncpus; 1306 if (cpu != mycpuid) { 1307 struct lwkt_port *port = netisr_cpuport(cpu); 1308 lwkt_msg_t lmsg = &msg->lmsg; 1309 1310 /* 1311 * We are moving the protocol processing port the 1312 * socket is on, we have to unlink here and re-link 1313 * on the target cpu (this inpcb is still left in 1314 * the wildcard hash). 1315 */ 1316 in_pcbunlink_flags(inp, &udbinfo[mycpuid], 0); 1317 msg->nm_dispatch = udp_soreuseport_dispatch; 1318 1319 /* 1320 * See the related comment in tcp_usrreq.c 1321 * tcp_connect() 1322 */ 1323 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1324 lwkt_forwardmsg(port, lmsg); 1325 return TRUE; /* forwarded */ 1326 } 1327 } 1328 return FALSE; 1329 } 1330 1331 static void 1332 udp_inswildcardhash_dispatch(netmsg_t msg) 1333 { 1334 struct inpcb *inp = msg->base.nm_so->so_pcb; 1335 boolean_t forwarded; 1336 1337 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1338 KASSERT(udp_lportcpu(inp->inp_lport) == mycpuid, ("not target cpu")); 1339 1340 in_pcblink(inp, &udbinfo[mycpuid]); 1341 1342 forwarded = udp_inswildcardhash_oncpu(inp, &msg->base); 1343 if (forwarded) { 1344 /* The message is further forwarded, so we are done here. */ 1345 return; 1346 } 1347 udp_inswildcard_replymsg(msg); 1348 } 1349 1350 static boolean_t 1351 udp_inswildcardhash(struct inpcb *inp, struct netmsg_base *msg, int error) 1352 { 1353 lwkt_msg_t lmsg = &msg->lmsg; 1354 int cpu; 1355 1356 ASSERT_INP_NOTINHASH(inp); 1357 1358 /* This inpcb could no longer be directly detached */ 1359 inp->inp_flags &= ~INP_DIRECT_DETACH; 1360 1361 /* 1362 * Always clear the route cache, so we don't need to 1363 * worry about any owner CPU changes later. 1364 */ 1365 in_pcbresetroute(inp); 1366 1367 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1368 cpu = udp_lportcpu(inp->inp_lport); 1369 1370 lmsg->ms_error = error; 1371 if (cpu != mycpuid) { 1372 struct lwkt_port *port = netisr_cpuport(cpu); 1373 1374 /* 1375 * We are moving the protocol processing port the socket 1376 * is on, we have to unlink here and re-link on the 1377 * target cpu. 1378 */ 1379 in_pcbunlink(inp, &udbinfo[mycpuid]); 1380 msg->nm_dispatch = udp_inswildcardhash_dispatch; 1381 1382 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1383 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1384 lwkt_forwardmsg(port, lmsg); 1385 return TRUE; /* forwarded */ 1386 } 1387 1388 return udp_inswildcardhash_oncpu(inp, msg); 1389 } 1390 1391 static void 1392 udp_bind(netmsg_t msg) 1393 { 1394 struct socket *so = msg->bind.base.nm_so; 1395 struct inpcb *inp; 1396 int error; 1397 1398 inp = so->so_pcb; 1399 if (inp) { 1400 struct sockaddr *nam = msg->bind.nm_nam; 1401 struct thread *td = msg->bind.nm_td; 1402 struct sockaddr_in *sin; 1403 lwkt_port_t port; 1404 int cpu; 1405 1406 /* 1407 * Check "already bound" here (in_pcbbind() does the same 1408 * check though), so we don't forward a connected/bound 1409 * socket randomly which would panic in the following 1410 * in_pcbunlink(). 1411 */ 1412 if (inp->inp_lport != 0 || 1413 inp->inp_laddr.s_addr != INADDR_ANY) { 1414 error = EINVAL; /* already bound */ 1415 goto done; 1416 } 1417 1418 if (nam->sa_len != sizeof(*sin)) { 1419 error = EINVAL; 1420 goto done; 1421 } 1422 sin = (struct sockaddr_in *)nam; 1423 1424 cpu = udp_lportcpu(sin->sin_port); 1425 port = netisr_cpuport(cpu); 1426 1427 /* 1428 * See the related comment in tcp_usrreq.c tcp_usr_bind(). 1429 * The exception is that we use local port based netisr 1430 * to serialize in_pcbbind(). 1431 */ 1432 if (&curthread->td_msgport != port) { 1433 lwkt_msg_t lmsg = &msg->bind.base.lmsg; 1434 1435 KASSERT((msg->bind.nm_flags & PRUB_RELINK) == 0, 1436 ("already asked to relink")); 1437 1438 in_pcbunlink(so->so_pcb, &udbinfo[mycpuid]); 1439 msg->bind.nm_flags |= PRUB_RELINK; 1440 1441 /* 1442 * See the related comment in tcp_usrreq.c 1443 * tcp_connect(). 1444 */ 1445 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1446 lwkt_forwardmsg(port, lmsg); 1447 /* msg invalid now */ 1448 return; 1449 } 1450 KASSERT(so->so_port == port, ("so_port is not netisr%d", cpu)); 1451 1452 if (msg->bind.nm_flags & PRUB_RELINK) { 1453 msg->bind.nm_flags &= ~PRUB_RELINK; 1454 in_pcblink(so->so_pcb, &udbinfo[mycpuid]); 1455 } 1456 KASSERT(inp->inp_pcbinfo == &udbinfo[cpu], 1457 ("pcbinfo is not udbinfo%d", cpu)); 1458 1459 error = in_pcbbind(inp, nam, td); 1460 if (error == 0) { 1461 boolean_t forwarded; 1462 1463 if (sin->sin_addr.s_addr != INADDR_ANY) 1464 inp->inp_flags |= INP_WASBOUND_NOTANY; 1465 1466 forwarded = udp_inswildcardhash(inp, 1467 &msg->bind.base, 0); 1468 if (forwarded) { 1469 /* 1470 * The message is further forwarded, so 1471 * we are done here. 1472 */ 1473 return; 1474 } 1475 } 1476 } else { 1477 error = EINVAL; 1478 } 1479 done: 1480 lwkt_replymsg(&msg->bind.base.lmsg, error); 1481 } 1482 1483 static int 1484 udp_preconnect(struct socket *so, const struct sockaddr *nam __unused, 1485 struct thread *td __unused) 1486 { 1487 sosetstate(so, SS_ISCONNECTED); /* XXX */ 1488 return 0; 1489 } 1490 1491 static void 1492 udp_connect(netmsg_t msg) 1493 { 1494 struct socket *so = msg->connect.base.nm_so; 1495 struct sockaddr *nam = msg->connect.nm_nam; 1496 struct thread *td = msg->connect.nm_td; 1497 struct inpcb *inp; 1498 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1499 struct sockaddr_in *if_sin; 1500 struct lwkt_port *port; 1501 uint16_t hash; 1502 int error; 1503 1504 KKASSERT(msg->connect.nm_m == NULL); 1505 1506 inp = so->so_pcb; 1507 if (inp == NULL) { 1508 error = EINVAL; 1509 goto out; 1510 } 1511 1512 if (msg->connect.nm_flags & PRUC_RECONNECT) { 1513 msg->connect.nm_flags &= ~PRUC_RECONNECT; 1514 in_pcblink(inp, &udbinfo[mycpuid]); 1515 } 1516 1517 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1518 error = EISCONN; 1519 goto out; 1520 } 1521 error = 0; 1522 1523 /* 1524 * Bind if we have to 1525 */ 1526 if (inp->inp_lport == 0) { 1527 error = in_pcbbind(inp, NULL, td); 1528 if (error) 1529 goto out; 1530 } 1531 1532 /* 1533 * Calculate the correct protocol processing thread. The connect 1534 * operation must run there. 1535 */ 1536 error = in_pcbladdr(inp, nam, &if_sin, td); 1537 if (error) 1538 goto out; 1539 if (!prison_remote_ip(td, nam)) { 1540 error = EAFNOSUPPORT; /* IPv6 only jail */ 1541 goto out; 1542 } 1543 1544 hash = udp_addrhash(sin->sin_addr.s_addr, sin->sin_port, 1545 inp->inp_laddr.s_addr != INADDR_ANY ? 1546 inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr, inp->inp_lport); 1547 port = netisr_hashport(hash); 1548 if (port != &curthread->td_msgport) { 1549 lwkt_msg_t lmsg = &msg->connect.base.lmsg; 1550 int nm_flags = PRUC_RECONNECT; 1551 1552 /* 1553 * in_pcbladdr() may have allocated a route entry for us 1554 * on the current CPU, but we need a route entry on the 1555 * inpcb's owner CPU, so free it here. 1556 */ 1557 in_pcbresetroute(inp); 1558 1559 if (inp->inp_flags & INP_WILDCARD) { 1560 /* 1561 * Remove this inpcb from the wildcard hash before 1562 * the socket's msgport changes. 1563 */ 1564 udp_remwildcardhash(inp); 1565 } 1566 1567 if (so->so_orig_port == NULL) { 1568 /* 1569 * First time change protocol processing port. 1570 * Save the current port for synchronization upon 1571 * udp_detach. 1572 */ 1573 so->so_orig_port = &curthread->td_msgport; 1574 } else { 1575 /* 1576 * We have changed protocol processing port more 1577 * than once. We could not do direct detach 1578 * anymore, because we lose the track of the 1579 * original protocol processing ports to perform 1580 * synchronization upon udp_detach. This should 1581 * be rare though. 1582 */ 1583 inp->inp_flags &= ~INP_DIRECT_DETACH; 1584 } 1585 1586 /* 1587 * We are moving the protocol processing port the socket 1588 * is on, we have to unlink here and re-link on the 1589 * target cpu. 1590 */ 1591 in_pcbunlink(inp, &udbinfo[mycpuid]); 1592 msg->connect.nm_flags |= nm_flags; 1593 1594 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1595 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1596 lwkt_forwardmsg(port, lmsg); 1597 /* msg invalid now */ 1598 return; 1599 } 1600 error = udp_connect_oncpu(inp, sin, if_sin, hash); 1601 out: 1602 if (msg->connect.nm_flags & PRUC_HELDTD) 1603 lwkt_rele(td); 1604 if (error && (msg->connect.nm_flags & PRUC_ASYNC)) { 1605 if (inp->inp_lport == 0) { 1606 /* 1607 * As long as we have the local port, it is fine 1608 * for connect to fail, e.g. disconnect. 1609 */ 1610 so->so_error = error; 1611 } 1612 soclrstate(so, SS_ISCONNECTED); 1613 /* 1614 * Wake up callers blocked on this socket to make sure 1615 * that they can see this error. 1616 * 1617 * NOTE: 1618 * sodisconnected() can't be used here, which bricks 1619 * sending and receiving. 1620 */ 1621 wakeup(&so->so_timeo); 1622 sowwakeup(so); 1623 sorwakeup(so); 1624 } 1625 if (error && inp != NULL && inp->inp_lport != 0 && 1626 (inp->inp_flags & INP_WILDCARD) == 0) { 1627 boolean_t forwarded; 1628 1629 /* Connect failed; put it to wildcard hash. */ 1630 forwarded = udp_inswildcardhash(inp, &msg->connect.base, 1631 error); 1632 if (forwarded) { 1633 /* 1634 * The message is further forwarded, so we are done 1635 * here. 1636 */ 1637 return; 1638 } 1639 } 1640 lwkt_replymsg(&msg->connect.base.lmsg, error); 1641 } 1642 1643 static void 1644 udp_remwildcardhash(struct inpcb *inp) 1645 { 1646 int cpu; 1647 1648 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1649 ("not on owner cpu")); 1650 1651 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 1652 if (cpu == mycpuid) { 1653 /* 1654 * This inpcb will be removed by the later 1655 * in_pcbremwildcardhash(). 1656 */ 1657 continue; 1658 } 1659 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpu]); 1660 } 1661 in_pcbremwildcardhash(inp); 1662 } 1663 1664 static int 1665 udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 1666 struct sockaddr_in *if_sin, uint16_t hash) 1667 { 1668 struct socket *so = inp->inp_socket; 1669 struct inpcb *oinp; 1670 1671 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 1672 sin->sin_addr, sin->sin_port, 1673 inp->inp_laddr.s_addr != INADDR_ANY ? 1674 inp->inp_laddr : if_sin->sin_addr, inp->inp_lport, FALSE, NULL); 1675 if (oinp != NULL) 1676 return EADDRINUSE; 1677 1678 /* 1679 * No more errors can occur, finish adjusting the socket 1680 * and change the processing port to reflect the connected 1681 * socket. Once set we can no longer safely mess with the 1682 * socket. 1683 */ 1684 1685 if (inp->inp_flags & INP_WILDCARD) 1686 udp_remwildcardhash(inp); 1687 1688 if (inp->inp_laddr.s_addr == INADDR_ANY) 1689 inp->inp_laddr = if_sin->sin_addr; 1690 inp->inp_faddr = sin->sin_addr; 1691 inp->inp_fport = sin->sin_port; 1692 in_pcbinsconnhash(inp); 1693 1694 inp->inp_flags |= INP_HASH; 1695 inp->inp_hashval = hash; 1696 1697 soisconnected(so); 1698 1699 return 0; 1700 } 1701 1702 static void 1703 udp_detach2(struct socket *so) 1704 { 1705 in_pcbdetach(so->so_pcb); 1706 sodiscard(so); 1707 sofree(so); 1708 } 1709 1710 static void 1711 udp_detach_final_dispatch(netmsg_t msg) 1712 { 1713 udp_detach2(msg->base.nm_so); 1714 } 1715 1716 static void 1717 udp_detach_oncpu_dispatch(netmsg_t msg) 1718 { 1719 struct netmsg_base *clomsg = &msg->base; 1720 struct socket *so = clomsg->nm_so; 1721 struct inpcb *inp = so->so_pcb; 1722 struct thread *td = curthread; 1723 int nextcpu, cpuid = mycpuid; 1724 1725 KASSERT(td->td_type == TD_TYPE_NETISR, ("not in netisr")); 1726 1727 if (inp->inp_flags & INP_WILDCARD) { 1728 /* 1729 * This inp will be removed on the inp's 1730 * owner CPU later, so don't do it now. 1731 */ 1732 if (&td->td_msgport != so->so_port) 1733 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpuid]); 1734 } 1735 1736 if (cpuid == 0) { 1737 /* 1738 * Free and clear multicast socket option, 1739 * which is only accessed in netisr0. 1740 */ 1741 ip_freemoptions(inp->inp_moptions); 1742 inp->inp_moptions = NULL; 1743 } 1744 1745 nextcpu = cpuid + 1; 1746 if (nextcpu < netisr_ncpus) { 1747 lwkt_forwardmsg(netisr_cpuport(nextcpu), &clomsg->lmsg); 1748 } else { 1749 /* 1750 * No one could see this inpcb now; destroy this 1751 * inpcb in its owner netisr. 1752 */ 1753 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1754 udp_detach_final_dispatch); 1755 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1756 } 1757 } 1758 1759 static void 1760 udp_detach_syncorig_dispatch(netmsg_t msg) 1761 { 1762 struct netmsg_base *clomsg = &msg->base; 1763 struct socket *so = clomsg->nm_so; 1764 1765 /* 1766 * Original protocol processing port is synchronized; 1767 * destroy this inpcb in its owner netisr. 1768 */ 1769 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1770 udp_detach_final_dispatch); 1771 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1772 } 1773 1774 static void 1775 udp_detach(netmsg_t msg) 1776 { 1777 struct socket *so = msg->detach.base.nm_so; 1778 struct netmsg_base *clomsg; 1779 struct inpcb *inp; 1780 1781 inp = so->so_pcb; 1782 if (inp == NULL) { 1783 lwkt_replymsg(&msg->detach.base.lmsg, EINVAL); 1784 return; 1785 } 1786 1787 /* 1788 * Reply EJUSTRETURN ASAP, we will call sodiscard() and 1789 * sofree() later. 1790 */ 1791 lwkt_replymsg(&msg->detach.base.lmsg, EJUSTRETURN); 1792 1793 if (netisr_ncpus == 1) { 1794 /* Only one CPU, detach the inpcb directly. */ 1795 udp_detach2(so); 1796 return; 1797 } 1798 1799 /* 1800 * Remove this inpcb from the inpcb list first, so that 1801 * no one could find this inpcb from the inpcb list. 1802 */ 1803 in_pcbofflist(inp); 1804 1805 /* 1806 * Remove this inpcb from the local port hash directly 1807 * here, so that its bound local port could be recycled 1808 * timely. 1809 */ 1810 in_pcbremporthash(inp); 1811 1812 if (inp->inp_flags & INP_DIRECT_DETACH) { 1813 /* 1814 * Direct detaching is allowed 1815 */ 1816 KASSERT((inp->inp_flags & INP_WILDCARD) == 0, 1817 ("in the wildcardhash")); 1818 KASSERT(inp->inp_moptions == NULL, ("has mcast options")); 1819 if (so->so_orig_port == NULL) { 1820 udp_detach2(so); 1821 } else { 1822 /* 1823 * Protocol processing port changed once, so 1824 * we need to make sure that there are nothing 1825 * left on the original protocol processing 1826 * port before we destroy this socket and inpcb. 1827 * This is more lightweight than going through 1828 * all UDP processing netisrs. 1829 */ 1830 clomsg = &so->so_clomsg; 1831 netmsg_init(clomsg, so, &netisr_apanic_rport, 1832 MSGF_IGNSOPORT, udp_detach_syncorig_dispatch); 1833 lwkt_sendmsg(so->so_orig_port, &clomsg->lmsg); 1834 } 1835 return; 1836 } 1837 1838 /* 1839 * Go through netisrs which process UDP to make sure 1840 * no one could find this inpcb anymore. 1841 */ 1842 clomsg = &so->so_clomsg; 1843 netmsg_init(clomsg, so, &netisr_apanic_rport, MSGF_IGNSOPORT, 1844 udp_detach_oncpu_dispatch); 1845 lwkt_sendmsg(netisr_cpuport(0), &clomsg->lmsg); 1846 } 1847 1848 static void 1849 udp_disconnect(netmsg_t msg) 1850 { 1851 struct socket *so = msg->disconnect.base.nm_so; 1852 struct inpcb *inp; 1853 boolean_t forwarded; 1854 int error = 0; 1855 1856 inp = so->so_pcb; 1857 if (inp == NULL) { 1858 error = EINVAL; 1859 goto out; 1860 } 1861 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1862 error = ENOTCONN; 1863 goto out; 1864 } 1865 1866 soclrstate(so, SS_ISCONNECTED); /* XXX */ 1867 1868 in_pcbdisconnect(inp); 1869 inp->inp_flags &= ~INP_HASH; 1870 1871 /* 1872 * Follow traditional BSD behavior and retain the local port 1873 * binding. But, fix the old misbehavior of overwriting any 1874 * previously bound local address. 1875 */ 1876 if (!(inp->inp_flags & INP_WASBOUND_NOTANY)) 1877 inp->inp_laddr.s_addr = INADDR_ANY; 1878 1879 if (so->so_state & SS_ISCLOSING) { 1880 /* 1881 * If this socket is being closed, there is no need 1882 * to put this socket back into wildcard hash table. 1883 */ 1884 error = 0; 1885 goto out; 1886 } 1887 1888 forwarded = udp_inswildcardhash(inp, &msg->disconnect.base, 0); 1889 if (forwarded) { 1890 /* 1891 * The message is further forwarded, so we are done 1892 * here. 1893 */ 1894 return; 1895 } 1896 out: 1897 lwkt_replymsg(&msg->disconnect.base.lmsg, error); 1898 } 1899 1900 void 1901 udp_shutdown(netmsg_t msg) 1902 { 1903 struct socket *so = msg->shutdown.base.nm_so; 1904 struct inpcb *inp; 1905 int error; 1906 1907 inp = so->so_pcb; 1908 if (inp) { 1909 socantsendmore(so); 1910 error = 0; 1911 } else { 1912 error = EINVAL; 1913 } 1914 lwkt_replymsg(&msg->shutdown.base.lmsg, error); 1915 } 1916 1917 struct pr_usrreqs udp_usrreqs = { 1918 .pru_abort = udp_abort, 1919 .pru_accept = pr_generic_notsupp, 1920 .pru_attach = udp_attach, 1921 .pru_bind = udp_bind, 1922 .pru_connect = udp_connect, 1923 .pru_connect2 = pr_generic_notsupp, 1924 .pru_control = in_control_dispatch, 1925 .pru_detach = udp_detach, 1926 .pru_disconnect = udp_disconnect, 1927 .pru_listen = pr_generic_notsupp, 1928 .pru_peeraddr = in_setpeeraddr_dispatch, 1929 .pru_rcvd = pr_generic_notsupp, 1930 .pru_rcvoob = pr_generic_notsupp, 1931 .pru_send = udp_send, 1932 .pru_sense = pru_sense_null, 1933 .pru_shutdown = udp_shutdown, 1934 .pru_sockaddr = in_setsockaddr_dispatch, 1935 .pru_sosend = sosendudp, 1936 .pru_soreceive = soreceive, 1937 .pru_preconnect = udp_preconnect, 1938 .pru_preattach = udp_preattach 1939 }; 1940