1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 63 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $ 64 */ 65 66 #include "opt_inet6.h" 67 68 #include <sys/param.h> 69 #include <sys/systm.h> 70 #include <sys/kernel.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/domain.h> 74 #include <sys/proc.h> 75 #include <sys/priv.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #include <sys/sysctl.h> 80 #include <sys/syslog.h> 81 #include <sys/in_cksum.h> 82 #include <sys/ktr.h> 83 #include <sys/jail.h> 84 85 #include <sys/socketvar2.h> 86 #include <sys/serialize.h> 87 88 #include <machine/stdarg.h> 89 90 #include <net/if.h> 91 #include <net/route.h> 92 #include <net/netmsg2.h> 93 #include <net/netisr2.h> 94 95 #include <netinet/in.h> 96 #include <netinet/in_systm.h> 97 #include <netinet/ip.h> 98 #ifdef INET6 99 #include <netinet/ip6.h> 100 #endif 101 #include <netinet/in_pcb.h> 102 #include <netinet/in_var.h> 103 #include <netinet/ip_var.h> 104 #ifdef INET6 105 #include <netinet6/ip6_var.h> 106 #endif 107 #include <netinet/ip_icmp.h> 108 #include <netinet/icmp_var.h> 109 #include <netinet/udp.h> 110 #include <netinet/udp_var.h> 111 112 #define MSGF_UDP_SEND MSGF_PROTO1 113 114 #define INP_DIRECT_DETACH INP_FLAG_PROTO2 115 116 #define UDP_KTR_STRING "inp=%p" 117 #define UDP_KTR_ARGS struct inpcb *inp 118 119 #ifndef KTR_UDP 120 #define KTR_UDP KTR_ALL 121 #endif 122 123 KTR_INFO_MASTER(udp); 124 KTR_INFO(KTR_UDP, udp, send_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS); 125 KTR_INFO(KTR_UDP, udp, send_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS); 126 KTR_INFO(KTR_UDP, udp, send_ipout, 2, UDP_KTR_STRING, UDP_KTR_ARGS); 127 KTR_INFO(KTR_UDP, udp, redisp_ipout_beg, 3, UDP_KTR_STRING, UDP_KTR_ARGS); 128 KTR_INFO(KTR_UDP, udp, redisp_ipout_end, 4, UDP_KTR_STRING, UDP_KTR_ARGS); 129 KTR_INFO(KTR_UDP, udp, send_redisp, 5, UDP_KTR_STRING, UDP_KTR_ARGS); 130 KTR_INFO(KTR_UDP, udp, send_inswildcard, 6, UDP_KTR_STRING, UDP_KTR_ARGS); 131 132 #define logudp(name, inp) KTR_LOG(udp_##name, inp) 133 134 /* 135 * UDP protocol implementation. 136 * Per RFC 768, August, 1980. 137 */ 138 #ifndef COMPAT_42 139 static int udpcksum = 1; 140 #else 141 static int udpcksum = 0; /* XXX */ 142 #endif 143 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, 144 &udpcksum, 0, "Enable checksumming of UDP packets"); 145 146 int log_in_vain = 0; 147 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 148 &log_in_vain, 0, "Log all incoming UDP packets"); 149 150 static int blackhole = 0; 151 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, 152 &blackhole, 0, "Do not send port unreachables for refused connects"); 153 154 static int strict_mcast_mship = 1; 155 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, 156 &strict_mcast_mship, 0, "Only send multicast to member sockets"); 157 158 int udp_sosend_async = 1; 159 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW, 160 &udp_sosend_async, 0, "UDP asynchronized pru_send"); 161 162 int udp_sosend_prepend = 1; 163 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW, 164 &udp_sosend_prepend, 0, 165 "Prepend enough space for proto and link header in pru_send"); 166 167 static int udp_reuseport_ext = 1; 168 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW, 169 &udp_reuseport_ext, 0, "SO_REUSEPORT extension"); 170 171 struct inpcbinfo udbinfo[MAXCPU]; 172 173 #ifndef UDBHASHSIZE 174 #define UDBHASHSIZE 16 175 #endif 176 CTASSERT(powerof2(UDBHASHSIZE)); 177 178 struct udpstat udpstat_percpu[MAXCPU] __cachealign; 179 180 static void udp_append(struct inpcb *last, struct ip *ip, 181 struct mbuf *n, int off, struct sockaddr_in *udp_in); 182 183 static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 184 struct sockaddr_in *if_sin, uint16_t hash); 185 186 static boolean_t udp_inswildcardhash(struct inpcb *inp, 187 struct netmsg_base *msg, int error); 188 static void udp_remwildcardhash(struct inpcb *inp); 189 190 static __inline int 191 udp_lportcpu(short lport) 192 { 193 return (ntohs(lport) % netisr_ncpus); 194 } 195 196 void 197 udp_init(void) 198 { 199 struct inpcbportinfo *portinfo; 200 int cpu; 201 202 portinfo = kmalloc(sizeof(*portinfo) * netisr_ncpus, M_PCB, 203 M_WAITOK | M_CACHEALIGN); 204 205 for (cpu = 0; cpu < netisr_ncpus; cpu++) { 206 struct inpcbinfo *uicb = &udbinfo[cpu]; 207 208 /* 209 * NOTE: 210 * UDP pcb list, wildcard hash table and localgroup hash 211 * table are shared. 212 */ 213 in_pcbinfo_init(uicb, cpu, TRUE); 214 uicb->hashbase = hashinit(UDBHASHSIZE, M_PCB, &uicb->hashmask); 215 216 in_pcbportinfo_init(&portinfo[cpu], UDBHASHSIZE, cpu); 217 in_pcbportinfo_set(uicb, portinfo, netisr_ncpus); 218 219 uicb->wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB, 220 &uicb->wildcardhashmask); 221 uicb->localgrphashbase = hashinit(UDBHASHSIZE, M_PCB, 222 &uicb->localgrphashmask); 223 224 uicb->ipi_size = sizeof(struct inpcb); 225 } 226 227 /* 228 * Initialize UDP statistics counters for each CPU. 229 */ 230 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 231 bzero(&udpstat_percpu[cpu], sizeof(struct udpstat)); 232 } 233 234 static int 235 sysctl_udpstat(SYSCTL_HANDLER_ARGS) 236 { 237 int cpu, error = 0; 238 239 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 240 if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu], 241 sizeof(struct udpstat)))) 242 break; 243 if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu], 244 sizeof(struct udpstat)))) 245 break; 246 } 247 248 return (error); 249 } 250 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW), 251 0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics"); 252 253 void 254 udp_ctloutput(netmsg_t msg) 255 { 256 struct socket *so = msg->base.nm_so; 257 struct sockopt *sopt = msg->ctloutput.nm_sopt; 258 struct inpcb *inp = so->so_pcb; 259 260 if (inp == NULL) { 261 lwkt_replymsg(&msg->lmsg, EINVAL); 262 return; 263 } 264 265 if (sopt->sopt_level == IPPROTO_IP && sopt->sopt_dir == SOPT_SET) { 266 switch (sopt->sopt_name) { 267 case IP_MULTICAST_IF: 268 case IP_MULTICAST_VIF: 269 case IP_MULTICAST_TTL: 270 case IP_MULTICAST_LOOP: 271 case IP_ADD_MEMBERSHIP: 272 case IP_DROP_MEMBERSHIP: 273 /* 274 * This pr_ctloutput msg will be forwarded 275 * to netisr0 to run; we can't do direct 276 * detaching anymore. 277 * 278 * NOTE: 279 * Don't optimize for the sockets whose 280 * current so_port is netisr0's msgport. 281 * These sockets could be connect(2)'ed 282 * later and the so_port will be changed. 283 */ 284 inp->inp_flags &= ~INP_DIRECT_DETACH; 285 break; 286 } 287 } 288 return ip_ctloutput(msg); 289 } 290 291 /* 292 * Check multicast packets to make sure they are only sent to sockets with 293 * multicast memberships for the packet's destination address and arrival 294 * interface. Multicast packets to multicast-unaware sockets are also 295 * disallowed. 296 * 297 * Returns 0 if the packet is acceptable, -1 if it is not. 298 */ 299 static __inline int 300 check_multicast_membership(const struct ip *ip, const struct inpcb *inp, 301 const struct mbuf *m) 302 { 303 const struct ip_moptions *mopt; 304 int mshipno; 305 306 if (strict_mcast_mship == 0 || 307 !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 308 return (0); 309 } 310 311 ASSERT_NETISR0; 312 313 mopt = inp->inp_moptions; 314 if (mopt == NULL) 315 return (-1); 316 for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) { 317 const struct in_multi *maddr = mopt->imo_membership[mshipno]; 318 319 if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr && 320 m->m_pkthdr.rcvif == maddr->inm_ifp) { 321 return (0); 322 } 323 } 324 return (-1); 325 } 326 327 struct udp_mcast_arg { 328 struct inpcb *inp; 329 struct inpcb *last; 330 struct ip *ip; 331 struct mbuf *m; 332 int iphlen; 333 struct sockaddr_in *udp_in; 334 }; 335 336 static int 337 udp_mcast_input(struct udp_mcast_arg *arg) 338 { 339 struct inpcb *inp = arg->inp; 340 struct inpcb *last = arg->last; 341 struct ip *ip = arg->ip; 342 struct mbuf *m = arg->m; 343 344 if (check_multicast_membership(ip, inp, m) < 0) 345 return ERESTART; /* caller continue */ 346 347 if (last != NULL) { 348 struct mbuf *n; 349 350 if ((n = m_copypacket(m, M_NOWAIT)) != NULL) 351 udp_append(last, ip, n, 352 arg->iphlen + sizeof(struct udphdr), 353 arg->udp_in); 354 } 355 arg->last = last = inp; 356 357 /* 358 * Don't look for additional matches if this one does 359 * not have either the SO_REUSEPORT or SO_REUSEADDR 360 * socket options set. This heuristic avoids searching 361 * through all pcbs in the common case of a non-shared 362 * port. It * assumes that an application will never 363 * clear these options after setting them. 364 */ 365 if (!(last->inp_socket->so_options & 366 (SO_REUSEPORT | SO_REUSEADDR))) 367 return EJUSTRETURN; /* caller stop */ 368 return 0; 369 } 370 371 int 372 udp_input(struct mbuf **mp, int *offp, int proto) 373 { 374 struct sockaddr_in udp_in = { sizeof udp_in, AF_INET }; 375 int iphlen; 376 struct ip *ip; 377 struct udphdr *uh; 378 struct inpcb *inp; 379 struct mbuf *m; 380 struct mbuf *opts = NULL; 381 int len, off; 382 struct ip save_ip; 383 struct inpcbinfo *pcbinfo = &udbinfo[mycpuid]; 384 385 off = *offp; 386 m = *mp; 387 *mp = NULL; 388 389 iphlen = off; 390 udp_stat.udps_ipackets++; 391 392 /* 393 * Strip IP options, if any; should skip this, 394 * make available to user, and use on returned packets, 395 * but we don't yet have a way to check the checksum 396 * with options still present. 397 */ 398 if (iphlen > sizeof(struct ip)) { 399 ip_stripoptions(m); 400 iphlen = sizeof(struct ip); 401 } 402 403 /* 404 * IP and UDP headers are together in first mbuf. 405 * Already checked and pulled up in ip_demux(). 406 */ 407 KASSERT(m->m_len >= iphlen + sizeof(struct udphdr), 408 ("UDP header not in one mbuf")); 409 410 ip = mtod(m, struct ip *); 411 uh = (struct udphdr *)((caddr_t)ip + iphlen); 412 413 /* destination port of 0 is illegal, based on RFC768. */ 414 if (uh->uh_dport == 0) 415 goto bad; 416 417 /* 418 * Make mbuf data length reflect UDP length. 419 * If not enough data to reflect UDP length, drop. 420 */ 421 len = ntohs((u_short)uh->uh_ulen); 422 if (ip->ip_len != len) { 423 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 424 udp_stat.udps_badlen++; 425 goto bad; 426 } 427 m_adj(m, len - ip->ip_len); 428 /* ip->ip_len = len; */ 429 } 430 /* 431 * Save a copy of the IP header in case we want restore it 432 * for sending an ICMP error message in response. 433 */ 434 save_ip = *ip; 435 436 /* 437 * Checksum extended UDP header and data. 438 */ 439 if (uh->uh_sum) { 440 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 441 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 442 uh->uh_sum = m->m_pkthdr.csum_data; 443 else 444 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, 445 ip->ip_dst.s_addr, htonl((u_short)len + 446 m->m_pkthdr.csum_data + IPPROTO_UDP)); 447 uh->uh_sum ^= 0xffff; 448 } else { 449 char b[9]; 450 451 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 452 bzero(((struct ipovly *)ip)->ih_x1, 9); 453 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 454 uh->uh_sum = in_cksum(m, len + sizeof(struct ip)); 455 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 456 } 457 if (uh->uh_sum) { 458 udp_stat.udps_badsum++; 459 m_freem(m); 460 return(IPPROTO_DONE); 461 } 462 } else 463 udp_stat.udps_nosum++; 464 465 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 466 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { 467 struct inpcbhead *connhead; 468 struct inpcontainer *ic, *ic_marker; 469 struct inpcontainerhead *ichead; 470 struct udp_mcast_arg arg; 471 struct inpcb *last; 472 int error; 473 474 /* 475 * Deliver a multicast or broadcast datagram to *all* sockets 476 * for which the local and remote addresses and ports match 477 * those of the incoming datagram. This allows more than 478 * one process to receive multi/broadcasts on the same port. 479 * (This really ought to be done for unicast datagrams as 480 * well, but that would cause problems with existing 481 * applications that open both address-specific sockets and 482 * a wildcard socket listening to the same port -- they would 483 * end up receiving duplicates of every unicast datagram. 484 * Those applications open the multiple sockets to overcome an 485 * inadequacy of the UDP socket interface, but for backwards 486 * compatibility we avoid the problem here rather than 487 * fixing the interface. Maybe 4.5BSD will remedy this?) 488 */ 489 490 /* 491 * Construct sockaddr format source address. 492 */ 493 udp_in.sin_port = uh->uh_sport; 494 udp_in.sin_addr = ip->ip_src; 495 arg.udp_in = &udp_in; 496 /* 497 * Locate pcb(s) for datagram. 498 * (Algorithm copied from raw_intr().) 499 */ 500 last = NULL; 501 arg.iphlen = iphlen; 502 503 connhead = &pcbinfo->hashbase[ 504 INP_PCBCONNHASH(ip->ip_src.s_addr, uh->uh_sport, 505 ip->ip_dst.s_addr, uh->uh_dport, pcbinfo->hashmask)]; 506 LIST_FOREACH(inp, connhead, inp_hash) { 507 #ifdef INET6 508 if (!INP_ISIPV4(inp)) 509 continue; 510 #endif 511 if (!in_hosteq(inp->inp_faddr, ip->ip_src) || 512 !in_hosteq(inp->inp_laddr, ip->ip_dst) || 513 inp->inp_fport != uh->uh_sport || 514 inp->inp_lport != uh->uh_dport) 515 continue; 516 517 arg.inp = inp; 518 arg.last = last; 519 arg.ip = ip; 520 arg.m = m; 521 522 error = udp_mcast_input(&arg); 523 if (error == ERESTART) 524 continue; 525 last = arg.last; 526 527 if (error == EJUSTRETURN) 528 goto done; 529 } 530 531 ichead = &pcbinfo->wildcardhashbase[ 532 INP_PCBWILDCARDHASH(uh->uh_dport, 533 pcbinfo->wildcardhashmask)]; 534 ic_marker = in_pcbcontainer_marker(); 535 536 GET_PCBINFO_TOKEN(pcbinfo); 537 LIST_INSERT_HEAD(ichead, ic_marker, ic_list); 538 while ((ic = LIST_NEXT(ic_marker, ic_list)) != NULL) { 539 LIST_REMOVE(ic_marker, ic_list); 540 LIST_INSERT_AFTER(ic, ic_marker, ic_list); 541 542 inp = ic->ic_inp; 543 if (inp->inp_flags & INP_PLACEMARKER) 544 continue; 545 #ifdef INET6 546 if (!INP_ISIPV4(inp)) 547 continue; 548 #endif 549 if (inp->inp_lport != uh->uh_dport) 550 continue; 551 if (inp->inp_laddr.s_addr != INADDR_ANY && 552 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 553 continue; 554 555 arg.inp = inp; 556 arg.last = last; 557 arg.ip = ip; 558 arg.m = m; 559 560 error = udp_mcast_input(&arg); 561 if (error == ERESTART) 562 continue; 563 last = arg.last; 564 565 if (error == EJUSTRETURN) 566 break; 567 } 568 LIST_REMOVE(ic_marker, ic_list); 569 REL_PCBINFO_TOKEN(pcbinfo); 570 done: 571 if (last == NULL) { 572 /* 573 * No matching pcb found; discard datagram. 574 * (No need to send an ICMP Port Unreachable 575 * for a broadcast or multicast datgram.) 576 */ 577 udp_stat.udps_noportbcast++; 578 goto bad; 579 } 580 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 581 &udp_in); 582 return(IPPROTO_DONE); 583 } 584 /* 585 * Locate pcb for datagram. 586 */ 587 inp = in_pcblookup_pkthash(pcbinfo, ip->ip_src, uh->uh_sport, 588 ip->ip_dst, uh->uh_dport, TRUE, m->m_pkthdr.rcvif, 589 udp_reuseport_ext ? m : NULL); 590 if (inp == NULL) { 591 if (log_in_vain) { 592 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 593 594 log(LOG_INFO, 595 "Connection attempt to UDP %s:%d from %s:%d\n", 596 kinet_ntoa(ip->ip_dst, dst), ntohs(uh->uh_dport), 597 kinet_ntoa(ip->ip_src, src), ntohs(uh->uh_sport)); 598 } 599 udp_stat.udps_noport++; 600 if (m->m_flags & (M_BCAST | M_MCAST)) { 601 udp_stat.udps_noportbcast++; 602 goto bad; 603 } 604 if (blackhole) 605 goto bad; 606 #ifdef ICMP_BANDLIM 607 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 608 goto bad; 609 #endif 610 *ip = save_ip; 611 ip->ip_len += iphlen; 612 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 613 return(IPPROTO_DONE); 614 } 615 KASSERT(INP_ISIPV4(inp), ("not inet inpcb")); 616 /* 617 * Check the minimum TTL for socket. 618 */ 619 if (ip->ip_ttl < inp->inp_ip_minttl) 620 goto bad; 621 622 /* 623 * Construct sockaddr format source address. 624 * Stuff source address and datagram in user buffer. 625 */ 626 udp_in.sin_port = uh->uh_sport; 627 udp_in.sin_addr = ip->ip_src; 628 if ((inp->inp_flags & INP_CONTROLOPTS) || 629 (inp->inp_socket->so_options & SO_TIMESTAMP)) 630 ip_savecontrol(inp, &opts, ip, m); 631 m_adj(m, iphlen + sizeof(struct udphdr)); 632 633 lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token); 634 if (ssb_appendaddr(&inp->inp_socket->so_rcv, 635 (struct sockaddr *)&udp_in, m, opts) == 0) { 636 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 637 udp_stat.udps_fullsock++; 638 soroverflow(inp->inp_socket); 639 goto bad; 640 } 641 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 642 sorwakeup(inp->inp_socket); 643 return(IPPROTO_DONE); 644 bad: 645 m_freem(m); 646 if (opts) 647 m_freem(opts); 648 return(IPPROTO_DONE); 649 } 650 651 /* 652 * subroutine of udp_input(), mainly for source code readability. 653 * caller must properly init udp_ip6 and udp_in6 beforehand. 654 */ 655 static void 656 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, 657 struct sockaddr_in *udp_in) 658 { 659 struct mbuf *opts = NULL; 660 int ret; 661 662 KASSERT(INP_ISIPV4(last), ("not inet inpcb")); 663 664 if (last->inp_flags & INP_CONTROLOPTS || 665 last->inp_socket->so_options & SO_TIMESTAMP) 666 ip_savecontrol(last, &opts, ip, n); 667 m_adj(n, off); 668 669 lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token); 670 ret = ssb_appendaddr(&last->inp_socket->so_rcv, 671 (struct sockaddr *)udp_in, n, opts); 672 lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token); 673 if (ret == 0) { 674 m_freem(n); 675 if (opts) 676 m_freem(opts); 677 udp_stat.udps_fullsock++; 678 } else { 679 sorwakeup(last->inp_socket); 680 } 681 } 682 683 /* 684 * Notify a udp user of an asynchronous error; 685 * just wake up so that he can collect error status. 686 */ 687 void 688 udp_notify(struct inpcb *inp, int error) 689 { 690 inp->inp_socket->so_error = error; 691 sorwakeup(inp->inp_socket); 692 sowwakeup(inp->inp_socket); 693 } 694 695 struct netmsg_udp_notify { 696 struct netmsg_base base; 697 inp_notify_t nm_notify; 698 struct in_addr nm_faddr; 699 int nm_arg; 700 }; 701 702 static void 703 udp_notifyall_oncpu(netmsg_t msg) 704 { 705 struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg; 706 int nextcpu, cpu = mycpuid; 707 708 ASSERT_NETISR_NCPUS(cpu); 709 710 in_pcbnotifyall(&udbinfo[cpu], nm->nm_faddr, nm->nm_arg, nm->nm_notify); 711 712 nextcpu = cpu + 1; 713 if (nextcpu < netisr_ncpus) 714 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg); 715 else 716 lwkt_replymsg(&nm->base.lmsg, 0); 717 } 718 719 inp_notify_t 720 udp_get_inpnotify(int cmd, const struct sockaddr *sa, 721 struct ip **ip0, int *cpuid) 722 { 723 struct in_addr faddr; 724 struct ip *ip = *ip0; 725 inp_notify_t notify = udp_notify; 726 727 faddr = ((const struct sockaddr_in *)sa)->sin_addr; 728 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 729 return NULL; 730 731 if (PRC_IS_REDIRECT(cmd)) { 732 ip = NULL; 733 notify = in_rtchange; 734 } else if (cmd == PRC_HOSTDEAD) { 735 ip = NULL; 736 } else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) { 737 return NULL; 738 } 739 740 if (cpuid != NULL) { 741 if (ip == NULL) { 742 /* Go through all effective netisr CPUs. */ 743 *cpuid = netisr_ncpus; 744 } else { 745 const struct udphdr *uh; 746 747 uh = (const struct udphdr *) 748 ((caddr_t)ip + (ip->ip_hl << 2)); 749 *cpuid = udp_addrcpu(faddr.s_addr, uh->uh_dport, 750 ip->ip_src.s_addr, uh->uh_sport); 751 } 752 } 753 754 *ip0 = ip; 755 return notify; 756 } 757 758 void 759 udp_ctlinput(netmsg_t msg) 760 { 761 struct sockaddr *sa = msg->ctlinput.nm_arg; 762 struct ip *ip = msg->ctlinput.nm_extra; 763 int cmd = msg->ctlinput.nm_cmd, cpuid; 764 inp_notify_t notify; 765 struct in_addr faddr; 766 767 ASSERT_NETISR_NCPUS(mycpuid); 768 769 notify = udp_get_inpnotify(cmd, sa, &ip, &cpuid); 770 if (notify == NULL) 771 goto done; 772 773 faddr = ((struct sockaddr_in *)sa)->sin_addr; 774 if (ip) { 775 const struct udphdr *uh; 776 struct inpcb *inp; 777 778 if (cpuid != mycpuid) 779 goto done; 780 781 uh = (const struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 782 inp = in_pcblookup_hash(&udbinfo[mycpuid], faddr, uh->uh_dport, 783 ip->ip_src, uh->uh_sport, 0, NULL); 784 if (inp != NULL && inp->inp_socket != NULL) 785 notify(inp, inetctlerrmap[cmd]); 786 } else if (msg->ctlinput.nm_direct) { 787 if (cpuid != netisr_ncpus && cpuid != mycpuid) 788 goto done; 789 790 in_pcbnotifyall(&udbinfo[mycpuid], faddr, inetctlerrmap[cmd], 791 notify); 792 } else { 793 struct netmsg_udp_notify *nm; 794 795 ASSERT_NETISR0; 796 nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT); 797 netmsg_init(&nm->base, NULL, &netisr_afree_rport, 798 0, udp_notifyall_oncpu); 799 nm->nm_faddr = faddr; 800 nm->nm_arg = inetctlerrmap[cmd]; 801 nm->nm_notify = notify; 802 lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg); 803 } 804 done: 805 lwkt_replymsg(&msg->lmsg, 0); 806 } 807 808 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, udbinfo, 0, 809 in_pcblist_ncpus, "S,xinpcb", "List of active UDP sockets"); 810 811 static int 812 udp_getcred(SYSCTL_HANDLER_ARGS) 813 { 814 struct sockaddr_in addrs[2]; 815 struct ucred cred0, *cred = NULL; 816 struct inpcb *inp; 817 int error, cpu, origcpu; 818 819 error = priv_check(req->td, PRIV_ROOT); 820 if (error) 821 return (error); 822 error = SYSCTL_IN(req, addrs, sizeof addrs); 823 if (error) 824 return (error); 825 826 origcpu = mycpuid; 827 cpu = udp_addrcpu(addrs[1].sin_addr.s_addr, addrs[1].sin_port, 828 addrs[0].sin_addr.s_addr, addrs[0].sin_port); 829 830 lwkt_migratecpu(cpu); 831 832 inp = in_pcblookup_hash(&udbinfo[cpu], 833 addrs[1].sin_addr, addrs[1].sin_port, 834 addrs[0].sin_addr, addrs[0].sin_port, TRUE, NULL); 835 if (inp == NULL || inp->inp_socket == NULL) { 836 error = ENOENT; 837 } else if (inp->inp_socket->so_cred != NULL) { 838 cred0 = *(inp->inp_socket->so_cred); 839 cred = &cred0; 840 } 841 842 lwkt_migratecpu(origcpu); 843 844 if (error) 845 return error; 846 847 return SYSCTL_OUT(req, cred, sizeof(struct ucred)); 848 } 849 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 850 0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection"); 851 852 static void 853 udp_send_redispatch(netmsg_t msg) 854 { 855 struct mbuf *m = msg->send.nm_m; 856 int pru_flags = msg->send.nm_flags; 857 struct inpcb *inp = msg->send.base.nm_so->so_pcb; 858 struct mbuf *m_opt = msg->send.nm_control; /* XXX save ipopt */ 859 int flags = msg->send.nm_priv; /* ip_output flags */ 860 int error; 861 862 logudp(redisp_ipout_beg, inp); 863 864 /* 865 * - Don't use inp route cache. It should only be used in the 866 * inp owner netisr. 867 * - Access to inp_moptions should be safe, since multicast UDP 868 * datagrams are redispatched to netisr0 and inp_moptions is 869 * changed only in netisr0. 870 */ 871 error = ip_output(m, m_opt, NULL, flags, inp->inp_moptions, inp); 872 if ((pru_flags & PRUS_NOREPLY) == 0) 873 lwkt_replymsg(&msg->send.base.lmsg, error); 874 875 if (m_opt != NULL) { 876 /* Free saved ip options, if any */ 877 m_freem(m_opt); 878 } 879 880 logudp(redisp_ipout_end, inp); 881 } 882 883 static void 884 udp_send(netmsg_t msg) 885 { 886 struct socket *so = msg->send.base.nm_so; 887 struct mbuf *m = msg->send.nm_m; 888 struct sockaddr *dstaddr = msg->send.nm_addr; 889 int pru_flags = msg->send.nm_flags; 890 struct inpcb *inp = so->so_pcb; 891 struct thread *td = msg->send.nm_td; 892 uint16_t hash; 893 int flags; 894 895 struct udpiphdr *ui; 896 int len = m->m_pkthdr.len; 897 struct sockaddr_in *sin; /* really is initialized before use */ 898 int error = 0, cpu; 899 900 KKASSERT(msg->send.nm_control == NULL); 901 902 logudp(send_beg, inp); 903 904 if (inp == NULL) { 905 error = EINVAL; 906 goto release; 907 } 908 909 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 910 error = EMSGSIZE; 911 goto release; 912 } 913 914 if (inp->inp_lport == 0) { /* unbound socket */ 915 boolean_t forwarded; 916 917 error = in_pcbbind(inp, NULL, td); 918 if (error) 919 goto release; 920 921 /* 922 * Need to call udp_send again, after this inpcb is 923 * inserted into wildcard hash table. 924 */ 925 msg->send.base.lmsg.ms_flags |= MSGF_UDP_SEND; 926 forwarded = udp_inswildcardhash(inp, &msg->send.base, 0); 927 if (forwarded) { 928 /* 929 * The message is further forwarded, so we are 930 * done here. 931 */ 932 logudp(send_inswildcard, inp); 933 return; 934 } 935 } 936 937 if (dstaddr != NULL) { /* destination address specified */ 938 if (inp->inp_faddr.s_addr != INADDR_ANY) { 939 /* already connected */ 940 error = EISCONN; 941 goto release; 942 } 943 sin = (struct sockaddr_in *)dstaddr; 944 } else { 945 if (inp->inp_faddr.s_addr == INADDR_ANY) { 946 /* no destination specified and not already connected */ 947 error = ENOTCONN; 948 goto release; 949 } 950 sin = NULL; 951 } 952 953 /* 954 * Calculate data length and get a mbuf 955 * for UDP and IP headers. 956 */ 957 M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT); 958 if (m == NULL) { 959 error = ENOBUFS; 960 goto release; 961 } 962 963 /* 964 * Fill in mbuf with extended UDP header 965 * and addresses and length put into network format. 966 */ 967 ui = mtod(m, struct udpiphdr *); 968 bzero(ui->ui_x1, sizeof ui->ui_x1); /* XXX still needed? */ 969 ui->ui_pr = IPPROTO_UDP; 970 971 /* 972 * Set destination address. 973 */ 974 if (dstaddr != NULL) { /* use specified destination */ 975 ui->ui_dst = sin->sin_addr; 976 ui->ui_dport = sin->sin_port; 977 } else { /* use connected destination */ 978 ui->ui_dst = inp->inp_faddr; 979 ui->ui_dport = inp->inp_fport; 980 } 981 982 /* 983 * Set source address. 984 */ 985 if (inp->inp_laddr.s_addr == INADDR_ANY || 986 IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) { 987 struct sockaddr_in *if_sin; 988 989 if (dstaddr == NULL) { 990 /* 991 * connect() had (or should have) failed because 992 * the interface had no IP address, but the 993 * application proceeded to call send() anyways. 994 */ 995 error = ENOTCONN; 996 goto release; 997 } 998 999 /* Look up outgoing interface. */ 1000 error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1); 1001 if (error) 1002 goto release; 1003 ui->ui_src = if_sin->sin_addr; /* use address of interface */ 1004 } else { 1005 ui->ui_src = inp->inp_laddr; /* use non-null bound address */ 1006 } 1007 ui->ui_sport = inp->inp_lport; 1008 KASSERT(inp->inp_lport != 0, ("inp lport should have been bound")); 1009 1010 /* 1011 * Release the original thread, since it is no longer used 1012 */ 1013 if (pru_flags & PRUS_HELDTD) { 1014 lwkt_rele(td); 1015 pru_flags &= ~PRUS_HELDTD; 1016 } 1017 /* 1018 * Free the dest address, since it is no longer needed 1019 */ 1020 if (pru_flags & PRUS_FREEADDR) { 1021 kfree(dstaddr, M_SONAME); 1022 pru_flags &= ~PRUS_FREEADDR; 1023 } 1024 1025 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1026 1027 /* 1028 * Set up checksum and output datagram. 1029 */ 1030 if (udpcksum) { 1031 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, 1032 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1033 m->m_pkthdr.csum_flags = CSUM_UDP; 1034 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1035 m->m_pkthdr.csum_thlen = sizeof(struct udphdr); 1036 } else { 1037 ui->ui_sum = 0; 1038 } 1039 ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len; 1040 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1041 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1042 udp_stat.udps_opackets++; 1043 1044 flags = IP_DEBUGROUTE | 1045 (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); 1046 if (pru_flags & PRUS_DONTROUTE) 1047 flags |= SO_DONTROUTE; 1048 1049 if (inp->inp_flags & INP_CONNECTED) { 1050 /* 1051 * For connected socket, this datagram has already 1052 * been in the correct netisr; no need to rehash. 1053 */ 1054 KASSERT(inp->inp_flags & INP_HASH, ("inpcb has no hash")); 1055 m_sethash(m, inp->inp_hashval); 1056 goto sendit; 1057 } 1058 1059 hash = udp_addrhash(ui->ui_dst.s_addr, ui->ui_dport, 1060 ui->ui_src.s_addr, ui->ui_sport); 1061 m_sethash(m, hash); 1062 1063 cpu = netisr_hashcpu(hash); 1064 if (cpu != mycpuid) { 1065 struct mbuf *m_opt = NULL; 1066 struct netmsg_pru_send *smsg; 1067 struct lwkt_port *port = netisr_cpuport(cpu); 1068 1069 /* 1070 * Not on the CPU that matches this UDP datagram hash; 1071 * redispatch to the correct CPU to do the ip_output(). 1072 */ 1073 if (inp->inp_options != NULL) { 1074 /* 1075 * If there are ip options, then save a copy, 1076 * since accessing inp_options on other CPUs' 1077 * is not safe. 1078 * 1079 * XXX optimize this? 1080 */ 1081 m_opt = m_copym(inp->inp_options, 0, M_COPYALL, 1082 M_WAITOK); 1083 } 1084 if ((pru_flags & PRUS_NOREPLY) == 0) { 1085 /* 1086 * Change some parts of the original netmsg and 1087 * forward it to the target netisr. 1088 * 1089 * NOTE: so_port MUST NOT be checked in the target 1090 * netisr. 1091 */ 1092 smsg = &msg->send; 1093 smsg->nm_priv = flags; /* ip_output flags */ 1094 smsg->nm_m = m; 1095 smsg->nm_control = m_opt; /* XXX save ipopt */ 1096 smsg->base.lmsg.ms_flags |= MSGF_IGNSOPORT; 1097 smsg->base.nm_dispatch = udp_send_redispatch; 1098 lwkt_forwardmsg(port, &smsg->base.lmsg); 1099 } else { 1100 /* 1101 * Recreate the netmsg, since the original mbuf 1102 * could have been changed. And send it to the 1103 * target netisr. 1104 * 1105 * NOTE: so_port MUST NOT be checked in the target 1106 * netisr. 1107 */ 1108 smsg = &m->m_hdr.mh_sndmsg; 1109 netmsg_init(&smsg->base, so, &netisr_apanic_rport, 1110 MSGF_IGNSOPORT, udp_send_redispatch); 1111 smsg->nm_priv = flags; /* ip_output flags */ 1112 smsg->nm_flags = pru_flags; 1113 smsg->nm_m = m; 1114 smsg->nm_control = m_opt; /* XXX save ipopt */ 1115 lwkt_sendmsg(port, &smsg->base.lmsg); 1116 } 1117 1118 /* This UDP datagram is redispatched; done */ 1119 logudp(send_redisp, inp); 1120 return; 1121 } 1122 1123 sendit: 1124 logudp(send_ipout, inp); 1125 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 1126 inp->inp_moptions, inp); 1127 m = NULL; 1128 1129 release: 1130 if (m != NULL) 1131 m_freem(m); 1132 1133 if (pru_flags & PRUS_HELDTD) 1134 lwkt_rele(td); 1135 if (pru_flags & PRUS_FREEADDR) 1136 kfree(dstaddr, M_SONAME); 1137 if ((pru_flags & PRUS_NOREPLY) == 0) 1138 lwkt_replymsg(&msg->send.base.lmsg, error); 1139 1140 logudp(send_end, inp); 1141 } 1142 1143 u_long udp_sendspace = 9216; /* really max datagram size */ 1144 /* 40 1K datagrams */ 1145 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 1146 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 1147 1148 u_long udp_recvspace = 40 * (1024 + 1149 #ifdef INET6 1150 sizeof(struct sockaddr_in6) 1151 #else 1152 sizeof(struct sockaddr_in) 1153 #endif 1154 ); 1155 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1156 &udp_recvspace, 0, "Maximum incoming UDP datagram size"); 1157 1158 /* 1159 * This should never happen, since UDP socket does not support 1160 * connection acception (SO_ACCEPTCONN, i.e. listen(2)). 1161 */ 1162 static void 1163 udp_abort(netmsg_t msg __unused) 1164 { 1165 panic("udp_abort is called"); 1166 } 1167 1168 static int 1169 udp_preattach(struct socket *so, int proto __unused, struct pru_attach_info *ai) 1170 { 1171 return soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit); 1172 } 1173 1174 static void 1175 udp_attach(netmsg_t msg) 1176 { 1177 struct socket *so = msg->attach.base.nm_so; 1178 struct pru_attach_info *ai = msg->attach.nm_ai; 1179 struct inpcb *inp; 1180 int error; 1181 1182 KASSERT(so->so_pcb == NULL, ("udp socket attached")); 1183 1184 if (ai != NULL) { 1185 error = udp_preattach(so, 0 /* don't care */, ai); 1186 if (error) 1187 goto out; 1188 } else { 1189 /* Post attach; do nothing */ 1190 } 1191 1192 error = in_pcballoc(so, &udbinfo[mycpuid]); 1193 if (error) 1194 goto out; 1195 1196 inp = so->so_pcb; 1197 inp->inp_flags |= INP_DIRECT_DETACH; 1198 inp->inp_ip_ttl = ip_defttl; 1199 error = 0; 1200 out: 1201 lwkt_replymsg(&msg->attach.base.lmsg, error); 1202 } 1203 1204 static void 1205 udp_inswildcard_replymsg(netmsg_t msg) 1206 { 1207 lwkt_msg_t lmsg = &msg->lmsg; 1208 1209 if (lmsg->ms_flags & MSGF_UDP_SEND) { 1210 udp_send(msg); 1211 /* msg is replied by udp_send() */ 1212 } else { 1213 lwkt_replymsg(lmsg, lmsg->ms_error); 1214 } 1215 } 1216 1217 static void 1218 udp_soreuseport_dispatch(netmsg_t msg) 1219 { 1220 /* This inpcb has already been in the wildcard hash. */ 1221 in_pcblink_flags(msg->base.nm_so->so_pcb, &udbinfo[mycpuid], 0); 1222 udp_inswildcard_replymsg(msg); 1223 } 1224 1225 static void 1226 udp_sosetport(struct lwkt_msg *msg, lwkt_port_t port) 1227 { 1228 sosetport(((struct netmsg_base *)msg)->nm_so, port); 1229 } 1230 1231 static boolean_t 1232 udp_inswildcardhash_oncpu(struct inpcb *inp, struct netmsg_base *msg) 1233 { 1234 int cpu; 1235 1236 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1237 ("not on owner cpu")); 1238 1239 in_pcbinswildcardhash(inp); 1240 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 1241 if (cpu == mycpuid) { 1242 /* 1243 * This inpcb has been inserted by the above 1244 * in_pcbinswildcardhash(). 1245 */ 1246 continue; 1247 } 1248 in_pcbinswildcardhash_oncpu(inp, &udbinfo[cpu]); 1249 } 1250 1251 /* NOTE: inp_lgrpindex is _not_ assigned in jail. */ 1252 if ((inp->inp_socket->so_options & SO_REUSEPORT) && 1253 inp->inp_lgrpindex >= 0) { 1254 /* 1255 * For SO_REUSEPORT socket, redistribute it based on its 1256 * local group index. 1257 */ 1258 cpu = inp->inp_lgrpindex % netisr_ncpus; 1259 if (cpu != mycpuid) { 1260 struct lwkt_port *port = netisr_cpuport(cpu); 1261 lwkt_msg_t lmsg = &msg->lmsg; 1262 1263 /* 1264 * We are moving the protocol processing port the 1265 * socket is on, we have to unlink here and re-link 1266 * on the target cpu (this inpcb is still left in 1267 * the wildcard hash). 1268 */ 1269 in_pcbunlink_flags(inp, &udbinfo[mycpuid], 0); 1270 msg->nm_dispatch = udp_soreuseport_dispatch; 1271 1272 /* 1273 * See the related comment in tcp_usrreq.c 1274 * tcp_connect() 1275 */ 1276 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1277 lwkt_forwardmsg(port, lmsg); 1278 return TRUE; /* forwarded */ 1279 } 1280 } 1281 return FALSE; 1282 } 1283 1284 static void 1285 udp_inswildcardhash_dispatch(netmsg_t msg) 1286 { 1287 struct inpcb *inp = msg->base.nm_so->so_pcb; 1288 boolean_t forwarded; 1289 1290 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1291 KASSERT(udp_lportcpu(inp->inp_lport) == mycpuid, ("not target cpu")); 1292 1293 in_pcblink(inp, &udbinfo[mycpuid]); 1294 1295 forwarded = udp_inswildcardhash_oncpu(inp, &msg->base); 1296 if (forwarded) { 1297 /* The message is further forwarded, so we are done here. */ 1298 return; 1299 } 1300 udp_inswildcard_replymsg(msg); 1301 } 1302 1303 static boolean_t 1304 udp_inswildcardhash(struct inpcb *inp, struct netmsg_base *msg, int error) 1305 { 1306 lwkt_msg_t lmsg = &msg->lmsg; 1307 int cpu; 1308 1309 ASSERT_INP_NOTINHASH(inp); 1310 1311 /* This inpcb could no longer be directly detached */ 1312 inp->inp_flags &= ~INP_DIRECT_DETACH; 1313 1314 /* 1315 * Always clear the route cache, so we don't need to 1316 * worry about any owner CPU changes later. 1317 */ 1318 in_pcbresetroute(inp); 1319 1320 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1321 cpu = udp_lportcpu(inp->inp_lport); 1322 1323 lmsg->ms_error = error; 1324 if (cpu != mycpuid) { 1325 struct lwkt_port *port = netisr_cpuport(cpu); 1326 1327 /* 1328 * We are moving the protocol processing port the socket 1329 * is on, we have to unlink here and re-link on the 1330 * target cpu. 1331 */ 1332 in_pcbunlink(inp, &udbinfo[mycpuid]); 1333 msg->nm_dispatch = udp_inswildcardhash_dispatch; 1334 1335 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1336 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1337 lwkt_forwardmsg(port, lmsg); 1338 return TRUE; /* forwarded */ 1339 } 1340 1341 return udp_inswildcardhash_oncpu(inp, msg); 1342 } 1343 1344 static void 1345 udp_bind(netmsg_t msg) 1346 { 1347 struct socket *so = msg->bind.base.nm_so; 1348 struct inpcb *inp; 1349 int error; 1350 1351 inp = so->so_pcb; 1352 if (inp) { 1353 struct sockaddr *nam = msg->bind.nm_nam; 1354 struct thread *td = msg->bind.nm_td; 1355 struct sockaddr_in *sin; 1356 lwkt_port_t port; 1357 int cpu; 1358 1359 /* 1360 * Check "already bound" here (in_pcbbind() does the same 1361 * check though), so we don't forward a connected/bound 1362 * socket randomly which would panic in the following 1363 * in_pcbunlink(). 1364 */ 1365 if (inp->inp_lport != 0 || 1366 inp->inp_laddr.s_addr != INADDR_ANY) { 1367 error = EINVAL; /* already bound */ 1368 goto done; 1369 } 1370 1371 if (nam->sa_len != sizeof(*sin)) { 1372 error = EINVAL; 1373 goto done; 1374 } 1375 sin = (struct sockaddr_in *)nam; 1376 1377 cpu = udp_lportcpu(sin->sin_port); 1378 port = netisr_cpuport(cpu); 1379 1380 /* 1381 * See the related comment in tcp_usrreq.c tcp_usr_bind(). 1382 * The exception is that we use local port based netisr 1383 * to serialize in_pcbbind(). 1384 */ 1385 if (&curthread->td_msgport != port) { 1386 lwkt_msg_t lmsg = &msg->bind.base.lmsg; 1387 1388 KASSERT((msg->bind.nm_flags & PRUB_RELINK) == 0, 1389 ("already asked to relink")); 1390 1391 in_pcbunlink(so->so_pcb, &udbinfo[mycpuid]); 1392 msg->bind.nm_flags |= PRUB_RELINK; 1393 1394 /* 1395 * See the related comment in tcp_usrreq.c 1396 * tcp_connect(). 1397 */ 1398 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1399 lwkt_forwardmsg(port, lmsg); 1400 /* msg invalid now */ 1401 return; 1402 } 1403 KASSERT(so->so_port == port, ("so_port is not netisr%d", cpu)); 1404 1405 if (msg->bind.nm_flags & PRUB_RELINK) { 1406 msg->bind.nm_flags &= ~PRUB_RELINK; 1407 in_pcblink(so->so_pcb, &udbinfo[mycpuid]); 1408 } 1409 KASSERT(inp->inp_pcbinfo == &udbinfo[cpu], 1410 ("pcbinfo is not udbinfo%d", cpu)); 1411 1412 error = in_pcbbind(inp, nam, td); 1413 if (error == 0) { 1414 boolean_t forwarded; 1415 1416 if (sin->sin_addr.s_addr != INADDR_ANY) 1417 inp->inp_flags |= INP_WASBOUND_NOTANY; 1418 1419 forwarded = udp_inswildcardhash(inp, 1420 &msg->bind.base, 0); 1421 if (forwarded) { 1422 /* 1423 * The message is further forwarded, so 1424 * we are done here. 1425 */ 1426 return; 1427 } 1428 } 1429 } else { 1430 error = EINVAL; 1431 } 1432 done: 1433 lwkt_replymsg(&msg->bind.base.lmsg, error); 1434 } 1435 1436 static int 1437 udp_preconnect(struct socket *so, const struct sockaddr *nam __unused, 1438 struct thread *td __unused) 1439 { 1440 sosetstate(so, SS_ISCONNECTED); /* XXX */ 1441 return 0; 1442 } 1443 1444 static void 1445 udp_connect(netmsg_t msg) 1446 { 1447 struct socket *so = msg->connect.base.nm_so; 1448 struct sockaddr *nam = msg->connect.nm_nam; 1449 struct thread *td = msg->connect.nm_td; 1450 struct inpcb *inp; 1451 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1452 struct sockaddr_in *if_sin; 1453 struct lwkt_port *port; 1454 uint16_t hash; 1455 int error; 1456 1457 KKASSERT(msg->connect.nm_m == NULL); 1458 1459 inp = so->so_pcb; 1460 if (inp == NULL) { 1461 error = EINVAL; 1462 goto out; 1463 } 1464 1465 if (msg->connect.nm_flags & PRUC_RECONNECT) { 1466 msg->connect.nm_flags &= ~PRUC_RECONNECT; 1467 in_pcblink(inp, &udbinfo[mycpuid]); 1468 } 1469 1470 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1471 error = EISCONN; 1472 goto out; 1473 } 1474 error = 0; 1475 1476 /* 1477 * Bind if we have to 1478 */ 1479 if (inp->inp_lport == 0) { 1480 error = in_pcbbind(inp, NULL, td); 1481 if (error) 1482 goto out; 1483 } 1484 1485 /* 1486 * Calculate the correct protocol processing thread. The connect 1487 * operation must run there. 1488 */ 1489 error = in_pcbladdr(inp, nam, &if_sin, td); 1490 if (error) 1491 goto out; 1492 1493 hash = udp_addrhash(sin->sin_addr.s_addr, sin->sin_port, 1494 inp->inp_laddr.s_addr != INADDR_ANY ? 1495 inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr, inp->inp_lport); 1496 port = netisr_hashport(hash); 1497 if (port != &curthread->td_msgport) { 1498 lwkt_msg_t lmsg = &msg->connect.base.lmsg; 1499 int nm_flags = PRUC_RECONNECT; 1500 1501 /* 1502 * in_pcbladdr() may have allocated a route entry for us 1503 * on the current CPU, but we need a route entry on the 1504 * inpcb's owner CPU, so free it here. 1505 */ 1506 in_pcbresetroute(inp); 1507 1508 if (inp->inp_flags & INP_WILDCARD) { 1509 /* 1510 * Remove this inpcb from the wildcard hash before 1511 * the socket's msgport changes. 1512 */ 1513 udp_remwildcardhash(inp); 1514 } 1515 1516 if (so->so_orig_port == NULL) { 1517 /* 1518 * First time change protocol processing port. 1519 * Save the current port for synchronization upon 1520 * udp_detach. 1521 */ 1522 so->so_orig_port = &curthread->td_msgport; 1523 } else { 1524 /* 1525 * We have changed protocol processing port more 1526 * than once. We could not do direct detach 1527 * anymore, because we lose the track of the 1528 * original protocol processing ports to perform 1529 * synchronization upon udp_detach. This should 1530 * be rare though. 1531 */ 1532 inp->inp_flags &= ~INP_DIRECT_DETACH; 1533 } 1534 1535 /* 1536 * We are moving the protocol processing port the socket 1537 * is on, we have to unlink here and re-link on the 1538 * target cpu. 1539 */ 1540 in_pcbunlink(inp, &udbinfo[mycpuid]); 1541 msg->connect.nm_flags |= nm_flags; 1542 1543 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1544 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1545 lwkt_forwardmsg(port, lmsg); 1546 /* msg invalid now */ 1547 return; 1548 } 1549 error = udp_connect_oncpu(inp, sin, if_sin, hash); 1550 out: 1551 if (msg->connect.nm_flags & PRUC_HELDTD) 1552 lwkt_rele(td); 1553 if (error && (msg->connect.nm_flags & PRUC_ASYNC)) { 1554 if (inp->inp_lport == 0) { 1555 /* 1556 * As long as we have the local port, it is fine 1557 * for connect to fail, e.g. disconnect. 1558 */ 1559 so->so_error = error; 1560 } 1561 soclrstate(so, SS_ISCONNECTED); 1562 /* 1563 * Wake up callers blocked on this socket to make sure 1564 * that they can see this error. 1565 * 1566 * NOTE: 1567 * sodisconnected() can't be used here, which bricks 1568 * sending and receiving. 1569 */ 1570 wakeup(&so->so_timeo); 1571 sowwakeup(so); 1572 sorwakeup(so); 1573 } 1574 if (error && inp != NULL && inp->inp_lport != 0 && 1575 (inp->inp_flags & INP_WILDCARD) == 0) { 1576 boolean_t forwarded; 1577 1578 /* Connect failed; put it to wildcard hash. */ 1579 forwarded = udp_inswildcardhash(inp, &msg->connect.base, 1580 error); 1581 if (forwarded) { 1582 /* 1583 * The message is further forwarded, so we are done 1584 * here. 1585 */ 1586 return; 1587 } 1588 } 1589 lwkt_replymsg(&msg->connect.base.lmsg, error); 1590 } 1591 1592 static void 1593 udp_remwildcardhash(struct inpcb *inp) 1594 { 1595 int cpu; 1596 1597 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1598 ("not on owner cpu")); 1599 1600 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 1601 if (cpu == mycpuid) { 1602 /* 1603 * This inpcb will be removed by the later 1604 * in_pcbremwildcardhash(). 1605 */ 1606 continue; 1607 } 1608 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpu]); 1609 } 1610 in_pcbremwildcardhash(inp); 1611 } 1612 1613 static int 1614 udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 1615 struct sockaddr_in *if_sin, uint16_t hash) 1616 { 1617 struct socket *so = inp->inp_socket; 1618 struct inpcb *oinp; 1619 1620 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 1621 sin->sin_addr, sin->sin_port, 1622 inp->inp_laddr.s_addr != INADDR_ANY ? 1623 inp->inp_laddr : if_sin->sin_addr, inp->inp_lport, FALSE, NULL); 1624 if (oinp != NULL) 1625 return EADDRINUSE; 1626 1627 /* 1628 * No more errors can occur, finish adjusting the socket 1629 * and change the processing port to reflect the connected 1630 * socket. Once set we can no longer safely mess with the 1631 * socket. 1632 */ 1633 1634 if (inp->inp_flags & INP_WILDCARD) 1635 udp_remwildcardhash(inp); 1636 1637 if (inp->inp_laddr.s_addr == INADDR_ANY) 1638 inp->inp_laddr = if_sin->sin_addr; 1639 inp->inp_faddr = sin->sin_addr; 1640 inp->inp_fport = sin->sin_port; 1641 in_pcbinsconnhash(inp); 1642 1643 inp->inp_flags |= INP_HASH; 1644 inp->inp_hashval = hash; 1645 1646 soisconnected(so); 1647 1648 return 0; 1649 } 1650 1651 static void 1652 udp_detach2(struct socket *so) 1653 { 1654 in_pcbdetach(so->so_pcb); 1655 sodiscard(so); 1656 sofree(so); 1657 } 1658 1659 static void 1660 udp_detach_final_dispatch(netmsg_t msg) 1661 { 1662 udp_detach2(msg->base.nm_so); 1663 } 1664 1665 static void 1666 udp_detach_oncpu_dispatch(netmsg_t msg) 1667 { 1668 struct netmsg_base *clomsg = &msg->base; 1669 struct socket *so = clomsg->nm_so; 1670 struct inpcb *inp = so->so_pcb; 1671 struct thread *td = curthread; 1672 int nextcpu, cpuid = mycpuid; 1673 1674 KASSERT(td->td_type == TD_TYPE_NETISR, ("not in netisr")); 1675 1676 if (inp->inp_flags & INP_WILDCARD) { 1677 /* 1678 * This inp will be removed on the inp's 1679 * owner CPU later, so don't do it now. 1680 */ 1681 if (&td->td_msgport != so->so_port) 1682 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpuid]); 1683 } 1684 1685 if (cpuid == 0) { 1686 /* 1687 * Free and clear multicast socket option, 1688 * which is only accessed in netisr0. 1689 */ 1690 ip_freemoptions(inp->inp_moptions); 1691 inp->inp_moptions = NULL; 1692 } 1693 1694 nextcpu = cpuid + 1; 1695 if (nextcpu < netisr_ncpus) { 1696 lwkt_forwardmsg(netisr_cpuport(nextcpu), &clomsg->lmsg); 1697 } else { 1698 /* 1699 * No one could see this inpcb now; destroy this 1700 * inpcb in its owner netisr. 1701 */ 1702 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1703 udp_detach_final_dispatch); 1704 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1705 } 1706 } 1707 1708 static void 1709 udp_detach_syncorig_dispatch(netmsg_t msg) 1710 { 1711 struct netmsg_base *clomsg = &msg->base; 1712 struct socket *so = clomsg->nm_so; 1713 1714 /* 1715 * Original protocol processing port is synchronized; 1716 * destroy this inpcb in its owner netisr. 1717 */ 1718 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1719 udp_detach_final_dispatch); 1720 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1721 } 1722 1723 static void 1724 udp_detach(netmsg_t msg) 1725 { 1726 struct socket *so = msg->detach.base.nm_so; 1727 struct netmsg_base *clomsg; 1728 struct inpcb *inp; 1729 1730 inp = so->so_pcb; 1731 if (inp == NULL) { 1732 lwkt_replymsg(&msg->detach.base.lmsg, EINVAL); 1733 return; 1734 } 1735 1736 /* 1737 * Reply EJUSTRETURN ASAP, we will call sodiscard() and 1738 * sofree() later. 1739 */ 1740 lwkt_replymsg(&msg->detach.base.lmsg, EJUSTRETURN); 1741 1742 if (netisr_ncpus == 1) { 1743 /* Only one CPU, detach the inpcb directly. */ 1744 udp_detach2(so); 1745 return; 1746 } 1747 1748 /* 1749 * Remove this inpcb from the inpcb list first, so that 1750 * no one could find this inpcb from the inpcb list. 1751 */ 1752 in_pcbofflist(inp); 1753 1754 /* 1755 * Remove this inpcb from the local port hash directly 1756 * here, so that its bound local port could be recycled 1757 * timely. 1758 */ 1759 in_pcbremporthash(inp); 1760 1761 if (inp->inp_flags & INP_DIRECT_DETACH) { 1762 /* 1763 * Direct detaching is allowed 1764 */ 1765 KASSERT((inp->inp_flags & INP_WILDCARD) == 0, 1766 ("in the wildcardhash")); 1767 KASSERT(inp->inp_moptions == NULL, ("has mcast options")); 1768 if (so->so_orig_port == NULL) { 1769 udp_detach2(so); 1770 } else { 1771 /* 1772 * Protocol processing port changed once, so 1773 * we need to make sure that there are nothing 1774 * left on the original protocol processing 1775 * port before we destroy this socket and inpcb. 1776 * This is more lightweight than going through 1777 * all UDP processing netisrs. 1778 */ 1779 clomsg = &so->so_clomsg; 1780 netmsg_init(clomsg, so, &netisr_apanic_rport, 1781 MSGF_IGNSOPORT, udp_detach_syncorig_dispatch); 1782 lwkt_sendmsg(so->so_orig_port, &clomsg->lmsg); 1783 } 1784 return; 1785 } 1786 1787 /* 1788 * Go through netisrs which process UDP to make sure 1789 * no one could find this inpcb anymore. 1790 */ 1791 clomsg = &so->so_clomsg; 1792 netmsg_init(clomsg, so, &netisr_apanic_rport, MSGF_IGNSOPORT, 1793 udp_detach_oncpu_dispatch); 1794 lwkt_sendmsg(netisr_cpuport(0), &clomsg->lmsg); 1795 } 1796 1797 static void 1798 udp_disconnect(netmsg_t msg) 1799 { 1800 struct socket *so = msg->disconnect.base.nm_so; 1801 struct inpcb *inp; 1802 boolean_t forwarded; 1803 int error = 0; 1804 1805 inp = so->so_pcb; 1806 if (inp == NULL) { 1807 error = EINVAL; 1808 goto out; 1809 } 1810 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1811 error = ENOTCONN; 1812 goto out; 1813 } 1814 1815 soclrstate(so, SS_ISCONNECTED); /* XXX */ 1816 1817 in_pcbdisconnect(inp); 1818 inp->inp_flags &= ~INP_HASH; 1819 1820 /* 1821 * Follow traditional BSD behavior and retain the local port 1822 * binding. But, fix the old misbehavior of overwriting any 1823 * previously bound local address. 1824 */ 1825 if (!(inp->inp_flags & INP_WASBOUND_NOTANY)) 1826 inp->inp_laddr.s_addr = INADDR_ANY; 1827 1828 if (so->so_state & SS_ISCLOSING) { 1829 /* 1830 * If this socket is being closed, there is no need 1831 * to put this socket back into wildcard hash table. 1832 */ 1833 error = 0; 1834 goto out; 1835 } 1836 1837 forwarded = udp_inswildcardhash(inp, &msg->disconnect.base, 0); 1838 if (forwarded) { 1839 /* 1840 * The message is further forwarded, so we are done 1841 * here. 1842 */ 1843 return; 1844 } 1845 out: 1846 lwkt_replymsg(&msg->disconnect.base.lmsg, error); 1847 } 1848 1849 void 1850 udp_shutdown(netmsg_t msg) 1851 { 1852 struct socket *so = msg->shutdown.base.nm_so; 1853 struct inpcb *inp; 1854 int error; 1855 1856 inp = so->so_pcb; 1857 if (inp) { 1858 socantsendmore(so); 1859 error = 0; 1860 } else { 1861 error = EINVAL; 1862 } 1863 lwkt_replymsg(&msg->shutdown.base.lmsg, error); 1864 } 1865 1866 struct pr_usrreqs udp_usrreqs = { 1867 .pru_abort = udp_abort, 1868 .pru_accept = pr_generic_notsupp, 1869 .pru_attach = udp_attach, 1870 .pru_bind = udp_bind, 1871 .pru_connect = udp_connect, 1872 .pru_connect2 = pr_generic_notsupp, 1873 .pru_control = in_control_dispatch, 1874 .pru_detach = udp_detach, 1875 .pru_disconnect = udp_disconnect, 1876 .pru_listen = pr_generic_notsupp, 1877 .pru_peeraddr = in_setpeeraddr_dispatch, 1878 .pru_rcvd = pr_generic_notsupp, 1879 .pru_rcvoob = pr_generic_notsupp, 1880 .pru_send = udp_send, 1881 .pru_sense = pru_sense_null, 1882 .pru_shutdown = udp_shutdown, 1883 .pru_sockaddr = in_setsockaddr_dispatch, 1884 .pru_sosend = sosendudp, 1885 .pru_soreceive = soreceive, 1886 .pru_preconnect = udp_preconnect, 1887 .pru_preattach = udp_preattach 1888 }; 1889