1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 63 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $ 64 */ 65 66 #include "opt_inet6.h" 67 68 #include <sys/param.h> 69 #include <sys/systm.h> 70 #include <sys/kernel.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/domain.h> 74 #include <sys/proc.h> 75 #include <sys/priv.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #include <sys/sysctl.h> 80 #include <sys/syslog.h> 81 #include <sys/in_cksum.h> 82 #include <sys/ktr.h> 83 #include <sys/jail.h> 84 85 #include <sys/socketvar2.h> 86 #include <sys/serialize.h> 87 88 #include <machine/stdarg.h> 89 90 #include <net/if.h> 91 #include <net/route.h> 92 #include <net/netmsg2.h> 93 #include <net/netisr2.h> 94 95 #include <netinet/in.h> 96 #include <netinet/in_systm.h> 97 #include <netinet/ip.h> 98 #ifdef INET6 99 #include <netinet/ip6.h> 100 #endif 101 #include <netinet/in_pcb.h> 102 #include <netinet/in_var.h> 103 #include <netinet/ip_var.h> 104 #ifdef INET6 105 #include <netinet6/ip6_var.h> 106 #endif 107 #include <netinet/ip_icmp.h> 108 #include <netinet/icmp_var.h> 109 #include <netinet/udp.h> 110 #include <netinet/udp_var.h> 111 112 #define MSGF_UDP_SEND MSGF_PROTO1 113 114 #define INP_DIRECT_DETACH INP_FLAG_PROTO2 115 116 #define UDP_KTR_STRING "inp=%p" 117 #define UDP_KTR_ARGS struct inpcb *inp 118 119 #ifndef KTR_UDP 120 #define KTR_UDP KTR_ALL 121 #endif 122 123 KTR_INFO_MASTER(udp); 124 KTR_INFO(KTR_UDP, udp, send_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS); 125 KTR_INFO(KTR_UDP, udp, send_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS); 126 KTR_INFO(KTR_UDP, udp, send_ipout, 2, UDP_KTR_STRING, UDP_KTR_ARGS); 127 KTR_INFO(KTR_UDP, udp, redisp_ipout_beg, 3, UDP_KTR_STRING, UDP_KTR_ARGS); 128 KTR_INFO(KTR_UDP, udp, redisp_ipout_end, 4, UDP_KTR_STRING, UDP_KTR_ARGS); 129 KTR_INFO(KTR_UDP, udp, send_redisp, 5, UDP_KTR_STRING, UDP_KTR_ARGS); 130 KTR_INFO(KTR_UDP, udp, send_inswildcard, 6, UDP_KTR_STRING, UDP_KTR_ARGS); 131 132 #define logudp(name, inp) KTR_LOG(udp_##name, inp) 133 134 /* 135 * UDP protocol implementation. 136 * Per RFC 768, August, 1980. 137 */ 138 #ifndef COMPAT_42 139 static int udpcksum = 1; 140 #else 141 static int udpcksum = 0; /* XXX */ 142 #endif 143 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, 144 &udpcksum, 0, "Enable checksumming of UDP packets"); 145 146 int log_in_vain = 0; 147 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 148 &log_in_vain, 0, "Log all incoming UDP packets"); 149 150 static int blackhole = 0; 151 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, 152 &blackhole, 0, "Do not send port unreachables for refused connects"); 153 154 static int strict_mcast_mship = 1; 155 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, 156 &strict_mcast_mship, 0, "Only send multicast to member sockets"); 157 158 int udp_sosend_async = 1; 159 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW, 160 &udp_sosend_async, 0, "UDP asynchronized pru_send"); 161 162 int udp_sosend_prepend = 1; 163 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW, 164 &udp_sosend_prepend, 0, 165 "Prepend enough space for proto and link header in pru_send"); 166 167 static int udp_reuseport_ext = 1; 168 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW, 169 &udp_reuseport_ext, 0, "SO_REUSEPORT extension"); 170 171 struct inpcbinfo udbinfo[MAXCPU]; 172 173 #ifndef UDBHASHSIZE 174 #define UDBHASHSIZE 16 175 #endif 176 CTASSERT(powerof2(UDBHASHSIZE)); 177 178 struct udpstat udpstat_percpu[MAXCPU] __cachealign; 179 180 static void udp_append(struct inpcb *last, struct ip *ip, 181 struct mbuf *n, int off, struct sockaddr_in *udp_in); 182 183 static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 184 struct sockaddr_in *if_sin, uint16_t hash); 185 186 static boolean_t udp_inswildcardhash(struct inpcb *inp, 187 struct netmsg_base *msg, int error); 188 static void udp_remwildcardhash(struct inpcb *inp); 189 190 static __inline int 191 udp_lportcpu(short lport) 192 { 193 return (ntohs(lport) % netisr_ncpus); 194 } 195 196 void 197 udp_init(void) 198 { 199 struct inpcbportinfo *portinfo; 200 int cpu; 201 202 portinfo = kmalloc(sizeof(*portinfo) * netisr_ncpus, M_PCB, 203 M_WAITOK | M_CACHEALIGN); 204 205 for (cpu = 0; cpu < netisr_ncpus; cpu++) { 206 struct inpcbinfo *uicb = &udbinfo[cpu]; 207 208 /* 209 * NOTE: 210 * UDP pcb list, wildcard hash table and localgroup hash 211 * table are shared. 212 */ 213 in_pcbinfo_init(uicb, cpu, TRUE); 214 uicb->hashbase = hashinit(UDBHASHSIZE, M_PCB, &uicb->hashmask); 215 216 in_pcbportinfo_init(&portinfo[cpu], UDBHASHSIZE, cpu); 217 in_pcbportinfo_set(uicb, portinfo, netisr_ncpus); 218 219 uicb->wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB, 220 &uicb->wildcardhashmask); 221 uicb->localgrphashbase = hashinit(UDBHASHSIZE, M_PCB, 222 &uicb->localgrphashmask); 223 224 uicb->ipi_size = sizeof(struct inpcb); 225 } 226 227 /* 228 * Initialize UDP statistics counters for each CPU. 229 */ 230 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 231 bzero(&udpstat_percpu[cpu], sizeof(struct udpstat)); 232 } 233 234 static int 235 sysctl_udpstat(SYSCTL_HANDLER_ARGS) 236 { 237 int cpu, error = 0; 238 239 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 240 if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu], 241 sizeof(struct udpstat)))) 242 break; 243 if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu], 244 sizeof(struct udpstat)))) 245 break; 246 } 247 248 return (error); 249 } 250 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW), 251 0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics"); 252 253 void 254 udp_ctloutput(netmsg_t msg) 255 { 256 struct socket *so = msg->base.nm_so; 257 struct sockopt *sopt = msg->ctloutput.nm_sopt; 258 struct inpcb *inp = so->so_pcb; 259 260 if (inp == NULL) { 261 lwkt_replymsg(&msg->lmsg, EINVAL); 262 return; 263 } 264 265 if (sopt->sopt_level == IPPROTO_IP && sopt->sopt_dir == SOPT_SET) { 266 switch (sopt->sopt_name) { 267 case IP_MULTICAST_IF: 268 case IP_MULTICAST_VIF: 269 case IP_MULTICAST_TTL: 270 case IP_MULTICAST_LOOP: 271 case IP_ADD_MEMBERSHIP: 272 case IP_DROP_MEMBERSHIP: 273 /* 274 * This pr_ctloutput msg will be forwarded 275 * to netisr0 to run; we can't do direct 276 * detaching anymore. 277 * 278 * NOTE: 279 * Don't optimize for the sockets whose 280 * current so_port is netisr0's msgport. 281 * These sockets could be connect(2)'ed 282 * later and the so_port will be changed. 283 */ 284 inp->inp_flags &= ~INP_DIRECT_DETACH; 285 break; 286 } 287 } 288 return ip_ctloutput(msg); 289 } 290 291 /* 292 * Check multicast packets to make sure they are only sent to sockets with 293 * multicast memberships for the packet's destination address and arrival 294 * interface. Multicast packets to multicast-unaware sockets are also 295 * disallowed. 296 * 297 * Returns 0 if the packet is acceptable, -1 if it is not. 298 */ 299 static __inline int 300 check_multicast_membership(const struct ip *ip, const struct inpcb *inp, 301 const struct mbuf *m) 302 { 303 const struct ip_moptions *mopt; 304 int mshipno; 305 306 if (strict_mcast_mship == 0 || 307 !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 308 return (0); 309 } 310 311 ASSERT_NETISR0; 312 313 mopt = inp->inp_moptions; 314 if (mopt == NULL) 315 return (-1); 316 for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) { 317 const struct in_multi *maddr = mopt->imo_membership[mshipno]; 318 319 if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr && 320 m->m_pkthdr.rcvif == maddr->inm_ifp) { 321 return (0); 322 } 323 } 324 return (-1); 325 } 326 327 struct udp_mcast_arg { 328 struct inpcb *inp; 329 struct inpcb *last; 330 struct ip *ip; 331 struct mbuf *m; 332 int iphlen; 333 struct sockaddr_in *udp_in; 334 }; 335 336 static int 337 udp_mcast_input(struct udp_mcast_arg *arg) 338 { 339 struct inpcb *inp = arg->inp; 340 struct inpcb *last = arg->last; 341 struct ip *ip = arg->ip; 342 struct mbuf *m = arg->m; 343 344 if (check_multicast_membership(ip, inp, m) < 0) 345 return ERESTART; /* caller continue */ 346 347 if (last != NULL) { 348 struct mbuf *n; 349 350 if ((n = m_copypacket(m, M_NOWAIT)) != NULL) 351 udp_append(last, ip, n, 352 arg->iphlen + sizeof(struct udphdr), 353 arg->udp_in); 354 } 355 arg->last = last = inp; 356 357 /* 358 * Don't look for additional matches if this one does 359 * not have either the SO_REUSEPORT or SO_REUSEADDR 360 * socket options set. This heuristic avoids searching 361 * through all pcbs in the common case of a non-shared 362 * port. It * assumes that an application will never 363 * clear these options after setting them. 364 */ 365 if (!(last->inp_socket->so_options & 366 (SO_REUSEPORT | SO_REUSEADDR))) 367 return EJUSTRETURN; /* caller stop */ 368 return 0; 369 } 370 371 int 372 udp_input(struct mbuf **mp, int *offp, int proto) 373 { 374 struct sockaddr_in udp_in = { sizeof udp_in, AF_INET }; 375 int iphlen; 376 struct ip *ip; 377 struct udphdr *uh; 378 struct inpcb *inp; 379 struct mbuf *m; 380 struct mbuf *opts = NULL; 381 int len, off; 382 struct ip save_ip; 383 struct inpcbinfo *pcbinfo = &udbinfo[mycpuid]; 384 385 off = *offp; 386 m = *mp; 387 *mp = NULL; 388 389 iphlen = off; 390 udp_stat.udps_ipackets++; 391 392 /* 393 * Strip IP options, if any; should skip this, 394 * make available to user, and use on returned packets, 395 * but we don't yet have a way to check the checksum 396 * with options still present. 397 */ 398 if (iphlen > sizeof(struct ip)) { 399 ip_stripoptions(m); 400 iphlen = sizeof(struct ip); 401 } 402 403 /* 404 * IP and UDP headers are together in first mbuf. 405 * Already checked and pulled up in ip_demux(). 406 */ 407 KASSERT(m->m_len >= iphlen + sizeof(struct udphdr), 408 ("UDP header not in one mbuf")); 409 410 ip = mtod(m, struct ip *); 411 uh = (struct udphdr *)((caddr_t)ip + iphlen); 412 413 /* destination port of 0 is illegal, based on RFC768. */ 414 if (uh->uh_dport == 0) 415 goto bad; 416 417 /* 418 * Make mbuf data length reflect UDP length. 419 * If not enough data to reflect UDP length, drop. 420 */ 421 len = ntohs((u_short)uh->uh_ulen); 422 if (ip->ip_len != len) { 423 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 424 udp_stat.udps_badlen++; 425 goto bad; 426 } 427 m_adj(m, len - ip->ip_len); 428 /* ip->ip_len = len; */ 429 } 430 /* 431 * Save a copy of the IP header in case we want restore it 432 * for sending an ICMP error message in response. 433 */ 434 save_ip = *ip; 435 436 /* 437 * Checksum extended UDP header and data. 438 */ 439 if (uh->uh_sum) { 440 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 441 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 442 uh->uh_sum = m->m_pkthdr.csum_data; 443 else 444 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, 445 ip->ip_dst.s_addr, htonl((u_short)len + 446 m->m_pkthdr.csum_data + IPPROTO_UDP)); 447 uh->uh_sum ^= 0xffff; 448 } else { 449 char b[9]; 450 451 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 452 bzero(((struct ipovly *)ip)->ih_x1, 9); 453 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 454 uh->uh_sum = in_cksum(m, len + sizeof(struct ip)); 455 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 456 } 457 if (uh->uh_sum) { 458 udp_stat.udps_badsum++; 459 m_freem(m); 460 return(IPPROTO_DONE); 461 } 462 } else 463 udp_stat.udps_nosum++; 464 465 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 466 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { 467 struct inpcbhead *connhead; 468 struct inpcontainer *ic, *ic_marker; 469 struct inpcontainerhead *ichead; 470 struct udp_mcast_arg arg; 471 struct inpcb *last; 472 int error; 473 474 /* 475 * Deliver a multicast or broadcast datagram to *all* sockets 476 * for which the local and remote addresses and ports match 477 * those of the incoming datagram. This allows more than 478 * one process to receive multi/broadcasts on the same port. 479 * (This really ought to be done for unicast datagrams as 480 * well, but that would cause problems with existing 481 * applications that open both address-specific sockets and 482 * a wildcard socket listening to the same port -- they would 483 * end up receiving duplicates of every unicast datagram. 484 * Those applications open the multiple sockets to overcome an 485 * inadequacy of the UDP socket interface, but for backwards 486 * compatibility we avoid the problem here rather than 487 * fixing the interface. Maybe 4.5BSD will remedy this?) 488 */ 489 490 /* 491 * Construct sockaddr format source address. 492 */ 493 udp_in.sin_port = uh->uh_sport; 494 udp_in.sin_addr = ip->ip_src; 495 arg.udp_in = &udp_in; 496 /* 497 * Locate pcb(s) for datagram. 498 * (Algorithm copied from raw_intr().) 499 */ 500 last = NULL; 501 arg.iphlen = iphlen; 502 503 connhead = &pcbinfo->hashbase[ 504 INP_PCBCONNHASH(ip->ip_src.s_addr, uh->uh_sport, 505 ip->ip_dst.s_addr, uh->uh_dport, pcbinfo->hashmask)]; 506 LIST_FOREACH(inp, connhead, inp_hash) { 507 #ifdef INET6 508 if (!INP_ISIPV4(inp)) 509 continue; 510 #endif 511 if (!in_hosteq(inp->inp_faddr, ip->ip_src) || 512 !in_hosteq(inp->inp_laddr, ip->ip_dst) || 513 inp->inp_fport != uh->uh_sport || 514 inp->inp_lport != uh->uh_dport) 515 continue; 516 517 arg.inp = inp; 518 arg.last = last; 519 arg.ip = ip; 520 arg.m = m; 521 522 error = udp_mcast_input(&arg); 523 if (error == ERESTART) 524 continue; 525 last = arg.last; 526 527 if (error == EJUSTRETURN) 528 goto done; 529 } 530 531 ichead = &pcbinfo->wildcardhashbase[ 532 INP_PCBWILDCARDHASH(uh->uh_dport, 533 pcbinfo->wildcardhashmask)]; 534 ic_marker = in_pcbcontainer_marker(); 535 536 GET_PCBINFO_TOKEN(pcbinfo); 537 LIST_INSERT_HEAD(ichead, ic_marker, ic_list); 538 while ((ic = LIST_NEXT(ic_marker, ic_list)) != NULL) { 539 LIST_REMOVE(ic_marker, ic_list); 540 LIST_INSERT_AFTER(ic, ic_marker, ic_list); 541 542 inp = ic->ic_inp; 543 if (inp->inp_flags & INP_PLACEMARKER) 544 continue; 545 #ifdef INET6 546 if (!INP_ISIPV4(inp)) 547 continue; 548 #endif 549 if (inp->inp_lport != uh->uh_dport) 550 continue; 551 if (inp->inp_laddr.s_addr != INADDR_ANY && 552 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 553 continue; 554 555 arg.inp = inp; 556 arg.last = last; 557 arg.ip = ip; 558 arg.m = m; 559 560 error = udp_mcast_input(&arg); 561 if (error == ERESTART) 562 continue; 563 last = arg.last; 564 565 if (error == EJUSTRETURN) 566 break; 567 } 568 LIST_REMOVE(ic_marker, ic_list); 569 REL_PCBINFO_TOKEN(pcbinfo); 570 done: 571 if (last == NULL) { 572 /* 573 * No matching pcb found; discard datagram. 574 * (No need to send an ICMP Port Unreachable 575 * for a broadcast or multicast datgram.) 576 */ 577 udp_stat.udps_noportbcast++; 578 goto bad; 579 } 580 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 581 &udp_in); 582 return(IPPROTO_DONE); 583 } 584 /* 585 * Locate pcb for datagram. 586 */ 587 inp = in_pcblookup_pkthash(pcbinfo, ip->ip_src, uh->uh_sport, 588 ip->ip_dst, uh->uh_dport, TRUE, m->m_pkthdr.rcvif, 589 udp_reuseport_ext ? m : NULL); 590 if (inp == NULL) { 591 if (log_in_vain) { 592 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 593 594 log(LOG_INFO, 595 "Connection attempt to UDP %s:%d from %s:%d\n", 596 kinet_ntoa(ip->ip_dst, dst), ntohs(uh->uh_dport), 597 kinet_ntoa(ip->ip_src, src), ntohs(uh->uh_sport)); 598 } 599 udp_stat.udps_noport++; 600 if (m->m_flags & (M_BCAST | M_MCAST)) { 601 udp_stat.udps_noportbcast++; 602 goto bad; 603 } 604 if (blackhole) 605 goto bad; 606 #ifdef ICMP_BANDLIM 607 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 608 goto bad; 609 #endif 610 *ip = save_ip; 611 ip->ip_len += iphlen; 612 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 613 return(IPPROTO_DONE); 614 } 615 KASSERT(INP_ISIPV4(inp), ("not inet inpcb")); 616 /* 617 * Check the minimum TTL for socket. 618 */ 619 if (ip->ip_ttl < inp->inp_ip_minttl) 620 goto bad; 621 622 /* 623 * Construct sockaddr format source address. 624 * Stuff source address and datagram in user buffer. 625 */ 626 udp_in.sin_port = uh->uh_sport; 627 udp_in.sin_addr = ip->ip_src; 628 if ((inp->inp_flags & INP_CONTROLOPTS) || 629 (inp->inp_socket->so_options & SO_TIMESTAMP)) 630 ip_savecontrol(inp, &opts, ip, m); 631 m_adj(m, iphlen + sizeof(struct udphdr)); 632 633 lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token); 634 if (ssb_appendaddr(&inp->inp_socket->so_rcv, 635 (struct sockaddr *)&udp_in, m, opts) == 0) { 636 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 637 udp_stat.udps_fullsock++; 638 soroverflow(inp->inp_socket); 639 goto bad; 640 } 641 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 642 sorwakeup(inp->inp_socket); 643 return(IPPROTO_DONE); 644 bad: 645 m_freem(m); 646 if (opts) 647 m_freem(opts); 648 return(IPPROTO_DONE); 649 } 650 651 /* 652 * subroutine of udp_input(), mainly for source code readability. 653 * caller must properly init udp_ip6 and udp_in6 beforehand. 654 */ 655 static void 656 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, 657 struct sockaddr_in *udp_in) 658 { 659 struct mbuf *opts = NULL; 660 int ret; 661 662 KASSERT(INP_ISIPV4(last), ("not inet inpcb")); 663 664 if (last->inp_flags & INP_CONTROLOPTS || 665 last->inp_socket->so_options & SO_TIMESTAMP) 666 ip_savecontrol(last, &opts, ip, n); 667 m_adj(n, off); 668 669 lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token); 670 ret = ssb_appendaddr(&last->inp_socket->so_rcv, 671 (struct sockaddr *)udp_in, n, opts); 672 lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token); 673 if (ret == 0) { 674 m_freem(n); 675 if (opts) 676 m_freem(opts); 677 udp_stat.udps_fullsock++; 678 } else { 679 sorwakeup(last->inp_socket); 680 } 681 } 682 683 /* 684 * Notify a udp user of an asynchronous error; 685 * just wake up so that he can collect error status. 686 */ 687 void 688 udp_notify(struct inpcb *inp, int error) 689 { 690 inp->inp_socket->so_error = error; 691 sorwakeup(inp->inp_socket); 692 sowwakeup(inp->inp_socket); 693 } 694 695 struct netmsg_udp_notify { 696 struct netmsg_base base; 697 inp_notify_t nm_notify; 698 struct in_addr nm_faddr; 699 int nm_arg; 700 }; 701 702 static void 703 udp_notifyall_oncpu(netmsg_t msg) 704 { 705 struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg; 706 int nextcpu, cpu = mycpuid; 707 708 ASSERT_NETISR_NCPUS(cpu); 709 710 in_pcbnotifyall(&udbinfo[cpu], nm->nm_faddr, nm->nm_arg, nm->nm_notify); 711 712 nextcpu = cpu + 1; 713 if (nextcpu < netisr_ncpus) 714 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg); 715 else 716 lwkt_replymsg(&nm->base.lmsg, 0); 717 } 718 719 inp_notify_t 720 udp_get_inpnotify(int cmd, const struct sockaddr *sa, 721 struct ip **ip0, int *cpuid) 722 { 723 struct in_addr faddr; 724 struct ip *ip = *ip0; 725 inp_notify_t notify = udp_notify; 726 727 faddr = ((const struct sockaddr_in *)sa)->sin_addr; 728 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 729 return NULL; 730 731 if (PRC_IS_REDIRECT(cmd)) { 732 ip = NULL; 733 notify = in_rtchange; 734 } else if (cmd == PRC_HOSTDEAD) { 735 ip = NULL; 736 } else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) { 737 return NULL; 738 } 739 740 if (cpuid != NULL) { 741 if (ip == NULL) { 742 /* Go through all effective netisr CPUs. */ 743 *cpuid = netisr_ncpus; 744 } else { 745 const struct udphdr *uh; 746 747 uh = (const struct udphdr *) 748 ((caddr_t)ip + (ip->ip_hl << 2)); 749 *cpuid = udp_addrcpu(faddr.s_addr, uh->uh_dport, 750 ip->ip_src.s_addr, uh->uh_sport); 751 } 752 } 753 754 *ip0 = ip; 755 return notify; 756 } 757 758 void 759 udp_ctlinput(netmsg_t msg) 760 { 761 struct sockaddr *sa = msg->ctlinput.nm_arg; 762 struct ip *ip = msg->ctlinput.nm_extra; 763 int cmd = msg->ctlinput.nm_cmd, cpuid; 764 inp_notify_t notify; 765 struct in_addr faddr; 766 767 ASSERT_NETISR_NCPUS(mycpuid); 768 769 notify = udp_get_inpnotify(cmd, sa, &ip, &cpuid); 770 if (notify == NULL) 771 goto done; 772 773 faddr = ((struct sockaddr_in *)sa)->sin_addr; 774 if (ip) { 775 const struct udphdr *uh; 776 struct inpcb *inp; 777 778 if (cpuid != mycpuid) 779 goto done; 780 781 uh = (const struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 782 inp = in_pcblookup_hash(&udbinfo[mycpuid], faddr, uh->uh_dport, 783 ip->ip_src, uh->uh_sport, 0, NULL); 784 if (inp != NULL && inp->inp_socket != NULL) 785 notify(inp, inetctlerrmap[cmd]); 786 } else if (msg->ctlinput.nm_direct) { 787 if (cpuid != netisr_ncpus && cpuid != mycpuid) 788 goto done; 789 790 in_pcbnotifyall(&udbinfo[mycpuid], faddr, inetctlerrmap[cmd], 791 notify); 792 } else { 793 struct netmsg_udp_notify *nm; 794 795 ASSERT_NETISR0; 796 nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT); 797 netmsg_init(&nm->base, NULL, &netisr_afree_rport, 798 0, udp_notifyall_oncpu); 799 nm->nm_faddr = faddr; 800 nm->nm_arg = inetctlerrmap[cmd]; 801 nm->nm_notify = notify; 802 lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg); 803 } 804 done: 805 lwkt_replymsg(&msg->lmsg, 0); 806 } 807 808 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, udbinfo, 0, 809 in_pcblist_ncpus, "S,xinpcb", "List of active UDP sockets"); 810 811 static int 812 udp_getcred(SYSCTL_HANDLER_ARGS) 813 { 814 struct sockaddr_in addrs[2]; 815 struct ucred cred0, *cred = NULL; 816 struct inpcb *inp; 817 int error, cpu, origcpu; 818 819 error = priv_check(req->td, PRIV_ROOT); 820 if (error) 821 return (error); 822 error = SYSCTL_IN(req, addrs, sizeof addrs); 823 if (error) 824 return (error); 825 826 origcpu = mycpuid; 827 cpu = udp_addrcpu(addrs[1].sin_addr.s_addr, addrs[1].sin_port, 828 addrs[0].sin_addr.s_addr, addrs[0].sin_port); 829 830 lwkt_migratecpu(cpu); 831 832 inp = in_pcblookup_hash(&udbinfo[cpu], 833 addrs[1].sin_addr, addrs[1].sin_port, 834 addrs[0].sin_addr, addrs[0].sin_port, TRUE, NULL); 835 if (inp == NULL || inp->inp_socket == NULL) { 836 error = ENOENT; 837 } else if (inp->inp_socket->so_cred != NULL) { 838 cred0 = *(inp->inp_socket->so_cred); 839 cred = &cred0; 840 } 841 842 lwkt_migratecpu(origcpu); 843 844 if (error) 845 return error; 846 847 return SYSCTL_OUT(req, cred, sizeof(struct ucred)); 848 } 849 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 850 0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection"); 851 852 static void 853 udp_send_redispatch(netmsg_t msg) 854 { 855 struct mbuf *m = msg->send.nm_m; 856 int pru_flags = msg->send.nm_flags; 857 struct inpcb *inp = msg->send.base.nm_so->so_pcb; 858 struct mbuf *m_opt = msg->send.nm_control; /* XXX save ipopt */ 859 int flags = msg->send.nm_priv; /* ip_output flags */ 860 int error; 861 862 logudp(redisp_ipout_beg, inp); 863 864 /* 865 * - Don't use inp route cache. It should only be used in the 866 * inp owner netisr. 867 * - Access to inp_moptions should be safe, since multicast UDP 868 * datagrams are redispatched to netisr0 and inp_moptions is 869 * changed only in netisr0. 870 */ 871 error = ip_output(m, m_opt, NULL, flags, inp->inp_moptions, inp); 872 if ((pru_flags & PRUS_NOREPLY) == 0) 873 lwkt_replymsg(&msg->send.base.lmsg, error); 874 875 if (m_opt != NULL) { 876 /* Free saved ip options, if any */ 877 m_freem(m_opt); 878 } 879 880 logudp(redisp_ipout_end, inp); 881 } 882 883 static void 884 udp_send(netmsg_t msg) 885 { 886 struct socket *so = msg->send.base.nm_so; 887 struct mbuf *m = msg->send.nm_m; 888 struct sockaddr *dstaddr = msg->send.nm_addr; 889 int pru_flags = msg->send.nm_flags; 890 struct inpcb *inp = so->so_pcb; 891 struct thread *td = msg->send.nm_td; 892 uint16_t hash; 893 int flags; 894 895 struct udpiphdr *ui; 896 int len = m->m_pkthdr.len; 897 struct sockaddr_in *sin; /* really is initialized before use */ 898 int error = 0, cpu; 899 900 KKASSERT(msg->send.nm_control == NULL); 901 902 logudp(send_beg, inp); 903 904 if (inp == NULL) { 905 error = EINVAL; 906 goto release; 907 } 908 909 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 910 error = EMSGSIZE; 911 goto release; 912 } 913 914 if (inp->inp_lport == 0) { /* unbound socket */ 915 boolean_t forwarded; 916 917 error = in_pcbbind(inp, NULL, td); 918 if (error) 919 goto release; 920 921 /* 922 * Need to call udp_send again, after this inpcb is 923 * inserted into wildcard hash table. 924 */ 925 msg->send.base.lmsg.ms_flags |= MSGF_UDP_SEND; 926 forwarded = udp_inswildcardhash(inp, &msg->send.base, 0); 927 if (forwarded) { 928 /* 929 * The message is further forwarded, so we are 930 * done here. 931 */ 932 logudp(send_inswildcard, inp); 933 return; 934 } 935 } 936 937 if (dstaddr != NULL) { /* destination address specified */ 938 if (inp->inp_faddr.s_addr != INADDR_ANY) { 939 /* already connected */ 940 error = EISCONN; 941 goto release; 942 } 943 sin = (struct sockaddr_in *)dstaddr; 944 if (!prison_remote_ip(td, (struct sockaddr *)&sin)) { 945 error = EAFNOSUPPORT; /* IPv6 only jail */ 946 goto release; 947 } 948 } else { 949 if (inp->inp_faddr.s_addr == INADDR_ANY) { 950 /* no destination specified and not already connected */ 951 error = ENOTCONN; 952 goto release; 953 } 954 sin = NULL; 955 } 956 957 /* 958 * Calculate data length and get a mbuf 959 * for UDP and IP headers. 960 */ 961 M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT); 962 if (m == NULL) { 963 error = ENOBUFS; 964 goto release; 965 } 966 967 /* 968 * Fill in mbuf with extended UDP header 969 * and addresses and length put into network format. 970 */ 971 ui = mtod(m, struct udpiphdr *); 972 bzero(ui->ui_x1, sizeof ui->ui_x1); /* XXX still needed? */ 973 ui->ui_pr = IPPROTO_UDP; 974 975 /* 976 * Set destination address. 977 */ 978 if (dstaddr != NULL) { /* use specified destination */ 979 ui->ui_dst = sin->sin_addr; 980 ui->ui_dport = sin->sin_port; 981 } else { /* use connected destination */ 982 ui->ui_dst = inp->inp_faddr; 983 ui->ui_dport = inp->inp_fport; 984 } 985 986 /* 987 * Set source address. 988 */ 989 if (inp->inp_laddr.s_addr == INADDR_ANY || 990 IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) { 991 struct sockaddr_in *if_sin; 992 993 if (dstaddr == NULL) { 994 /* 995 * connect() had (or should have) failed because 996 * the interface had no IP address, but the 997 * application proceeded to call send() anyways. 998 */ 999 error = ENOTCONN; 1000 goto release; 1001 } 1002 1003 /* Look up outgoing interface. */ 1004 error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1); 1005 if (error) 1006 goto release; 1007 ui->ui_src = if_sin->sin_addr; /* use address of interface */ 1008 } else { 1009 ui->ui_src = inp->inp_laddr; /* use non-null bound address */ 1010 } 1011 ui->ui_sport = inp->inp_lport; 1012 KASSERT(inp->inp_lport != 0, ("inp lport should have been bound")); 1013 1014 /* 1015 * Release the original thread, since it is no longer used 1016 */ 1017 if (pru_flags & PRUS_HELDTD) { 1018 lwkt_rele(td); 1019 pru_flags &= ~PRUS_HELDTD; 1020 } 1021 /* 1022 * Free the dest address, since it is no longer needed 1023 */ 1024 if (pru_flags & PRUS_FREEADDR) { 1025 kfree(dstaddr, M_SONAME); 1026 pru_flags &= ~PRUS_FREEADDR; 1027 } 1028 1029 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1030 1031 /* 1032 * Set up checksum and output datagram. 1033 */ 1034 if (udpcksum) { 1035 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, 1036 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1037 m->m_pkthdr.csum_flags = CSUM_UDP; 1038 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1039 m->m_pkthdr.csum_thlen = sizeof(struct udphdr); 1040 } else { 1041 ui->ui_sum = 0; 1042 } 1043 ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len; 1044 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1045 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1046 udp_stat.udps_opackets++; 1047 1048 flags = IP_DEBUGROUTE | 1049 (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); 1050 if (pru_flags & PRUS_DONTROUTE) 1051 flags |= SO_DONTROUTE; 1052 1053 if (inp->inp_flags & INP_CONNECTED) { 1054 /* 1055 * For connected socket, this datagram has already 1056 * been in the correct netisr; no need to rehash. 1057 */ 1058 KASSERT(inp->inp_flags & INP_HASH, ("inpcb has no hash")); 1059 m_sethash(m, inp->inp_hashval); 1060 goto sendit; 1061 } 1062 1063 hash = udp_addrhash(ui->ui_dst.s_addr, ui->ui_dport, 1064 ui->ui_src.s_addr, ui->ui_sport); 1065 m_sethash(m, hash); 1066 1067 cpu = netisr_hashcpu(hash); 1068 if (cpu != mycpuid) { 1069 struct mbuf *m_opt = NULL; 1070 struct netmsg_pru_send *smsg; 1071 struct lwkt_port *port = netisr_cpuport(cpu); 1072 1073 /* 1074 * Not on the CPU that matches this UDP datagram hash; 1075 * redispatch to the correct CPU to do the ip_output(). 1076 */ 1077 if (inp->inp_options != NULL) { 1078 /* 1079 * If there are ip options, then save a copy, 1080 * since accessing inp_options on other CPUs' 1081 * is not safe. 1082 * 1083 * XXX optimize this? 1084 */ 1085 m_opt = m_copym(inp->inp_options, 0, M_COPYALL, 1086 M_WAITOK); 1087 } 1088 if ((pru_flags & PRUS_NOREPLY) == 0) { 1089 /* 1090 * Change some parts of the original netmsg and 1091 * forward it to the target netisr. 1092 * 1093 * NOTE: so_port MUST NOT be checked in the target 1094 * netisr. 1095 */ 1096 smsg = &msg->send; 1097 smsg->nm_priv = flags; /* ip_output flags */ 1098 smsg->nm_m = m; 1099 smsg->nm_control = m_opt; /* XXX save ipopt */ 1100 smsg->base.lmsg.ms_flags |= MSGF_IGNSOPORT; 1101 smsg->base.nm_dispatch = udp_send_redispatch; 1102 lwkt_forwardmsg(port, &smsg->base.lmsg); 1103 } else { 1104 /* 1105 * Recreate the netmsg, since the original mbuf 1106 * could have been changed. And send it to the 1107 * target netisr. 1108 * 1109 * NOTE: so_port MUST NOT be checked in the target 1110 * netisr. 1111 */ 1112 smsg = &m->m_hdr.mh_sndmsg; 1113 netmsg_init(&smsg->base, so, &netisr_apanic_rport, 1114 MSGF_IGNSOPORT, udp_send_redispatch); 1115 smsg->nm_priv = flags; /* ip_output flags */ 1116 smsg->nm_flags = pru_flags; 1117 smsg->nm_m = m; 1118 smsg->nm_control = m_opt; /* XXX save ipopt */ 1119 lwkt_sendmsg(port, &smsg->base.lmsg); 1120 } 1121 1122 /* This UDP datagram is redispatched; done */ 1123 logudp(send_redisp, inp); 1124 return; 1125 } 1126 1127 sendit: 1128 logudp(send_ipout, inp); 1129 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 1130 inp->inp_moptions, inp); 1131 m = NULL; 1132 1133 release: 1134 if (m != NULL) 1135 m_freem(m); 1136 1137 if (pru_flags & PRUS_HELDTD) 1138 lwkt_rele(td); 1139 if (pru_flags & PRUS_FREEADDR) 1140 kfree(dstaddr, M_SONAME); 1141 if ((pru_flags & PRUS_NOREPLY) == 0) 1142 lwkt_replymsg(&msg->send.base.lmsg, error); 1143 1144 logudp(send_end, inp); 1145 } 1146 1147 u_long udp_sendspace = 9216; /* really max datagram size */ 1148 /* 40 1K datagrams */ 1149 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 1150 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 1151 1152 u_long udp_recvspace = 40 * (1024 + 1153 #ifdef INET6 1154 sizeof(struct sockaddr_in6) 1155 #else 1156 sizeof(struct sockaddr_in) 1157 #endif 1158 ); 1159 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1160 &udp_recvspace, 0, "Maximum incoming UDP datagram size"); 1161 1162 /* 1163 * This should never happen, since UDP socket does not support 1164 * connection acception (SO_ACCEPTCONN, i.e. listen(2)). 1165 */ 1166 static void 1167 udp_abort(netmsg_t msg __unused) 1168 { 1169 panic("udp_abort is called"); 1170 } 1171 1172 static int 1173 udp_preattach(struct socket *so, int proto __unused, struct pru_attach_info *ai) 1174 { 1175 return soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit); 1176 } 1177 1178 static void 1179 udp_attach(netmsg_t msg) 1180 { 1181 struct socket *so = msg->attach.base.nm_so; 1182 struct pru_attach_info *ai = msg->attach.nm_ai; 1183 struct inpcb *inp; 1184 int error; 1185 1186 KASSERT(so->so_pcb == NULL, ("udp socket attached")); 1187 1188 if (ai != NULL) { 1189 error = udp_preattach(so, 0 /* don't care */, ai); 1190 if (error) 1191 goto out; 1192 } else { 1193 /* Post attach; do nothing */ 1194 } 1195 1196 error = in_pcballoc(so, &udbinfo[mycpuid]); 1197 if (error) 1198 goto out; 1199 1200 inp = so->so_pcb; 1201 inp->inp_flags |= INP_DIRECT_DETACH; 1202 inp->inp_ip_ttl = ip_defttl; 1203 error = 0; 1204 out: 1205 lwkt_replymsg(&msg->attach.base.lmsg, error); 1206 } 1207 1208 static void 1209 udp_inswildcard_replymsg(netmsg_t msg) 1210 { 1211 lwkt_msg_t lmsg = &msg->lmsg; 1212 1213 if (lmsg->ms_flags & MSGF_UDP_SEND) { 1214 udp_send(msg); 1215 /* msg is replied by udp_send() */ 1216 } else { 1217 lwkt_replymsg(lmsg, lmsg->ms_error); 1218 } 1219 } 1220 1221 static void 1222 udp_soreuseport_dispatch(netmsg_t msg) 1223 { 1224 /* This inpcb has already been in the wildcard hash. */ 1225 in_pcblink_flags(msg->base.nm_so->so_pcb, &udbinfo[mycpuid], 0); 1226 udp_inswildcard_replymsg(msg); 1227 } 1228 1229 static void 1230 udp_sosetport(struct lwkt_msg *msg, lwkt_port_t port) 1231 { 1232 sosetport(((struct netmsg_base *)msg)->nm_so, port); 1233 } 1234 1235 static boolean_t 1236 udp_inswildcardhash_oncpu(struct inpcb *inp, struct netmsg_base *msg) 1237 { 1238 int cpu; 1239 1240 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1241 ("not on owner cpu")); 1242 1243 in_pcbinswildcardhash(inp); 1244 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 1245 if (cpu == mycpuid) { 1246 /* 1247 * This inpcb has been inserted by the above 1248 * in_pcbinswildcardhash(). 1249 */ 1250 continue; 1251 } 1252 in_pcbinswildcardhash_oncpu(inp, &udbinfo[cpu]); 1253 } 1254 1255 /* NOTE: inp_lgrpindex is _not_ assigned in jail. */ 1256 if ((inp->inp_socket->so_options & SO_REUSEPORT) && 1257 inp->inp_lgrpindex >= 0) { 1258 /* 1259 * For SO_REUSEPORT socket, redistribute it based on its 1260 * local group index. 1261 */ 1262 cpu = inp->inp_lgrpindex % netisr_ncpus; 1263 if (cpu != mycpuid) { 1264 struct lwkt_port *port = netisr_cpuport(cpu); 1265 lwkt_msg_t lmsg = &msg->lmsg; 1266 1267 /* 1268 * We are moving the protocol processing port the 1269 * socket is on, we have to unlink here and re-link 1270 * on the target cpu (this inpcb is still left in 1271 * the wildcard hash). 1272 */ 1273 in_pcbunlink_flags(inp, &udbinfo[mycpuid], 0); 1274 msg->nm_dispatch = udp_soreuseport_dispatch; 1275 1276 /* 1277 * See the related comment in tcp_usrreq.c 1278 * tcp_connect() 1279 */ 1280 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1281 lwkt_forwardmsg(port, lmsg); 1282 return TRUE; /* forwarded */ 1283 } 1284 } 1285 return FALSE; 1286 } 1287 1288 static void 1289 udp_inswildcardhash_dispatch(netmsg_t msg) 1290 { 1291 struct inpcb *inp = msg->base.nm_so->so_pcb; 1292 boolean_t forwarded; 1293 1294 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1295 KASSERT(udp_lportcpu(inp->inp_lport) == mycpuid, ("not target cpu")); 1296 1297 in_pcblink(inp, &udbinfo[mycpuid]); 1298 1299 forwarded = udp_inswildcardhash_oncpu(inp, &msg->base); 1300 if (forwarded) { 1301 /* The message is further forwarded, so we are done here. */ 1302 return; 1303 } 1304 udp_inswildcard_replymsg(msg); 1305 } 1306 1307 static boolean_t 1308 udp_inswildcardhash(struct inpcb *inp, struct netmsg_base *msg, int error) 1309 { 1310 lwkt_msg_t lmsg = &msg->lmsg; 1311 int cpu; 1312 1313 ASSERT_INP_NOTINHASH(inp); 1314 1315 /* This inpcb could no longer be directly detached */ 1316 inp->inp_flags &= ~INP_DIRECT_DETACH; 1317 1318 /* 1319 * Always clear the route cache, so we don't need to 1320 * worry about any owner CPU changes later. 1321 */ 1322 in_pcbresetroute(inp); 1323 1324 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1325 cpu = udp_lportcpu(inp->inp_lport); 1326 1327 lmsg->ms_error = error; 1328 if (cpu != mycpuid) { 1329 struct lwkt_port *port = netisr_cpuport(cpu); 1330 1331 /* 1332 * We are moving the protocol processing port the socket 1333 * is on, we have to unlink here and re-link on the 1334 * target cpu. 1335 */ 1336 in_pcbunlink(inp, &udbinfo[mycpuid]); 1337 msg->nm_dispatch = udp_inswildcardhash_dispatch; 1338 1339 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1340 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1341 lwkt_forwardmsg(port, lmsg); 1342 return TRUE; /* forwarded */ 1343 } 1344 1345 return udp_inswildcardhash_oncpu(inp, msg); 1346 } 1347 1348 static void 1349 udp_bind(netmsg_t msg) 1350 { 1351 struct socket *so = msg->bind.base.nm_so; 1352 struct inpcb *inp; 1353 int error; 1354 1355 inp = so->so_pcb; 1356 if (inp) { 1357 struct sockaddr *nam = msg->bind.nm_nam; 1358 struct thread *td = msg->bind.nm_td; 1359 struct sockaddr_in *sin; 1360 lwkt_port_t port; 1361 int cpu; 1362 1363 /* 1364 * Check "already bound" here (in_pcbbind() does the same 1365 * check though), so we don't forward a connected/bound 1366 * socket randomly which would panic in the following 1367 * in_pcbunlink(). 1368 */ 1369 if (inp->inp_lport != 0 || 1370 inp->inp_laddr.s_addr != INADDR_ANY) { 1371 error = EINVAL; /* already bound */ 1372 goto done; 1373 } 1374 1375 if (nam->sa_len != sizeof(*sin)) { 1376 error = EINVAL; 1377 goto done; 1378 } 1379 sin = (struct sockaddr_in *)nam; 1380 1381 cpu = udp_lportcpu(sin->sin_port); 1382 port = netisr_cpuport(cpu); 1383 1384 /* 1385 * See the related comment in tcp_usrreq.c tcp_usr_bind(). 1386 * The exception is that we use local port based netisr 1387 * to serialize in_pcbbind(). 1388 */ 1389 if (&curthread->td_msgport != port) { 1390 lwkt_msg_t lmsg = &msg->bind.base.lmsg; 1391 1392 KASSERT((msg->bind.nm_flags & PRUB_RELINK) == 0, 1393 ("already asked to relink")); 1394 1395 in_pcbunlink(so->so_pcb, &udbinfo[mycpuid]); 1396 msg->bind.nm_flags |= PRUB_RELINK; 1397 1398 /* 1399 * See the related comment in tcp_usrreq.c 1400 * tcp_connect(). 1401 */ 1402 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1403 lwkt_forwardmsg(port, lmsg); 1404 /* msg invalid now */ 1405 return; 1406 } 1407 KASSERT(so->so_port == port, ("so_port is not netisr%d", cpu)); 1408 1409 if (msg->bind.nm_flags & PRUB_RELINK) { 1410 msg->bind.nm_flags &= ~PRUB_RELINK; 1411 in_pcblink(so->so_pcb, &udbinfo[mycpuid]); 1412 } 1413 KASSERT(inp->inp_pcbinfo == &udbinfo[cpu], 1414 ("pcbinfo is not udbinfo%d", cpu)); 1415 1416 error = in_pcbbind(inp, nam, td); 1417 if (error == 0) { 1418 boolean_t forwarded; 1419 1420 if (sin->sin_addr.s_addr != INADDR_ANY) 1421 inp->inp_flags |= INP_WASBOUND_NOTANY; 1422 1423 forwarded = udp_inswildcardhash(inp, 1424 &msg->bind.base, 0); 1425 if (forwarded) { 1426 /* 1427 * The message is further forwarded, so 1428 * we are done here. 1429 */ 1430 return; 1431 } 1432 } 1433 } else { 1434 error = EINVAL; 1435 } 1436 done: 1437 lwkt_replymsg(&msg->bind.base.lmsg, error); 1438 } 1439 1440 static int 1441 udp_preconnect(struct socket *so, const struct sockaddr *nam __unused, 1442 struct thread *td __unused) 1443 { 1444 sosetstate(so, SS_ISCONNECTED); /* XXX */ 1445 return 0; 1446 } 1447 1448 static void 1449 udp_connect(netmsg_t msg) 1450 { 1451 struct socket *so = msg->connect.base.nm_so; 1452 struct sockaddr *nam = msg->connect.nm_nam; 1453 struct thread *td = msg->connect.nm_td; 1454 struct inpcb *inp; 1455 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1456 struct sockaddr_in *if_sin; 1457 struct lwkt_port *port; 1458 uint16_t hash; 1459 int error; 1460 1461 KKASSERT(msg->connect.nm_m == NULL); 1462 1463 inp = so->so_pcb; 1464 if (inp == NULL) { 1465 error = EINVAL; 1466 goto out; 1467 } 1468 1469 if (msg->connect.nm_flags & PRUC_RECONNECT) { 1470 msg->connect.nm_flags &= ~PRUC_RECONNECT; 1471 in_pcblink(inp, &udbinfo[mycpuid]); 1472 } 1473 1474 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1475 error = EISCONN; 1476 goto out; 1477 } 1478 error = 0; 1479 1480 /* 1481 * Bind if we have to 1482 */ 1483 if (inp->inp_lport == 0) { 1484 error = in_pcbbind(inp, NULL, td); 1485 if (error) 1486 goto out; 1487 } 1488 1489 /* 1490 * Calculate the correct protocol processing thread. The connect 1491 * operation must run there. 1492 */ 1493 error = in_pcbladdr(inp, nam, &if_sin, td); 1494 if (error) 1495 goto out; 1496 if (!prison_remote_ip(td, nam)) { 1497 error = EAFNOSUPPORT; /* IPv6 only jail */ 1498 goto out; 1499 } 1500 1501 hash = udp_addrhash(sin->sin_addr.s_addr, sin->sin_port, 1502 inp->inp_laddr.s_addr != INADDR_ANY ? 1503 inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr, inp->inp_lport); 1504 port = netisr_hashport(hash); 1505 if (port != &curthread->td_msgport) { 1506 lwkt_msg_t lmsg = &msg->connect.base.lmsg; 1507 int nm_flags = PRUC_RECONNECT; 1508 1509 /* 1510 * in_pcbladdr() may have allocated a route entry for us 1511 * on the current CPU, but we need a route entry on the 1512 * inpcb's owner CPU, so free it here. 1513 */ 1514 in_pcbresetroute(inp); 1515 1516 if (inp->inp_flags & INP_WILDCARD) { 1517 /* 1518 * Remove this inpcb from the wildcard hash before 1519 * the socket's msgport changes. 1520 */ 1521 udp_remwildcardhash(inp); 1522 } 1523 1524 if (so->so_orig_port == NULL) { 1525 /* 1526 * First time change protocol processing port. 1527 * Save the current port for synchronization upon 1528 * udp_detach. 1529 */ 1530 so->so_orig_port = &curthread->td_msgport; 1531 } else { 1532 /* 1533 * We have changed protocol processing port more 1534 * than once. We could not do direct detach 1535 * anymore, because we lose the track of the 1536 * original protocol processing ports to perform 1537 * synchronization upon udp_detach. This should 1538 * be rare though. 1539 */ 1540 inp->inp_flags &= ~INP_DIRECT_DETACH; 1541 } 1542 1543 /* 1544 * We are moving the protocol processing port the socket 1545 * is on, we have to unlink here and re-link on the 1546 * target cpu. 1547 */ 1548 in_pcbunlink(inp, &udbinfo[mycpuid]); 1549 msg->connect.nm_flags |= nm_flags; 1550 1551 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1552 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1553 lwkt_forwardmsg(port, lmsg); 1554 /* msg invalid now */ 1555 return; 1556 } 1557 error = udp_connect_oncpu(inp, sin, if_sin, hash); 1558 out: 1559 if (msg->connect.nm_flags & PRUC_HELDTD) 1560 lwkt_rele(td); 1561 if (error && (msg->connect.nm_flags & PRUC_ASYNC)) { 1562 if (inp->inp_lport == 0) { 1563 /* 1564 * As long as we have the local port, it is fine 1565 * for connect to fail, e.g. disconnect. 1566 */ 1567 so->so_error = error; 1568 } 1569 soclrstate(so, SS_ISCONNECTED); 1570 /* 1571 * Wake up callers blocked on this socket to make sure 1572 * that they can see this error. 1573 * 1574 * NOTE: 1575 * sodisconnected() can't be used here, which bricks 1576 * sending and receiving. 1577 */ 1578 wakeup(&so->so_timeo); 1579 sowwakeup(so); 1580 sorwakeup(so); 1581 } 1582 if (error && inp != NULL && inp->inp_lport != 0 && 1583 (inp->inp_flags & INP_WILDCARD) == 0) { 1584 boolean_t forwarded; 1585 1586 /* Connect failed; put it to wildcard hash. */ 1587 forwarded = udp_inswildcardhash(inp, &msg->connect.base, 1588 error); 1589 if (forwarded) { 1590 /* 1591 * The message is further forwarded, so we are done 1592 * here. 1593 */ 1594 return; 1595 } 1596 } 1597 lwkt_replymsg(&msg->connect.base.lmsg, error); 1598 } 1599 1600 static void 1601 udp_remwildcardhash(struct inpcb *inp) 1602 { 1603 int cpu; 1604 1605 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1606 ("not on owner cpu")); 1607 1608 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 1609 if (cpu == mycpuid) { 1610 /* 1611 * This inpcb will be removed by the later 1612 * in_pcbremwildcardhash(). 1613 */ 1614 continue; 1615 } 1616 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpu]); 1617 } 1618 in_pcbremwildcardhash(inp); 1619 } 1620 1621 static int 1622 udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 1623 struct sockaddr_in *if_sin, uint16_t hash) 1624 { 1625 struct socket *so = inp->inp_socket; 1626 struct inpcb *oinp; 1627 1628 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 1629 sin->sin_addr, sin->sin_port, 1630 inp->inp_laddr.s_addr != INADDR_ANY ? 1631 inp->inp_laddr : if_sin->sin_addr, inp->inp_lport, FALSE, NULL); 1632 if (oinp != NULL) 1633 return EADDRINUSE; 1634 1635 /* 1636 * No more errors can occur, finish adjusting the socket 1637 * and change the processing port to reflect the connected 1638 * socket. Once set we can no longer safely mess with the 1639 * socket. 1640 */ 1641 1642 if (inp->inp_flags & INP_WILDCARD) 1643 udp_remwildcardhash(inp); 1644 1645 if (inp->inp_laddr.s_addr == INADDR_ANY) 1646 inp->inp_laddr = if_sin->sin_addr; 1647 inp->inp_faddr = sin->sin_addr; 1648 inp->inp_fport = sin->sin_port; 1649 in_pcbinsconnhash(inp); 1650 1651 inp->inp_flags |= INP_HASH; 1652 inp->inp_hashval = hash; 1653 1654 soisconnected(so); 1655 1656 return 0; 1657 } 1658 1659 static void 1660 udp_detach2(struct socket *so) 1661 { 1662 in_pcbdetach(so->so_pcb); 1663 sodiscard(so); 1664 sofree(so); 1665 } 1666 1667 static void 1668 udp_detach_final_dispatch(netmsg_t msg) 1669 { 1670 udp_detach2(msg->base.nm_so); 1671 } 1672 1673 static void 1674 udp_detach_oncpu_dispatch(netmsg_t msg) 1675 { 1676 struct netmsg_base *clomsg = &msg->base; 1677 struct socket *so = clomsg->nm_so; 1678 struct inpcb *inp = so->so_pcb; 1679 struct thread *td = curthread; 1680 int nextcpu, cpuid = mycpuid; 1681 1682 KASSERT(td->td_type == TD_TYPE_NETISR, ("not in netisr")); 1683 1684 if (inp->inp_flags & INP_WILDCARD) { 1685 /* 1686 * This inp will be removed on the inp's 1687 * owner CPU later, so don't do it now. 1688 */ 1689 if (&td->td_msgport != so->so_port) 1690 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpuid]); 1691 } 1692 1693 if (cpuid == 0) { 1694 /* 1695 * Free and clear multicast socket option, 1696 * which is only accessed in netisr0. 1697 */ 1698 ip_freemoptions(inp->inp_moptions); 1699 inp->inp_moptions = NULL; 1700 } 1701 1702 nextcpu = cpuid + 1; 1703 if (nextcpu < netisr_ncpus) { 1704 lwkt_forwardmsg(netisr_cpuport(nextcpu), &clomsg->lmsg); 1705 } else { 1706 /* 1707 * No one could see this inpcb now; destroy this 1708 * inpcb in its owner netisr. 1709 */ 1710 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1711 udp_detach_final_dispatch); 1712 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1713 } 1714 } 1715 1716 static void 1717 udp_detach_syncorig_dispatch(netmsg_t msg) 1718 { 1719 struct netmsg_base *clomsg = &msg->base; 1720 struct socket *so = clomsg->nm_so; 1721 1722 /* 1723 * Original protocol processing port is synchronized; 1724 * destroy this inpcb in its owner netisr. 1725 */ 1726 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1727 udp_detach_final_dispatch); 1728 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1729 } 1730 1731 static void 1732 udp_detach(netmsg_t msg) 1733 { 1734 struct socket *so = msg->detach.base.nm_so; 1735 struct netmsg_base *clomsg; 1736 struct inpcb *inp; 1737 1738 inp = so->so_pcb; 1739 if (inp == NULL) { 1740 lwkt_replymsg(&msg->detach.base.lmsg, EINVAL); 1741 return; 1742 } 1743 1744 /* 1745 * Reply EJUSTRETURN ASAP, we will call sodiscard() and 1746 * sofree() later. 1747 */ 1748 lwkt_replymsg(&msg->detach.base.lmsg, EJUSTRETURN); 1749 1750 if (netisr_ncpus == 1) { 1751 /* Only one CPU, detach the inpcb directly. */ 1752 udp_detach2(so); 1753 return; 1754 } 1755 1756 /* 1757 * Remove this inpcb from the inpcb list first, so that 1758 * no one could find this inpcb from the inpcb list. 1759 */ 1760 in_pcbofflist(inp); 1761 1762 /* 1763 * Remove this inpcb from the local port hash directly 1764 * here, so that its bound local port could be recycled 1765 * timely. 1766 */ 1767 in_pcbremporthash(inp); 1768 1769 if (inp->inp_flags & INP_DIRECT_DETACH) { 1770 /* 1771 * Direct detaching is allowed 1772 */ 1773 KASSERT((inp->inp_flags & INP_WILDCARD) == 0, 1774 ("in the wildcardhash")); 1775 KASSERT(inp->inp_moptions == NULL, ("has mcast options")); 1776 if (so->so_orig_port == NULL) { 1777 udp_detach2(so); 1778 } else { 1779 /* 1780 * Protocol processing port changed once, so 1781 * we need to make sure that there are nothing 1782 * left on the original protocol processing 1783 * port before we destroy this socket and inpcb. 1784 * This is more lightweight than going through 1785 * all UDP processing netisrs. 1786 */ 1787 clomsg = &so->so_clomsg; 1788 netmsg_init(clomsg, so, &netisr_apanic_rport, 1789 MSGF_IGNSOPORT, udp_detach_syncorig_dispatch); 1790 lwkt_sendmsg(so->so_orig_port, &clomsg->lmsg); 1791 } 1792 return; 1793 } 1794 1795 /* 1796 * Go through netisrs which process UDP to make sure 1797 * no one could find this inpcb anymore. 1798 */ 1799 clomsg = &so->so_clomsg; 1800 netmsg_init(clomsg, so, &netisr_apanic_rport, MSGF_IGNSOPORT, 1801 udp_detach_oncpu_dispatch); 1802 lwkt_sendmsg(netisr_cpuport(0), &clomsg->lmsg); 1803 } 1804 1805 static void 1806 udp_disconnect(netmsg_t msg) 1807 { 1808 struct socket *so = msg->disconnect.base.nm_so; 1809 struct inpcb *inp; 1810 boolean_t forwarded; 1811 int error = 0; 1812 1813 inp = so->so_pcb; 1814 if (inp == NULL) { 1815 error = EINVAL; 1816 goto out; 1817 } 1818 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1819 error = ENOTCONN; 1820 goto out; 1821 } 1822 1823 soclrstate(so, SS_ISCONNECTED); /* XXX */ 1824 1825 in_pcbdisconnect(inp); 1826 inp->inp_flags &= ~INP_HASH; 1827 1828 /* 1829 * Follow traditional BSD behavior and retain the local port 1830 * binding. But, fix the old misbehavior of overwriting any 1831 * previously bound local address. 1832 */ 1833 if (!(inp->inp_flags & INP_WASBOUND_NOTANY)) 1834 inp->inp_laddr.s_addr = INADDR_ANY; 1835 1836 if (so->so_state & SS_ISCLOSING) { 1837 /* 1838 * If this socket is being closed, there is no need 1839 * to put this socket back into wildcard hash table. 1840 */ 1841 error = 0; 1842 goto out; 1843 } 1844 1845 forwarded = udp_inswildcardhash(inp, &msg->disconnect.base, 0); 1846 if (forwarded) { 1847 /* 1848 * The message is further forwarded, so we are done 1849 * here. 1850 */ 1851 return; 1852 } 1853 out: 1854 lwkt_replymsg(&msg->disconnect.base.lmsg, error); 1855 } 1856 1857 void 1858 udp_shutdown(netmsg_t msg) 1859 { 1860 struct socket *so = msg->shutdown.base.nm_so; 1861 struct inpcb *inp; 1862 int error; 1863 1864 inp = so->so_pcb; 1865 if (inp) { 1866 socantsendmore(so); 1867 error = 0; 1868 } else { 1869 error = EINVAL; 1870 } 1871 lwkt_replymsg(&msg->shutdown.base.lmsg, error); 1872 } 1873 1874 struct pr_usrreqs udp_usrreqs = { 1875 .pru_abort = udp_abort, 1876 .pru_accept = pr_generic_notsupp, 1877 .pru_attach = udp_attach, 1878 .pru_bind = udp_bind, 1879 .pru_connect = udp_connect, 1880 .pru_connect2 = pr_generic_notsupp, 1881 .pru_control = in_control_dispatch, 1882 .pru_detach = udp_detach, 1883 .pru_disconnect = udp_disconnect, 1884 .pru_listen = pr_generic_notsupp, 1885 .pru_peeraddr = in_setpeeraddr_dispatch, 1886 .pru_rcvd = pr_generic_notsupp, 1887 .pru_rcvoob = pr_generic_notsupp, 1888 .pru_send = udp_send, 1889 .pru_sense = pru_sense_null, 1890 .pru_shutdown = udp_shutdown, 1891 .pru_sockaddr = in_setsockaddr_dispatch, 1892 .pru_sosend = sosendudp, 1893 .pru_soreceive = soreceive, 1894 .pru_preconnect = udp_preconnect, 1895 .pru_preattach = udp_preattach 1896 }; 1897