1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 63 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $ 64 */ 65 66 #include "opt_inet6.h" 67 68 #include <sys/param.h> 69 #include <sys/systm.h> 70 #include <sys/kernel.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/domain.h> 74 #include <sys/proc.h> 75 #include <sys/priv.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #include <sys/sysctl.h> 80 #include <sys/syslog.h> 81 #include <sys/in_cksum.h> 82 #include <sys/ktr.h> 83 84 #include <sys/thread2.h> 85 #include <sys/socketvar2.h> 86 #include <sys/serialize.h> 87 88 #include <machine/stdarg.h> 89 90 #include <net/if.h> 91 #include <net/route.h> 92 #include <net/netmsg2.h> 93 #include <net/netisr2.h> 94 95 #include <netinet/in.h> 96 #include <netinet/in_systm.h> 97 #include <netinet/ip.h> 98 #ifdef INET6 99 #include <netinet/ip6.h> 100 #endif 101 #include <netinet/in_pcb.h> 102 #include <netinet/in_var.h> 103 #include <netinet/ip_var.h> 104 #ifdef INET6 105 #include <netinet6/ip6_var.h> 106 #endif 107 #include <netinet/ip_icmp.h> 108 #include <netinet/icmp_var.h> 109 #include <netinet/udp.h> 110 #include <netinet/udp_var.h> 111 112 #define MSGF_UDP_SEND MSGF_PROTO1 113 114 #define INP_DIRECT_DETACH INP_FLAG_PROTO2 115 116 #define UDP_KTR_STRING "inp=%p" 117 #define UDP_KTR_ARGS struct inpcb *inp 118 119 #ifndef KTR_UDP 120 #define KTR_UDP KTR_ALL 121 #endif 122 123 KTR_INFO_MASTER(udp); 124 KTR_INFO(KTR_UDP, udp, send_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS); 125 KTR_INFO(KTR_UDP, udp, send_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS); 126 KTR_INFO(KTR_UDP, udp, send_ipout, 2, UDP_KTR_STRING, UDP_KTR_ARGS); 127 KTR_INFO(KTR_UDP, udp, redisp_ipout_beg, 3, UDP_KTR_STRING, UDP_KTR_ARGS); 128 KTR_INFO(KTR_UDP, udp, redisp_ipout_end, 4, UDP_KTR_STRING, UDP_KTR_ARGS); 129 KTR_INFO(KTR_UDP, udp, send_redisp, 5, UDP_KTR_STRING, UDP_KTR_ARGS); 130 KTR_INFO(KTR_UDP, udp, send_inswildcard, 6, UDP_KTR_STRING, UDP_KTR_ARGS); 131 132 #define logudp(name, inp) KTR_LOG(udp_##name, inp) 133 134 /* 135 * UDP protocol implementation. 136 * Per RFC 768, August, 1980. 137 */ 138 #ifndef COMPAT_42 139 static int udpcksum = 1; 140 #else 141 static int udpcksum = 0; /* XXX */ 142 #endif 143 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, 144 &udpcksum, 0, "Enable checksumming of UDP packets"); 145 146 int log_in_vain = 0; 147 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 148 &log_in_vain, 0, "Log all incoming UDP packets"); 149 150 static int blackhole = 0; 151 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, 152 &blackhole, 0, "Do not send port unreachables for refused connects"); 153 154 static int strict_mcast_mship = 1; 155 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, 156 &strict_mcast_mship, 0, "Only send multicast to member sockets"); 157 158 int udp_sosend_async = 1; 159 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW, 160 &udp_sosend_async, 0, "UDP asynchronized pru_send"); 161 162 int udp_sosend_prepend = 1; 163 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW, 164 &udp_sosend_prepend, 0, 165 "Prepend enough space for proto and link header in pru_send"); 166 167 static int udp_reuseport_ext = 1; 168 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW, 169 &udp_reuseport_ext, 0, "SO_REUSEPORT extension"); 170 171 struct inpcbinfo udbinfo[MAXCPU]; 172 173 #ifndef UDBHASHSIZE 174 #define UDBHASHSIZE 16 175 #endif 176 CTASSERT((UDBHASHSIZE & (UDBHASHSIZE - 1)) == 0); 177 178 struct udpstat udpstat_percpu[MAXCPU] __cachealign; 179 180 static void udp_append(struct inpcb *last, struct ip *ip, 181 struct mbuf *n, int off, struct sockaddr_in *udp_in); 182 183 static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 184 struct sockaddr_in *if_sin, uint16_t hash); 185 186 static boolean_t udp_inswildcardhash(struct inpcb *inp, 187 struct netmsg_base *msg, int error); 188 static void udp_remwildcardhash(struct inpcb *inp); 189 190 static __inline int 191 udp_lportcpu(short lport) 192 { 193 return (ntohs(lport) % netisr_ncpus); 194 } 195 196 void 197 udp_init(void) 198 { 199 struct inpcbportinfo *portinfo; 200 int cpu; 201 202 portinfo = kmalloc_cachealign(sizeof(*portinfo) * netisr_ncpus, M_PCB, 203 M_WAITOK); 204 205 for (cpu = 0; cpu < netisr_ncpus; cpu++) { 206 struct inpcbinfo *uicb = &udbinfo[cpu]; 207 208 /* 209 * NOTE: 210 * UDP pcb list, wildcard hash table and localgroup hash 211 * table are shared. 212 */ 213 in_pcbinfo_init(uicb, cpu, TRUE); 214 uicb->hashbase = hashinit(UDBHASHSIZE, M_PCB, &uicb->hashmask); 215 216 in_pcbportinfo_init(&portinfo[cpu], UDBHASHSIZE, cpu); 217 in_pcbportinfo_set(uicb, portinfo, netisr_ncpus); 218 219 uicb->wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB, 220 &uicb->wildcardhashmask); 221 uicb->localgrphashbase = hashinit(UDBHASHSIZE, M_PCB, 222 &uicb->localgrphashmask); 223 224 uicb->ipi_size = sizeof(struct inpcb); 225 } 226 227 /* 228 * Initialize UDP statistics counters for each CPU. 229 */ 230 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 231 bzero(&udpstat_percpu[cpu], sizeof(struct udpstat)); 232 } 233 234 static int 235 sysctl_udpstat(SYSCTL_HANDLER_ARGS) 236 { 237 int cpu, error = 0; 238 239 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 240 if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu], 241 sizeof(struct udpstat)))) 242 break; 243 if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu], 244 sizeof(struct udpstat)))) 245 break; 246 } 247 248 return (error); 249 } 250 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW), 251 0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics"); 252 253 void 254 udp_ctloutput(netmsg_t msg) 255 { 256 struct socket *so = msg->base.nm_so; 257 struct sockopt *sopt = msg->ctloutput.nm_sopt; 258 struct inpcb *inp = so->so_pcb; 259 260 if (inp == NULL) { 261 lwkt_replymsg(&msg->lmsg, EINVAL); 262 return; 263 } 264 265 if (sopt->sopt_level == IPPROTO_IP && sopt->sopt_dir == SOPT_SET) { 266 switch (sopt->sopt_name) { 267 case IP_MULTICAST_IF: 268 case IP_MULTICAST_VIF: 269 case IP_MULTICAST_TTL: 270 case IP_MULTICAST_LOOP: 271 case IP_ADD_MEMBERSHIP: 272 case IP_DROP_MEMBERSHIP: 273 /* 274 * This pr_ctloutput msg will be forwarded 275 * to netisr0 to run; we can't do direct 276 * detaching anymore. 277 * 278 * NOTE: 279 * Don't optimize for the sockets whose 280 * current so_port is netisr0's msgport. 281 * These sockets could be connect(2)'ed 282 * later and the so_port will be changed. 283 */ 284 inp->inp_flags &= ~INP_DIRECT_DETACH; 285 break; 286 } 287 } 288 return ip_ctloutput(msg); 289 } 290 291 /* 292 * Check multicast packets to make sure they are only sent to sockets with 293 * multicast memberships for the packet's destination address and arrival 294 * interface. Multicast packets to multicast-unaware sockets are also 295 * disallowed. 296 * 297 * Returns 0 if the packet is acceptable, -1 if it is not. 298 */ 299 static __inline int 300 check_multicast_membership(const struct ip *ip, const struct inpcb *inp, 301 const struct mbuf *m) 302 { 303 const struct ip_moptions *mopt; 304 int mshipno; 305 306 if (strict_mcast_mship == 0 || 307 !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 308 return (0); 309 } 310 311 ASSERT_NETISR0; 312 313 mopt = inp->inp_moptions; 314 if (mopt == NULL) 315 return (-1); 316 for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) { 317 const struct in_multi *maddr = mopt->imo_membership[mshipno]; 318 319 if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr && 320 m->m_pkthdr.rcvif == maddr->inm_ifp) { 321 return (0); 322 } 323 } 324 return (-1); 325 } 326 327 struct udp_mcast_arg { 328 struct inpcb *inp; 329 struct inpcb *last; 330 struct ip *ip; 331 struct mbuf *m; 332 int iphlen; 333 struct sockaddr_in *udp_in; 334 }; 335 336 static int 337 udp_mcast_input(struct udp_mcast_arg *arg) 338 { 339 struct inpcb *inp = arg->inp; 340 struct inpcb *last = arg->last; 341 struct ip *ip = arg->ip; 342 struct mbuf *m = arg->m; 343 344 if (check_multicast_membership(ip, inp, m) < 0) 345 return ERESTART; /* caller continue */ 346 347 if (last != NULL) { 348 struct mbuf *n; 349 350 if ((n = m_copypacket(m, M_NOWAIT)) != NULL) 351 udp_append(last, ip, n, 352 arg->iphlen + sizeof(struct udphdr), 353 arg->udp_in); 354 } 355 arg->last = last = inp; 356 357 /* 358 * Don't look for additional matches if this one does 359 * not have either the SO_REUSEPORT or SO_REUSEADDR 360 * socket options set. This heuristic avoids searching 361 * through all pcbs in the common case of a non-shared 362 * port. It * assumes that an application will never 363 * clear these options after setting them. 364 */ 365 if (!(last->inp_socket->so_options & 366 (SO_REUSEPORT | SO_REUSEADDR))) 367 return EJUSTRETURN; /* caller stop */ 368 return 0; 369 } 370 371 int 372 udp_input(struct mbuf **mp, int *offp, int proto) 373 { 374 struct sockaddr_in udp_in = { sizeof udp_in, AF_INET }; 375 int iphlen; 376 struct ip *ip; 377 struct udphdr *uh; 378 struct inpcb *inp; 379 struct mbuf *m; 380 struct mbuf *opts = NULL; 381 int len, off; 382 struct ip save_ip; 383 struct inpcbinfo *pcbinfo = &udbinfo[mycpuid]; 384 385 off = *offp; 386 m = *mp; 387 *mp = NULL; 388 389 iphlen = off; 390 udp_stat.udps_ipackets++; 391 392 /* 393 * Strip IP options, if any; should skip this, 394 * make available to user, and use on returned packets, 395 * but we don't yet have a way to check the checksum 396 * with options still present. 397 */ 398 if (iphlen > sizeof(struct ip)) { 399 ip_stripoptions(m); 400 iphlen = sizeof(struct ip); 401 } 402 403 /* 404 * IP and UDP headers are together in first mbuf. 405 * Already checked and pulled up in ip_demux(). 406 */ 407 KASSERT(m->m_len >= iphlen + sizeof(struct udphdr), 408 ("UDP header not in one mbuf")); 409 410 ip = mtod(m, struct ip *); 411 uh = (struct udphdr *)((caddr_t)ip + iphlen); 412 413 /* destination port of 0 is illegal, based on RFC768. */ 414 if (uh->uh_dport == 0) 415 goto bad; 416 417 /* 418 * Make mbuf data length reflect UDP length. 419 * If not enough data to reflect UDP length, drop. 420 */ 421 len = ntohs((u_short)uh->uh_ulen); 422 if (ip->ip_len != len) { 423 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 424 udp_stat.udps_badlen++; 425 goto bad; 426 } 427 m_adj(m, len - ip->ip_len); 428 /* ip->ip_len = len; */ 429 } 430 /* 431 * Save a copy of the IP header in case we want restore it 432 * for sending an ICMP error message in response. 433 */ 434 save_ip = *ip; 435 436 /* 437 * Checksum extended UDP header and data. 438 */ 439 if (uh->uh_sum) { 440 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 441 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 442 uh->uh_sum = m->m_pkthdr.csum_data; 443 else 444 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, 445 ip->ip_dst.s_addr, htonl((u_short)len + 446 m->m_pkthdr.csum_data + IPPROTO_UDP)); 447 uh->uh_sum ^= 0xffff; 448 } else { 449 char b[9]; 450 451 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 452 bzero(((struct ipovly *)ip)->ih_x1, 9); 453 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 454 uh->uh_sum = in_cksum(m, len + sizeof(struct ip)); 455 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 456 } 457 if (uh->uh_sum) { 458 udp_stat.udps_badsum++; 459 m_freem(m); 460 return(IPPROTO_DONE); 461 } 462 } else 463 udp_stat.udps_nosum++; 464 465 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 466 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { 467 struct inpcbhead *connhead; 468 struct inpcontainer *ic, *ic_marker; 469 struct inpcontainerhead *ichead; 470 struct udp_mcast_arg arg; 471 struct inpcb *last; 472 int error; 473 474 /* 475 * Deliver a multicast or broadcast datagram to *all* sockets 476 * for which the local and remote addresses and ports match 477 * those of the incoming datagram. This allows more than 478 * one process to receive multi/broadcasts on the same port. 479 * (This really ought to be done for unicast datagrams as 480 * well, but that would cause problems with existing 481 * applications that open both address-specific sockets and 482 * a wildcard socket listening to the same port -- they would 483 * end up receiving duplicates of every unicast datagram. 484 * Those applications open the multiple sockets to overcome an 485 * inadequacy of the UDP socket interface, but for backwards 486 * compatibility we avoid the problem here rather than 487 * fixing the interface. Maybe 4.5BSD will remedy this?) 488 */ 489 490 /* 491 * Construct sockaddr format source address. 492 */ 493 udp_in.sin_port = uh->uh_sport; 494 udp_in.sin_addr = ip->ip_src; 495 arg.udp_in = &udp_in; 496 /* 497 * Locate pcb(s) for datagram. 498 * (Algorithm copied from raw_intr().) 499 */ 500 last = NULL; 501 arg.iphlen = iphlen; 502 503 connhead = &pcbinfo->hashbase[ 504 INP_PCBCONNHASH(ip->ip_src.s_addr, uh->uh_sport, 505 ip->ip_dst.s_addr, uh->uh_dport, pcbinfo->hashmask)]; 506 LIST_FOREACH(inp, connhead, inp_hash) { 507 #ifdef INET6 508 if (!INP_ISIPV4(inp)) 509 continue; 510 #endif 511 if (!in_hosteq(inp->inp_faddr, ip->ip_src) || 512 !in_hosteq(inp->inp_laddr, ip->ip_dst) || 513 inp->inp_fport != uh->uh_sport || 514 inp->inp_lport != uh->uh_dport) 515 continue; 516 517 arg.inp = inp; 518 arg.last = last; 519 arg.ip = ip; 520 arg.m = m; 521 522 error = udp_mcast_input(&arg); 523 if (error == ERESTART) 524 continue; 525 last = arg.last; 526 527 if (error == EJUSTRETURN) 528 goto done; 529 } 530 531 ichead = &pcbinfo->wildcardhashbase[ 532 INP_PCBWILDCARDHASH(uh->uh_dport, 533 pcbinfo->wildcardhashmask)]; 534 ic_marker = in_pcbcontainer_marker(); 535 536 GET_PCBINFO_TOKEN(pcbinfo); 537 LIST_INSERT_HEAD(ichead, ic_marker, ic_list); 538 while ((ic = LIST_NEXT(ic_marker, ic_list)) != NULL) { 539 LIST_REMOVE(ic_marker, ic_list); 540 LIST_INSERT_AFTER(ic, ic_marker, ic_list); 541 542 inp = ic->ic_inp; 543 if (inp->inp_flags & INP_PLACEMARKER) 544 continue; 545 #ifdef INET6 546 if (!INP_ISIPV4(inp)) 547 continue; 548 #endif 549 if (inp->inp_lport != uh->uh_dport) 550 continue; 551 if (inp->inp_laddr.s_addr != INADDR_ANY && 552 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 553 continue; 554 555 arg.inp = inp; 556 arg.last = last; 557 arg.ip = ip; 558 arg.m = m; 559 560 error = udp_mcast_input(&arg); 561 if (error == ERESTART) 562 continue; 563 last = arg.last; 564 565 if (error == EJUSTRETURN) 566 break; 567 } 568 LIST_REMOVE(ic_marker, ic_list); 569 REL_PCBINFO_TOKEN(pcbinfo); 570 done: 571 if (last == NULL) { 572 /* 573 * No matching pcb found; discard datagram. 574 * (No need to send an ICMP Port Unreachable 575 * for a broadcast or multicast datgram.) 576 */ 577 udp_stat.udps_noportbcast++; 578 goto bad; 579 } 580 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 581 &udp_in); 582 return(IPPROTO_DONE); 583 } 584 /* 585 * Locate pcb for datagram. 586 */ 587 inp = in_pcblookup_pkthash(pcbinfo, ip->ip_src, uh->uh_sport, 588 ip->ip_dst, uh->uh_dport, TRUE, m->m_pkthdr.rcvif, 589 udp_reuseport_ext ? m : NULL); 590 if (inp == NULL) { 591 if (log_in_vain) { 592 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 593 594 log(LOG_INFO, 595 "Connection attempt to UDP %s:%d from %s:%d\n", 596 kinet_ntoa(ip->ip_dst, dst), ntohs(uh->uh_dport), 597 kinet_ntoa(ip->ip_src, src), ntohs(uh->uh_sport)); 598 } 599 udp_stat.udps_noport++; 600 if (m->m_flags & (M_BCAST | M_MCAST)) { 601 udp_stat.udps_noportbcast++; 602 goto bad; 603 } 604 if (blackhole) 605 goto bad; 606 #ifdef ICMP_BANDLIM 607 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 608 goto bad; 609 #endif 610 *ip = save_ip; 611 ip->ip_len += iphlen; 612 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 613 return(IPPROTO_DONE); 614 } 615 KASSERT(INP_ISIPV4(inp), ("not inet inpcb")); 616 /* 617 * Check the minimum TTL for socket. 618 */ 619 if (ip->ip_ttl < inp->inp_ip_minttl) 620 goto bad; 621 622 /* 623 * Construct sockaddr format source address. 624 * Stuff source address and datagram in user buffer. 625 */ 626 udp_in.sin_port = uh->uh_sport; 627 udp_in.sin_addr = ip->ip_src; 628 if ((inp->inp_flags & INP_CONTROLOPTS) || 629 (inp->inp_socket->so_options & SO_TIMESTAMP)) 630 ip_savecontrol(inp, &opts, ip, m); 631 m_adj(m, iphlen + sizeof(struct udphdr)); 632 633 lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token); 634 if (ssb_appendaddr(&inp->inp_socket->so_rcv, 635 (struct sockaddr *)&udp_in, m, opts) == 0) { 636 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 637 udp_stat.udps_fullsock++; 638 goto bad; 639 } 640 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 641 sorwakeup(inp->inp_socket); 642 return(IPPROTO_DONE); 643 bad: 644 m_freem(m); 645 if (opts) 646 m_freem(opts); 647 return(IPPROTO_DONE); 648 } 649 650 /* 651 * subroutine of udp_input(), mainly for source code readability. 652 * caller must properly init udp_ip6 and udp_in6 beforehand. 653 */ 654 static void 655 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, 656 struct sockaddr_in *udp_in) 657 { 658 struct mbuf *opts = NULL; 659 int ret; 660 661 KASSERT(INP_ISIPV4(last), ("not inet inpcb")); 662 663 if (last->inp_flags & INP_CONTROLOPTS || 664 last->inp_socket->so_options & SO_TIMESTAMP) 665 ip_savecontrol(last, &opts, ip, n); 666 m_adj(n, off); 667 668 lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token); 669 ret = ssb_appendaddr(&last->inp_socket->so_rcv, 670 (struct sockaddr *)udp_in, n, opts); 671 lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token); 672 if (ret == 0) { 673 m_freem(n); 674 if (opts) 675 m_freem(opts); 676 udp_stat.udps_fullsock++; 677 } else { 678 sorwakeup(last->inp_socket); 679 } 680 } 681 682 /* 683 * Notify a udp user of an asynchronous error; 684 * just wake up so that he can collect error status. 685 */ 686 void 687 udp_notify(struct inpcb *inp, int error) 688 { 689 inp->inp_socket->so_error = error; 690 sorwakeup(inp->inp_socket); 691 sowwakeup(inp->inp_socket); 692 } 693 694 struct netmsg_udp_notify { 695 struct netmsg_base base; 696 inp_notify_t nm_notify; 697 struct in_addr nm_faddr; 698 int nm_arg; 699 }; 700 701 static void 702 udp_notifyall_oncpu(netmsg_t msg) 703 { 704 struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg; 705 int nextcpu, cpu = mycpuid; 706 707 ASSERT_NETISR_NCPUS(cpu); 708 709 in_pcbnotifyall(&udbinfo[cpu], nm->nm_faddr, nm->nm_arg, nm->nm_notify); 710 711 nextcpu = cpu + 1; 712 if (nextcpu < netisr_ncpus) 713 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg); 714 else 715 lwkt_replymsg(&nm->base.lmsg, 0); 716 } 717 718 inp_notify_t 719 udp_get_inpnotify(int cmd, const struct sockaddr *sa, 720 struct ip **ip0, int *cpuid) 721 { 722 struct in_addr faddr; 723 struct ip *ip = *ip0; 724 inp_notify_t notify = udp_notify; 725 726 faddr = ((const struct sockaddr_in *)sa)->sin_addr; 727 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 728 return NULL; 729 730 if (PRC_IS_REDIRECT(cmd)) { 731 ip = NULL; 732 notify = in_rtchange; 733 } else if (cmd == PRC_HOSTDEAD) { 734 ip = NULL; 735 } else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) { 736 return NULL; 737 } 738 739 if (cpuid != NULL) { 740 if (ip == NULL) { 741 /* Go through all effective netisr CPUs. */ 742 *cpuid = netisr_ncpus; 743 } else { 744 const struct udphdr *uh; 745 746 uh = (const struct udphdr *) 747 ((caddr_t)ip + (ip->ip_hl << 2)); 748 *cpuid = udp_addrcpu(faddr.s_addr, uh->uh_dport, 749 ip->ip_src.s_addr, uh->uh_sport); 750 } 751 } 752 753 *ip0 = ip; 754 return notify; 755 } 756 757 void 758 udp_ctlinput(netmsg_t msg) 759 { 760 struct sockaddr *sa = msg->ctlinput.nm_arg; 761 struct ip *ip = msg->ctlinput.nm_extra; 762 int cmd = msg->ctlinput.nm_cmd, cpuid; 763 inp_notify_t notify; 764 struct in_addr faddr; 765 766 ASSERT_NETISR_NCPUS(mycpuid); 767 768 notify = udp_get_inpnotify(cmd, sa, &ip, &cpuid); 769 if (notify == NULL) 770 goto done; 771 772 faddr = ((struct sockaddr_in *)sa)->sin_addr; 773 if (ip) { 774 const struct udphdr *uh; 775 struct inpcb *inp; 776 777 if (cpuid != mycpuid) 778 goto done; 779 780 uh = (const struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 781 inp = in_pcblookup_hash(&udbinfo[mycpuid], faddr, uh->uh_dport, 782 ip->ip_src, uh->uh_sport, 0, NULL); 783 if (inp != NULL && inp->inp_socket != NULL) 784 notify(inp, inetctlerrmap[cmd]); 785 } else if (msg->ctlinput.nm_direct) { 786 if (cpuid != netisr_ncpus && cpuid != mycpuid) 787 goto done; 788 789 in_pcbnotifyall(&udbinfo[mycpuid], faddr, inetctlerrmap[cmd], 790 notify); 791 } else { 792 struct netmsg_udp_notify *nm; 793 794 ASSERT_NETISR0; 795 nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT); 796 netmsg_init(&nm->base, NULL, &netisr_afree_rport, 797 0, udp_notifyall_oncpu); 798 nm->nm_faddr = faddr; 799 nm->nm_arg = inetctlerrmap[cmd]; 800 nm->nm_notify = notify; 801 lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg); 802 } 803 done: 804 lwkt_replymsg(&msg->lmsg, 0); 805 } 806 807 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, udbinfo, 0, 808 in_pcblist_ncpus, "S,xinpcb", "List of active UDP sockets"); 809 810 static int 811 udp_getcred(SYSCTL_HANDLER_ARGS) 812 { 813 struct sockaddr_in addrs[2]; 814 struct ucred cred0, *cred = NULL; 815 struct inpcb *inp; 816 int error, cpu, origcpu; 817 818 error = priv_check(req->td, PRIV_ROOT); 819 if (error) 820 return (error); 821 error = SYSCTL_IN(req, addrs, sizeof addrs); 822 if (error) 823 return (error); 824 825 origcpu = mycpuid; 826 cpu = udp_addrcpu(addrs[1].sin_addr.s_addr, addrs[1].sin_port, 827 addrs[0].sin_addr.s_addr, addrs[0].sin_port); 828 829 lwkt_migratecpu(cpu); 830 831 inp = in_pcblookup_hash(&udbinfo[cpu], 832 addrs[1].sin_addr, addrs[1].sin_port, 833 addrs[0].sin_addr, addrs[0].sin_port, TRUE, NULL); 834 if (inp == NULL || inp->inp_socket == NULL) { 835 error = ENOENT; 836 } else if (inp->inp_socket->so_cred != NULL) { 837 cred0 = *(inp->inp_socket->so_cred); 838 cred = &cred0; 839 } 840 841 lwkt_migratecpu(origcpu); 842 843 if (error) 844 return error; 845 846 return SYSCTL_OUT(req, cred, sizeof(struct ucred)); 847 } 848 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 849 0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection"); 850 851 static void 852 udp_send_redispatch(netmsg_t msg) 853 { 854 struct mbuf *m = msg->send.nm_m; 855 int pru_flags = msg->send.nm_flags; 856 struct inpcb *inp = msg->send.base.nm_so->so_pcb; 857 struct mbuf *m_opt = msg->send.nm_control; /* XXX save ipopt */ 858 int flags = msg->send.nm_priv; /* ip_output flags */ 859 int error; 860 861 logudp(redisp_ipout_beg, inp); 862 863 /* 864 * - Don't use inp route cache. It should only be used in the 865 * inp owner netisr. 866 * - Access to inp_moptions should be safe, since multicast UDP 867 * datagrams are redispatched to netisr0 and inp_moptions is 868 * changed only in netisr0. 869 */ 870 error = ip_output(m, m_opt, NULL, flags, inp->inp_moptions, inp); 871 if ((pru_flags & PRUS_NOREPLY) == 0) 872 lwkt_replymsg(&msg->send.base.lmsg, error); 873 874 if (m_opt != NULL) { 875 /* Free saved ip options, if any */ 876 m_freem(m_opt); 877 } 878 879 logudp(redisp_ipout_end, inp); 880 } 881 882 static void 883 udp_send(netmsg_t msg) 884 { 885 struct socket *so = msg->send.base.nm_so; 886 struct mbuf *m = msg->send.nm_m; 887 struct sockaddr *dstaddr = msg->send.nm_addr; 888 int pru_flags = msg->send.nm_flags; 889 struct inpcb *inp = so->so_pcb; 890 struct thread *td = msg->send.nm_td; 891 uint16_t hash; 892 int flags; 893 894 struct udpiphdr *ui; 895 int len = m->m_pkthdr.len; 896 struct sockaddr_in *sin; /* really is initialized before use */ 897 int error = 0, cpu; 898 899 KKASSERT(msg->send.nm_control == NULL); 900 901 logudp(send_beg, inp); 902 903 if (inp == NULL) { 904 error = EINVAL; 905 goto release; 906 } 907 908 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 909 error = EMSGSIZE; 910 goto release; 911 } 912 913 if (inp->inp_lport == 0) { /* unbound socket */ 914 boolean_t forwarded; 915 916 error = in_pcbbind(inp, NULL, td); 917 if (error) 918 goto release; 919 920 /* 921 * Need to call udp_send again, after this inpcb is 922 * inserted into wildcard hash table. 923 */ 924 msg->send.base.lmsg.ms_flags |= MSGF_UDP_SEND; 925 forwarded = udp_inswildcardhash(inp, &msg->send.base, 0); 926 if (forwarded) { 927 /* 928 * The message is further forwarded, so we are 929 * done here. 930 */ 931 logudp(send_inswildcard, inp); 932 return; 933 } 934 } 935 936 if (dstaddr != NULL) { /* destination address specified */ 937 if (inp->inp_faddr.s_addr != INADDR_ANY) { 938 /* already connected */ 939 error = EISCONN; 940 goto release; 941 } 942 sin = (struct sockaddr_in *)dstaddr; 943 if (!prison_remote_ip(td, (struct sockaddr *)&sin)) { 944 error = EAFNOSUPPORT; /* IPv6 only jail */ 945 goto release; 946 } 947 } else { 948 if (inp->inp_faddr.s_addr == INADDR_ANY) { 949 /* no destination specified and not already connected */ 950 error = ENOTCONN; 951 goto release; 952 } 953 sin = NULL; 954 } 955 956 /* 957 * Calculate data length and get a mbuf 958 * for UDP and IP headers. 959 */ 960 M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT); 961 if (m == NULL) { 962 error = ENOBUFS; 963 goto release; 964 } 965 966 /* 967 * Fill in mbuf with extended UDP header 968 * and addresses and length put into network format. 969 */ 970 ui = mtod(m, struct udpiphdr *); 971 bzero(ui->ui_x1, sizeof ui->ui_x1); /* XXX still needed? */ 972 ui->ui_pr = IPPROTO_UDP; 973 974 /* 975 * Set destination address. 976 */ 977 if (dstaddr != NULL) { /* use specified destination */ 978 ui->ui_dst = sin->sin_addr; 979 ui->ui_dport = sin->sin_port; 980 } else { /* use connected destination */ 981 ui->ui_dst = inp->inp_faddr; 982 ui->ui_dport = inp->inp_fport; 983 } 984 985 /* 986 * Set source address. 987 */ 988 if (inp->inp_laddr.s_addr == INADDR_ANY || 989 IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) { 990 struct sockaddr_in *if_sin; 991 992 if (dstaddr == NULL) { 993 /* 994 * connect() had (or should have) failed because 995 * the interface had no IP address, but the 996 * application proceeded to call send() anyways. 997 */ 998 error = ENOTCONN; 999 goto release; 1000 } 1001 1002 /* Look up outgoing interface. */ 1003 error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1); 1004 if (error) 1005 goto release; 1006 ui->ui_src = if_sin->sin_addr; /* use address of interface */ 1007 } else { 1008 ui->ui_src = inp->inp_laddr; /* use non-null bound address */ 1009 } 1010 ui->ui_sport = inp->inp_lport; 1011 KASSERT(inp->inp_lport != 0, ("inp lport should have been bound")); 1012 1013 /* 1014 * Release the original thread, since it is no longer used 1015 */ 1016 if (pru_flags & PRUS_HELDTD) { 1017 lwkt_rele(td); 1018 pru_flags &= ~PRUS_HELDTD; 1019 } 1020 /* 1021 * Free the dest address, since it is no longer needed 1022 */ 1023 if (pru_flags & PRUS_FREEADDR) { 1024 kfree(dstaddr, M_SONAME); 1025 pru_flags &= ~PRUS_FREEADDR; 1026 } 1027 1028 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1029 1030 /* 1031 * Set up checksum and output datagram. 1032 */ 1033 if (udpcksum) { 1034 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, 1035 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1036 m->m_pkthdr.csum_flags = CSUM_UDP; 1037 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1038 m->m_pkthdr.csum_thlen = sizeof(struct udphdr); 1039 } else { 1040 ui->ui_sum = 0; 1041 } 1042 ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len; 1043 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1044 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1045 udp_stat.udps_opackets++; 1046 1047 flags = IP_DEBUGROUTE | 1048 (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); 1049 if (pru_flags & PRUS_DONTROUTE) 1050 flags |= SO_DONTROUTE; 1051 1052 if (inp->inp_flags & INP_CONNECTED) { 1053 /* 1054 * For connected socket, this datagram has already 1055 * been in the correct netisr; no need to rehash. 1056 */ 1057 KASSERT(inp->inp_flags & INP_HASH, ("inpcb has no hash")); 1058 m_sethash(m, inp->inp_hashval); 1059 goto sendit; 1060 } 1061 1062 hash = udp_addrhash(ui->ui_dst.s_addr, ui->ui_dport, 1063 ui->ui_src.s_addr, ui->ui_sport); 1064 m_sethash(m, hash); 1065 1066 cpu = netisr_hashcpu(hash); 1067 if (cpu != mycpuid) { 1068 struct mbuf *m_opt = NULL; 1069 struct netmsg_pru_send *smsg; 1070 struct lwkt_port *port = netisr_cpuport(cpu); 1071 1072 /* 1073 * Not on the CPU that matches this UDP datagram hash; 1074 * redispatch to the correct CPU to do the ip_output(). 1075 */ 1076 if (inp->inp_options != NULL) { 1077 /* 1078 * If there are ip options, then save a copy, 1079 * since accessing inp_options on other CPUs' 1080 * is not safe. 1081 * 1082 * XXX optimize this? 1083 */ 1084 m_opt = m_copym(inp->inp_options, 0, M_COPYALL, 1085 M_WAITOK); 1086 } 1087 if ((pru_flags & PRUS_NOREPLY) == 0) { 1088 /* 1089 * Change some parts of the original netmsg and 1090 * forward it to the target netisr. 1091 * 1092 * NOTE: so_port MUST NOT be checked in the target 1093 * netisr. 1094 */ 1095 smsg = &msg->send; 1096 smsg->nm_priv = flags; /* ip_output flags */ 1097 smsg->nm_m = m; 1098 smsg->nm_control = m_opt; /* XXX save ipopt */ 1099 smsg->base.lmsg.ms_flags |= MSGF_IGNSOPORT; 1100 smsg->base.nm_dispatch = udp_send_redispatch; 1101 lwkt_forwardmsg(port, &smsg->base.lmsg); 1102 } else { 1103 /* 1104 * Recreate the netmsg, since the original mbuf 1105 * could have been changed. And send it to the 1106 * target netisr. 1107 * 1108 * NOTE: so_port MUST NOT be checked in the target 1109 * netisr. 1110 */ 1111 smsg = &m->m_hdr.mh_sndmsg; 1112 netmsg_init(&smsg->base, so, &netisr_apanic_rport, 1113 MSGF_IGNSOPORT, udp_send_redispatch); 1114 smsg->nm_priv = flags; /* ip_output flags */ 1115 smsg->nm_flags = pru_flags; 1116 smsg->nm_m = m; 1117 smsg->nm_control = m_opt; /* XXX save ipopt */ 1118 lwkt_sendmsg(port, &smsg->base.lmsg); 1119 } 1120 1121 /* This UDP datagram is redispatched; done */ 1122 logudp(send_redisp, inp); 1123 return; 1124 } 1125 1126 sendit: 1127 logudp(send_ipout, inp); 1128 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 1129 inp->inp_moptions, inp); 1130 m = NULL; 1131 1132 release: 1133 if (m != NULL) 1134 m_freem(m); 1135 1136 if (pru_flags & PRUS_HELDTD) 1137 lwkt_rele(td); 1138 if (pru_flags & PRUS_FREEADDR) 1139 kfree(dstaddr, M_SONAME); 1140 if ((pru_flags & PRUS_NOREPLY) == 0) 1141 lwkt_replymsg(&msg->send.base.lmsg, error); 1142 1143 logudp(send_end, inp); 1144 } 1145 1146 u_long udp_sendspace = 9216; /* really max datagram size */ 1147 /* 40 1K datagrams */ 1148 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 1149 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 1150 1151 u_long udp_recvspace = 40 * (1024 + 1152 #ifdef INET6 1153 sizeof(struct sockaddr_in6) 1154 #else 1155 sizeof(struct sockaddr_in) 1156 #endif 1157 ); 1158 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1159 &udp_recvspace, 0, "Maximum incoming UDP datagram size"); 1160 1161 /* 1162 * This should never happen, since UDP socket does not support 1163 * connection acception (SO_ACCEPTCONN, i.e. listen(2)). 1164 */ 1165 static void 1166 udp_abort(netmsg_t msg __unused) 1167 { 1168 panic("udp_abort is called"); 1169 } 1170 1171 static int 1172 udp_preattach(struct socket *so, int proto __unused, struct pru_attach_info *ai) 1173 { 1174 return soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit); 1175 } 1176 1177 static void 1178 udp_attach(netmsg_t msg) 1179 { 1180 struct socket *so = msg->attach.base.nm_so; 1181 struct pru_attach_info *ai = msg->attach.nm_ai; 1182 struct inpcb *inp; 1183 int error; 1184 1185 KASSERT(so->so_pcb == NULL, ("udp socket attached")); 1186 1187 if (ai != NULL) { 1188 error = udp_preattach(so, 0 /* don't care */, ai); 1189 if (error) 1190 goto out; 1191 } else { 1192 /* Post attach; do nothing */ 1193 } 1194 1195 error = in_pcballoc(so, &udbinfo[mycpuid]); 1196 if (error) 1197 goto out; 1198 1199 inp = so->so_pcb; 1200 inp->inp_flags |= INP_DIRECT_DETACH; 1201 inp->inp_ip_ttl = ip_defttl; 1202 error = 0; 1203 out: 1204 lwkt_replymsg(&msg->attach.base.lmsg, error); 1205 } 1206 1207 static void 1208 udp_inswildcard_replymsg(netmsg_t msg) 1209 { 1210 lwkt_msg_t lmsg = &msg->lmsg; 1211 1212 if (lmsg->ms_flags & MSGF_UDP_SEND) { 1213 udp_send(msg); 1214 /* msg is replied by udp_send() */ 1215 } else { 1216 lwkt_replymsg(lmsg, lmsg->ms_error); 1217 } 1218 } 1219 1220 static void 1221 udp_soreuseport_dispatch(netmsg_t msg) 1222 { 1223 /* This inpcb has already been in the wildcard hash. */ 1224 in_pcblink_flags(msg->base.nm_so->so_pcb, &udbinfo[mycpuid], 0); 1225 udp_inswildcard_replymsg(msg); 1226 } 1227 1228 static void 1229 udp_sosetport(struct lwkt_msg *msg, lwkt_port_t port) 1230 { 1231 sosetport(((struct netmsg_base *)msg)->nm_so, port); 1232 } 1233 1234 static boolean_t 1235 udp_inswildcardhash_oncpu(struct inpcb *inp, struct netmsg_base *msg) 1236 { 1237 int cpu; 1238 1239 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1240 ("not on owner cpu")); 1241 1242 in_pcbinswildcardhash(inp); 1243 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 1244 if (cpu == mycpuid) { 1245 /* 1246 * This inpcb has been inserted by the above 1247 * in_pcbinswildcardhash(). 1248 */ 1249 continue; 1250 } 1251 in_pcbinswildcardhash_oncpu(inp, &udbinfo[cpu]); 1252 } 1253 1254 /* NOTE: inp_lgrpindex is _not_ assigned in jail. */ 1255 if ((inp->inp_socket->so_options & SO_REUSEPORT) && 1256 inp->inp_lgrpindex >= 0) { 1257 /* 1258 * For SO_REUSEPORT socket, redistribute it based on its 1259 * local group index. 1260 */ 1261 cpu = inp->inp_lgrpindex % netisr_ncpus; 1262 if (cpu != mycpuid) { 1263 struct lwkt_port *port = netisr_cpuport(cpu); 1264 lwkt_msg_t lmsg = &msg->lmsg; 1265 1266 /* 1267 * We are moving the protocol processing port the 1268 * socket is on, we have to unlink here and re-link 1269 * on the target cpu (this inpcb is still left in 1270 * the wildcard hash). 1271 */ 1272 in_pcbunlink_flags(inp, &udbinfo[mycpuid], 0); 1273 msg->nm_dispatch = udp_soreuseport_dispatch; 1274 1275 /* 1276 * See the related comment in tcp_usrreq.c 1277 * tcp_connect() 1278 */ 1279 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1280 lwkt_forwardmsg(port, lmsg); 1281 return TRUE; /* forwarded */ 1282 } 1283 } 1284 return FALSE; 1285 } 1286 1287 static void 1288 udp_inswildcardhash_dispatch(netmsg_t msg) 1289 { 1290 struct inpcb *inp = msg->base.nm_so->so_pcb; 1291 boolean_t forwarded; 1292 1293 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1294 KASSERT(udp_lportcpu(inp->inp_lport) == mycpuid, ("not target cpu")); 1295 1296 in_pcblink(inp, &udbinfo[mycpuid]); 1297 1298 forwarded = udp_inswildcardhash_oncpu(inp, &msg->base); 1299 if (forwarded) { 1300 /* The message is further forwarded, so we are done here. */ 1301 return; 1302 } 1303 udp_inswildcard_replymsg(msg); 1304 } 1305 1306 static boolean_t 1307 udp_inswildcardhash(struct inpcb *inp, struct netmsg_base *msg, int error) 1308 { 1309 lwkt_msg_t lmsg = &msg->lmsg; 1310 int cpu; 1311 1312 ASSERT_INP_NOTINHASH(inp); 1313 1314 /* This inpcb could no longer be directly detached */ 1315 inp->inp_flags &= ~INP_DIRECT_DETACH; 1316 1317 /* 1318 * Always clear the route cache, so we don't need to 1319 * worry about any owner CPU changes later. 1320 */ 1321 in_pcbresetroute(inp); 1322 1323 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1324 cpu = udp_lportcpu(inp->inp_lport); 1325 1326 lmsg->ms_error = error; 1327 if (cpu != mycpuid) { 1328 struct lwkt_port *port = netisr_cpuport(cpu); 1329 1330 /* 1331 * We are moving the protocol processing port the socket 1332 * is on, we have to unlink here and re-link on the 1333 * target cpu. 1334 */ 1335 in_pcbunlink(inp, &udbinfo[mycpuid]); 1336 msg->nm_dispatch = udp_inswildcardhash_dispatch; 1337 1338 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1339 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1340 lwkt_forwardmsg(port, lmsg); 1341 return TRUE; /* forwarded */ 1342 } 1343 1344 return udp_inswildcardhash_oncpu(inp, msg); 1345 } 1346 1347 static void 1348 udp_bind(netmsg_t msg) 1349 { 1350 struct socket *so = msg->bind.base.nm_so; 1351 struct inpcb *inp; 1352 int error; 1353 1354 inp = so->so_pcb; 1355 if (inp) { 1356 struct sockaddr *nam = msg->bind.nm_nam; 1357 struct thread *td = msg->bind.nm_td; 1358 struct sockaddr_in *sin; 1359 lwkt_port_t port; 1360 int cpu; 1361 1362 /* 1363 * Check "already bound" here (in_pcbbind() does the same 1364 * check though), so we don't forward a connected/bound 1365 * socket randomly which would panic in the following 1366 * in_pcbunlink(). 1367 */ 1368 if (inp->inp_lport != 0 || 1369 inp->inp_laddr.s_addr != INADDR_ANY) { 1370 error = EINVAL; /* already bound */ 1371 goto done; 1372 } 1373 1374 if (nam->sa_len != sizeof(*sin)) { 1375 error = EINVAL; 1376 goto done; 1377 } 1378 sin = (struct sockaddr_in *)nam; 1379 1380 cpu = udp_lportcpu(sin->sin_port); 1381 port = netisr_cpuport(cpu); 1382 1383 /* 1384 * See the related comment in tcp_usrreq.c tcp_usr_bind(). 1385 * The exception is that we use local port based netisr 1386 * to serialize in_pcbbind(). 1387 */ 1388 if (&curthread->td_msgport != port) { 1389 lwkt_msg_t lmsg = &msg->bind.base.lmsg; 1390 1391 KASSERT((msg->bind.nm_flags & PRUB_RELINK) == 0, 1392 ("already asked to relink")); 1393 1394 in_pcbunlink(so->so_pcb, &udbinfo[mycpuid]); 1395 msg->bind.nm_flags |= PRUB_RELINK; 1396 1397 /* 1398 * See the related comment in tcp_usrreq.c 1399 * tcp_connect(). 1400 */ 1401 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1402 lwkt_forwardmsg(port, lmsg); 1403 /* msg invalid now */ 1404 return; 1405 } 1406 KASSERT(so->so_port == port, ("so_port is not netisr%d", cpu)); 1407 1408 if (msg->bind.nm_flags & PRUB_RELINK) { 1409 msg->bind.nm_flags &= ~PRUB_RELINK; 1410 in_pcblink(so->so_pcb, &udbinfo[mycpuid]); 1411 } 1412 KASSERT(inp->inp_pcbinfo == &udbinfo[cpu], 1413 ("pcbinfo is not udbinfo%d", cpu)); 1414 1415 error = in_pcbbind(inp, nam, td); 1416 if (error == 0) { 1417 boolean_t forwarded; 1418 1419 if (sin->sin_addr.s_addr != INADDR_ANY) 1420 inp->inp_flags |= INP_WASBOUND_NOTANY; 1421 1422 forwarded = udp_inswildcardhash(inp, 1423 &msg->bind.base, 0); 1424 if (forwarded) { 1425 /* 1426 * The message is further forwarded, so 1427 * we are done here. 1428 */ 1429 return; 1430 } 1431 } 1432 } else { 1433 error = EINVAL; 1434 } 1435 done: 1436 lwkt_replymsg(&msg->bind.base.lmsg, error); 1437 } 1438 1439 static int 1440 udp_preconnect(struct socket *so, const struct sockaddr *nam __unused, 1441 struct thread *td __unused) 1442 { 1443 sosetstate(so, SS_ISCONNECTED); /* XXX */ 1444 return 0; 1445 } 1446 1447 static void 1448 udp_connect(netmsg_t msg) 1449 { 1450 struct socket *so = msg->connect.base.nm_so; 1451 struct sockaddr *nam = msg->connect.nm_nam; 1452 struct thread *td = msg->connect.nm_td; 1453 struct inpcb *inp; 1454 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1455 struct sockaddr_in *if_sin; 1456 struct lwkt_port *port; 1457 uint16_t hash; 1458 int error; 1459 1460 KKASSERT(msg->connect.nm_m == NULL); 1461 1462 inp = so->so_pcb; 1463 if (inp == NULL) { 1464 error = EINVAL; 1465 goto out; 1466 } 1467 1468 if (msg->connect.nm_flags & PRUC_RECONNECT) { 1469 msg->connect.nm_flags &= ~PRUC_RECONNECT; 1470 in_pcblink(inp, &udbinfo[mycpuid]); 1471 } 1472 1473 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1474 error = EISCONN; 1475 goto out; 1476 } 1477 error = 0; 1478 1479 /* 1480 * Bind if we have to 1481 */ 1482 if (inp->inp_lport == 0) { 1483 error = in_pcbbind(inp, NULL, td); 1484 if (error) 1485 goto out; 1486 } 1487 1488 /* 1489 * Calculate the correct protocol processing thread. The connect 1490 * operation must run there. 1491 */ 1492 error = in_pcbladdr(inp, nam, &if_sin, td); 1493 if (error) 1494 goto out; 1495 if (!prison_remote_ip(td, nam)) { 1496 error = EAFNOSUPPORT; /* IPv6 only jail */ 1497 goto out; 1498 } 1499 1500 hash = udp_addrhash(sin->sin_addr.s_addr, sin->sin_port, 1501 inp->inp_laddr.s_addr != INADDR_ANY ? 1502 inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr, inp->inp_lport); 1503 port = netisr_hashport(hash); 1504 if (port != &curthread->td_msgport) { 1505 lwkt_msg_t lmsg = &msg->connect.base.lmsg; 1506 int nm_flags = PRUC_RECONNECT; 1507 1508 /* 1509 * in_pcbladdr() may have allocated a route entry for us 1510 * on the current CPU, but we need a route entry on the 1511 * inpcb's owner CPU, so free it here. 1512 */ 1513 in_pcbresetroute(inp); 1514 1515 if (inp->inp_flags & INP_WILDCARD) { 1516 /* 1517 * Remove this inpcb from the wildcard hash before 1518 * the socket's msgport changes. 1519 */ 1520 udp_remwildcardhash(inp); 1521 } 1522 1523 if (so->so_orig_port == NULL) { 1524 /* 1525 * First time change protocol processing port. 1526 * Save the current port for synchronization upon 1527 * udp_detach. 1528 */ 1529 so->so_orig_port = &curthread->td_msgport; 1530 } else { 1531 /* 1532 * We have changed protocol processing port more 1533 * than once. We could not do direct detach 1534 * anymore, because we lose the track of the 1535 * original protocol processing ports to perform 1536 * synchronization upon udp_detach. This should 1537 * be rare though. 1538 */ 1539 inp->inp_flags &= ~INP_DIRECT_DETACH; 1540 } 1541 1542 /* 1543 * We are moving the protocol processing port the socket 1544 * is on, we have to unlink here and re-link on the 1545 * target cpu. 1546 */ 1547 in_pcbunlink(inp, &udbinfo[mycpuid]); 1548 msg->connect.nm_flags |= nm_flags; 1549 1550 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1551 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1552 lwkt_forwardmsg(port, lmsg); 1553 /* msg invalid now */ 1554 return; 1555 } 1556 error = udp_connect_oncpu(inp, sin, if_sin, hash); 1557 out: 1558 if (msg->connect.nm_flags & PRUC_HELDTD) 1559 lwkt_rele(td); 1560 if (error && (msg->connect.nm_flags & PRUC_ASYNC)) { 1561 if (inp->inp_lport == 0) { 1562 /* 1563 * As long as we have the local port, it is fine 1564 * for connect to fail, e.g. disconnect. 1565 */ 1566 so->so_error = error; 1567 } 1568 soclrstate(so, SS_ISCONNECTED); 1569 /* 1570 * Wake up callers blocked on this socket to make sure 1571 * that they can see this error. 1572 * 1573 * NOTE: 1574 * sodisconnected() can't be used here, which bricks 1575 * sending and receiving. 1576 */ 1577 wakeup(&so->so_timeo); 1578 sowwakeup(so); 1579 sorwakeup(so); 1580 } 1581 if (error && inp != NULL && inp->inp_lport != 0 && 1582 (inp->inp_flags & INP_WILDCARD) == 0) { 1583 boolean_t forwarded; 1584 1585 /* Connect failed; put it to wildcard hash. */ 1586 forwarded = udp_inswildcardhash(inp, &msg->connect.base, 1587 error); 1588 if (forwarded) { 1589 /* 1590 * The message is further forwarded, so we are done 1591 * here. 1592 */ 1593 return; 1594 } 1595 } 1596 lwkt_replymsg(&msg->connect.base.lmsg, error); 1597 } 1598 1599 static void 1600 udp_remwildcardhash(struct inpcb *inp) 1601 { 1602 int cpu; 1603 1604 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1605 ("not on owner cpu")); 1606 1607 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 1608 if (cpu == mycpuid) { 1609 /* 1610 * This inpcb will be removed by the later 1611 * in_pcbremwildcardhash(). 1612 */ 1613 continue; 1614 } 1615 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpu]); 1616 } 1617 in_pcbremwildcardhash(inp); 1618 } 1619 1620 static int 1621 udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 1622 struct sockaddr_in *if_sin, uint16_t hash) 1623 { 1624 struct socket *so = inp->inp_socket; 1625 struct inpcb *oinp; 1626 1627 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 1628 sin->sin_addr, sin->sin_port, 1629 inp->inp_laddr.s_addr != INADDR_ANY ? 1630 inp->inp_laddr : if_sin->sin_addr, inp->inp_lport, FALSE, NULL); 1631 if (oinp != NULL) 1632 return EADDRINUSE; 1633 1634 /* 1635 * No more errors can occur, finish adjusting the socket 1636 * and change the processing port to reflect the connected 1637 * socket. Once set we can no longer safely mess with the 1638 * socket. 1639 */ 1640 1641 if (inp->inp_flags & INP_WILDCARD) 1642 udp_remwildcardhash(inp); 1643 1644 if (inp->inp_laddr.s_addr == INADDR_ANY) 1645 inp->inp_laddr = if_sin->sin_addr; 1646 inp->inp_faddr = sin->sin_addr; 1647 inp->inp_fport = sin->sin_port; 1648 in_pcbinsconnhash(inp); 1649 1650 inp->inp_flags |= INP_HASH; 1651 inp->inp_hashval = hash; 1652 1653 soisconnected(so); 1654 1655 return 0; 1656 } 1657 1658 static void 1659 udp_detach2(struct socket *so) 1660 { 1661 in_pcbdetach(so->so_pcb); 1662 sodiscard(so); 1663 sofree(so); 1664 } 1665 1666 static void 1667 udp_detach_final_dispatch(netmsg_t msg) 1668 { 1669 udp_detach2(msg->base.nm_so); 1670 } 1671 1672 static void 1673 udp_detach_oncpu_dispatch(netmsg_t msg) 1674 { 1675 struct netmsg_base *clomsg = &msg->base; 1676 struct socket *so = clomsg->nm_so; 1677 struct inpcb *inp = so->so_pcb; 1678 struct thread *td = curthread; 1679 int nextcpu, cpuid = mycpuid; 1680 1681 KASSERT(td->td_type == TD_TYPE_NETISR, ("not in netisr")); 1682 1683 if (inp->inp_flags & INP_WILDCARD) { 1684 /* 1685 * This inp will be removed on the inp's 1686 * owner CPU later, so don't do it now. 1687 */ 1688 if (&td->td_msgport != so->so_port) 1689 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpuid]); 1690 } 1691 1692 if (cpuid == 0) { 1693 /* 1694 * Free and clear multicast socket option, 1695 * which is only accessed in netisr0. 1696 */ 1697 ip_freemoptions(inp->inp_moptions); 1698 inp->inp_moptions = NULL; 1699 } 1700 1701 nextcpu = cpuid + 1; 1702 if (nextcpu < netisr_ncpus) { 1703 lwkt_forwardmsg(netisr_cpuport(nextcpu), &clomsg->lmsg); 1704 } else { 1705 /* 1706 * No one could see this inpcb now; destroy this 1707 * inpcb in its owner netisr. 1708 */ 1709 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1710 udp_detach_final_dispatch); 1711 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1712 } 1713 } 1714 1715 static void 1716 udp_detach_syncorig_dispatch(netmsg_t msg) 1717 { 1718 struct netmsg_base *clomsg = &msg->base; 1719 struct socket *so = clomsg->nm_so; 1720 1721 /* 1722 * Original protocol processing port is synchronized; 1723 * destroy this inpcb in its owner netisr. 1724 */ 1725 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1726 udp_detach_final_dispatch); 1727 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1728 } 1729 1730 static void 1731 udp_detach(netmsg_t msg) 1732 { 1733 struct socket *so = msg->detach.base.nm_so; 1734 struct netmsg_base *clomsg; 1735 struct inpcb *inp; 1736 1737 inp = so->so_pcb; 1738 if (inp == NULL) { 1739 lwkt_replymsg(&msg->detach.base.lmsg, EINVAL); 1740 return; 1741 } 1742 1743 /* 1744 * Reply EJUSTRETURN ASAP, we will call sodiscard() and 1745 * sofree() later. 1746 */ 1747 lwkt_replymsg(&msg->detach.base.lmsg, EJUSTRETURN); 1748 1749 if (netisr_ncpus == 1) { 1750 /* Only one CPU, detach the inpcb directly. */ 1751 udp_detach2(so); 1752 return; 1753 } 1754 1755 /* 1756 * Remove this inpcb from the inpcb list first, so that 1757 * no one could find this inpcb from the inpcb list. 1758 */ 1759 in_pcbofflist(inp); 1760 1761 /* 1762 * Remove this inpcb from the local port hash directly 1763 * here, so that its bound local port could be recycled 1764 * timely. 1765 */ 1766 in_pcbremporthash(inp); 1767 1768 if (inp->inp_flags & INP_DIRECT_DETACH) { 1769 /* 1770 * Direct detaching is allowed 1771 */ 1772 KASSERT((inp->inp_flags & INP_WILDCARD) == 0, 1773 ("in the wildcardhash")); 1774 KASSERT(inp->inp_moptions == NULL, ("has mcast options")); 1775 if (so->so_orig_port == NULL) { 1776 udp_detach2(so); 1777 } else { 1778 /* 1779 * Protocol processing port changed once, so 1780 * we need to make sure that there are nothing 1781 * left on the original protocol processing 1782 * port before we destroy this socket and inpcb. 1783 * This is more lightweight than going through 1784 * all UDP processing netisrs. 1785 */ 1786 clomsg = &so->so_clomsg; 1787 netmsg_init(clomsg, so, &netisr_apanic_rport, 1788 MSGF_IGNSOPORT, udp_detach_syncorig_dispatch); 1789 lwkt_sendmsg(so->so_orig_port, &clomsg->lmsg); 1790 } 1791 return; 1792 } 1793 1794 /* 1795 * Go through netisrs which process UDP to make sure 1796 * no one could find this inpcb anymore. 1797 */ 1798 clomsg = &so->so_clomsg; 1799 netmsg_init(clomsg, so, &netisr_apanic_rport, MSGF_IGNSOPORT, 1800 udp_detach_oncpu_dispatch); 1801 lwkt_sendmsg(netisr_cpuport(0), &clomsg->lmsg); 1802 } 1803 1804 static void 1805 udp_disconnect(netmsg_t msg) 1806 { 1807 struct socket *so = msg->disconnect.base.nm_so; 1808 struct inpcb *inp; 1809 boolean_t forwarded; 1810 int error = 0; 1811 1812 inp = so->so_pcb; 1813 if (inp == NULL) { 1814 error = EINVAL; 1815 goto out; 1816 } 1817 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1818 error = ENOTCONN; 1819 goto out; 1820 } 1821 1822 soclrstate(so, SS_ISCONNECTED); /* XXX */ 1823 1824 in_pcbdisconnect(inp); 1825 inp->inp_flags &= ~INP_HASH; 1826 1827 /* 1828 * Follow traditional BSD behavior and retain the local port 1829 * binding. But, fix the old misbehavior of overwriting any 1830 * previously bound local address. 1831 */ 1832 if (!(inp->inp_flags & INP_WASBOUND_NOTANY)) 1833 inp->inp_laddr.s_addr = INADDR_ANY; 1834 1835 if (so->so_state & SS_ISCLOSING) { 1836 /* 1837 * If this socket is being closed, there is no need 1838 * to put this socket back into wildcard hash table. 1839 */ 1840 error = 0; 1841 goto out; 1842 } 1843 1844 forwarded = udp_inswildcardhash(inp, &msg->disconnect.base, 0); 1845 if (forwarded) { 1846 /* 1847 * The message is further forwarded, so we are done 1848 * here. 1849 */ 1850 return; 1851 } 1852 out: 1853 lwkt_replymsg(&msg->disconnect.base.lmsg, error); 1854 } 1855 1856 void 1857 udp_shutdown(netmsg_t msg) 1858 { 1859 struct socket *so = msg->shutdown.base.nm_so; 1860 struct inpcb *inp; 1861 int error; 1862 1863 inp = so->so_pcb; 1864 if (inp) { 1865 socantsendmore(so); 1866 error = 0; 1867 } else { 1868 error = EINVAL; 1869 } 1870 lwkt_replymsg(&msg->shutdown.base.lmsg, error); 1871 } 1872 1873 struct pr_usrreqs udp_usrreqs = { 1874 .pru_abort = udp_abort, 1875 .pru_accept = pr_generic_notsupp, 1876 .pru_attach = udp_attach, 1877 .pru_bind = udp_bind, 1878 .pru_connect = udp_connect, 1879 .pru_connect2 = pr_generic_notsupp, 1880 .pru_control = in_control_dispatch, 1881 .pru_detach = udp_detach, 1882 .pru_disconnect = udp_disconnect, 1883 .pru_listen = pr_generic_notsupp, 1884 .pru_peeraddr = in_setpeeraddr_dispatch, 1885 .pru_rcvd = pr_generic_notsupp, 1886 .pru_rcvoob = pr_generic_notsupp, 1887 .pru_send = udp_send, 1888 .pru_sense = pru_sense_null, 1889 .pru_shutdown = udp_shutdown, 1890 .pru_sockaddr = in_setsockaddr_dispatch, 1891 .pru_sosend = sosendudp, 1892 .pru_soreceive = soreceive, 1893 .pru_preconnect = udp_preconnect, 1894 .pru_preattach = udp_preattach 1895 }; 1896