1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 63 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $ 64 */ 65 66 #include "opt_ipsec.h" 67 #include "opt_inet6.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/kernel.h> 72 #include <sys/malloc.h> 73 #include <sys/mbuf.h> 74 #include <sys/domain.h> 75 #include <sys/proc.h> 76 #include <sys/priv.h> 77 #include <sys/protosw.h> 78 #include <sys/socket.h> 79 #include <sys/socketvar.h> 80 #include <sys/sysctl.h> 81 #include <sys/syslog.h> 82 #include <sys/in_cksum.h> 83 #include <sys/ktr.h> 84 85 #include <sys/thread2.h> 86 #include <sys/socketvar2.h> 87 #include <sys/serialize.h> 88 89 #include <machine/stdarg.h> 90 91 #include <net/if.h> 92 #include <net/route.h> 93 #include <net/netmsg2.h> 94 #include <net/netisr2.h> 95 96 #include <netinet/in.h> 97 #include <netinet/in_systm.h> 98 #include <netinet/ip.h> 99 #ifdef INET6 100 #include <netinet/ip6.h> 101 #endif 102 #include <netinet/in_pcb.h> 103 #include <netinet/in_var.h> 104 #include <netinet/ip_var.h> 105 #ifdef INET6 106 #include <netinet6/ip6_var.h> 107 #endif 108 #include <netinet/ip_icmp.h> 109 #include <netinet/icmp_var.h> 110 #include <netinet/udp.h> 111 #include <netinet/udp_var.h> 112 113 #ifdef FAST_IPSEC 114 #include <netproto/ipsec/ipsec.h> 115 #endif 116 117 #ifdef IPSEC 118 #include <netinet6/ipsec.h> 119 #endif 120 121 #define MSGF_UDP_SEND MSGF_PROTO1 122 123 #define INP_DIRECT_DETACH INP_FLAG_PROTO2 124 125 #define UDP_KTR_STRING "inp=%p" 126 #define UDP_KTR_ARGS struct inpcb *inp 127 128 #ifndef KTR_UDP 129 #define KTR_UDP KTR_ALL 130 #endif 131 132 KTR_INFO_MASTER(udp); 133 KTR_INFO(KTR_UDP, udp, send_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS); 134 KTR_INFO(KTR_UDP, udp, send_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS); 135 KTR_INFO(KTR_UDP, udp, send_ipout, 2, UDP_KTR_STRING, UDP_KTR_ARGS); 136 KTR_INFO(KTR_UDP, udp, redisp_ipout_beg, 3, UDP_KTR_STRING, UDP_KTR_ARGS); 137 KTR_INFO(KTR_UDP, udp, redisp_ipout_end, 4, UDP_KTR_STRING, UDP_KTR_ARGS); 138 KTR_INFO(KTR_UDP, udp, send_redisp, 5, UDP_KTR_STRING, UDP_KTR_ARGS); 139 KTR_INFO(KTR_UDP, udp, send_inswildcard, 6, UDP_KTR_STRING, UDP_KTR_ARGS); 140 141 #define logudp(name, inp) KTR_LOG(udp_##name, inp) 142 143 /* 144 * UDP protocol implementation. 145 * Per RFC 768, August, 1980. 146 */ 147 #ifndef COMPAT_42 148 static int udpcksum = 1; 149 #else 150 static int udpcksum = 0; /* XXX */ 151 #endif 152 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, 153 &udpcksum, 0, "Enable checksumming of UDP packets"); 154 155 int log_in_vain = 0; 156 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 157 &log_in_vain, 0, "Log all incoming UDP packets"); 158 159 static int blackhole = 0; 160 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, 161 &blackhole, 0, "Do not send port unreachables for refused connects"); 162 163 static int strict_mcast_mship = 1; 164 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, 165 &strict_mcast_mship, 0, "Only send multicast to member sockets"); 166 167 int udp_sosend_async = 1; 168 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW, 169 &udp_sosend_async, 0, "UDP asynchronized pru_send"); 170 171 int udp_sosend_prepend = 1; 172 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW, 173 &udp_sosend_prepend, 0, 174 "Prepend enough space for proto and link header in pru_send"); 175 176 static int udp_reuseport_ext = 1; 177 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW, 178 &udp_reuseport_ext, 0, "SO_REUSEPORT extension"); 179 180 struct inpcbinfo udbinfo[MAXCPU]; 181 182 #ifndef UDBHASHSIZE 183 #define UDBHASHSIZE 16 184 #endif 185 186 struct udpstat udpstat_percpu[MAXCPU] __cachealign; 187 188 static void udp_append(struct inpcb *last, struct ip *ip, 189 struct mbuf *n, int off, struct sockaddr_in *udp_in); 190 191 static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 192 struct sockaddr_in *if_sin); 193 194 static boolean_t udp_inswildcardhash(struct inpcb *inp, 195 struct netmsg_base *msg, int error); 196 static void udp_remwildcardhash(struct inpcb *inp); 197 198 void 199 udp_init(void) 200 { 201 struct inpcbportinfo *portinfo; 202 int cpu; 203 204 portinfo = kmalloc_cachealign(sizeof(*portinfo) * ncpus2, M_PCB, 205 M_WAITOK); 206 207 for (cpu = 0; cpu < ncpus2; cpu++) { 208 struct inpcbinfo *uicb = &udbinfo[cpu]; 209 210 /* 211 * NOTE: 212 * UDP pcb list, wildcard hash table and localgroup hash 213 * table are shared. 214 */ 215 in_pcbinfo_init(uicb, cpu, TRUE); 216 uicb->hashbase = hashinit(UDBHASHSIZE, M_PCB, &uicb->hashmask); 217 218 in_pcbportinfo_init(&portinfo[cpu], UDBHASHSIZE, TRUE, cpu); 219 uicb->portinfo = portinfo; 220 uicb->portinfo_mask = ncpus2_mask; 221 222 uicb->wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB, 223 &uicb->wildcardhashmask); 224 uicb->localgrphashbase = hashinit(UDBHASHSIZE, M_PCB, 225 &uicb->localgrphashmask); 226 227 uicb->ipi_size = sizeof(struct inpcb); 228 } 229 230 /* 231 * Initialize UDP statistics counters for each CPU. 232 */ 233 for (cpu = 0; cpu < ncpus; ++cpu) 234 bzero(&udpstat_percpu[cpu], sizeof(struct udpstat)); 235 } 236 237 static int 238 sysctl_udpstat(SYSCTL_HANDLER_ARGS) 239 { 240 int cpu, error = 0; 241 242 for (cpu = 0; cpu < ncpus; ++cpu) { 243 if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu], 244 sizeof(struct udpstat)))) 245 break; 246 if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu], 247 sizeof(struct udpstat)))) 248 break; 249 } 250 251 return (error); 252 } 253 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW), 254 0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics"); 255 256 void 257 udp_ctloutput(netmsg_t msg) 258 { 259 struct socket *so = msg->base.nm_so; 260 struct sockopt *sopt = msg->ctloutput.nm_sopt; 261 struct inpcb *inp = so->so_pcb; 262 263 if (sopt->sopt_level == IPPROTO_IP && sopt->sopt_dir == SOPT_SET) { 264 switch (sopt->sopt_name) { 265 case IP_MULTICAST_IF: 266 case IP_MULTICAST_VIF: 267 case IP_MULTICAST_TTL: 268 case IP_MULTICAST_LOOP: 269 case IP_ADD_MEMBERSHIP: 270 case IP_DROP_MEMBERSHIP: 271 /* 272 * This pr_ctloutput msg will be forwarded 273 * to netisr0 to run; we can't do direct 274 * detaching anymore. 275 * 276 * NOTE: 277 * Don't optimize for the sockets whose 278 * current so_port is netisr0's msgport. 279 * These sockets could be connect(2)'ed 280 * later and the so_port will be changed. 281 */ 282 inp->inp_flags &= ~INP_DIRECT_DETACH; 283 break; 284 } 285 } 286 return ip_ctloutput(msg); 287 } 288 289 /* 290 * Check multicast packets to make sure they are only sent to sockets with 291 * multicast memberships for the packet's destination address and arrival 292 * interface. Multicast packets to multicast-unaware sockets are also 293 * disallowed. 294 * 295 * Returns 0 if the packet is acceptable, -1 if it is not. 296 */ 297 static __inline int 298 check_multicast_membership(const struct ip *ip, const struct inpcb *inp, 299 const struct mbuf *m) 300 { 301 const struct ip_moptions *mopt; 302 int mshipno; 303 304 if (strict_mcast_mship == 0 || 305 !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 306 return (0); 307 } 308 309 ASSERT_IN_NETISR(0); 310 311 mopt = inp->inp_moptions; 312 if (mopt == NULL) 313 return (-1); 314 for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) { 315 const struct in_multi *maddr = mopt->imo_membership[mshipno]; 316 317 if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr && 318 m->m_pkthdr.rcvif == maddr->inm_ifp) { 319 return (0); 320 } 321 } 322 return (-1); 323 } 324 325 struct udp_mcast_arg { 326 struct inpcb *inp; 327 struct inpcb *last; 328 struct ip *ip; 329 struct mbuf *m; 330 int iphlen; 331 struct sockaddr_in *udp_in; 332 }; 333 334 static int 335 udp_mcast_input(struct udp_mcast_arg *arg) 336 { 337 struct inpcb *inp = arg->inp; 338 struct inpcb *last = arg->last; 339 struct ip *ip = arg->ip; 340 struct mbuf *m = arg->m; 341 342 if (check_multicast_membership(ip, inp, m) < 0) 343 return ERESTART; /* caller continue */ 344 345 if (last != NULL) { 346 struct mbuf *n; 347 348 #ifdef IPSEC 349 /* check AH/ESP integrity. */ 350 if (ipsec4_in_reject_so(m, last->inp_socket)) 351 ipsecstat.in_polvio++; 352 /* do not inject data to pcb */ 353 else 354 #endif /*IPSEC*/ 355 #ifdef FAST_IPSEC 356 /* check AH/ESP integrity. */ 357 if (ipsec4_in_reject(m, last)) 358 ; 359 else 360 #endif /*FAST_IPSEC*/ 361 if ((n = m_copypacket(m, M_NOWAIT)) != NULL) 362 udp_append(last, ip, n, 363 arg->iphlen + sizeof(struct udphdr), 364 arg->udp_in); 365 } 366 arg->last = last = inp; 367 368 /* 369 * Don't look for additional matches if this one does 370 * not have either the SO_REUSEPORT or SO_REUSEADDR 371 * socket options set. This heuristic avoids searching 372 * through all pcbs in the common case of a non-shared 373 * port. It * assumes that an application will never 374 * clear these options after setting them. 375 */ 376 if (!(last->inp_socket->so_options & 377 (SO_REUSEPORT | SO_REUSEADDR))) 378 return EJUSTRETURN; /* caller stop */ 379 return 0; 380 } 381 382 int 383 udp_input(struct mbuf **mp, int *offp, int proto) 384 { 385 struct sockaddr_in udp_in = { sizeof udp_in, AF_INET }; 386 int iphlen; 387 struct ip *ip; 388 struct udphdr *uh; 389 struct inpcb *inp; 390 struct mbuf *m; 391 struct mbuf *opts = NULL; 392 int len, off; 393 struct ip save_ip; 394 struct inpcbinfo *pcbinfo = &udbinfo[mycpuid]; 395 396 off = *offp; 397 m = *mp; 398 *mp = NULL; 399 400 iphlen = off; 401 udp_stat.udps_ipackets++; 402 403 /* 404 * Strip IP options, if any; should skip this, 405 * make available to user, and use on returned packets, 406 * but we don't yet have a way to check the checksum 407 * with options still present. 408 */ 409 if (iphlen > sizeof(struct ip)) { 410 ip_stripoptions(m); 411 iphlen = sizeof(struct ip); 412 } 413 414 /* 415 * IP and UDP headers are together in first mbuf. 416 * Already checked and pulled up in ip_demux(). 417 */ 418 KASSERT(m->m_len >= iphlen + sizeof(struct udphdr), 419 ("UDP header not in one mbuf")); 420 421 ip = mtod(m, struct ip *); 422 uh = (struct udphdr *)((caddr_t)ip + iphlen); 423 424 /* destination port of 0 is illegal, based on RFC768. */ 425 if (uh->uh_dport == 0) 426 goto bad; 427 428 /* 429 * Make mbuf data length reflect UDP length. 430 * If not enough data to reflect UDP length, drop. 431 */ 432 len = ntohs((u_short)uh->uh_ulen); 433 if (ip->ip_len != len) { 434 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 435 udp_stat.udps_badlen++; 436 goto bad; 437 } 438 m_adj(m, len - ip->ip_len); 439 /* ip->ip_len = len; */ 440 } 441 /* 442 * Save a copy of the IP header in case we want restore it 443 * for sending an ICMP error message in response. 444 */ 445 save_ip = *ip; 446 447 /* 448 * Checksum extended UDP header and data. 449 */ 450 if (uh->uh_sum) { 451 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 452 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 453 uh->uh_sum = m->m_pkthdr.csum_data; 454 else 455 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, 456 ip->ip_dst.s_addr, htonl((u_short)len + 457 m->m_pkthdr.csum_data + IPPROTO_UDP)); 458 uh->uh_sum ^= 0xffff; 459 } else { 460 char b[9]; 461 462 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 463 bzero(((struct ipovly *)ip)->ih_x1, 9); 464 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 465 uh->uh_sum = in_cksum(m, len + sizeof(struct ip)); 466 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 467 } 468 if (uh->uh_sum) { 469 udp_stat.udps_badsum++; 470 m_freem(m); 471 return(IPPROTO_DONE); 472 } 473 } else 474 udp_stat.udps_nosum++; 475 476 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 477 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { 478 struct inpcbhead *connhead; 479 struct inpcontainer *ic, *ic_marker; 480 struct inpcontainerhead *ichead; 481 struct udp_mcast_arg arg; 482 struct inpcb *last; 483 int error; 484 485 /* 486 * Deliver a multicast or broadcast datagram to *all* sockets 487 * for which the local and remote addresses and ports match 488 * those of the incoming datagram. This allows more than 489 * one process to receive multi/broadcasts on the same port. 490 * (This really ought to be done for unicast datagrams as 491 * well, but that would cause problems with existing 492 * applications that open both address-specific sockets and 493 * a wildcard socket listening to the same port -- they would 494 * end up receiving duplicates of every unicast datagram. 495 * Those applications open the multiple sockets to overcome an 496 * inadequacy of the UDP socket interface, but for backwards 497 * compatibility we avoid the problem here rather than 498 * fixing the interface. Maybe 4.5BSD will remedy this?) 499 */ 500 501 /* 502 * Construct sockaddr format source address. 503 */ 504 udp_in.sin_port = uh->uh_sport; 505 udp_in.sin_addr = ip->ip_src; 506 arg.udp_in = &udp_in; 507 /* 508 * Locate pcb(s) for datagram. 509 * (Algorithm copied from raw_intr().) 510 */ 511 last = NULL; 512 arg.iphlen = iphlen; 513 514 connhead = &pcbinfo->hashbase[ 515 INP_PCBCONNHASH(ip->ip_src.s_addr, uh->uh_sport, 516 ip->ip_dst.s_addr, uh->uh_dport, pcbinfo->hashmask)]; 517 LIST_FOREACH(inp, connhead, inp_hash) { 518 #ifdef INET6 519 if (!INP_ISIPV4(inp)) 520 continue; 521 #endif 522 if (!in_hosteq(inp->inp_faddr, ip->ip_src) || 523 !in_hosteq(inp->inp_laddr, ip->ip_dst) || 524 inp->inp_fport != uh->uh_sport || 525 inp->inp_lport != uh->uh_dport) 526 continue; 527 528 arg.inp = inp; 529 arg.last = last; 530 arg.ip = ip; 531 arg.m = m; 532 533 error = udp_mcast_input(&arg); 534 if (error == ERESTART) 535 continue; 536 last = arg.last; 537 538 if (error == EJUSTRETURN) 539 goto done; 540 } 541 542 ichead = &pcbinfo->wildcardhashbase[ 543 INP_PCBWILDCARDHASH(uh->uh_dport, 544 pcbinfo->wildcardhashmask)]; 545 ic_marker = in_pcbcontainer_marker(mycpuid); 546 547 GET_PCBINFO_TOKEN(pcbinfo); 548 LIST_INSERT_HEAD(ichead, ic_marker, ic_list); 549 while ((ic = LIST_NEXT(ic_marker, ic_list)) != NULL) { 550 LIST_REMOVE(ic_marker, ic_list); 551 LIST_INSERT_AFTER(ic, ic_marker, ic_list); 552 553 inp = ic->ic_inp; 554 if (inp->inp_flags & INP_PLACEMARKER) 555 continue; 556 #ifdef INET6 557 if (!INP_ISIPV4(inp)) 558 continue; 559 #endif 560 if (inp->inp_lport != uh->uh_dport) 561 continue; 562 if (inp->inp_laddr.s_addr != INADDR_ANY && 563 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 564 continue; 565 566 arg.inp = inp; 567 arg.last = last; 568 arg.ip = ip; 569 arg.m = m; 570 571 error = udp_mcast_input(&arg); 572 if (error == ERESTART) 573 continue; 574 last = arg.last; 575 576 if (error == EJUSTRETURN) 577 break; 578 } 579 LIST_REMOVE(ic_marker, ic_list); 580 REL_PCBINFO_TOKEN(pcbinfo); 581 done: 582 if (last == NULL) { 583 /* 584 * No matching pcb found; discard datagram. 585 * (No need to send an ICMP Port Unreachable 586 * for a broadcast or multicast datgram.) 587 */ 588 udp_stat.udps_noportbcast++; 589 goto bad; 590 } 591 #ifdef IPSEC 592 /* check AH/ESP integrity. */ 593 if (ipsec4_in_reject_so(m, last->inp_socket)) { 594 ipsecstat.in_polvio++; 595 goto bad; 596 } 597 #endif /*IPSEC*/ 598 #ifdef FAST_IPSEC 599 /* check AH/ESP integrity. */ 600 if (ipsec4_in_reject(m, last)) 601 goto bad; 602 #endif /*FAST_IPSEC*/ 603 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 604 &udp_in); 605 return(IPPROTO_DONE); 606 } 607 /* 608 * Locate pcb for datagram. 609 */ 610 inp = in_pcblookup_pkthash(pcbinfo, ip->ip_src, uh->uh_sport, 611 ip->ip_dst, uh->uh_dport, TRUE, m->m_pkthdr.rcvif, 612 udp_reuseport_ext ? m : NULL); 613 if (inp == NULL) { 614 if (log_in_vain) { 615 char buf[sizeof "aaa.bbb.ccc.ddd"]; 616 617 strcpy(buf, inet_ntoa(ip->ip_dst)); 618 log(LOG_INFO, 619 "Connection attempt to UDP %s:%d from %s:%d\n", 620 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), 621 ntohs(uh->uh_sport)); 622 } 623 udp_stat.udps_noport++; 624 if (m->m_flags & (M_BCAST | M_MCAST)) { 625 udp_stat.udps_noportbcast++; 626 goto bad; 627 } 628 if (blackhole) 629 goto bad; 630 #ifdef ICMP_BANDLIM 631 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 632 goto bad; 633 #endif 634 *ip = save_ip; 635 ip->ip_len += iphlen; 636 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 637 return(IPPROTO_DONE); 638 } 639 KASSERT(INP_ISIPV4(inp), ("not inet inpcb")); 640 #ifdef IPSEC 641 if (ipsec4_in_reject_so(m, inp->inp_socket)) { 642 ipsecstat.in_polvio++; 643 goto bad; 644 } 645 #endif /*IPSEC*/ 646 #ifdef FAST_IPSEC 647 if (ipsec4_in_reject(m, inp)) 648 goto bad; 649 #endif /*FAST_IPSEC*/ 650 /* 651 * Check the minimum TTL for socket. 652 */ 653 if (ip->ip_ttl < inp->inp_ip_minttl) 654 goto bad; 655 656 /* 657 * Construct sockaddr format source address. 658 * Stuff source address and datagram in user buffer. 659 */ 660 udp_in.sin_port = uh->uh_sport; 661 udp_in.sin_addr = ip->ip_src; 662 if ((inp->inp_flags & INP_CONTROLOPTS) || 663 (inp->inp_socket->so_options & SO_TIMESTAMP)) 664 ip_savecontrol(inp, &opts, ip, m); 665 m_adj(m, iphlen + sizeof(struct udphdr)); 666 667 lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token); 668 if (ssb_appendaddr(&inp->inp_socket->so_rcv, 669 (struct sockaddr *)&udp_in, m, opts) == 0) { 670 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 671 udp_stat.udps_fullsock++; 672 goto bad; 673 } 674 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 675 sorwakeup(inp->inp_socket); 676 return(IPPROTO_DONE); 677 bad: 678 m_freem(m); 679 if (opts) 680 m_freem(opts); 681 return(IPPROTO_DONE); 682 } 683 684 /* 685 * subroutine of udp_input(), mainly for source code readability. 686 * caller must properly init udp_ip6 and udp_in6 beforehand. 687 */ 688 static void 689 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, 690 struct sockaddr_in *udp_in) 691 { 692 struct mbuf *opts = NULL; 693 int ret; 694 695 KASSERT(INP_ISIPV4(last), ("not inet inpcb")); 696 697 if (last->inp_flags & INP_CONTROLOPTS || 698 last->inp_socket->so_options & SO_TIMESTAMP) 699 ip_savecontrol(last, &opts, ip, n); 700 m_adj(n, off); 701 702 lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token); 703 ret = ssb_appendaddr(&last->inp_socket->so_rcv, 704 (struct sockaddr *)udp_in, n, opts); 705 lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token); 706 if (ret == 0) { 707 m_freem(n); 708 if (opts) 709 m_freem(opts); 710 udp_stat.udps_fullsock++; 711 } else { 712 sorwakeup(last->inp_socket); 713 } 714 } 715 716 /* 717 * Notify a udp user of an asynchronous error; 718 * just wake up so that he can collect error status. 719 */ 720 void 721 udp_notify(struct inpcb *inp, int error) 722 { 723 inp->inp_socket->so_error = error; 724 sorwakeup(inp->inp_socket); 725 sowwakeup(inp->inp_socket); 726 } 727 728 struct netmsg_udp_notify { 729 struct netmsg_base base; 730 inp_notify_t nm_notify; 731 struct in_addr nm_faddr; 732 int nm_arg; 733 }; 734 735 static void 736 udp_notifyall_oncpu(netmsg_t msg) 737 { 738 struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg; 739 int nextcpu, cpu = mycpuid; 740 741 in_pcbnotifyall(&udbinfo[cpu], nm->nm_faddr, nm->nm_arg, nm->nm_notify); 742 743 nextcpu = cpu + 1; 744 if (nextcpu < ncpus2) 745 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg); 746 else 747 lwkt_replymsg(&nm->base.lmsg, 0); 748 } 749 750 inp_notify_t 751 udp_get_inpnotify(int cmd, const struct sockaddr *sa, 752 struct ip **ip0, int *cpuid) 753 { 754 struct in_addr faddr; 755 struct ip *ip = *ip0; 756 inp_notify_t notify = udp_notify; 757 758 faddr = ((const struct sockaddr_in *)sa)->sin_addr; 759 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 760 return NULL; 761 762 if (PRC_IS_REDIRECT(cmd)) { 763 ip = NULL; 764 notify = in_rtchange; 765 } else if (cmd == PRC_HOSTDEAD) { 766 ip = NULL; 767 } else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) { 768 return NULL; 769 } 770 771 if (cpuid != NULL) { 772 if (ip == NULL) { 773 /* Go through all CPUs */ 774 *cpuid = ncpus; 775 } else { 776 const struct udphdr *uh; 777 778 uh = (const struct udphdr *) 779 ((caddr_t)ip + (ip->ip_hl << 2)); 780 *cpuid = udp_addrcpu(faddr.s_addr, uh->uh_dport, 781 ip->ip_src.s_addr, uh->uh_sport); 782 } 783 } 784 785 *ip0 = ip; 786 return notify; 787 } 788 789 void 790 udp_ctlinput(netmsg_t msg) 791 { 792 struct sockaddr *sa = msg->ctlinput.nm_arg; 793 struct ip *ip = msg->ctlinput.nm_extra; 794 int cmd = msg->ctlinput.nm_cmd, cpuid; 795 inp_notify_t notify; 796 struct in_addr faddr; 797 798 notify = udp_get_inpnotify(cmd, sa, &ip, &cpuid); 799 if (notify == NULL) 800 goto done; 801 802 faddr = ((struct sockaddr_in *)sa)->sin_addr; 803 if (ip) { 804 const struct udphdr *uh; 805 struct inpcb *inp; 806 807 if (cpuid != mycpuid) 808 goto done; 809 810 uh = (const struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 811 inp = in_pcblookup_hash(&udbinfo[mycpuid], faddr, uh->uh_dport, 812 ip->ip_src, uh->uh_sport, 0, NULL); 813 if (inp != NULL && inp->inp_socket != NULL) 814 notify(inp, inetctlerrmap[cmd]); 815 } else if (msg->ctlinput.nm_direct) { 816 if (cpuid != ncpus && cpuid != mycpuid) 817 goto done; 818 if (mycpuid >= ncpus2) 819 goto done; 820 821 in_pcbnotifyall(&udbinfo[mycpuid], faddr, inetctlerrmap[cmd], 822 notify); 823 } else { 824 struct netmsg_udp_notify *nm; 825 826 ASSERT_IN_NETISR(0); 827 nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT); 828 netmsg_init(&nm->base, NULL, &netisr_afree_rport, 829 0, udp_notifyall_oncpu); 830 nm->nm_faddr = faddr; 831 nm->nm_arg = inetctlerrmap[cmd]; 832 nm->nm_notify = notify; 833 lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg); 834 } 835 done: 836 lwkt_replymsg(&msg->lmsg, 0); 837 } 838 839 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, udbinfo, 0, 840 in_pcblist_global_ncpus2, "S,xinpcb", "List of active UDP sockets"); 841 842 static int 843 udp_getcred(SYSCTL_HANDLER_ARGS) 844 { 845 struct sockaddr_in addrs[2]; 846 struct ucred cred0, *cred = NULL; 847 struct inpcb *inp; 848 int error, cpu, origcpu; 849 850 error = priv_check(req->td, PRIV_ROOT); 851 if (error) 852 return (error); 853 error = SYSCTL_IN(req, addrs, sizeof addrs); 854 if (error) 855 return (error); 856 857 origcpu = mycpuid; 858 cpu = udp_addrcpu(addrs[1].sin_addr.s_addr, addrs[1].sin_port, 859 addrs[0].sin_addr.s_addr, addrs[0].sin_port); 860 861 lwkt_migratecpu(cpu); 862 863 inp = in_pcblookup_hash(&udbinfo[cpu], 864 addrs[1].sin_addr, addrs[1].sin_port, 865 addrs[0].sin_addr, addrs[0].sin_port, TRUE, NULL); 866 if (inp == NULL || inp->inp_socket == NULL) { 867 error = ENOENT; 868 } else if (inp->inp_socket->so_cred != NULL) { 869 cred0 = *(inp->inp_socket->so_cred); 870 cred = &cred0; 871 } 872 873 lwkt_migratecpu(origcpu); 874 875 if (error) 876 return error; 877 878 return SYSCTL_OUT(req, cred, sizeof(struct ucred)); 879 } 880 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 881 0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection"); 882 883 static void 884 udp_send_redispatch(netmsg_t msg) 885 { 886 struct mbuf *m = msg->send.nm_m; 887 int pru_flags = msg->send.nm_flags; 888 struct inpcb *inp = msg->send.base.nm_so->so_pcb; 889 struct mbuf *m_opt = msg->send.nm_control; /* XXX save ipopt */ 890 int flags = msg->send.nm_priv; /* ip_output flags */ 891 int error; 892 893 logudp(redisp_ipout_beg, inp); 894 895 /* 896 * - Don't use inp route cache. It should only be used in the 897 * inp owner netisr. 898 * - Access to inp_moptions should be safe, since multicast UDP 899 * datagrams are redispatched to netisr0 and inp_moptions is 900 * changed only in netisr0. 901 */ 902 error = ip_output(m, m_opt, NULL, flags, inp->inp_moptions, inp); 903 if ((pru_flags & PRUS_NOREPLY) == 0) 904 lwkt_replymsg(&msg->send.base.lmsg, error); 905 906 if (m_opt != NULL) { 907 /* Free saved ip options, if any */ 908 m_freem(m_opt); 909 } 910 911 logudp(redisp_ipout_end, inp); 912 } 913 914 static void 915 udp_send(netmsg_t msg) 916 { 917 struct socket *so = msg->send.base.nm_so; 918 struct mbuf *m = msg->send.nm_m; 919 struct sockaddr *dstaddr = msg->send.nm_addr; 920 int pru_flags = msg->send.nm_flags; 921 struct inpcb *inp = so->so_pcb; 922 struct thread *td = msg->send.nm_td; 923 int flags; 924 925 struct udpiphdr *ui; 926 int len = m->m_pkthdr.len; 927 struct sockaddr_in *sin; /* really is initialized before use */ 928 int error = 0, cpu; 929 930 KKASSERT(msg->send.nm_control == NULL); 931 932 logudp(send_beg, inp); 933 934 if (inp == NULL) { 935 error = EINVAL; 936 goto release; 937 } 938 939 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 940 error = EMSGSIZE; 941 goto release; 942 } 943 944 if (inp->inp_lport == 0) { /* unbound socket */ 945 boolean_t forwarded; 946 947 error = in_pcbbind(inp, NULL, td); 948 if (error) 949 goto release; 950 951 /* 952 * Need to call udp_send again, after this inpcb is 953 * inserted into wildcard hash table. 954 */ 955 msg->send.base.lmsg.ms_flags |= MSGF_UDP_SEND; 956 forwarded = udp_inswildcardhash(inp, &msg->send.base, 0); 957 if (forwarded) { 958 /* 959 * The message is further forwarded, so we are 960 * done here. 961 */ 962 logudp(send_inswildcard, inp); 963 return; 964 } 965 } 966 967 if (dstaddr != NULL) { /* destination address specified */ 968 if (inp->inp_faddr.s_addr != INADDR_ANY) { 969 /* already connected */ 970 error = EISCONN; 971 goto release; 972 } 973 sin = (struct sockaddr_in *)dstaddr; 974 if (!prison_remote_ip(td, (struct sockaddr *)&sin)) { 975 error = EAFNOSUPPORT; /* IPv6 only jail */ 976 goto release; 977 } 978 } else { 979 if (inp->inp_faddr.s_addr == INADDR_ANY) { 980 /* no destination specified and not already connected */ 981 error = ENOTCONN; 982 goto release; 983 } 984 sin = NULL; 985 } 986 987 /* 988 * Calculate data length and get a mbuf 989 * for UDP and IP headers. 990 */ 991 M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT); 992 if (m == NULL) { 993 error = ENOBUFS; 994 goto release; 995 } 996 997 /* 998 * Fill in mbuf with extended UDP header 999 * and addresses and length put into network format. 1000 */ 1001 ui = mtod(m, struct udpiphdr *); 1002 bzero(ui->ui_x1, sizeof ui->ui_x1); /* XXX still needed? */ 1003 ui->ui_pr = IPPROTO_UDP; 1004 1005 /* 1006 * Set destination address. 1007 */ 1008 if (dstaddr != NULL) { /* use specified destination */ 1009 ui->ui_dst = sin->sin_addr; 1010 ui->ui_dport = sin->sin_port; 1011 } else { /* use connected destination */ 1012 ui->ui_dst = inp->inp_faddr; 1013 ui->ui_dport = inp->inp_fport; 1014 } 1015 1016 /* 1017 * Set source address. 1018 */ 1019 if (inp->inp_laddr.s_addr == INADDR_ANY || 1020 IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) { 1021 struct sockaddr_in *if_sin; 1022 1023 if (dstaddr == NULL) { 1024 /* 1025 * connect() had (or should have) failed because 1026 * the interface had no IP address, but the 1027 * application proceeded to call send() anyways. 1028 */ 1029 error = ENOTCONN; 1030 goto release; 1031 } 1032 1033 /* Look up outgoing interface. */ 1034 error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1); 1035 if (error) 1036 goto release; 1037 ui->ui_src = if_sin->sin_addr; /* use address of interface */ 1038 } else { 1039 ui->ui_src = inp->inp_laddr; /* use non-null bound address */ 1040 } 1041 ui->ui_sport = inp->inp_lport; 1042 KASSERT(inp->inp_lport != 0, ("inp lport should have been bound")); 1043 1044 /* 1045 * Release the original thread, since it is no longer used 1046 */ 1047 if (pru_flags & PRUS_HELDTD) { 1048 lwkt_rele(td); 1049 pru_flags &= ~PRUS_HELDTD; 1050 } 1051 /* 1052 * Free the dest address, since it is no longer needed 1053 */ 1054 if (pru_flags & PRUS_FREEADDR) { 1055 kfree(dstaddr, M_SONAME); 1056 pru_flags &= ~PRUS_FREEADDR; 1057 } 1058 1059 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1060 1061 /* 1062 * Set up checksum and output datagram. 1063 */ 1064 if (udpcksum) { 1065 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, 1066 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1067 m->m_pkthdr.csum_flags = CSUM_UDP; 1068 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1069 m->m_pkthdr.csum_thlen = sizeof(struct udphdr); 1070 } else { 1071 ui->ui_sum = 0; 1072 } 1073 ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len; 1074 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1075 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1076 udp_stat.udps_opackets++; 1077 1078 flags = IP_DEBUGROUTE | 1079 (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); 1080 if (pru_flags & PRUS_DONTROUTE) 1081 flags |= SO_DONTROUTE; 1082 1083 if (inp->inp_flags & INP_CONNECTED) { 1084 /* 1085 * For connected socket, this datagram has already 1086 * been in the correct netisr; no need to rehash. 1087 */ 1088 goto sendit; 1089 } 1090 1091 cpu = udp_addrcpu(ui->ui_dst.s_addr, ui->ui_dport, 1092 ui->ui_src.s_addr, ui->ui_sport); 1093 if (cpu != mycpuid) { 1094 struct mbuf *m_opt = NULL; 1095 struct netmsg_pru_send *smsg; 1096 struct lwkt_port *port = netisr_cpuport(cpu); 1097 1098 /* 1099 * Not on the CPU that matches this UDP datagram hash; 1100 * redispatch to the correct CPU to do the ip_output(). 1101 */ 1102 if (inp->inp_options != NULL) { 1103 /* 1104 * If there are ip options, then save a copy, 1105 * since accessing inp_options on other CPUs' 1106 * is not safe. 1107 * 1108 * XXX optimize this? 1109 */ 1110 m_opt = m_copym(inp->inp_options, 0, M_COPYALL, 1111 M_WAITOK); 1112 } 1113 if ((pru_flags & PRUS_NOREPLY) == 0) { 1114 /* 1115 * Change some parts of the original netmsg and 1116 * forward it to the target netisr. 1117 * 1118 * NOTE: so_port MUST NOT be checked in the target 1119 * netisr. 1120 */ 1121 smsg = &msg->send; 1122 smsg->nm_priv = flags; /* ip_output flags */ 1123 smsg->nm_m = m; 1124 smsg->nm_control = m_opt; /* XXX save ipopt */ 1125 smsg->base.lmsg.ms_flags |= MSGF_IGNSOPORT; 1126 smsg->base.nm_dispatch = udp_send_redispatch; 1127 lwkt_forwardmsg(port, &smsg->base.lmsg); 1128 } else { 1129 /* 1130 * Recreate the netmsg, since the original mbuf 1131 * could have been changed. And send it to the 1132 * target netisr. 1133 * 1134 * NOTE: so_port MUST NOT be checked in the target 1135 * netisr. 1136 */ 1137 smsg = &m->m_hdr.mh_sndmsg; 1138 netmsg_init(&smsg->base, so, &netisr_apanic_rport, 1139 MSGF_IGNSOPORT, udp_send_redispatch); 1140 smsg->nm_priv = flags; /* ip_output flags */ 1141 smsg->nm_flags = pru_flags; 1142 smsg->nm_m = m; 1143 smsg->nm_control = m_opt; /* XXX save ipopt */ 1144 lwkt_sendmsg(port, &smsg->base.lmsg); 1145 } 1146 1147 /* This UDP datagram is redispatched; done */ 1148 logudp(send_redisp, inp); 1149 return; 1150 } 1151 1152 sendit: 1153 logudp(send_ipout, inp); 1154 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 1155 inp->inp_moptions, inp); 1156 m = NULL; 1157 1158 release: 1159 if (m != NULL) 1160 m_freem(m); 1161 1162 if (pru_flags & PRUS_HELDTD) 1163 lwkt_rele(td); 1164 if (pru_flags & PRUS_FREEADDR) 1165 kfree(dstaddr, M_SONAME); 1166 if ((pru_flags & PRUS_NOREPLY) == 0) 1167 lwkt_replymsg(&msg->send.base.lmsg, error); 1168 1169 logudp(send_end, inp); 1170 } 1171 1172 u_long udp_sendspace = 9216; /* really max datagram size */ 1173 /* 40 1K datagrams */ 1174 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 1175 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 1176 1177 u_long udp_recvspace = 40 * (1024 + 1178 #ifdef INET6 1179 sizeof(struct sockaddr_in6) 1180 #else 1181 sizeof(struct sockaddr_in) 1182 #endif 1183 ); 1184 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1185 &udp_recvspace, 0, "Maximum incoming UDP datagram size"); 1186 1187 /* 1188 * This should never happen, since UDP socket does not support 1189 * connection acception (SO_ACCEPTCONN, i.e. listen(2)). 1190 */ 1191 static void 1192 udp_abort(netmsg_t msg __unused) 1193 { 1194 panic("udp_abort is called"); 1195 } 1196 1197 static void 1198 udp_attach(netmsg_t msg) 1199 { 1200 struct socket *so = msg->attach.base.nm_so; 1201 struct pru_attach_info *ai = msg->attach.nm_ai; 1202 struct inpcb *inp; 1203 int error; 1204 1205 inp = so->so_pcb; 1206 if (inp != NULL) { 1207 error = EINVAL; 1208 goto out; 1209 } 1210 error = soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit); 1211 if (error) 1212 goto out; 1213 1214 error = in_pcballoc(so, &udbinfo[mycpuid]); 1215 if (error) 1216 goto out; 1217 1218 inp = (struct inpcb *)so->so_pcb; 1219 inp->inp_flags |= INP_DIRECT_DETACH; 1220 inp->inp_ip_ttl = ip_defttl; 1221 error = 0; 1222 out: 1223 lwkt_replymsg(&msg->attach.base.lmsg, error); 1224 } 1225 1226 static void 1227 udp_inswildcard_replymsg(netmsg_t msg) 1228 { 1229 lwkt_msg_t lmsg = &msg->lmsg; 1230 1231 if (lmsg->ms_flags & MSGF_UDP_SEND) { 1232 udp_send(msg); 1233 /* msg is replied by udp_send() */ 1234 } else { 1235 lwkt_replymsg(lmsg, lmsg->ms_error); 1236 } 1237 } 1238 1239 static void 1240 udp_soreuseport_dispatch(netmsg_t msg) 1241 { 1242 /* This inpcb has already been in the wildcard hash. */ 1243 in_pcblink_flags(msg->base.nm_so->so_pcb, &udbinfo[mycpuid], 0); 1244 udp_inswildcard_replymsg(msg); 1245 } 1246 1247 static void 1248 udp_sosetport(struct lwkt_msg *msg, lwkt_port_t port) 1249 { 1250 sosetport(((struct netmsg_base *)msg)->nm_so, port); 1251 } 1252 1253 static boolean_t 1254 udp_inswildcardhash_oncpu(struct inpcb *inp, struct netmsg_base *msg) 1255 { 1256 int cpu; 1257 1258 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1259 ("not on owner cpu")); 1260 1261 in_pcbinswildcardhash(inp); 1262 for (cpu = 0; cpu < ncpus2; ++cpu) { 1263 if (cpu == mycpuid) { 1264 /* 1265 * This inpcb has been inserted by the above 1266 * in_pcbinswildcardhash(). 1267 */ 1268 continue; 1269 } 1270 in_pcbinswildcardhash_oncpu(inp, &udbinfo[cpu]); 1271 } 1272 1273 if (inp->inp_socket->so_options & SO_REUSEPORT) { 1274 /* 1275 * For SO_REUSEPORT socket, redistribute it based on its 1276 * local group index. 1277 */ 1278 cpu = inp->inp_lgrpindex & ncpus2_mask; 1279 if (cpu != mycpuid) { 1280 struct lwkt_port *port = netisr_cpuport(cpu); 1281 lwkt_msg_t lmsg = &msg->lmsg; 1282 1283 /* 1284 * We are moving the protocol processing port the 1285 * socket is on, we have to unlink here and re-link 1286 * on the target cpu (this inpcb is still left in 1287 * the wildcard hash). 1288 */ 1289 in_pcbunlink_flags(inp, &udbinfo[mycpuid], 0); 1290 msg->nm_dispatch = udp_soreuseport_dispatch; 1291 1292 /* 1293 * See the related comment in tcp_usrreq.c 1294 * tcp_connect() 1295 */ 1296 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1297 lwkt_forwardmsg(port, lmsg); 1298 return TRUE; /* forwarded */ 1299 } 1300 } 1301 return FALSE; 1302 } 1303 1304 static void 1305 udp_inswildcardhash_dispatch(netmsg_t msg) 1306 { 1307 struct inpcb *inp = msg->base.nm_so->so_pcb; 1308 boolean_t forwarded; 1309 1310 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1311 KASSERT((ntohs(inp->inp_lport) & ncpus2_mask) == mycpuid, 1312 ("not target cpu")); 1313 1314 in_pcblink(inp, &udbinfo[mycpuid]); 1315 1316 forwarded = udp_inswildcardhash_oncpu(inp, &msg->base); 1317 if (forwarded) { 1318 /* The message is further forwarded, so we are done here. */ 1319 return; 1320 } 1321 udp_inswildcard_replymsg(msg); 1322 } 1323 1324 static boolean_t 1325 udp_inswildcardhash(struct inpcb *inp, struct netmsg_base *msg, int error) 1326 { 1327 lwkt_msg_t lmsg = &msg->lmsg; 1328 int cpu; 1329 1330 ASSERT_INP_NOTINHASH(inp); 1331 1332 /* This inpcb could no longer be directly detached */ 1333 inp->inp_flags &= ~INP_DIRECT_DETACH; 1334 1335 /* 1336 * Always clear the route cache, so we don't need to 1337 * worry about any owner CPU changes later. 1338 */ 1339 in_pcbresetroute(inp); 1340 1341 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1342 cpu = ntohs(inp->inp_lport) & ncpus2_mask; 1343 1344 lmsg->ms_error = error; 1345 if (cpu != mycpuid) { 1346 struct lwkt_port *port = netisr_cpuport(cpu); 1347 1348 /* 1349 * We are moving the protocol processing port the socket 1350 * is on, we have to unlink here and re-link on the 1351 * target cpu. 1352 */ 1353 in_pcbunlink(inp, &udbinfo[mycpuid]); 1354 msg->nm_dispatch = udp_inswildcardhash_dispatch; 1355 1356 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1357 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1358 lwkt_forwardmsg(port, lmsg); 1359 return TRUE; /* forwarded */ 1360 } 1361 1362 return udp_inswildcardhash_oncpu(inp, msg); 1363 } 1364 1365 static void 1366 udp_bind(netmsg_t msg) 1367 { 1368 struct socket *so = msg->bind.base.nm_so; 1369 struct inpcb *inp; 1370 int error; 1371 1372 inp = so->so_pcb; 1373 if (inp) { 1374 struct sockaddr *nam = msg->bind.nm_nam; 1375 struct thread *td = msg->bind.nm_td; 1376 1377 error = in_pcbbind(inp, nam, td); 1378 if (error == 0) { 1379 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1380 boolean_t forwarded; 1381 1382 if (sin->sin_addr.s_addr != INADDR_ANY) 1383 inp->inp_flags |= INP_WASBOUND_NOTANY; 1384 1385 forwarded = udp_inswildcardhash(inp, 1386 &msg->bind.base, 0); 1387 if (forwarded) { 1388 /* 1389 * The message is further forwarded, so 1390 * we are done here. 1391 */ 1392 return; 1393 } 1394 } 1395 } else { 1396 error = EINVAL; 1397 } 1398 lwkt_replymsg(&msg->bind.base.lmsg, error); 1399 } 1400 1401 static void 1402 udp_connect(netmsg_t msg) 1403 { 1404 struct socket *so = msg->connect.base.nm_so; 1405 struct sockaddr *nam = msg->connect.nm_nam; 1406 struct thread *td = msg->connect.nm_td; 1407 struct inpcb *inp; 1408 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1409 struct sockaddr_in *if_sin; 1410 struct lwkt_port *port; 1411 int error; 1412 1413 KKASSERT(msg->connect.nm_m == NULL); 1414 1415 inp = so->so_pcb; 1416 if (inp == NULL) { 1417 error = EINVAL; 1418 goto out; 1419 } 1420 1421 if (msg->connect.nm_flags & PRUC_RECONNECT) { 1422 msg->connect.nm_flags &= ~PRUC_RECONNECT; 1423 in_pcblink(inp, &udbinfo[mycpuid]); 1424 } 1425 1426 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1427 error = EISCONN; 1428 goto out; 1429 } 1430 error = 0; 1431 1432 /* 1433 * Bind if we have to 1434 */ 1435 if (inp->inp_lport == 0) { 1436 error = in_pcbbind(inp, NULL, td); 1437 if (error) 1438 goto out; 1439 } 1440 1441 /* 1442 * Calculate the correct protocol processing thread. The connect 1443 * operation must run there. 1444 */ 1445 error = in_pcbladdr(inp, nam, &if_sin, td); 1446 if (error) 1447 goto out; 1448 if (!prison_remote_ip(td, nam)) { 1449 error = EAFNOSUPPORT; /* IPv6 only jail */ 1450 goto out; 1451 } 1452 1453 port = udp_addrport(sin->sin_addr.s_addr, sin->sin_port, 1454 inp->inp_laddr.s_addr != INADDR_ANY ? 1455 inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr, inp->inp_lport); 1456 if (port != &curthread->td_msgport) { 1457 lwkt_msg_t lmsg = &msg->connect.base.lmsg; 1458 int nm_flags = PRUC_RECONNECT; 1459 1460 /* 1461 * in_pcbladdr() may have allocated a route entry for us 1462 * on the current CPU, but we need a route entry on the 1463 * inpcb's owner CPU, so free it here. 1464 */ 1465 in_pcbresetroute(inp); 1466 1467 if (inp->inp_flags & INP_WILDCARD) { 1468 /* 1469 * Remove this inpcb from the wildcard hash before 1470 * the socket's msgport changes. 1471 */ 1472 udp_remwildcardhash(inp); 1473 } 1474 1475 /* 1476 * We are moving the protocol processing port the socket 1477 * is on, we have to unlink here and re-link on the 1478 * target cpu. 1479 */ 1480 in_pcbunlink(inp, &udbinfo[mycpuid]); 1481 msg->connect.nm_flags |= nm_flags; 1482 1483 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1484 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1485 lwkt_forwardmsg(port, lmsg); 1486 /* msg invalid now */ 1487 return; 1488 } 1489 error = udp_connect_oncpu(inp, sin, if_sin); 1490 out: 1491 if (error && inp != NULL && inp->inp_lport != 0 && 1492 (inp->inp_flags & INP_WILDCARD) == 0) { 1493 boolean_t forwarded; 1494 1495 /* Connect failed; put it to wildcard hash. */ 1496 forwarded = udp_inswildcardhash(inp, &msg->connect.base, 1497 error); 1498 if (forwarded) { 1499 /* 1500 * The message is further forwarded, so we are done 1501 * here. 1502 */ 1503 return; 1504 } 1505 } 1506 lwkt_replymsg(&msg->connect.base.lmsg, error); 1507 } 1508 1509 static void 1510 udp_remwildcardhash(struct inpcb *inp) 1511 { 1512 int cpu; 1513 1514 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1515 ("not on owner cpu")); 1516 1517 for (cpu = 0; cpu < ncpus2; ++cpu) { 1518 if (cpu == mycpuid) { 1519 /* 1520 * This inpcb will be removed by the later 1521 * in_pcbremwildcardhash(). 1522 */ 1523 continue; 1524 } 1525 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpu]); 1526 } 1527 in_pcbremwildcardhash(inp); 1528 } 1529 1530 static int 1531 udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 1532 struct sockaddr_in *if_sin) 1533 { 1534 struct socket *so = inp->inp_socket; 1535 struct inpcb *oinp; 1536 1537 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 1538 sin->sin_addr, sin->sin_port, 1539 inp->inp_laddr.s_addr != INADDR_ANY ? 1540 inp->inp_laddr : if_sin->sin_addr, inp->inp_lport, FALSE, NULL); 1541 if (oinp != NULL) 1542 return EADDRINUSE; 1543 1544 /* 1545 * No more errors can occur, finish adjusting the socket 1546 * and change the processing port to reflect the connected 1547 * socket. Once set we can no longer safely mess with the 1548 * socket. 1549 */ 1550 1551 if (inp->inp_flags & INP_WILDCARD) 1552 udp_remwildcardhash(inp); 1553 1554 if (inp->inp_laddr.s_addr == INADDR_ANY) 1555 inp->inp_laddr = if_sin->sin_addr; 1556 inp->inp_faddr = sin->sin_addr; 1557 inp->inp_fport = sin->sin_port; 1558 in_pcbinsconnhash(inp); 1559 1560 soisconnected(so); 1561 1562 return 0; 1563 } 1564 1565 static void 1566 udp_detach2(struct socket *so) 1567 { 1568 in_pcbdetach(so->so_pcb); 1569 sodiscard(so); 1570 sofree(so); 1571 } 1572 1573 static void 1574 udp_detach_final_dispatch(netmsg_t msg) 1575 { 1576 udp_detach2(msg->base.nm_so); 1577 } 1578 1579 static void 1580 udp_detach_oncpu_dispatch(netmsg_t msg) 1581 { 1582 struct netmsg_base *clomsg = &msg->base; 1583 struct socket *so = clomsg->nm_so; 1584 struct inpcb *inp = so->so_pcb; 1585 struct thread *td = curthread; 1586 int nextcpu, cpuid = mycpuid; 1587 1588 KASSERT(td->td_type == TD_TYPE_NETISR, ("not in netisr")); 1589 1590 if (inp->inp_flags & INP_WILDCARD) { 1591 /* 1592 * This inp will be removed on the inp's 1593 * owner CPU later, so don't do it now. 1594 */ 1595 if (&td->td_msgport != so->so_port) 1596 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpuid]); 1597 } 1598 1599 if (cpuid == 0) { 1600 /* 1601 * Free and clear multicast socket option, 1602 * which is only accessed in netisr0. 1603 */ 1604 ip_freemoptions(inp->inp_moptions); 1605 inp->inp_moptions = NULL; 1606 } 1607 1608 nextcpu = cpuid + 1; 1609 if (nextcpu < ncpus2) { 1610 lwkt_forwardmsg(netisr_cpuport(nextcpu), &clomsg->lmsg); 1611 } else { 1612 /* 1613 * No one could see this inpcb now; destroy this 1614 * inpcb in its owner netisr. 1615 */ 1616 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1617 udp_detach_final_dispatch); 1618 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1619 } 1620 } 1621 1622 static void 1623 udp_detach(netmsg_t msg) 1624 { 1625 struct socket *so = msg->detach.base.nm_so; 1626 struct netmsg_base *clomsg; 1627 struct inpcb *inp; 1628 1629 inp = so->so_pcb; 1630 if (inp == NULL) { 1631 lwkt_replymsg(&msg->detach.base.lmsg, EINVAL); 1632 return; 1633 } 1634 1635 /* 1636 * Reply EJUSTRETURN ASAP, we will call sodiscard() and 1637 * sofree() later. 1638 */ 1639 lwkt_replymsg(&msg->detach.base.lmsg, EJUSTRETURN); 1640 1641 if (ncpus2 == 1) { 1642 /* Only one CPU, detach the inpcb directly. */ 1643 udp_detach2(so); 1644 return; 1645 } 1646 1647 /* 1648 * Remove this inpcb from the inpcb list first, so that 1649 * no one could find this inpcb from the inpcb list. 1650 */ 1651 in_pcbofflist(inp); 1652 1653 if (inp->inp_flags & INP_DIRECT_DETACH) { 1654 /* 1655 * Direct detaching is allowed 1656 */ 1657 KASSERT((inp->inp_flags & INP_WILDCARD) == 0, 1658 ("in the wildcardhash")); 1659 KASSERT(inp->inp_moptions == NULL, ("has mcast options")); 1660 udp_detach2(so); 1661 return; 1662 } 1663 1664 /* 1665 * Go through netisrs which process UDP to make sure 1666 * no one could find this inpcb anymore. 1667 */ 1668 clomsg = &so->so_clomsg; 1669 netmsg_init(clomsg, so, &netisr_apanic_rport, MSGF_IGNSOPORT, 1670 udp_detach_oncpu_dispatch); 1671 lwkt_sendmsg(netisr_cpuport(0), &clomsg->lmsg); 1672 } 1673 1674 static void 1675 udp_disconnect(netmsg_t msg) 1676 { 1677 struct socket *so = msg->disconnect.base.nm_so; 1678 struct inpcb *inp; 1679 boolean_t forwarded; 1680 int error = 0; 1681 1682 inp = so->so_pcb; 1683 if (inp == NULL) { 1684 error = EINVAL; 1685 goto out; 1686 } 1687 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1688 error = ENOTCONN; 1689 goto out; 1690 } 1691 1692 soclrstate(so, SS_ISCONNECTED); /* XXX */ 1693 1694 in_pcbdisconnect(inp); 1695 1696 /* 1697 * Follow traditional BSD behavior and retain the local port 1698 * binding. But, fix the old misbehavior of overwriting any 1699 * previously bound local address. 1700 */ 1701 if (!(inp->inp_flags & INP_WASBOUND_NOTANY)) 1702 inp->inp_laddr.s_addr = INADDR_ANY; 1703 1704 if (so->so_state & SS_ISCLOSING) { 1705 /* 1706 * If this socket is being closed, there is no need 1707 * to put this socket back into wildcard hash table. 1708 */ 1709 error = 0; 1710 goto out; 1711 } 1712 1713 forwarded = udp_inswildcardhash(inp, &msg->disconnect.base, 0); 1714 if (forwarded) { 1715 /* 1716 * The message is further forwarded, so we are done 1717 * here. 1718 */ 1719 return; 1720 } 1721 out: 1722 lwkt_replymsg(&msg->disconnect.base.lmsg, error); 1723 } 1724 1725 void 1726 udp_shutdown(netmsg_t msg) 1727 { 1728 struct socket *so = msg->shutdown.base.nm_so; 1729 struct inpcb *inp; 1730 int error; 1731 1732 inp = so->so_pcb; 1733 if (inp) { 1734 socantsendmore(so); 1735 error = 0; 1736 } else { 1737 error = EINVAL; 1738 } 1739 lwkt_replymsg(&msg->shutdown.base.lmsg, error); 1740 } 1741 1742 struct pr_usrreqs udp_usrreqs = { 1743 .pru_abort = udp_abort, 1744 .pru_accept = pr_generic_notsupp, 1745 .pru_attach = udp_attach, 1746 .pru_bind = udp_bind, 1747 .pru_connect = udp_connect, 1748 .pru_connect2 = pr_generic_notsupp, 1749 .pru_control = in_control_dispatch, 1750 .pru_detach = udp_detach, 1751 .pru_disconnect = udp_disconnect, 1752 .pru_listen = pr_generic_notsupp, 1753 .pru_peeraddr = in_setpeeraddr_dispatch, 1754 .pru_rcvd = pr_generic_notsupp, 1755 .pru_rcvoob = pr_generic_notsupp, 1756 .pru_send = udp_send, 1757 .pru_sense = pru_sense_null, 1758 .pru_shutdown = udp_shutdown, 1759 .pru_sockaddr = in_setsockaddr_dispatch, 1760 .pru_sosend = sosendudp, 1761 .pru_soreceive = soreceive 1762 }; 1763