1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 63 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $ 64 */ 65 66 #include "opt_ipsec.h" 67 #include "opt_inet6.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/kernel.h> 72 #include <sys/malloc.h> 73 #include <sys/mbuf.h> 74 #include <sys/domain.h> 75 #include <sys/proc.h> 76 #include <sys/priv.h> 77 #include <sys/protosw.h> 78 #include <sys/socket.h> 79 #include <sys/socketvar.h> 80 #include <sys/sysctl.h> 81 #include <sys/syslog.h> 82 #include <sys/in_cksum.h> 83 #include <sys/ktr.h> 84 85 #include <sys/thread2.h> 86 #include <sys/socketvar2.h> 87 #include <sys/serialize.h> 88 89 #include <machine/stdarg.h> 90 91 #include <net/if.h> 92 #include <net/route.h> 93 #include <net/netmsg2.h> 94 #include <net/netisr2.h> 95 96 #include <netinet/in.h> 97 #include <netinet/in_systm.h> 98 #include <netinet/ip.h> 99 #ifdef INET6 100 #include <netinet/ip6.h> 101 #endif 102 #include <netinet/in_pcb.h> 103 #include <netinet/in_var.h> 104 #include <netinet/ip_var.h> 105 #ifdef INET6 106 #include <netinet6/ip6_var.h> 107 #endif 108 #include <netinet/ip_icmp.h> 109 #include <netinet/icmp_var.h> 110 #include <netinet/udp.h> 111 #include <netinet/udp_var.h> 112 113 #ifdef FAST_IPSEC 114 #include <netproto/ipsec/ipsec.h> 115 #endif 116 117 #ifdef IPSEC 118 #include <netinet6/ipsec.h> 119 #endif 120 121 #define MSGF_UDP_SEND MSGF_PROTO1 122 123 #define INP_DIRECT_DETACH INP_FLAG_PROTO2 124 125 #define UDP_KTR_STRING "inp=%p" 126 #define UDP_KTR_ARGS struct inpcb *inp 127 128 #ifndef KTR_UDP 129 #define KTR_UDP KTR_ALL 130 #endif 131 132 KTR_INFO_MASTER(udp); 133 KTR_INFO(KTR_UDP, udp, send_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS); 134 KTR_INFO(KTR_UDP, udp, send_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS); 135 KTR_INFO(KTR_UDP, udp, send_ipout, 2, UDP_KTR_STRING, UDP_KTR_ARGS); 136 KTR_INFO(KTR_UDP, udp, redisp_ipout_beg, 3, UDP_KTR_STRING, UDP_KTR_ARGS); 137 KTR_INFO(KTR_UDP, udp, redisp_ipout_end, 4, UDP_KTR_STRING, UDP_KTR_ARGS); 138 KTR_INFO(KTR_UDP, udp, send_redisp, 5, UDP_KTR_STRING, UDP_KTR_ARGS); 139 KTR_INFO(KTR_UDP, udp, send_inswildcard, 6, UDP_KTR_STRING, UDP_KTR_ARGS); 140 141 #define logudp(name, inp) KTR_LOG(udp_##name, inp) 142 143 /* 144 * UDP protocol implementation. 145 * Per RFC 768, August, 1980. 146 */ 147 #ifndef COMPAT_42 148 static int udpcksum = 1; 149 #else 150 static int udpcksum = 0; /* XXX */ 151 #endif 152 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, 153 &udpcksum, 0, "Enable checksumming of UDP packets"); 154 155 int log_in_vain = 0; 156 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 157 &log_in_vain, 0, "Log all incoming UDP packets"); 158 159 static int blackhole = 0; 160 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, 161 &blackhole, 0, "Do not send port unreachables for refused connects"); 162 163 static int strict_mcast_mship = 1; 164 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, 165 &strict_mcast_mship, 0, "Only send multicast to member sockets"); 166 167 int udp_sosend_async = 1; 168 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW, 169 &udp_sosend_async, 0, "UDP asynchronized pru_send"); 170 171 int udp_sosend_prepend = 1; 172 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW, 173 &udp_sosend_prepend, 0, 174 "Prepend enough space for proto and link header in pru_send"); 175 176 static int udp_reuseport_ext = 1; 177 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW, 178 &udp_reuseport_ext, 0, "SO_REUSEPORT extension"); 179 180 struct inpcbinfo udbinfo[MAXCPU]; 181 182 #ifndef UDBHASHSIZE 183 #define UDBHASHSIZE 16 184 #endif 185 186 struct udpstat udpstat_percpu[MAXCPU] __cachealign; 187 188 static void udp_append(struct inpcb *last, struct ip *ip, 189 struct mbuf *n, int off, struct sockaddr_in *udp_in); 190 191 static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 192 struct sockaddr_in *if_sin); 193 194 static boolean_t udp_inswildcardhash(struct inpcb *inp, 195 struct netmsg_base *msg, int error); 196 static void udp_remwildcardhash(struct inpcb *inp); 197 198 void 199 udp_init(void) 200 { 201 struct inpcbportinfo *portinfo; 202 int cpu; 203 204 portinfo = kmalloc_cachealign(sizeof(*portinfo) * ncpus2, M_PCB, 205 M_WAITOK); 206 207 for (cpu = 0; cpu < ncpus2; cpu++) { 208 struct inpcbinfo *uicb = &udbinfo[cpu]; 209 210 /* 211 * NOTE: 212 * UDP pcb list, wildcard hash table and localgroup hash 213 * table are shared. 214 */ 215 in_pcbinfo_init(uicb, cpu, TRUE); 216 uicb->hashbase = hashinit(UDBHASHSIZE, M_PCB, &uicb->hashmask); 217 218 in_pcbportinfo_init(&portinfo[cpu], UDBHASHSIZE, TRUE, cpu); 219 uicb->portinfo = portinfo; 220 uicb->portinfo_mask = ncpus2_mask; 221 222 uicb->wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB, 223 &uicb->wildcardhashmask); 224 uicb->localgrphashbase = hashinit(UDBHASHSIZE, M_PCB, 225 &uicb->localgrphashmask); 226 227 uicb->ipi_size = sizeof(struct inpcb); 228 } 229 230 /* 231 * Initialize UDP statistics counters for each CPU. 232 */ 233 for (cpu = 0; cpu < ncpus; ++cpu) 234 bzero(&udpstat_percpu[cpu], sizeof(struct udpstat)); 235 } 236 237 static int 238 sysctl_udpstat(SYSCTL_HANDLER_ARGS) 239 { 240 int cpu, error = 0; 241 242 for (cpu = 0; cpu < ncpus; ++cpu) { 243 if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu], 244 sizeof(struct udpstat)))) 245 break; 246 if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu], 247 sizeof(struct udpstat)))) 248 break; 249 } 250 251 return (error); 252 } 253 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW), 254 0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics"); 255 256 void 257 udp_ctloutput(netmsg_t msg) 258 { 259 struct socket *so = msg->base.nm_so; 260 struct sockopt *sopt = msg->ctloutput.nm_sopt; 261 struct inpcb *inp = so->so_pcb; 262 263 if (sopt->sopt_level == IPPROTO_IP && sopt->sopt_dir == SOPT_SET) { 264 switch (sopt->sopt_name) { 265 case IP_MULTICAST_IF: 266 case IP_MULTICAST_VIF: 267 case IP_MULTICAST_TTL: 268 case IP_MULTICAST_LOOP: 269 case IP_ADD_MEMBERSHIP: 270 case IP_DROP_MEMBERSHIP: 271 /* 272 * This pr_ctloutput msg will be forwarded 273 * to netisr0 to run; we can't do direct 274 * detaching anymore. 275 * 276 * NOTE: 277 * Don't optimize for the sockets whose 278 * current so_port is netisr0's msgport. 279 * These sockets could be connect(2)'ed 280 * later and the so_port will be changed. 281 */ 282 inp->inp_flags &= ~INP_DIRECT_DETACH; 283 break; 284 } 285 } 286 return ip_ctloutput(msg); 287 } 288 289 /* 290 * Check multicast packets to make sure they are only sent to sockets with 291 * multicast memberships for the packet's destination address and arrival 292 * interface. Multicast packets to multicast-unaware sockets are also 293 * disallowed. 294 * 295 * Returns 0 if the packet is acceptable, -1 if it is not. 296 */ 297 static __inline int 298 check_multicast_membership(const struct ip *ip, const struct inpcb *inp, 299 const struct mbuf *m) 300 { 301 const struct ip_moptions *mopt; 302 int mshipno; 303 304 if (strict_mcast_mship == 0 || 305 !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 306 return (0); 307 } 308 309 KASSERT(&curthread->td_msgport == netisr_cpuport(0), 310 ("multicast input not in netisr0")); 311 312 mopt = inp->inp_moptions; 313 if (mopt == NULL) 314 return (-1); 315 for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) { 316 const struct in_multi *maddr = mopt->imo_membership[mshipno]; 317 318 if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr && 319 m->m_pkthdr.rcvif == maddr->inm_ifp) { 320 return (0); 321 } 322 } 323 return (-1); 324 } 325 326 struct udp_mcast_arg { 327 struct inpcb *inp; 328 struct inpcb *last; 329 struct ip *ip; 330 struct mbuf *m; 331 int iphlen; 332 struct sockaddr_in *udp_in; 333 }; 334 335 static int 336 udp_mcast_input(struct udp_mcast_arg *arg) 337 { 338 struct inpcb *inp = arg->inp; 339 struct inpcb *last = arg->last; 340 struct ip *ip = arg->ip; 341 struct mbuf *m = arg->m; 342 343 if (check_multicast_membership(ip, inp, m) < 0) 344 return ERESTART; /* caller continue */ 345 346 if (last != NULL) { 347 struct mbuf *n; 348 349 #ifdef IPSEC 350 /* check AH/ESP integrity. */ 351 if (ipsec4_in_reject_so(m, last->inp_socket)) 352 ipsecstat.in_polvio++; 353 /* do not inject data to pcb */ 354 else 355 #endif /*IPSEC*/ 356 #ifdef FAST_IPSEC 357 /* check AH/ESP integrity. */ 358 if (ipsec4_in_reject(m, last)) 359 ; 360 else 361 #endif /*FAST_IPSEC*/ 362 if ((n = m_copypacket(m, M_NOWAIT)) != NULL) 363 udp_append(last, ip, n, 364 arg->iphlen + sizeof(struct udphdr), 365 arg->udp_in); 366 } 367 arg->last = last = inp; 368 369 /* 370 * Don't look for additional matches if this one does 371 * not have either the SO_REUSEPORT or SO_REUSEADDR 372 * socket options set. This heuristic avoids searching 373 * through all pcbs in the common case of a non-shared 374 * port. It * assumes that an application will never 375 * clear these options after setting them. 376 */ 377 if (!(last->inp_socket->so_options & 378 (SO_REUSEPORT | SO_REUSEADDR))) 379 return EJUSTRETURN; /* caller stop */ 380 return 0; 381 } 382 383 int 384 udp_input(struct mbuf **mp, int *offp, int proto) 385 { 386 struct sockaddr_in udp_in = { sizeof udp_in, AF_INET }; 387 int iphlen; 388 struct ip *ip; 389 struct udphdr *uh; 390 struct inpcb *inp; 391 struct mbuf *m; 392 struct mbuf *opts = NULL; 393 int len, off; 394 struct ip save_ip; 395 struct inpcbinfo *pcbinfo = &udbinfo[mycpuid]; 396 397 off = *offp; 398 m = *mp; 399 *mp = NULL; 400 401 iphlen = off; 402 udp_stat.udps_ipackets++; 403 404 /* 405 * Strip IP options, if any; should skip this, 406 * make available to user, and use on returned packets, 407 * but we don't yet have a way to check the checksum 408 * with options still present. 409 */ 410 if (iphlen > sizeof(struct ip)) { 411 ip_stripoptions(m); 412 iphlen = sizeof(struct ip); 413 } 414 415 /* 416 * IP and UDP headers are together in first mbuf. 417 * Already checked and pulled up in ip_demux(). 418 */ 419 KASSERT(m->m_len >= iphlen + sizeof(struct udphdr), 420 ("UDP header not in one mbuf")); 421 422 ip = mtod(m, struct ip *); 423 uh = (struct udphdr *)((caddr_t)ip + iphlen); 424 425 /* destination port of 0 is illegal, based on RFC768. */ 426 if (uh->uh_dport == 0) 427 goto bad; 428 429 /* 430 * Make mbuf data length reflect UDP length. 431 * If not enough data to reflect UDP length, drop. 432 */ 433 len = ntohs((u_short)uh->uh_ulen); 434 if (ip->ip_len != len) { 435 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 436 udp_stat.udps_badlen++; 437 goto bad; 438 } 439 m_adj(m, len - ip->ip_len); 440 /* ip->ip_len = len; */ 441 } 442 /* 443 * Save a copy of the IP header in case we want restore it 444 * for sending an ICMP error message in response. 445 */ 446 save_ip = *ip; 447 448 /* 449 * Checksum extended UDP header and data. 450 */ 451 if (uh->uh_sum) { 452 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 453 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 454 uh->uh_sum = m->m_pkthdr.csum_data; 455 else 456 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, 457 ip->ip_dst.s_addr, htonl((u_short)len + 458 m->m_pkthdr.csum_data + IPPROTO_UDP)); 459 uh->uh_sum ^= 0xffff; 460 } else { 461 char b[9]; 462 463 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 464 bzero(((struct ipovly *)ip)->ih_x1, 9); 465 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 466 uh->uh_sum = in_cksum(m, len + sizeof(struct ip)); 467 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 468 } 469 if (uh->uh_sum) { 470 udp_stat.udps_badsum++; 471 m_freem(m); 472 return(IPPROTO_DONE); 473 } 474 } else 475 udp_stat.udps_nosum++; 476 477 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 478 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { 479 struct inpcbhead *connhead; 480 struct inpcontainer *ic, *ic_marker; 481 struct inpcontainerhead *ichead; 482 struct udp_mcast_arg arg; 483 struct inpcb *last; 484 int error; 485 486 /* 487 * Deliver a multicast or broadcast datagram to *all* sockets 488 * for which the local and remote addresses and ports match 489 * those of the incoming datagram. This allows more than 490 * one process to receive multi/broadcasts on the same port. 491 * (This really ought to be done for unicast datagrams as 492 * well, but that would cause problems with existing 493 * applications that open both address-specific sockets and 494 * a wildcard socket listening to the same port -- they would 495 * end up receiving duplicates of every unicast datagram. 496 * Those applications open the multiple sockets to overcome an 497 * inadequacy of the UDP socket interface, but for backwards 498 * compatibility we avoid the problem here rather than 499 * fixing the interface. Maybe 4.5BSD will remedy this?) 500 */ 501 502 /* 503 * Construct sockaddr format source address. 504 */ 505 udp_in.sin_port = uh->uh_sport; 506 udp_in.sin_addr = ip->ip_src; 507 arg.udp_in = &udp_in; 508 /* 509 * Locate pcb(s) for datagram. 510 * (Algorithm copied from raw_intr().) 511 */ 512 last = NULL; 513 arg.iphlen = iphlen; 514 515 connhead = &pcbinfo->hashbase[ 516 INP_PCBCONNHASH(ip->ip_src.s_addr, uh->uh_sport, 517 ip->ip_dst.s_addr, uh->uh_dport, pcbinfo->hashmask)]; 518 LIST_FOREACH(inp, connhead, inp_hash) { 519 #ifdef INET6 520 if (!INP_ISIPV4(inp)) 521 continue; 522 #endif 523 if (!in_hosteq(inp->inp_faddr, ip->ip_src) || 524 !in_hosteq(inp->inp_laddr, ip->ip_dst) || 525 inp->inp_fport != uh->uh_sport || 526 inp->inp_lport != uh->uh_dport) 527 continue; 528 529 arg.inp = inp; 530 arg.last = last; 531 arg.ip = ip; 532 arg.m = m; 533 534 error = udp_mcast_input(&arg); 535 if (error == ERESTART) 536 continue; 537 last = arg.last; 538 539 if (error == EJUSTRETURN) 540 goto done; 541 } 542 543 ichead = &pcbinfo->wildcardhashbase[ 544 INP_PCBWILDCARDHASH(uh->uh_dport, 545 pcbinfo->wildcardhashmask)]; 546 ic_marker = in_pcbcontainer_marker(mycpuid); 547 548 GET_PCBINFO_TOKEN(pcbinfo); 549 LIST_INSERT_HEAD(ichead, ic_marker, ic_list); 550 while ((ic = LIST_NEXT(ic_marker, ic_list)) != NULL) { 551 LIST_REMOVE(ic_marker, ic_list); 552 LIST_INSERT_AFTER(ic, ic_marker, ic_list); 553 554 inp = ic->ic_inp; 555 if (inp->inp_flags & INP_PLACEMARKER) 556 continue; 557 #ifdef INET6 558 if (!INP_ISIPV4(inp)) 559 continue; 560 #endif 561 if (inp->inp_lport != uh->uh_dport) 562 continue; 563 if (inp->inp_laddr.s_addr != INADDR_ANY && 564 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 565 continue; 566 567 arg.inp = inp; 568 arg.last = last; 569 arg.ip = ip; 570 arg.m = m; 571 572 error = udp_mcast_input(&arg); 573 if (error == ERESTART) 574 continue; 575 last = arg.last; 576 577 if (error == EJUSTRETURN) 578 break; 579 } 580 LIST_REMOVE(ic_marker, ic_list); 581 REL_PCBINFO_TOKEN(pcbinfo); 582 done: 583 if (last == NULL) { 584 /* 585 * No matching pcb found; discard datagram. 586 * (No need to send an ICMP Port Unreachable 587 * for a broadcast or multicast datgram.) 588 */ 589 udp_stat.udps_noportbcast++; 590 goto bad; 591 } 592 #ifdef IPSEC 593 /* check AH/ESP integrity. */ 594 if (ipsec4_in_reject_so(m, last->inp_socket)) { 595 ipsecstat.in_polvio++; 596 goto bad; 597 } 598 #endif /*IPSEC*/ 599 #ifdef FAST_IPSEC 600 /* check AH/ESP integrity. */ 601 if (ipsec4_in_reject(m, last)) 602 goto bad; 603 #endif /*FAST_IPSEC*/ 604 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 605 &udp_in); 606 return(IPPROTO_DONE); 607 } 608 /* 609 * Locate pcb for datagram. 610 */ 611 inp = in_pcblookup_pkthash(pcbinfo, ip->ip_src, uh->uh_sport, 612 ip->ip_dst, uh->uh_dport, TRUE, m->m_pkthdr.rcvif, 613 udp_reuseport_ext ? m : NULL); 614 if (inp == NULL) { 615 if (log_in_vain) { 616 char buf[sizeof "aaa.bbb.ccc.ddd"]; 617 618 strcpy(buf, inet_ntoa(ip->ip_dst)); 619 log(LOG_INFO, 620 "Connection attempt to UDP %s:%d from %s:%d\n", 621 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), 622 ntohs(uh->uh_sport)); 623 } 624 udp_stat.udps_noport++; 625 if (m->m_flags & (M_BCAST | M_MCAST)) { 626 udp_stat.udps_noportbcast++; 627 goto bad; 628 } 629 if (blackhole) 630 goto bad; 631 #ifdef ICMP_BANDLIM 632 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 633 goto bad; 634 #endif 635 *ip = save_ip; 636 ip->ip_len += iphlen; 637 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 638 return(IPPROTO_DONE); 639 } 640 KASSERT(INP_ISIPV4(inp), ("not inet inpcb")); 641 #ifdef IPSEC 642 if (ipsec4_in_reject_so(m, inp->inp_socket)) { 643 ipsecstat.in_polvio++; 644 goto bad; 645 } 646 #endif /*IPSEC*/ 647 #ifdef FAST_IPSEC 648 if (ipsec4_in_reject(m, inp)) 649 goto bad; 650 #endif /*FAST_IPSEC*/ 651 /* 652 * Check the minimum TTL for socket. 653 */ 654 if (ip->ip_ttl < inp->inp_ip_minttl) 655 goto bad; 656 657 /* 658 * Construct sockaddr format source address. 659 * Stuff source address and datagram in user buffer. 660 */ 661 udp_in.sin_port = uh->uh_sport; 662 udp_in.sin_addr = ip->ip_src; 663 if ((inp->inp_flags & INP_CONTROLOPTS) || 664 (inp->inp_socket->so_options & SO_TIMESTAMP)) 665 ip_savecontrol(inp, &opts, ip, m); 666 m_adj(m, iphlen + sizeof(struct udphdr)); 667 668 lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token); 669 if (ssb_appendaddr(&inp->inp_socket->so_rcv, 670 (struct sockaddr *)&udp_in, m, opts) == 0) { 671 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 672 udp_stat.udps_fullsock++; 673 goto bad; 674 } 675 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 676 sorwakeup(inp->inp_socket); 677 return(IPPROTO_DONE); 678 bad: 679 m_freem(m); 680 if (opts) 681 m_freem(opts); 682 return(IPPROTO_DONE); 683 } 684 685 /* 686 * subroutine of udp_input(), mainly for source code readability. 687 * caller must properly init udp_ip6 and udp_in6 beforehand. 688 */ 689 static void 690 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, 691 struct sockaddr_in *udp_in) 692 { 693 struct mbuf *opts = NULL; 694 int ret; 695 696 KASSERT(INP_ISIPV4(last), ("not inet inpcb")); 697 698 if (last->inp_flags & INP_CONTROLOPTS || 699 last->inp_socket->so_options & SO_TIMESTAMP) 700 ip_savecontrol(last, &opts, ip, n); 701 m_adj(n, off); 702 703 lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token); 704 ret = ssb_appendaddr(&last->inp_socket->so_rcv, 705 (struct sockaddr *)udp_in, n, opts); 706 lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token); 707 if (ret == 0) { 708 m_freem(n); 709 if (opts) 710 m_freem(opts); 711 udp_stat.udps_fullsock++; 712 } else { 713 sorwakeup(last->inp_socket); 714 } 715 } 716 717 /* 718 * Notify a udp user of an asynchronous error; 719 * just wake up so that he can collect error status. 720 */ 721 void 722 udp_notify(struct inpcb *inp, int error) 723 { 724 inp->inp_socket->so_error = error; 725 sorwakeup(inp->inp_socket); 726 sowwakeup(inp->inp_socket); 727 } 728 729 struct netmsg_udp_notify { 730 struct netmsg_base base; 731 inp_notify_t nm_notify; 732 struct in_addr nm_faddr; 733 int nm_arg; 734 }; 735 736 static void 737 udp_notifyall_oncpu(netmsg_t msg) 738 { 739 struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg; 740 int nextcpu, cpu = mycpuid; 741 742 in_pcbnotifyall(&udbinfo[cpu], nm->nm_faddr, nm->nm_arg, nm->nm_notify); 743 744 nextcpu = cpu + 1; 745 if (nextcpu < ncpus2) 746 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg); 747 else 748 lwkt_replymsg(&nm->base.lmsg, 0); 749 } 750 751 inp_notify_t 752 udp_get_inpnotify(int cmd, const struct sockaddr *sa, 753 struct ip **ip0, int *cpuid) 754 { 755 struct in_addr faddr; 756 struct ip *ip = *ip0; 757 inp_notify_t notify = udp_notify; 758 759 faddr = ((const struct sockaddr_in *)sa)->sin_addr; 760 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 761 return NULL; 762 763 if (PRC_IS_REDIRECT(cmd)) { 764 ip = NULL; 765 notify = in_rtchange; 766 } else if (cmd == PRC_HOSTDEAD) { 767 ip = NULL; 768 } else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) { 769 return NULL; 770 } 771 772 if (cpuid != NULL) { 773 if (ip == NULL) { 774 /* Go through all CPUs */ 775 *cpuid = ncpus; 776 } else { 777 const struct udphdr *uh; 778 779 uh = (const struct udphdr *) 780 ((caddr_t)ip + (ip->ip_hl << 2)); 781 *cpuid = udp_addrcpu(faddr.s_addr, uh->uh_dport, 782 ip->ip_src.s_addr, uh->uh_sport); 783 } 784 } 785 786 *ip0 = ip; 787 return notify; 788 } 789 790 void 791 udp_ctlinput(netmsg_t msg) 792 { 793 struct sockaddr *sa = msg->ctlinput.nm_arg; 794 struct ip *ip = msg->ctlinput.nm_extra; 795 int cmd = msg->ctlinput.nm_cmd, cpuid; 796 inp_notify_t notify; 797 struct in_addr faddr; 798 799 notify = udp_get_inpnotify(cmd, sa, &ip, &cpuid); 800 if (notify == NULL) 801 goto done; 802 803 faddr = ((struct sockaddr_in *)sa)->sin_addr; 804 if (ip) { 805 const struct udphdr *uh; 806 struct inpcb *inp; 807 808 if (cpuid != mycpuid) 809 goto done; 810 811 uh = (const struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 812 inp = in_pcblookup_hash(&udbinfo[mycpuid], faddr, uh->uh_dport, 813 ip->ip_src, uh->uh_sport, 0, NULL); 814 if (inp != NULL && inp->inp_socket != NULL) 815 notify(inp, inetctlerrmap[cmd]); 816 } else if (msg->ctlinput.nm_direct) { 817 if (cpuid != ncpus && cpuid != mycpuid) 818 goto done; 819 if (mycpuid >= ncpus2) 820 goto done; 821 822 in_pcbnotifyall(&udbinfo[mycpuid], faddr, inetctlerrmap[cmd], 823 notify); 824 } else { 825 struct netmsg_udp_notify *nm; 826 827 KKASSERT(&curthread->td_msgport == netisr_cpuport(0)); 828 nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT); 829 netmsg_init(&nm->base, NULL, &netisr_afree_rport, 830 0, udp_notifyall_oncpu); 831 nm->nm_faddr = faddr; 832 nm->nm_arg = inetctlerrmap[cmd]; 833 nm->nm_notify = notify; 834 lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg); 835 } 836 done: 837 lwkt_replymsg(&msg->lmsg, 0); 838 } 839 840 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, udbinfo, 0, 841 in_pcblist_global_ncpus2, "S,xinpcb", "List of active UDP sockets"); 842 843 static int 844 udp_getcred(SYSCTL_HANDLER_ARGS) 845 { 846 struct sockaddr_in addrs[2]; 847 struct ucred cred0, *cred = NULL; 848 struct inpcb *inp; 849 int error, cpu, origcpu; 850 851 error = priv_check(req->td, PRIV_ROOT); 852 if (error) 853 return (error); 854 error = SYSCTL_IN(req, addrs, sizeof addrs); 855 if (error) 856 return (error); 857 858 origcpu = mycpuid; 859 cpu = udp_addrcpu(addrs[1].sin_addr.s_addr, addrs[1].sin_port, 860 addrs[0].sin_addr.s_addr, addrs[0].sin_port); 861 862 lwkt_migratecpu(cpu); 863 864 inp = in_pcblookup_hash(&udbinfo[cpu], 865 addrs[1].sin_addr, addrs[1].sin_port, 866 addrs[0].sin_addr, addrs[0].sin_port, TRUE, NULL); 867 if (inp == NULL || inp->inp_socket == NULL) { 868 error = ENOENT; 869 } else if (inp->inp_socket->so_cred != NULL) { 870 cred0 = *(inp->inp_socket->so_cred); 871 cred = &cred0; 872 } 873 874 lwkt_migratecpu(origcpu); 875 876 if (error) 877 return error; 878 879 return SYSCTL_OUT(req, cred, sizeof(struct ucred)); 880 } 881 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 882 0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection"); 883 884 static void 885 udp_send_redispatch(netmsg_t msg) 886 { 887 struct mbuf *m = msg->send.nm_m; 888 int pru_flags = msg->send.nm_flags; 889 struct inpcb *inp = msg->send.base.nm_so->so_pcb; 890 struct mbuf *m_opt = msg->send.nm_control; /* XXX save ipopt */ 891 int flags = msg->send.nm_priv; /* ip_output flags */ 892 int error; 893 894 logudp(redisp_ipout_beg, inp); 895 896 /* 897 * - Don't use inp route cache. It should only be used in the 898 * inp owner netisr. 899 * - Access to inp_moptions should be safe, since multicast UDP 900 * datagrams are redispatched to netisr0 and inp_moptions is 901 * changed only in netisr0. 902 */ 903 error = ip_output(m, m_opt, NULL, flags, inp->inp_moptions, inp); 904 if ((pru_flags & PRUS_NOREPLY) == 0) 905 lwkt_replymsg(&msg->send.base.lmsg, error); 906 907 if (m_opt != NULL) { 908 /* Free saved ip options, if any */ 909 m_freem(m_opt); 910 } 911 912 logudp(redisp_ipout_end, inp); 913 } 914 915 static void 916 udp_send(netmsg_t msg) 917 { 918 struct socket *so = msg->send.base.nm_so; 919 struct mbuf *m = msg->send.nm_m; 920 struct sockaddr *dstaddr = msg->send.nm_addr; 921 int pru_flags = msg->send.nm_flags; 922 struct inpcb *inp = so->so_pcb; 923 struct thread *td = msg->send.nm_td; 924 int flags; 925 926 struct udpiphdr *ui; 927 int len = m->m_pkthdr.len; 928 struct sockaddr_in *sin; /* really is initialized before use */ 929 int error = 0, cpu; 930 931 KKASSERT(msg->send.nm_control == NULL); 932 933 logudp(send_beg, inp); 934 935 if (inp == NULL) { 936 error = EINVAL; 937 goto release; 938 } 939 940 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 941 error = EMSGSIZE; 942 goto release; 943 } 944 945 if (inp->inp_lport == 0) { /* unbound socket */ 946 boolean_t forwarded; 947 948 error = in_pcbbind(inp, NULL, td); 949 if (error) 950 goto release; 951 952 /* 953 * Need to call udp_send again, after this inpcb is 954 * inserted into wildcard hash table. 955 */ 956 msg->send.base.lmsg.ms_flags |= MSGF_UDP_SEND; 957 forwarded = udp_inswildcardhash(inp, &msg->send.base, 0); 958 if (forwarded) { 959 /* 960 * The message is further forwarded, so we are 961 * done here. 962 */ 963 logudp(send_inswildcard, inp); 964 return; 965 } 966 } 967 968 if (dstaddr != NULL) { /* destination address specified */ 969 if (inp->inp_faddr.s_addr != INADDR_ANY) { 970 /* already connected */ 971 error = EISCONN; 972 goto release; 973 } 974 sin = (struct sockaddr_in *)dstaddr; 975 if (!prison_remote_ip(td, (struct sockaddr *)&sin)) { 976 error = EAFNOSUPPORT; /* IPv6 only jail */ 977 goto release; 978 } 979 } else { 980 if (inp->inp_faddr.s_addr == INADDR_ANY) { 981 /* no destination specified and not already connected */ 982 error = ENOTCONN; 983 goto release; 984 } 985 sin = NULL; 986 } 987 988 /* 989 * Calculate data length and get a mbuf 990 * for UDP and IP headers. 991 */ 992 M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT); 993 if (m == NULL) { 994 error = ENOBUFS; 995 goto release; 996 } 997 998 /* 999 * Fill in mbuf with extended UDP header 1000 * and addresses and length put into network format. 1001 */ 1002 ui = mtod(m, struct udpiphdr *); 1003 bzero(ui->ui_x1, sizeof ui->ui_x1); /* XXX still needed? */ 1004 ui->ui_pr = IPPROTO_UDP; 1005 1006 /* 1007 * Set destination address. 1008 */ 1009 if (dstaddr != NULL) { /* use specified destination */ 1010 ui->ui_dst = sin->sin_addr; 1011 ui->ui_dport = sin->sin_port; 1012 } else { /* use connected destination */ 1013 ui->ui_dst = inp->inp_faddr; 1014 ui->ui_dport = inp->inp_fport; 1015 } 1016 1017 /* 1018 * Set source address. 1019 */ 1020 if (inp->inp_laddr.s_addr == INADDR_ANY || 1021 IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) { 1022 struct sockaddr_in *if_sin; 1023 1024 if (dstaddr == NULL) { 1025 /* 1026 * connect() had (or should have) failed because 1027 * the interface had no IP address, but the 1028 * application proceeded to call send() anyways. 1029 */ 1030 error = ENOTCONN; 1031 goto release; 1032 } 1033 1034 /* Look up outgoing interface. */ 1035 error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1); 1036 if (error) 1037 goto release; 1038 ui->ui_src = if_sin->sin_addr; /* use address of interface */ 1039 } else { 1040 ui->ui_src = inp->inp_laddr; /* use non-null bound address */ 1041 } 1042 ui->ui_sport = inp->inp_lport; 1043 KASSERT(inp->inp_lport != 0, ("inp lport should have been bound")); 1044 1045 /* 1046 * Release the original thread, since it is no longer used 1047 */ 1048 if (pru_flags & PRUS_HELDTD) { 1049 lwkt_rele(td); 1050 pru_flags &= ~PRUS_HELDTD; 1051 } 1052 /* 1053 * Free the dest address, since it is no longer needed 1054 */ 1055 if (pru_flags & PRUS_FREEADDR) { 1056 kfree(dstaddr, M_SONAME); 1057 pru_flags &= ~PRUS_FREEADDR; 1058 } 1059 1060 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1061 1062 /* 1063 * Set up checksum and output datagram. 1064 */ 1065 if (udpcksum) { 1066 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, 1067 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1068 m->m_pkthdr.csum_flags = CSUM_UDP; 1069 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1070 m->m_pkthdr.csum_thlen = sizeof(struct udphdr); 1071 } else { 1072 ui->ui_sum = 0; 1073 } 1074 ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len; 1075 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1076 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1077 udp_stat.udps_opackets++; 1078 1079 flags = IP_DEBUGROUTE | 1080 (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); 1081 if (pru_flags & PRUS_DONTROUTE) 1082 flags |= SO_DONTROUTE; 1083 1084 if (inp->inp_flags & INP_CONNECTED) { 1085 /* 1086 * For connected socket, this datagram has already 1087 * been in the correct netisr; no need to rehash. 1088 */ 1089 goto sendit; 1090 } 1091 1092 cpu = udp_addrcpu(ui->ui_dst.s_addr, ui->ui_dport, 1093 ui->ui_src.s_addr, ui->ui_sport); 1094 if (cpu != mycpuid) { 1095 struct mbuf *m_opt = NULL; 1096 struct netmsg_pru_send *smsg; 1097 struct lwkt_port *port = netisr_cpuport(cpu); 1098 1099 /* 1100 * Not on the CPU that matches this UDP datagram hash; 1101 * redispatch to the correct CPU to do the ip_output(). 1102 */ 1103 if (inp->inp_options != NULL) { 1104 /* 1105 * If there are ip options, then save a copy, 1106 * since accessing inp_options on other CPUs' 1107 * is not safe. 1108 * 1109 * XXX optimize this? 1110 */ 1111 m_opt = m_copym(inp->inp_options, 0, M_COPYALL, 1112 M_WAITOK); 1113 } 1114 if ((pru_flags & PRUS_NOREPLY) == 0) { 1115 /* 1116 * Change some parts of the original netmsg and 1117 * forward it to the target netisr. 1118 * 1119 * NOTE: so_port MUST NOT be checked in the target 1120 * netisr. 1121 */ 1122 smsg = &msg->send; 1123 smsg->nm_priv = flags; /* ip_output flags */ 1124 smsg->nm_m = m; 1125 smsg->nm_control = m_opt; /* XXX save ipopt */ 1126 smsg->base.lmsg.ms_flags |= MSGF_IGNSOPORT; 1127 smsg->base.nm_dispatch = udp_send_redispatch; 1128 lwkt_forwardmsg(port, &smsg->base.lmsg); 1129 } else { 1130 /* 1131 * Recreate the netmsg, since the original mbuf 1132 * could have been changed. And send it to the 1133 * target netisr. 1134 * 1135 * NOTE: so_port MUST NOT be checked in the target 1136 * netisr. 1137 */ 1138 smsg = &m->m_hdr.mh_sndmsg; 1139 netmsg_init(&smsg->base, so, &netisr_apanic_rport, 1140 MSGF_IGNSOPORT, udp_send_redispatch); 1141 smsg->nm_priv = flags; /* ip_output flags */ 1142 smsg->nm_flags = pru_flags; 1143 smsg->nm_m = m; 1144 smsg->nm_control = m_opt; /* XXX save ipopt */ 1145 lwkt_sendmsg(port, &smsg->base.lmsg); 1146 } 1147 1148 /* This UDP datagram is redispatched; done */ 1149 logudp(send_redisp, inp); 1150 return; 1151 } 1152 1153 sendit: 1154 logudp(send_ipout, inp); 1155 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 1156 inp->inp_moptions, inp); 1157 m = NULL; 1158 1159 release: 1160 if (m != NULL) 1161 m_freem(m); 1162 1163 if (pru_flags & PRUS_HELDTD) 1164 lwkt_rele(td); 1165 if (pru_flags & PRUS_FREEADDR) 1166 kfree(dstaddr, M_SONAME); 1167 if ((pru_flags & PRUS_NOREPLY) == 0) 1168 lwkt_replymsg(&msg->send.base.lmsg, error); 1169 1170 logudp(send_end, inp); 1171 } 1172 1173 u_long udp_sendspace = 9216; /* really max datagram size */ 1174 /* 40 1K datagrams */ 1175 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 1176 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 1177 1178 u_long udp_recvspace = 40 * (1024 + 1179 #ifdef INET6 1180 sizeof(struct sockaddr_in6) 1181 #else 1182 sizeof(struct sockaddr_in) 1183 #endif 1184 ); 1185 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1186 &udp_recvspace, 0, "Maximum incoming UDP datagram size"); 1187 1188 /* 1189 * This should never happen, since UDP socket does not support 1190 * connection acception (SO_ACCEPTCONN, i.e. listen(2)). 1191 */ 1192 static void 1193 udp_abort(netmsg_t msg __unused) 1194 { 1195 panic("udp_abort is called"); 1196 } 1197 1198 static void 1199 udp_attach(netmsg_t msg) 1200 { 1201 struct socket *so = msg->attach.base.nm_so; 1202 struct pru_attach_info *ai = msg->attach.nm_ai; 1203 struct inpcb *inp; 1204 int error; 1205 1206 inp = so->so_pcb; 1207 if (inp != NULL) { 1208 error = EINVAL; 1209 goto out; 1210 } 1211 error = soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit); 1212 if (error) 1213 goto out; 1214 1215 error = in_pcballoc(so, &udbinfo[mycpuid]); 1216 if (error) 1217 goto out; 1218 1219 inp = (struct inpcb *)so->so_pcb; 1220 inp->inp_flags |= INP_DIRECT_DETACH; 1221 inp->inp_ip_ttl = ip_defttl; 1222 error = 0; 1223 out: 1224 lwkt_replymsg(&msg->attach.base.lmsg, error); 1225 } 1226 1227 static void 1228 udp_inswildcard_replymsg(netmsg_t msg) 1229 { 1230 lwkt_msg_t lmsg = &msg->lmsg; 1231 1232 if (lmsg->ms_flags & MSGF_UDP_SEND) { 1233 udp_send(msg); 1234 /* msg is replied by udp_send() */ 1235 } else { 1236 lwkt_replymsg(lmsg, lmsg->ms_error); 1237 } 1238 } 1239 1240 static void 1241 udp_soreuseport_dispatch(netmsg_t msg) 1242 { 1243 /* This inpcb has already been in the wildcard hash. */ 1244 in_pcblink_flags(msg->base.nm_so->so_pcb, &udbinfo[mycpuid], 0); 1245 udp_inswildcard_replymsg(msg); 1246 } 1247 1248 static void 1249 udp_sosetport(struct lwkt_msg *msg, lwkt_port_t port) 1250 { 1251 sosetport(((struct netmsg_base *)msg)->nm_so, port); 1252 } 1253 1254 static boolean_t 1255 udp_inswildcardhash_oncpu(struct inpcb *inp, struct netmsg_base *msg) 1256 { 1257 int cpu; 1258 1259 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1260 ("not on owner cpu")); 1261 1262 in_pcbinswildcardhash(inp); 1263 for (cpu = 0; cpu < ncpus2; ++cpu) { 1264 if (cpu == mycpuid) { 1265 /* 1266 * This inpcb has been inserted by the above 1267 * in_pcbinswildcardhash(). 1268 */ 1269 continue; 1270 } 1271 in_pcbinswildcardhash_oncpu(inp, &udbinfo[cpu]); 1272 } 1273 1274 if (inp->inp_socket->so_options & SO_REUSEPORT) { 1275 /* 1276 * For SO_REUSEPORT socket, redistribute it based on its 1277 * local group index. 1278 */ 1279 cpu = inp->inp_lgrpindex & ncpus2_mask; 1280 if (cpu != mycpuid) { 1281 struct lwkt_port *port = netisr_cpuport(cpu); 1282 lwkt_msg_t lmsg = &msg->lmsg; 1283 1284 /* 1285 * We are moving the protocol processing port the 1286 * socket is on, we have to unlink here and re-link 1287 * on the target cpu (this inpcb is still left in 1288 * the wildcard hash). 1289 */ 1290 in_pcbunlink_flags(inp, &udbinfo[mycpuid], 0); 1291 msg->nm_dispatch = udp_soreuseport_dispatch; 1292 1293 /* 1294 * See the related comment in tcp_usrreq.c 1295 * tcp_connect() 1296 */ 1297 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1298 lwkt_forwardmsg(port, lmsg); 1299 return TRUE; /* forwarded */ 1300 } 1301 } 1302 return FALSE; 1303 } 1304 1305 static void 1306 udp_inswildcardhash_dispatch(netmsg_t msg) 1307 { 1308 struct inpcb *inp = msg->base.nm_so->so_pcb; 1309 boolean_t forwarded; 1310 1311 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1312 KASSERT((ntohs(inp->inp_lport) & ncpus2_mask) == mycpuid, 1313 ("not target cpu")); 1314 1315 in_pcblink(inp, &udbinfo[mycpuid]); 1316 1317 forwarded = udp_inswildcardhash_oncpu(inp, &msg->base); 1318 if (forwarded) { 1319 /* The message is further forwarded, so we are done here. */ 1320 return; 1321 } 1322 udp_inswildcard_replymsg(msg); 1323 } 1324 1325 static boolean_t 1326 udp_inswildcardhash(struct inpcb *inp, struct netmsg_base *msg, int error) 1327 { 1328 lwkt_msg_t lmsg = &msg->lmsg; 1329 int cpu; 1330 1331 ASSERT_INP_NOTINHASH(inp); 1332 1333 /* This inpcb could no longer be directly detached */ 1334 inp->inp_flags &= ~INP_DIRECT_DETACH; 1335 1336 /* 1337 * Always clear the route cache, so we don't need to 1338 * worry about any owner CPU changes later. 1339 */ 1340 in_pcbresetroute(inp); 1341 1342 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1343 cpu = ntohs(inp->inp_lport) & ncpus2_mask; 1344 1345 lmsg->ms_error = error; 1346 if (cpu != mycpuid) { 1347 struct lwkt_port *port = netisr_cpuport(cpu); 1348 1349 /* 1350 * We are moving the protocol processing port the socket 1351 * is on, we have to unlink here and re-link on the 1352 * target cpu. 1353 */ 1354 in_pcbunlink(inp, &udbinfo[mycpuid]); 1355 msg->nm_dispatch = udp_inswildcardhash_dispatch; 1356 1357 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1358 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1359 lwkt_forwardmsg(port, lmsg); 1360 return TRUE; /* forwarded */ 1361 } 1362 1363 return udp_inswildcardhash_oncpu(inp, msg); 1364 } 1365 1366 static void 1367 udp_bind(netmsg_t msg) 1368 { 1369 struct socket *so = msg->bind.base.nm_so; 1370 struct inpcb *inp; 1371 int error; 1372 1373 inp = so->so_pcb; 1374 if (inp) { 1375 struct sockaddr *nam = msg->bind.nm_nam; 1376 struct thread *td = msg->bind.nm_td; 1377 1378 error = in_pcbbind(inp, nam, td); 1379 if (error == 0) { 1380 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1381 boolean_t forwarded; 1382 1383 if (sin->sin_addr.s_addr != INADDR_ANY) 1384 inp->inp_flags |= INP_WASBOUND_NOTANY; 1385 1386 forwarded = udp_inswildcardhash(inp, 1387 &msg->bind.base, 0); 1388 if (forwarded) { 1389 /* 1390 * The message is further forwarded, so 1391 * we are done here. 1392 */ 1393 return; 1394 } 1395 } 1396 } else { 1397 error = EINVAL; 1398 } 1399 lwkt_replymsg(&msg->bind.base.lmsg, error); 1400 } 1401 1402 static void 1403 udp_connect(netmsg_t msg) 1404 { 1405 struct socket *so = msg->connect.base.nm_so; 1406 struct sockaddr *nam = msg->connect.nm_nam; 1407 struct thread *td = msg->connect.nm_td; 1408 struct inpcb *inp; 1409 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1410 struct sockaddr_in *if_sin; 1411 struct lwkt_port *port; 1412 int error; 1413 1414 KKASSERT(msg->connect.nm_m == NULL); 1415 1416 inp = so->so_pcb; 1417 if (inp == NULL) { 1418 error = EINVAL; 1419 goto out; 1420 } 1421 1422 if (msg->connect.nm_flags & PRUC_RECONNECT) { 1423 msg->connect.nm_flags &= ~PRUC_RECONNECT; 1424 in_pcblink(inp, &udbinfo[mycpuid]); 1425 } 1426 1427 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1428 error = EISCONN; 1429 goto out; 1430 } 1431 error = 0; 1432 1433 /* 1434 * Bind if we have to 1435 */ 1436 if (inp->inp_lport == 0) { 1437 error = in_pcbbind(inp, NULL, td); 1438 if (error) 1439 goto out; 1440 } 1441 1442 /* 1443 * Calculate the correct protocol processing thread. The connect 1444 * operation must run there. 1445 */ 1446 error = in_pcbladdr(inp, nam, &if_sin, td); 1447 if (error) 1448 goto out; 1449 if (!prison_remote_ip(td, nam)) { 1450 error = EAFNOSUPPORT; /* IPv6 only jail */ 1451 goto out; 1452 } 1453 1454 port = udp_addrport(sin->sin_addr.s_addr, sin->sin_port, 1455 inp->inp_laddr.s_addr != INADDR_ANY ? 1456 inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr, inp->inp_lport); 1457 if (port != &curthread->td_msgport) { 1458 lwkt_msg_t lmsg = &msg->connect.base.lmsg; 1459 int nm_flags = PRUC_RECONNECT; 1460 1461 /* 1462 * in_pcbladdr() may have allocated a route entry for us 1463 * on the current CPU, but we need a route entry on the 1464 * inpcb's owner CPU, so free it here. 1465 */ 1466 in_pcbresetroute(inp); 1467 1468 if (inp->inp_flags & INP_WILDCARD) { 1469 /* 1470 * Remove this inpcb from the wildcard hash before 1471 * the socket's msgport changes. 1472 */ 1473 udp_remwildcardhash(inp); 1474 } 1475 1476 /* 1477 * We are moving the protocol processing port the socket 1478 * is on, we have to unlink here and re-link on the 1479 * target cpu. 1480 */ 1481 in_pcbunlink(inp, &udbinfo[mycpuid]); 1482 msg->connect.nm_flags |= nm_flags; 1483 1484 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1485 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1486 lwkt_forwardmsg(port, lmsg); 1487 /* msg invalid now */ 1488 return; 1489 } 1490 error = udp_connect_oncpu(inp, sin, if_sin); 1491 out: 1492 if (error && inp != NULL && inp->inp_lport != 0 && 1493 (inp->inp_flags & INP_WILDCARD) == 0) { 1494 boolean_t forwarded; 1495 1496 /* Connect failed; put it to wildcard hash. */ 1497 forwarded = udp_inswildcardhash(inp, &msg->connect.base, 1498 error); 1499 if (forwarded) { 1500 /* 1501 * The message is further forwarded, so we are done 1502 * here. 1503 */ 1504 return; 1505 } 1506 } 1507 lwkt_replymsg(&msg->connect.base.lmsg, error); 1508 } 1509 1510 static void 1511 udp_remwildcardhash(struct inpcb *inp) 1512 { 1513 int cpu; 1514 1515 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1516 ("not on owner cpu")); 1517 1518 for (cpu = 0; cpu < ncpus2; ++cpu) { 1519 if (cpu == mycpuid) { 1520 /* 1521 * This inpcb will be removed by the later 1522 * in_pcbremwildcardhash(). 1523 */ 1524 continue; 1525 } 1526 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpu]); 1527 } 1528 in_pcbremwildcardhash(inp); 1529 } 1530 1531 static int 1532 udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 1533 struct sockaddr_in *if_sin) 1534 { 1535 struct socket *so = inp->inp_socket; 1536 struct inpcb *oinp; 1537 1538 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 1539 sin->sin_addr, sin->sin_port, 1540 inp->inp_laddr.s_addr != INADDR_ANY ? 1541 inp->inp_laddr : if_sin->sin_addr, inp->inp_lport, FALSE, NULL); 1542 if (oinp != NULL) 1543 return EADDRINUSE; 1544 1545 /* 1546 * No more errors can occur, finish adjusting the socket 1547 * and change the processing port to reflect the connected 1548 * socket. Once set we can no longer safely mess with the 1549 * socket. 1550 */ 1551 1552 if (inp->inp_flags & INP_WILDCARD) 1553 udp_remwildcardhash(inp); 1554 1555 if (inp->inp_laddr.s_addr == INADDR_ANY) 1556 inp->inp_laddr = if_sin->sin_addr; 1557 inp->inp_faddr = sin->sin_addr; 1558 inp->inp_fport = sin->sin_port; 1559 in_pcbinsconnhash(inp); 1560 1561 soisconnected(so); 1562 1563 return 0; 1564 } 1565 1566 static void 1567 udp_detach2(struct socket *so) 1568 { 1569 in_pcbdetach(so->so_pcb); 1570 sodiscard(so); 1571 sofree(so); 1572 } 1573 1574 static void 1575 udp_detach_final_dispatch(netmsg_t msg) 1576 { 1577 udp_detach2(msg->base.nm_so); 1578 } 1579 1580 static void 1581 udp_detach_oncpu_dispatch(netmsg_t msg) 1582 { 1583 struct netmsg_base *clomsg = &msg->base; 1584 struct socket *so = clomsg->nm_so; 1585 struct inpcb *inp = so->so_pcb; 1586 struct thread *td = curthread; 1587 int nextcpu, cpuid = mycpuid; 1588 1589 KASSERT(td->td_type == TD_TYPE_NETISR, ("not in netisr")); 1590 1591 if (inp->inp_flags & INP_WILDCARD) { 1592 /* 1593 * This inp will be removed on the inp's 1594 * owner CPU later, so don't do it now. 1595 */ 1596 if (&td->td_msgport != so->so_port) 1597 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpuid]); 1598 } 1599 1600 if (cpuid == 0) { 1601 /* 1602 * Free and clear multicast socket option, 1603 * which is only accessed in netisr0. 1604 */ 1605 ip_freemoptions(inp->inp_moptions); 1606 inp->inp_moptions = NULL; 1607 } 1608 1609 nextcpu = cpuid + 1; 1610 if (nextcpu < ncpus2) { 1611 lwkt_forwardmsg(netisr_cpuport(nextcpu), &clomsg->lmsg); 1612 } else { 1613 /* 1614 * No one could see this inpcb now; destroy this 1615 * inpcb in its owner netisr. 1616 */ 1617 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1618 udp_detach_final_dispatch); 1619 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1620 } 1621 } 1622 1623 static void 1624 udp_detach(netmsg_t msg) 1625 { 1626 struct socket *so = msg->detach.base.nm_so; 1627 struct netmsg_base *clomsg; 1628 struct inpcb *inp; 1629 1630 inp = so->so_pcb; 1631 if (inp == NULL) { 1632 lwkt_replymsg(&msg->detach.base.lmsg, EINVAL); 1633 return; 1634 } 1635 1636 /* 1637 * Reply EJUSTRETURN ASAP, we will call sodiscard() and 1638 * sofree() later. 1639 */ 1640 lwkt_replymsg(&msg->detach.base.lmsg, EJUSTRETURN); 1641 1642 if (ncpus2 == 1) { 1643 /* Only one CPU, detach the inpcb directly. */ 1644 udp_detach2(so); 1645 return; 1646 } 1647 1648 /* 1649 * Remove this inpcb from the inpcb list first, so that 1650 * no one could find this inpcb from the inpcb list. 1651 */ 1652 in_pcbofflist(inp); 1653 1654 if (inp->inp_flags & INP_DIRECT_DETACH) { 1655 /* 1656 * Direct detaching is allowed 1657 */ 1658 KASSERT((inp->inp_flags & INP_WILDCARD) == 0, 1659 ("in the wildcardhash")); 1660 KASSERT(inp->inp_moptions == NULL, ("has mcast options")); 1661 udp_detach2(so); 1662 return; 1663 } 1664 1665 /* 1666 * Go through netisrs which process UDP to make sure 1667 * no one could find this inpcb anymore. 1668 */ 1669 clomsg = &so->so_clomsg; 1670 netmsg_init(clomsg, so, &netisr_apanic_rport, MSGF_IGNSOPORT, 1671 udp_detach_oncpu_dispatch); 1672 lwkt_sendmsg(netisr_cpuport(0), &clomsg->lmsg); 1673 } 1674 1675 static void 1676 udp_disconnect(netmsg_t msg) 1677 { 1678 struct socket *so = msg->disconnect.base.nm_so; 1679 struct inpcb *inp; 1680 boolean_t forwarded; 1681 int error = 0; 1682 1683 inp = so->so_pcb; 1684 if (inp == NULL) { 1685 error = EINVAL; 1686 goto out; 1687 } 1688 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1689 error = ENOTCONN; 1690 goto out; 1691 } 1692 1693 soclrstate(so, SS_ISCONNECTED); /* XXX */ 1694 1695 in_pcbdisconnect(inp); 1696 1697 /* 1698 * Follow traditional BSD behavior and retain the local port 1699 * binding. But, fix the old misbehavior of overwriting any 1700 * previously bound local address. 1701 */ 1702 if (!(inp->inp_flags & INP_WASBOUND_NOTANY)) 1703 inp->inp_laddr.s_addr = INADDR_ANY; 1704 1705 if (so->so_state & SS_ISCLOSING) { 1706 /* 1707 * If this socket is being closed, there is no need 1708 * to put this socket back into wildcard hash table. 1709 */ 1710 error = 0; 1711 goto out; 1712 } 1713 1714 forwarded = udp_inswildcardhash(inp, &msg->disconnect.base, 0); 1715 if (forwarded) { 1716 /* 1717 * The message is further forwarded, so we are done 1718 * here. 1719 */ 1720 return; 1721 } 1722 out: 1723 lwkt_replymsg(&msg->disconnect.base.lmsg, error); 1724 } 1725 1726 void 1727 udp_shutdown(netmsg_t msg) 1728 { 1729 struct socket *so = msg->shutdown.base.nm_so; 1730 struct inpcb *inp; 1731 int error; 1732 1733 inp = so->so_pcb; 1734 if (inp) { 1735 socantsendmore(so); 1736 error = 0; 1737 } else { 1738 error = EINVAL; 1739 } 1740 lwkt_replymsg(&msg->shutdown.base.lmsg, error); 1741 } 1742 1743 struct pr_usrreqs udp_usrreqs = { 1744 .pru_abort = udp_abort, 1745 .pru_accept = pr_generic_notsupp, 1746 .pru_attach = udp_attach, 1747 .pru_bind = udp_bind, 1748 .pru_connect = udp_connect, 1749 .pru_connect2 = pr_generic_notsupp, 1750 .pru_control = in_control_dispatch, 1751 .pru_detach = udp_detach, 1752 .pru_disconnect = udp_disconnect, 1753 .pru_listen = pr_generic_notsupp, 1754 .pru_peeraddr = in_setpeeraddr_dispatch, 1755 .pru_rcvd = pr_generic_notsupp, 1756 .pru_rcvoob = pr_generic_notsupp, 1757 .pru_send = udp_send, 1758 .pru_sense = pru_sense_null, 1759 .pru_shutdown = udp_shutdown, 1760 .pru_sockaddr = in_setsockaddr_dispatch, 1761 .pru_sosend = sosendudp, 1762 .pru_soreceive = soreceive 1763 }; 1764