1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 63 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $ 64 */ 65 66 #include "opt_ipsec.h" 67 #include "opt_inet6.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/kernel.h> 72 #include <sys/malloc.h> 73 #include <sys/mbuf.h> 74 #include <sys/domain.h> 75 #include <sys/proc.h> 76 #include <sys/priv.h> 77 #include <sys/protosw.h> 78 #include <sys/socket.h> 79 #include <sys/socketvar.h> 80 #include <sys/sysctl.h> 81 #include <sys/syslog.h> 82 #include <sys/in_cksum.h> 83 #include <sys/ktr.h> 84 85 #include <sys/thread2.h> 86 #include <sys/socketvar2.h> 87 #include <sys/serialize.h> 88 89 #include <machine/stdarg.h> 90 91 #include <net/if.h> 92 #include <net/route.h> 93 #include <net/netmsg2.h> 94 #include <net/netisr2.h> 95 96 #include <netinet/in.h> 97 #include <netinet/in_systm.h> 98 #include <netinet/ip.h> 99 #ifdef INET6 100 #include <netinet/ip6.h> 101 #endif 102 #include <netinet/in_pcb.h> 103 #include <netinet/in_var.h> 104 #include <netinet/ip_var.h> 105 #ifdef INET6 106 #include <netinet6/ip6_var.h> 107 #endif 108 #include <netinet/ip_icmp.h> 109 #include <netinet/icmp_var.h> 110 #include <netinet/udp.h> 111 #include <netinet/udp_var.h> 112 113 #ifdef FAST_IPSEC 114 #include <netproto/ipsec/ipsec.h> 115 #endif 116 117 #ifdef IPSEC 118 #include <netinet6/ipsec.h> 119 #endif 120 121 #define MSGF_UDP_SEND MSGF_PROTO1 122 123 #define INP_DIRECT_DETACH INP_FLAG_PROTO2 124 125 #define UDP_KTR_STRING "inp=%p" 126 #define UDP_KTR_ARGS struct inpcb *inp 127 128 #ifndef KTR_UDP 129 #define KTR_UDP KTR_ALL 130 #endif 131 132 KTR_INFO_MASTER(udp); 133 KTR_INFO(KTR_UDP, udp, send_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS); 134 KTR_INFO(KTR_UDP, udp, send_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS); 135 KTR_INFO(KTR_UDP, udp, send_ipout, 2, UDP_KTR_STRING, UDP_KTR_ARGS); 136 KTR_INFO(KTR_UDP, udp, redisp_ipout_beg, 3, UDP_KTR_STRING, UDP_KTR_ARGS); 137 KTR_INFO(KTR_UDP, udp, redisp_ipout_end, 4, UDP_KTR_STRING, UDP_KTR_ARGS); 138 KTR_INFO(KTR_UDP, udp, send_redisp, 5, UDP_KTR_STRING, UDP_KTR_ARGS); 139 KTR_INFO(KTR_UDP, udp, send_inswildcard, 6, UDP_KTR_STRING, UDP_KTR_ARGS); 140 141 #define logudp(name, inp) KTR_LOG(udp_##name, inp) 142 143 /* 144 * UDP protocol implementation. 145 * Per RFC 768, August, 1980. 146 */ 147 #ifndef COMPAT_42 148 static int udpcksum = 1; 149 #else 150 static int udpcksum = 0; /* XXX */ 151 #endif 152 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, 153 &udpcksum, 0, "Enable checksumming of UDP packets"); 154 155 int log_in_vain = 0; 156 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 157 &log_in_vain, 0, "Log all incoming UDP packets"); 158 159 static int blackhole = 0; 160 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, 161 &blackhole, 0, "Do not send port unreachables for refused connects"); 162 163 static int strict_mcast_mship = 1; 164 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, 165 &strict_mcast_mship, 0, "Only send multicast to member sockets"); 166 167 int udp_sosend_async = 1; 168 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW, 169 &udp_sosend_async, 0, "UDP asynchronized pru_send"); 170 171 int udp_sosend_prepend = 1; 172 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW, 173 &udp_sosend_prepend, 0, 174 "Prepend enough space for proto and link header in pru_send"); 175 176 static int udp_reuseport_ext = 1; 177 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW, 178 &udp_reuseport_ext, 0, "SO_REUSEPORT extension"); 179 180 struct inpcbinfo udbinfo[MAXCPU]; 181 182 #ifndef UDBHASHSIZE 183 #define UDBHASHSIZE 16 184 #endif 185 186 struct udpstat udpstat_percpu[MAXCPU] __cachealign; 187 188 #ifdef INET6 189 struct udp_in6 { 190 struct sockaddr_in6 uin6_sin; 191 u_char uin6_init_done : 1; 192 }; 193 struct udp_ip6 { 194 struct ip6_hdr uip6_ip6; 195 u_char uip6_init_done : 1; 196 }; 197 #else 198 struct udp_in6; 199 struct udp_ip6; 200 #endif /* INET6 */ 201 202 static void udp_append (struct inpcb *last, struct ip *ip, 203 struct mbuf *n, int off, struct sockaddr_in *udp_in, 204 struct udp_in6 *, struct udp_ip6 *); 205 #ifdef INET6 206 static void ip_2_ip6_hdr (struct ip6_hdr *ip6, struct ip *ip); 207 #endif 208 209 static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 210 struct sockaddr_in *if_sin); 211 212 static boolean_t udp_inswildcardhash(struct inpcb *inp, 213 struct netmsg_base *msg, int error); 214 static void udp_remwildcardhash(struct inpcb *inp); 215 216 void 217 udp_init(void) 218 { 219 struct inpcbportinfo *portinfo; 220 int cpu; 221 222 portinfo = kmalloc_cachealign(sizeof(*portinfo) * ncpus2, M_PCB, 223 M_WAITOK); 224 225 for (cpu = 0; cpu < ncpus2; cpu++) { 226 struct inpcbinfo *uicb = &udbinfo[cpu]; 227 228 /* 229 * NOTE: 230 * UDP pcb list, wildcard hash table and localgroup hash 231 * table are shared. 232 */ 233 in_pcbinfo_init(uicb, cpu, TRUE); 234 uicb->hashbase = hashinit(UDBHASHSIZE, M_PCB, &uicb->hashmask); 235 236 in_pcbportinfo_init(&portinfo[cpu], UDBHASHSIZE, TRUE, cpu); 237 uicb->portinfo = portinfo; 238 uicb->portinfo_mask = ncpus2_mask; 239 240 uicb->wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB, 241 &uicb->wildcardhashmask); 242 uicb->localgrphashbase = hashinit(UDBHASHSIZE, M_PCB, 243 &uicb->localgrphashmask); 244 245 uicb->ipi_size = sizeof(struct inpcb); 246 } 247 248 /* 249 * Initialize UDP statistics counters for each CPU. 250 */ 251 for (cpu = 0; cpu < ncpus; ++cpu) 252 bzero(&udpstat_percpu[cpu], sizeof(struct udpstat)); 253 } 254 255 static int 256 sysctl_udpstat(SYSCTL_HANDLER_ARGS) 257 { 258 int cpu, error = 0; 259 260 for (cpu = 0; cpu < ncpus; ++cpu) { 261 if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu], 262 sizeof(struct udpstat)))) 263 break; 264 if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu], 265 sizeof(struct udpstat)))) 266 break; 267 } 268 269 return (error); 270 } 271 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW), 272 0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics"); 273 274 void 275 udp_ctloutput(netmsg_t msg) 276 { 277 struct socket *so = msg->base.nm_so; 278 struct sockopt *sopt = msg->ctloutput.nm_sopt; 279 struct inpcb *inp = so->so_pcb; 280 281 if (sopt->sopt_level == IPPROTO_IP) { 282 switch (sopt->sopt_name) { 283 case IP_MULTICAST_IF: 284 case IP_MULTICAST_VIF: 285 case IP_MULTICAST_TTL: 286 case IP_MULTICAST_LOOP: 287 case IP_ADD_MEMBERSHIP: 288 case IP_DROP_MEMBERSHIP: 289 if (&curthread->td_msgport != netisr_cpuport(0)) { 290 /* 291 * This pr_ctloutput msg will be forwarded 292 * to netisr0 to run; we can't do direct 293 * detaching anymore. 294 */ 295 inp->inp_flags &= ~INP_DIRECT_DETACH; 296 } 297 break; 298 } 299 } 300 return ip_ctloutput(msg); 301 } 302 303 /* 304 * Check multicast packets to make sure they are only sent to sockets with 305 * multicast memberships for the packet's destination address and arrival 306 * interface. Multicast packets to multicast-unaware sockets are also 307 * disallowed. 308 * 309 * Returns 0 if the packet is acceptable, -1 if it is not. 310 */ 311 static __inline int 312 check_multicast_membership(const struct ip *ip, const struct inpcb *inp, 313 const struct mbuf *m) 314 { 315 const struct ip_moptions *mopt; 316 int mshipno; 317 318 if (strict_mcast_mship == 0 || 319 !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 320 return (0); 321 } 322 323 KASSERT(&curthread->td_msgport == netisr_cpuport(0), 324 ("multicast input not in netisr0")); 325 326 mopt = inp->inp_moptions; 327 if (mopt == NULL) 328 return (-1); 329 for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) { 330 const struct in_multi *maddr = mopt->imo_membership[mshipno]; 331 332 if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr && 333 m->m_pkthdr.rcvif == maddr->inm_ifp) { 334 return (0); 335 } 336 } 337 return (-1); 338 } 339 340 struct udp_mcast_arg { 341 struct inpcb *inp; 342 struct inpcb *last; 343 struct ip *ip; 344 struct mbuf *m; 345 int iphlen; 346 struct sockaddr_in *udp_in; 347 #ifdef INET6 348 struct udp_in6 *udp_in6; 349 struct udp_ip6 *udp_ip6; 350 #endif 351 }; 352 353 static int 354 udp_mcast_input(struct udp_mcast_arg *arg) 355 { 356 struct inpcb *inp = arg->inp; 357 struct inpcb *last = arg->last; 358 struct ip *ip = arg->ip; 359 struct mbuf *m = arg->m; 360 361 if (check_multicast_membership(ip, inp, m) < 0) 362 return ERESTART; /* caller continue */ 363 364 if (last != NULL) { 365 struct mbuf *n; 366 367 #ifdef IPSEC 368 /* check AH/ESP integrity. */ 369 if (ipsec4_in_reject_so(m, last->inp_socket)) 370 ipsecstat.in_polvio++; 371 /* do not inject data to pcb */ 372 else 373 #endif /*IPSEC*/ 374 #ifdef FAST_IPSEC 375 /* check AH/ESP integrity. */ 376 if (ipsec4_in_reject(m, last)) 377 ; 378 else 379 #endif /*FAST_IPSEC*/ 380 if ((n = m_copypacket(m, MB_DONTWAIT)) != NULL) 381 udp_append(last, ip, n, 382 arg->iphlen + sizeof(struct udphdr), 383 arg->udp_in, 384 #ifdef INET6 385 arg->udp_in6, arg->udp_ip6 386 #else 387 NULL, NULL 388 #endif 389 ); 390 } 391 arg->last = last = inp; 392 393 /* 394 * Don't look for additional matches if this one does 395 * not have either the SO_REUSEPORT or SO_REUSEADDR 396 * socket options set. This heuristic avoids searching 397 * through all pcbs in the common case of a non-shared 398 * port. It * assumes that an application will never 399 * clear these options after setting them. 400 */ 401 if (!(last->inp_socket->so_options & 402 (SO_REUSEPORT | SO_REUSEADDR))) 403 return EJUSTRETURN; /* caller stop */ 404 return 0; 405 } 406 407 int 408 udp_input(struct mbuf **mp, int *offp, int proto) 409 { 410 struct sockaddr_in udp_in = { sizeof udp_in, AF_INET }; 411 #ifdef INET6 412 struct udp_in6 udp_in6 = { 413 { sizeof udp_in6.uin6_sin, AF_INET6 }, 0 414 }; 415 struct udp_ip6 udp_ip6; 416 #endif 417 418 int iphlen; 419 struct ip *ip; 420 struct udphdr *uh; 421 struct inpcb *inp; 422 struct mbuf *m; 423 struct mbuf *opts = NULL; 424 int len, off; 425 struct ip save_ip; 426 struct sockaddr *append_sa; 427 struct inpcbinfo *pcbinfo = &udbinfo[mycpuid]; 428 429 off = *offp; 430 m = *mp; 431 *mp = NULL; 432 433 iphlen = off; 434 udp_stat.udps_ipackets++; 435 436 /* 437 * Strip IP options, if any; should skip this, 438 * make available to user, and use on returned packets, 439 * but we don't yet have a way to check the checksum 440 * with options still present. 441 */ 442 if (iphlen > sizeof(struct ip)) { 443 ip_stripoptions(m); 444 iphlen = sizeof(struct ip); 445 } 446 447 /* 448 * IP and UDP headers are together in first mbuf. 449 * Already checked and pulled up in ip_demux(). 450 */ 451 KASSERT(m->m_len >= iphlen + sizeof(struct udphdr), 452 ("UDP header not in one mbuf")); 453 454 ip = mtod(m, struct ip *); 455 uh = (struct udphdr *)((caddr_t)ip + iphlen); 456 457 /* destination port of 0 is illegal, based on RFC768. */ 458 if (uh->uh_dport == 0) 459 goto bad; 460 461 /* 462 * Make mbuf data length reflect UDP length. 463 * If not enough data to reflect UDP length, drop. 464 */ 465 len = ntohs((u_short)uh->uh_ulen); 466 if (ip->ip_len != len) { 467 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 468 udp_stat.udps_badlen++; 469 goto bad; 470 } 471 m_adj(m, len - ip->ip_len); 472 /* ip->ip_len = len; */ 473 } 474 /* 475 * Save a copy of the IP header in case we want restore it 476 * for sending an ICMP error message in response. 477 */ 478 save_ip = *ip; 479 480 /* 481 * Checksum extended UDP header and data. 482 */ 483 if (uh->uh_sum) { 484 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 485 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 486 uh->uh_sum = m->m_pkthdr.csum_data; 487 else 488 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, 489 ip->ip_dst.s_addr, htonl((u_short)len + 490 m->m_pkthdr.csum_data + IPPROTO_UDP)); 491 uh->uh_sum ^= 0xffff; 492 } else { 493 char b[9]; 494 495 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 496 bzero(((struct ipovly *)ip)->ih_x1, 9); 497 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 498 uh->uh_sum = in_cksum(m, len + sizeof(struct ip)); 499 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 500 } 501 if (uh->uh_sum) { 502 udp_stat.udps_badsum++; 503 m_freem(m); 504 return(IPPROTO_DONE); 505 } 506 } else 507 udp_stat.udps_nosum++; 508 509 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 510 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { 511 struct inpcbhead *connhead; 512 struct inpcontainer *ic, *ic_marker; 513 struct inpcontainerhead *ichead; 514 struct udp_mcast_arg arg; 515 struct inpcb *last; 516 int error; 517 518 /* 519 * Deliver a multicast or broadcast datagram to *all* sockets 520 * for which the local and remote addresses and ports match 521 * those of the incoming datagram. This allows more than 522 * one process to receive multi/broadcasts on the same port. 523 * (This really ought to be done for unicast datagrams as 524 * well, but that would cause problems with existing 525 * applications that open both address-specific sockets and 526 * a wildcard socket listening to the same port -- they would 527 * end up receiving duplicates of every unicast datagram. 528 * Those applications open the multiple sockets to overcome an 529 * inadequacy of the UDP socket interface, but for backwards 530 * compatibility we avoid the problem here rather than 531 * fixing the interface. Maybe 4.5BSD will remedy this?) 532 */ 533 534 /* 535 * Construct sockaddr format source address. 536 */ 537 udp_in.sin_port = uh->uh_sport; 538 udp_in.sin_addr = ip->ip_src; 539 arg.udp_in = &udp_in; 540 /* 541 * Locate pcb(s) for datagram. 542 * (Algorithm copied from raw_intr().) 543 */ 544 last = NULL; 545 #ifdef INET6 546 udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0; 547 arg.udp_in6 = &udp_in6; 548 arg.udp_ip6 = &udp_ip6; 549 #endif 550 arg.iphlen = iphlen; 551 552 connhead = &pcbinfo->hashbase[ 553 INP_PCBCONNHASH(ip->ip_src.s_addr, uh->uh_sport, 554 ip->ip_dst.s_addr, uh->uh_dport, pcbinfo->hashmask)]; 555 LIST_FOREACH(inp, connhead, inp_hash) { 556 #ifdef INET6 557 if (!(inp->inp_vflag & INP_IPV4)) 558 continue; 559 #endif 560 if (!in_hosteq(inp->inp_faddr, ip->ip_src) || 561 !in_hosteq(inp->inp_laddr, ip->ip_dst) || 562 inp->inp_fport != uh->uh_sport || 563 inp->inp_lport != uh->uh_dport) 564 continue; 565 566 arg.inp = inp; 567 arg.last = last; 568 arg.ip = ip; 569 arg.m = m; 570 571 error = udp_mcast_input(&arg); 572 if (error == ERESTART) 573 continue; 574 last = arg.last; 575 576 if (error == EJUSTRETURN) 577 goto done; 578 } 579 580 ichead = &pcbinfo->wildcardhashbase[ 581 INP_PCBWILDCARDHASH(uh->uh_dport, 582 pcbinfo->wildcardhashmask)]; 583 ic_marker = in_pcbcontainer_marker(mycpuid); 584 585 GET_PCBINFO_TOKEN(pcbinfo); 586 LIST_INSERT_HEAD(ichead, ic_marker, ic_list); 587 while ((ic = LIST_NEXT(ic_marker, ic_list)) != NULL) { 588 LIST_REMOVE(ic_marker, ic_list); 589 LIST_INSERT_AFTER(ic, ic_marker, ic_list); 590 591 inp = ic->ic_inp; 592 if (inp->inp_flags & INP_PLACEMARKER) 593 continue; 594 #ifdef INET6 595 if (!(inp->inp_vflag & INP_IPV4)) 596 continue; 597 #endif 598 if (inp->inp_lport != uh->uh_dport) 599 continue; 600 if (inp->inp_laddr.s_addr != INADDR_ANY && 601 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 602 continue; 603 604 arg.inp = inp; 605 arg.last = last; 606 arg.ip = ip; 607 arg.m = m; 608 609 error = udp_mcast_input(&arg); 610 if (error == ERESTART) 611 continue; 612 last = arg.last; 613 614 if (error == EJUSTRETURN) 615 break; 616 } 617 LIST_REMOVE(ic_marker, ic_list); 618 REL_PCBINFO_TOKEN(pcbinfo); 619 done: 620 if (last == NULL) { 621 /* 622 * No matching pcb found; discard datagram. 623 * (No need to send an ICMP Port Unreachable 624 * for a broadcast or multicast datgram.) 625 */ 626 udp_stat.udps_noportbcast++; 627 goto bad; 628 } 629 #ifdef IPSEC 630 /* check AH/ESP integrity. */ 631 if (ipsec4_in_reject_so(m, last->inp_socket)) { 632 ipsecstat.in_polvio++; 633 goto bad; 634 } 635 #endif /*IPSEC*/ 636 #ifdef FAST_IPSEC 637 /* check AH/ESP integrity. */ 638 if (ipsec4_in_reject(m, last)) 639 goto bad; 640 #endif /*FAST_IPSEC*/ 641 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 642 &udp_in, 643 #ifdef INET6 644 &udp_in6, &udp_ip6 645 #else 646 NULL, NULL 647 #endif 648 ); 649 return(IPPROTO_DONE); 650 } 651 /* 652 * Locate pcb for datagram. 653 */ 654 inp = in_pcblookup_pkthash(pcbinfo, ip->ip_src, uh->uh_sport, 655 ip->ip_dst, uh->uh_dport, TRUE, m->m_pkthdr.rcvif, 656 udp_reuseport_ext ? m : NULL); 657 if (inp == NULL) { 658 if (log_in_vain) { 659 char buf[sizeof "aaa.bbb.ccc.ddd"]; 660 661 strcpy(buf, inet_ntoa(ip->ip_dst)); 662 log(LOG_INFO, 663 "Connection attempt to UDP %s:%d from %s:%d\n", 664 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), 665 ntohs(uh->uh_sport)); 666 } 667 udp_stat.udps_noport++; 668 if (m->m_flags & (M_BCAST | M_MCAST)) { 669 udp_stat.udps_noportbcast++; 670 goto bad; 671 } 672 if (blackhole) 673 goto bad; 674 #ifdef ICMP_BANDLIM 675 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 676 goto bad; 677 #endif 678 *ip = save_ip; 679 ip->ip_len += iphlen; 680 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 681 return(IPPROTO_DONE); 682 } 683 #ifdef IPSEC 684 if (ipsec4_in_reject_so(m, inp->inp_socket)) { 685 ipsecstat.in_polvio++; 686 goto bad; 687 } 688 #endif /*IPSEC*/ 689 #ifdef FAST_IPSEC 690 if (ipsec4_in_reject(m, inp)) 691 goto bad; 692 #endif /*FAST_IPSEC*/ 693 /* 694 * Check the minimum TTL for socket. 695 */ 696 if (ip->ip_ttl < inp->inp_ip_minttl) 697 goto bad; 698 699 /* 700 * Construct sockaddr format source address. 701 * Stuff source address and datagram in user buffer. 702 */ 703 udp_in.sin_port = uh->uh_sport; 704 udp_in.sin_addr = ip->ip_src; 705 if ((inp->inp_flags & INP_CONTROLOPTS) || 706 (inp->inp_socket->so_options & SO_TIMESTAMP)) { 707 #ifdef INET6 708 if (inp->inp_vflag & INP_IPV6) { 709 int savedflags; 710 711 ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip); 712 savedflags = inp->inp_flags; 713 inp->inp_flags &= ~INP_UNMAPPABLEOPTS; 714 ip6_savecontrol(inp, &opts, &udp_ip6.uip6_ip6, m); 715 inp->inp_flags = savedflags; 716 } else 717 #endif 718 ip_savecontrol(inp, &opts, ip, m); 719 } 720 m_adj(m, iphlen + sizeof(struct udphdr)); 721 #ifdef INET6 722 if (inp->inp_vflag & INP_IPV6) { 723 in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin); 724 append_sa = (struct sockaddr *)&udp_in6; 725 } else 726 #endif 727 append_sa = (struct sockaddr *)&udp_in; 728 729 lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token); 730 if (ssb_appendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts) == 0) { 731 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 732 udp_stat.udps_fullsock++; 733 goto bad; 734 } 735 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 736 sorwakeup(inp->inp_socket); 737 return(IPPROTO_DONE); 738 bad: 739 m_freem(m); 740 if (opts) 741 m_freem(opts); 742 return(IPPROTO_DONE); 743 } 744 745 #ifdef INET6 746 static void 747 ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip) 748 { 749 bzero(ip6, sizeof *ip6); 750 751 ip6->ip6_vfc = IPV6_VERSION; 752 ip6->ip6_plen = ip->ip_len; 753 ip6->ip6_nxt = ip->ip_p; 754 ip6->ip6_hlim = ip->ip_ttl; 755 ip6->ip6_src.s6_addr32[2] = ip6->ip6_dst.s6_addr32[2] = 756 IPV6_ADDR_INT32_SMP; 757 ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr; 758 ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr; 759 } 760 #endif 761 762 /* 763 * subroutine of udp_input(), mainly for source code readability. 764 * caller must properly init udp_ip6 and udp_in6 beforehand. 765 */ 766 static void 767 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, 768 struct sockaddr_in *udp_in, 769 struct udp_in6 *udp_in6, struct udp_ip6 *udp_ip6) 770 { 771 struct sockaddr *append_sa; 772 struct mbuf *opts = NULL; 773 int ret; 774 775 if (last->inp_flags & INP_CONTROLOPTS || 776 last->inp_socket->so_options & SO_TIMESTAMP) { 777 #ifdef INET6 778 if (last->inp_vflag & INP_IPV6) { 779 int savedflags; 780 781 if (udp_ip6->uip6_init_done == 0) { 782 ip_2_ip6_hdr(&udp_ip6->uip6_ip6, ip); 783 udp_ip6->uip6_init_done = 1; 784 } 785 savedflags = last->inp_flags; 786 last->inp_flags &= ~INP_UNMAPPABLEOPTS; 787 ip6_savecontrol(last, &opts, &udp_ip6->uip6_ip6, n); 788 last->inp_flags = savedflags; 789 } else 790 #endif 791 ip_savecontrol(last, &opts, ip, n); 792 } 793 #ifdef INET6 794 if (last->inp_vflag & INP_IPV6) { 795 if (udp_in6->uin6_init_done == 0) { 796 in6_sin_2_v4mapsin6(udp_in, &udp_in6->uin6_sin); 797 udp_in6->uin6_init_done = 1; 798 } 799 append_sa = (struct sockaddr *)&udp_in6->uin6_sin; 800 } else 801 #endif 802 append_sa = (struct sockaddr *)udp_in; 803 m_adj(n, off); 804 805 lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token); 806 ret = ssb_appendaddr(&last->inp_socket->so_rcv, append_sa, n, opts); 807 lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token); 808 if (ret == 0) { 809 m_freem(n); 810 if (opts) 811 m_freem(opts); 812 udp_stat.udps_fullsock++; 813 } else { 814 sorwakeup(last->inp_socket); 815 } 816 } 817 818 /* 819 * Notify a udp user of an asynchronous error; 820 * just wake up so that he can collect error status. 821 */ 822 void 823 udp_notify(struct inpcb *inp, int error) 824 { 825 inp->inp_socket->so_error = error; 826 sorwakeup(inp->inp_socket); 827 sowwakeup(inp->inp_socket); 828 } 829 830 struct netmsg_udp_notify { 831 struct netmsg_base base; 832 inp_notify_t nm_notify; 833 struct in_addr nm_faddr; 834 int nm_arg; 835 }; 836 837 static void 838 udp_notifyall_oncpu(netmsg_t msg) 839 { 840 struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg; 841 int nextcpu, cpu = mycpuid; 842 843 in_pcbnotifyall(&udbinfo[cpu], nm->nm_faddr, nm->nm_arg, nm->nm_notify); 844 845 nextcpu = cpu + 1; 846 if (nextcpu < ncpus2) 847 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg); 848 else 849 lwkt_replymsg(&nm->base.lmsg, 0); 850 } 851 852 inp_notify_t 853 udp_get_inpnotify(int cmd, const struct sockaddr *sa, 854 struct ip **ip0, int *cpuid) 855 { 856 struct in_addr faddr; 857 struct ip *ip = *ip0; 858 inp_notify_t notify = udp_notify; 859 860 faddr = ((const struct sockaddr_in *)sa)->sin_addr; 861 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 862 return NULL; 863 864 if (PRC_IS_REDIRECT(cmd)) { 865 ip = NULL; 866 notify = in_rtchange; 867 } else if (cmd == PRC_HOSTDEAD) { 868 ip = NULL; 869 } else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) { 870 return NULL; 871 } 872 873 if (cpuid != NULL) { 874 if (ip == NULL) { 875 /* Go through all CPUs */ 876 *cpuid = ncpus; 877 } else { 878 const struct udphdr *uh; 879 880 uh = (const struct udphdr *) 881 ((caddr_t)ip + (ip->ip_hl << 2)); 882 *cpuid = udp_addrcpu(faddr.s_addr, uh->uh_dport, 883 ip->ip_src.s_addr, uh->uh_sport); 884 } 885 } 886 887 *ip0 = ip; 888 return notify; 889 } 890 891 void 892 udp_ctlinput(netmsg_t msg) 893 { 894 struct sockaddr *sa = msg->ctlinput.nm_arg; 895 struct ip *ip = msg->ctlinput.nm_extra; 896 int cmd = msg->ctlinput.nm_cmd; 897 inp_notify_t notify; 898 struct in_addr faddr; 899 900 notify = udp_get_inpnotify(cmd, sa, &ip, NULL); 901 if (notify == NULL) 902 goto done; 903 904 faddr = ((struct sockaddr_in *)sa)->sin_addr; 905 if (ip) { 906 const struct udphdr *uh; 907 struct inpcb *inp; 908 909 uh = (const struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 910 inp = in_pcblookup_hash(&udbinfo[mycpuid], faddr, uh->uh_dport, 911 ip->ip_src, uh->uh_sport, 0, NULL); 912 if (inp != NULL && inp->inp_socket != NULL) 913 notify(inp, inetctlerrmap[cmd]); 914 } else { 915 struct netmsg_udp_notify *nm; 916 917 KKASSERT(&curthread->td_msgport == netisr_cpuport(0)); 918 nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT); 919 netmsg_init(&nm->base, NULL, &netisr_afree_rport, 920 0, udp_notifyall_oncpu); 921 nm->nm_faddr = faddr; 922 nm->nm_arg = inetctlerrmap[cmd]; 923 nm->nm_notify = notify; 924 lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg); 925 } 926 done: 927 lwkt_replymsg(&msg->lmsg, 0); 928 } 929 930 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, udbinfo, 0, 931 in_pcblist_global_ncpus2, "S,xinpcb", "List of active UDP sockets"); 932 933 static int 934 udp_getcred(SYSCTL_HANDLER_ARGS) 935 { 936 struct sockaddr_in addrs[2]; 937 struct ucred cred0, *cred = NULL; 938 struct inpcb *inp; 939 int error, cpu, origcpu; 940 941 error = priv_check(req->td, PRIV_ROOT); 942 if (error) 943 return (error); 944 error = SYSCTL_IN(req, addrs, sizeof addrs); 945 if (error) 946 return (error); 947 948 origcpu = mycpuid; 949 cpu = udp_addrcpu(addrs[1].sin_addr.s_addr, addrs[1].sin_port, 950 addrs[0].sin_addr.s_addr, addrs[0].sin_port); 951 952 lwkt_migratecpu(cpu); 953 954 inp = in_pcblookup_hash(&udbinfo[cpu], 955 addrs[1].sin_addr, addrs[1].sin_port, 956 addrs[0].sin_addr, addrs[0].sin_port, TRUE, NULL); 957 if (inp == NULL || inp->inp_socket == NULL) { 958 error = ENOENT; 959 } else if (inp->inp_socket->so_cred != NULL) { 960 cred0 = *(inp->inp_socket->so_cred); 961 cred = &cred0; 962 } 963 964 lwkt_migratecpu(origcpu); 965 966 if (error) 967 return error; 968 969 return SYSCTL_OUT(req, cred, sizeof(struct ucred)); 970 } 971 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 972 0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection"); 973 974 static void 975 udp_send_redispatch(netmsg_t msg) 976 { 977 struct mbuf *m = msg->send.nm_m; 978 int pru_flags = msg->send.nm_flags; 979 struct inpcb *inp = msg->send.base.nm_so->so_pcb; 980 struct mbuf *m_opt = msg->send.nm_control; /* XXX save ipopt */ 981 int flags = msg->send.nm_priv; /* ip_output flags */ 982 int error; 983 984 logudp(redisp_ipout_beg, inp); 985 986 /* 987 * - Don't use inp route cache. It should only be used in the 988 * inp owner netisr. 989 * - Access to inp_moptions should be safe, since multicast UDP 990 * datagrams are redispatched to netisr0 and inp_moptions is 991 * changed only in netisr0. 992 */ 993 error = ip_output(m, m_opt, NULL, flags, inp->inp_moptions, inp); 994 if ((pru_flags & PRUS_NOREPLY) == 0) 995 lwkt_replymsg(&msg->send.base.lmsg, error); 996 997 if (m_opt != NULL) { 998 /* Free saved ip options, if any */ 999 m_freem(m_opt); 1000 } 1001 1002 logudp(redisp_ipout_end, inp); 1003 } 1004 1005 static void 1006 udp_send(netmsg_t msg) 1007 { 1008 struct socket *so = msg->send.base.nm_so; 1009 struct mbuf *m = msg->send.nm_m; 1010 struct sockaddr *dstaddr = msg->send.nm_addr; 1011 int pru_flags = msg->send.nm_flags; 1012 struct inpcb *inp = so->so_pcb; 1013 struct thread *td = msg->send.nm_td; 1014 int flags; 1015 1016 struct udpiphdr *ui; 1017 int len = m->m_pkthdr.len; 1018 struct sockaddr_in *sin; /* really is initialized before use */ 1019 int error = 0, cpu; 1020 1021 KKASSERT(msg->send.nm_control == NULL); 1022 1023 logudp(send_beg, inp); 1024 1025 if (inp == NULL) { 1026 error = EINVAL; 1027 goto release; 1028 } 1029 1030 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 1031 error = EMSGSIZE; 1032 goto release; 1033 } 1034 1035 if (inp->inp_lport == 0) { /* unbound socket */ 1036 boolean_t forwarded; 1037 1038 error = in_pcbbind(inp, NULL, td); 1039 if (error) 1040 goto release; 1041 1042 /* 1043 * Need to call udp_send again, after this inpcb is 1044 * inserted into wildcard hash table. 1045 */ 1046 msg->send.base.lmsg.ms_flags |= MSGF_UDP_SEND; 1047 forwarded = udp_inswildcardhash(inp, &msg->send.base, 0); 1048 if (forwarded) { 1049 /* 1050 * The message is further forwarded, so we are 1051 * done here. 1052 */ 1053 logudp(send_inswildcard, inp); 1054 return; 1055 } 1056 } 1057 1058 if (dstaddr != NULL) { /* destination address specified */ 1059 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1060 /* already connected */ 1061 error = EISCONN; 1062 goto release; 1063 } 1064 sin = (struct sockaddr_in *)dstaddr; 1065 if (!prison_remote_ip(td, (struct sockaddr *)&sin)) { 1066 error = EAFNOSUPPORT; /* IPv6 only jail */ 1067 goto release; 1068 } 1069 } else { 1070 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1071 /* no destination specified and not already connected */ 1072 error = ENOTCONN; 1073 goto release; 1074 } 1075 sin = NULL; 1076 } 1077 1078 /* 1079 * Calculate data length and get a mbuf 1080 * for UDP and IP headers. 1081 */ 1082 M_PREPEND(m, sizeof(struct udpiphdr), MB_DONTWAIT); 1083 if (m == NULL) { 1084 error = ENOBUFS; 1085 goto release; 1086 } 1087 1088 /* 1089 * Fill in mbuf with extended UDP header 1090 * and addresses and length put into network format. 1091 */ 1092 ui = mtod(m, struct udpiphdr *); 1093 bzero(ui->ui_x1, sizeof ui->ui_x1); /* XXX still needed? */ 1094 ui->ui_pr = IPPROTO_UDP; 1095 1096 /* 1097 * Set destination address. 1098 */ 1099 if (dstaddr != NULL) { /* use specified destination */ 1100 ui->ui_dst = sin->sin_addr; 1101 ui->ui_dport = sin->sin_port; 1102 } else { /* use connected destination */ 1103 ui->ui_dst = inp->inp_faddr; 1104 ui->ui_dport = inp->inp_fport; 1105 } 1106 1107 /* 1108 * Set source address. 1109 */ 1110 if (inp->inp_laddr.s_addr == INADDR_ANY || 1111 IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) { 1112 struct sockaddr_in *if_sin; 1113 1114 if (dstaddr == NULL) { 1115 /* 1116 * connect() had (or should have) failed because 1117 * the interface had no IP address, but the 1118 * application proceeded to call send() anyways. 1119 */ 1120 error = ENOTCONN; 1121 goto release; 1122 } 1123 1124 /* Look up outgoing interface. */ 1125 error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1); 1126 if (error) 1127 goto release; 1128 ui->ui_src = if_sin->sin_addr; /* use address of interface */ 1129 } else { 1130 ui->ui_src = inp->inp_laddr; /* use non-null bound address */ 1131 } 1132 ui->ui_sport = inp->inp_lport; 1133 KASSERT(inp->inp_lport != 0, ("inp lport should have been bound")); 1134 1135 /* 1136 * Release the original thread, since it is no longer used 1137 */ 1138 if (pru_flags & PRUS_HELDTD) { 1139 lwkt_rele(td); 1140 pru_flags &= ~PRUS_HELDTD; 1141 } 1142 /* 1143 * Free the dest address, since it is no longer needed 1144 */ 1145 if (pru_flags & PRUS_FREEADDR) { 1146 kfree(dstaddr, M_SONAME); 1147 pru_flags &= ~PRUS_FREEADDR; 1148 } 1149 1150 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1151 1152 /* 1153 * Set up checksum and output datagram. 1154 */ 1155 if (udpcksum) { 1156 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, 1157 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1158 m->m_pkthdr.csum_flags = CSUM_UDP; 1159 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1160 m->m_pkthdr.csum_thlen = sizeof(struct udphdr); 1161 } else { 1162 ui->ui_sum = 0; 1163 } 1164 ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len; 1165 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1166 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1167 udp_stat.udps_opackets++; 1168 1169 flags = IP_DEBUGROUTE | 1170 (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); 1171 if (pru_flags & PRUS_DONTROUTE) 1172 flags |= SO_DONTROUTE; 1173 1174 if (inp->inp_flags & INP_CONNECTED) { 1175 /* 1176 * For connected socket, this datagram has already 1177 * been in the correct netisr; no need to rehash. 1178 */ 1179 goto sendit; 1180 } 1181 1182 cpu = udp_addrcpu(ui->ui_dst.s_addr, ui->ui_dport, 1183 ui->ui_src.s_addr, ui->ui_sport); 1184 if (cpu != mycpuid) { 1185 struct mbuf *m_opt = NULL; 1186 struct netmsg_pru_send *smsg; 1187 struct lwkt_port *port = netisr_cpuport(cpu); 1188 1189 /* 1190 * Not on the CPU that matches this UDP datagram hash; 1191 * redispatch to the correct CPU to do the ip_output(). 1192 */ 1193 if (inp->inp_options != NULL) { 1194 /* 1195 * If there are ip options, then save a copy, 1196 * since accessing inp_options on other CPUs' 1197 * is not safe. 1198 * 1199 * XXX optimize this? 1200 */ 1201 m_opt = m_copym(inp->inp_options, 0, M_COPYALL, 1202 MB_WAIT); 1203 } 1204 if ((pru_flags & PRUS_NOREPLY) == 0) { 1205 /* 1206 * Change some parts of the original netmsg and 1207 * forward it to the target netisr. 1208 * 1209 * NOTE: so_port MUST NOT be checked in the target 1210 * netisr. 1211 */ 1212 smsg = &msg->send; 1213 smsg->nm_priv = flags; /* ip_output flags */ 1214 smsg->nm_m = m; 1215 smsg->nm_control = m_opt; /* XXX save ipopt */ 1216 smsg->base.lmsg.ms_flags |= MSGF_IGNSOPORT; 1217 smsg->base.nm_dispatch = udp_send_redispatch; 1218 lwkt_forwardmsg(port, &smsg->base.lmsg); 1219 } else { 1220 /* 1221 * Recreate the netmsg, since the original mbuf 1222 * could have been changed. And send it to the 1223 * target netisr. 1224 * 1225 * NOTE: so_port MUST NOT be checked in the target 1226 * netisr. 1227 */ 1228 smsg = &m->m_hdr.mh_sndmsg; 1229 netmsg_init(&smsg->base, so, &netisr_apanic_rport, 1230 MSGF_IGNSOPORT, udp_send_redispatch); 1231 smsg->nm_priv = flags; /* ip_output flags */ 1232 smsg->nm_flags = pru_flags; 1233 smsg->nm_m = m; 1234 smsg->nm_control = m_opt; /* XXX save ipopt */ 1235 lwkt_sendmsg(port, &smsg->base.lmsg); 1236 } 1237 1238 /* This UDP datagram is redispatched; done */ 1239 logudp(send_redisp, inp); 1240 return; 1241 } 1242 1243 sendit: 1244 logudp(send_ipout, inp); 1245 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 1246 inp->inp_moptions, inp); 1247 m = NULL; 1248 1249 release: 1250 if (m != NULL) 1251 m_freem(m); 1252 1253 if (pru_flags & PRUS_HELDTD) 1254 lwkt_rele(td); 1255 if (pru_flags & PRUS_FREEADDR) 1256 kfree(dstaddr, M_SONAME); 1257 if ((pru_flags & PRUS_NOREPLY) == 0) 1258 lwkt_replymsg(&msg->send.base.lmsg, error); 1259 1260 logudp(send_end, inp); 1261 } 1262 1263 u_long udp_sendspace = 9216; /* really max datagram size */ 1264 /* 40 1K datagrams */ 1265 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 1266 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 1267 1268 u_long udp_recvspace = 40 * (1024 + 1269 #ifdef INET6 1270 sizeof(struct sockaddr_in6) 1271 #else 1272 sizeof(struct sockaddr_in) 1273 #endif 1274 ); 1275 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1276 &udp_recvspace, 0, "Maximum incoming UDP datagram size"); 1277 1278 /* 1279 * This should never happen, since UDP socket does not support 1280 * connection acception (SO_ACCEPTCONN, i.e. listen(2)). 1281 */ 1282 static void 1283 udp_abort(netmsg_t msg __unused) 1284 { 1285 panic("udp_abort is called"); 1286 } 1287 1288 static void 1289 udp_attach(netmsg_t msg) 1290 { 1291 struct socket *so = msg->attach.base.nm_so; 1292 struct pru_attach_info *ai = msg->attach.nm_ai; 1293 struct inpcb *inp; 1294 int error; 1295 1296 inp = so->so_pcb; 1297 if (inp != NULL) { 1298 error = EINVAL; 1299 goto out; 1300 } 1301 error = soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit); 1302 if (error) 1303 goto out; 1304 1305 error = in_pcballoc(so, &udbinfo[mycpuid]); 1306 if (error) 1307 goto out; 1308 1309 inp = (struct inpcb *)so->so_pcb; 1310 inp->inp_flags |= INP_DIRECT_DETACH; 1311 inp->inp_vflag |= INP_IPV4; 1312 inp->inp_ip_ttl = ip_defttl; 1313 error = 0; 1314 out: 1315 lwkt_replymsg(&msg->attach.base.lmsg, error); 1316 } 1317 1318 static void 1319 udp_inswildcard_replymsg(netmsg_t msg) 1320 { 1321 lwkt_msg_t lmsg = &msg->lmsg; 1322 1323 if (lmsg->ms_flags & MSGF_UDP_SEND) { 1324 udp_send(msg); 1325 /* msg is replied by udp_send() */ 1326 } else { 1327 lwkt_replymsg(lmsg, lmsg->ms_error); 1328 } 1329 } 1330 1331 static void 1332 udp_soreuseport_dispatch(netmsg_t msg) 1333 { 1334 /* This inpcb has already been in the wildcard hash. */ 1335 in_pcblink_flags(msg->base.nm_so->so_pcb, &udbinfo[mycpuid], 0); 1336 udp_inswildcard_replymsg(msg); 1337 } 1338 1339 static void 1340 udp_sosetport(struct lwkt_msg *msg, lwkt_port_t port) 1341 { 1342 sosetport(((struct netmsg_base *)msg)->nm_so, port); 1343 } 1344 1345 static boolean_t 1346 udp_inswildcardhash_oncpu(struct inpcb *inp, struct netmsg_base *msg) 1347 { 1348 int cpu; 1349 1350 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1351 ("not on owner cpu")); 1352 1353 in_pcbinswildcardhash(inp); 1354 for (cpu = 0; cpu < ncpus2; ++cpu) { 1355 if (cpu == mycpuid) { 1356 /* 1357 * This inpcb has been inserted by the above 1358 * in_pcbinswildcardhash(). 1359 */ 1360 continue; 1361 } 1362 in_pcbinswildcardhash_oncpu(inp, &udbinfo[cpu]); 1363 } 1364 1365 if (inp->inp_socket->so_options & SO_REUSEPORT) { 1366 /* 1367 * For SO_REUSEPORT socket, redistribute it based on its 1368 * local group index. 1369 */ 1370 cpu = inp->inp_lgrpindex & ncpus2_mask; 1371 if (cpu != mycpuid) { 1372 struct lwkt_port *port = netisr_cpuport(cpu); 1373 lwkt_msg_t lmsg = &msg->lmsg; 1374 1375 /* 1376 * We are moving the protocol processing port the 1377 * socket is on, we have to unlink here and re-link 1378 * on the target cpu (this inpcb is still left in 1379 * the wildcard hash). 1380 */ 1381 in_pcbunlink_flags(inp, &udbinfo[mycpuid], 0); 1382 msg->nm_dispatch = udp_soreuseport_dispatch; 1383 1384 /* 1385 * See the related comment in tcp_usrreq.c 1386 * tcp_connect() 1387 */ 1388 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1389 lwkt_forwardmsg(port, lmsg); 1390 return TRUE; /* forwarded */ 1391 } 1392 } 1393 return FALSE; 1394 } 1395 1396 static void 1397 udp_inswildcardhash_dispatch(netmsg_t msg) 1398 { 1399 struct inpcb *inp = msg->base.nm_so->so_pcb; 1400 boolean_t forwarded; 1401 1402 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1403 KASSERT((ntohs(inp->inp_lport) & ncpus2_mask) == mycpuid, 1404 ("not target cpu")); 1405 1406 in_pcblink(inp, &udbinfo[mycpuid]); 1407 1408 forwarded = udp_inswildcardhash_oncpu(inp, &msg->base); 1409 if (forwarded) { 1410 /* The message is further forwarded, so we are done here. */ 1411 return; 1412 } 1413 udp_inswildcard_replymsg(msg); 1414 } 1415 1416 static boolean_t 1417 udp_inswildcardhash(struct inpcb *inp, struct netmsg_base *msg, int error) 1418 { 1419 lwkt_msg_t lmsg = &msg->lmsg; 1420 int cpu; 1421 1422 ASSERT_INP_NOTINHASH(inp); 1423 1424 /* This inpcb could no longer be directly detached */ 1425 inp->inp_flags &= ~INP_DIRECT_DETACH; 1426 1427 /* 1428 * Always clear the route cache, so we don't need to 1429 * worry about any owner CPU changes later. 1430 */ 1431 in_pcbresetroute(inp); 1432 1433 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1434 cpu = ntohs(inp->inp_lport) & ncpus2_mask; 1435 1436 lmsg->ms_error = error; 1437 if (cpu != mycpuid) { 1438 struct lwkt_port *port = netisr_cpuport(cpu); 1439 1440 /* 1441 * We are moving the protocol processing port the socket 1442 * is on, we have to unlink here and re-link on the 1443 * target cpu. 1444 */ 1445 in_pcbunlink(inp, &udbinfo[mycpuid]); 1446 msg->nm_dispatch = udp_inswildcardhash_dispatch; 1447 1448 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1449 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1450 lwkt_forwardmsg(port, lmsg); 1451 return TRUE; /* forwarded */ 1452 } 1453 1454 return udp_inswildcardhash_oncpu(inp, msg); 1455 } 1456 1457 static void 1458 udp_bind(netmsg_t msg) 1459 { 1460 struct socket *so = msg->bind.base.nm_so; 1461 struct inpcb *inp; 1462 int error; 1463 1464 inp = so->so_pcb; 1465 if (inp) { 1466 struct sockaddr *nam = msg->bind.nm_nam; 1467 struct thread *td = msg->bind.nm_td; 1468 1469 error = in_pcbbind(inp, nam, td); 1470 if (error == 0) { 1471 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1472 boolean_t forwarded; 1473 1474 if (sin->sin_addr.s_addr != INADDR_ANY) 1475 inp->inp_flags |= INP_WASBOUND_NOTANY; 1476 1477 forwarded = udp_inswildcardhash(inp, 1478 &msg->bind.base, 0); 1479 if (forwarded) { 1480 /* 1481 * The message is further forwarded, so 1482 * we are done here. 1483 */ 1484 return; 1485 } 1486 } 1487 } else { 1488 error = EINVAL; 1489 } 1490 lwkt_replymsg(&msg->bind.base.lmsg, error); 1491 } 1492 1493 static void 1494 udp_connect(netmsg_t msg) 1495 { 1496 struct socket *so = msg->connect.base.nm_so; 1497 struct sockaddr *nam = msg->connect.nm_nam; 1498 struct thread *td = msg->connect.nm_td; 1499 struct inpcb *inp; 1500 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1501 struct sockaddr_in *if_sin; 1502 struct lwkt_port *port; 1503 int error; 1504 1505 KKASSERT(msg->connect.nm_m == NULL); 1506 1507 inp = so->so_pcb; 1508 if (inp == NULL) { 1509 error = EINVAL; 1510 goto out; 1511 } 1512 1513 if (msg->connect.nm_flags & PRUC_RECONNECT) { 1514 msg->connect.nm_flags &= ~PRUC_RECONNECT; 1515 in_pcblink(inp, &udbinfo[mycpuid]); 1516 } 1517 1518 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1519 error = EISCONN; 1520 goto out; 1521 } 1522 error = 0; 1523 1524 /* 1525 * Bind if we have to 1526 */ 1527 if (inp->inp_lport == 0) { 1528 error = in_pcbbind(inp, NULL, td); 1529 if (error) 1530 goto out; 1531 } 1532 1533 /* 1534 * Calculate the correct protocol processing thread. The connect 1535 * operation must run there. 1536 */ 1537 error = in_pcbladdr(inp, nam, &if_sin, td); 1538 if (error) 1539 goto out; 1540 if (!prison_remote_ip(td, nam)) { 1541 error = EAFNOSUPPORT; /* IPv6 only jail */ 1542 goto out; 1543 } 1544 1545 port = udp_addrport(sin->sin_addr.s_addr, sin->sin_port, 1546 inp->inp_laddr.s_addr != INADDR_ANY ? 1547 inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr, inp->inp_lport); 1548 if (port != &curthread->td_msgport) { 1549 lwkt_msg_t lmsg = &msg->connect.base.lmsg; 1550 int nm_flags = PRUC_RECONNECT; 1551 1552 /* 1553 * in_pcbladdr() may have allocated a route entry for us 1554 * on the current CPU, but we need a route entry on the 1555 * inpcb's owner CPU, so free it here. 1556 */ 1557 in_pcbresetroute(inp); 1558 1559 if (inp->inp_flags & INP_WILDCARD) { 1560 /* 1561 * Remove this inpcb from the wildcard hash before 1562 * the socket's msgport changes. 1563 */ 1564 udp_remwildcardhash(inp); 1565 } 1566 1567 /* 1568 * We are moving the protocol processing port the socket 1569 * is on, we have to unlink here and re-link on the 1570 * target cpu. 1571 */ 1572 in_pcbunlink(inp, &udbinfo[mycpuid]); 1573 msg->connect.nm_flags |= nm_flags; 1574 1575 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1576 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1577 lwkt_forwardmsg(port, lmsg); 1578 /* msg invalid now */ 1579 return; 1580 } 1581 error = udp_connect_oncpu(inp, sin, if_sin); 1582 out: 1583 if (error && inp != NULL && inp->inp_lport != 0 && 1584 (inp->inp_flags & INP_WILDCARD) == 0) { 1585 boolean_t forwarded; 1586 1587 /* Connect failed; put it to wildcard hash. */ 1588 forwarded = udp_inswildcardhash(inp, &msg->connect.base, 1589 error); 1590 if (forwarded) { 1591 /* 1592 * The message is further forwarded, so we are done 1593 * here. 1594 */ 1595 return; 1596 } 1597 } 1598 lwkt_replymsg(&msg->connect.base.lmsg, error); 1599 } 1600 1601 static void 1602 udp_remwildcardhash(struct inpcb *inp) 1603 { 1604 int cpu; 1605 1606 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1607 ("not on owner cpu")); 1608 1609 for (cpu = 0; cpu < ncpus2; ++cpu) { 1610 if (cpu == mycpuid) { 1611 /* 1612 * This inpcb will be removed by the later 1613 * in_pcbremwildcardhash(). 1614 */ 1615 continue; 1616 } 1617 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpu]); 1618 } 1619 in_pcbremwildcardhash(inp); 1620 } 1621 1622 static int 1623 udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 1624 struct sockaddr_in *if_sin) 1625 { 1626 struct socket *so = inp->inp_socket; 1627 struct inpcb *oinp; 1628 1629 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 1630 sin->sin_addr, sin->sin_port, 1631 inp->inp_laddr.s_addr != INADDR_ANY ? 1632 inp->inp_laddr : if_sin->sin_addr, inp->inp_lport, FALSE, NULL); 1633 if (oinp != NULL) 1634 return EADDRINUSE; 1635 1636 /* 1637 * No more errors can occur, finish adjusting the socket 1638 * and change the processing port to reflect the connected 1639 * socket. Once set we can no longer safely mess with the 1640 * socket. 1641 */ 1642 1643 if (inp->inp_flags & INP_WILDCARD) 1644 udp_remwildcardhash(inp); 1645 1646 if (inp->inp_laddr.s_addr == INADDR_ANY) 1647 inp->inp_laddr = if_sin->sin_addr; 1648 inp->inp_faddr = sin->sin_addr; 1649 inp->inp_fport = sin->sin_port; 1650 in_pcbinsconnhash(inp); 1651 1652 soisconnected(so); 1653 1654 return 0; 1655 } 1656 1657 static void 1658 udp_detach2(struct socket *so) 1659 { 1660 in_pcbdetach(so->so_pcb); 1661 sodiscard(so); 1662 sofree(so); 1663 } 1664 1665 static void 1666 udp_detach_final_dispatch(netmsg_t msg) 1667 { 1668 udp_detach2(msg->base.nm_so); 1669 } 1670 1671 static void 1672 udp_detach_oncpu_dispatch(netmsg_t msg) 1673 { 1674 struct netmsg_base *clomsg = &msg->base; 1675 struct socket *so = clomsg->nm_so; 1676 struct inpcb *inp = so->so_pcb; 1677 struct thread *td = curthread; 1678 int nextcpu, cpuid = mycpuid; 1679 1680 KASSERT(td->td_type == TD_TYPE_NETISR, ("not in netisr")); 1681 1682 if (inp->inp_flags & INP_WILDCARD) { 1683 /* 1684 * This inp will be removed on the inp's 1685 * owner CPU later, so don't do it now. 1686 */ 1687 if (&td->td_msgport != so->so_port) 1688 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpuid]); 1689 } 1690 1691 if (cpuid == 0) { 1692 /* 1693 * Free and clear multicast socket option, 1694 * which is only accessed in netisr0. 1695 */ 1696 ip_freemoptions(inp->inp_moptions); 1697 inp->inp_moptions = NULL; 1698 } 1699 1700 nextcpu = cpuid + 1; 1701 if (nextcpu < ncpus2) { 1702 lwkt_forwardmsg(netisr_cpuport(nextcpu), &clomsg->lmsg); 1703 } else { 1704 /* 1705 * No one could see this inpcb now; destroy this 1706 * inpcb in its owner netisr. 1707 */ 1708 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1709 udp_detach_final_dispatch); 1710 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1711 } 1712 } 1713 1714 static void 1715 udp_detach(netmsg_t msg) 1716 { 1717 struct socket *so = msg->detach.base.nm_so; 1718 struct netmsg_base *clomsg; 1719 struct inpcb *inp; 1720 1721 inp = so->so_pcb; 1722 if (inp == NULL) { 1723 lwkt_replymsg(&msg->detach.base.lmsg, EINVAL); 1724 return; 1725 } 1726 1727 /* 1728 * Reply EJUSTRETURN ASAP, we will call sodiscard() and 1729 * sofree() later. 1730 */ 1731 lwkt_replymsg(&msg->detach.base.lmsg, EJUSTRETURN); 1732 1733 if (ncpus2 == 1) { 1734 /* Only one CPU, detach the inpcb directly. */ 1735 udp_detach2(so); 1736 return; 1737 } 1738 1739 /* 1740 * Remove this inpcb from the inpcb list first, so that 1741 * no one could find this inpcb from the inpcb list. 1742 */ 1743 in_pcbofflist(inp); 1744 1745 if (inp->inp_flags & INP_DIRECT_DETACH) { 1746 /* 1747 * Direct detaching is allowed 1748 */ 1749 KASSERT((inp->inp_flags & INP_WILDCARD) == 0, 1750 ("in the wildcardhash")); 1751 KASSERT(inp->inp_moptions == NULL, ("has mcast options")); 1752 udp_detach2(so); 1753 return; 1754 } 1755 1756 /* 1757 * Go through netisrs which process UDP to make sure 1758 * no one could find this inpcb anymore. 1759 */ 1760 clomsg = &so->so_clomsg; 1761 netmsg_init(clomsg, so, &netisr_apanic_rport, MSGF_IGNSOPORT, 1762 udp_detach_oncpu_dispatch); 1763 lwkt_sendmsg(netisr_cpuport(0), &clomsg->lmsg); 1764 } 1765 1766 static void 1767 udp_disconnect(netmsg_t msg) 1768 { 1769 struct socket *so = msg->disconnect.base.nm_so; 1770 struct inpcb *inp; 1771 boolean_t forwarded; 1772 int error = 0; 1773 1774 inp = so->so_pcb; 1775 if (inp == NULL) { 1776 error = EINVAL; 1777 goto out; 1778 } 1779 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1780 error = ENOTCONN; 1781 goto out; 1782 } 1783 1784 soclrstate(so, SS_ISCONNECTED); /* XXX */ 1785 1786 in_pcbdisconnect(inp); 1787 1788 /* 1789 * Follow traditional BSD behavior and retain the local port 1790 * binding. But, fix the old misbehavior of overwriting any 1791 * previously bound local address. 1792 */ 1793 if (!(inp->inp_flags & INP_WASBOUND_NOTANY)) 1794 inp->inp_laddr.s_addr = INADDR_ANY; 1795 1796 if (so->so_state & SS_ISCLOSING) { 1797 /* 1798 * If this socket is being closed, there is no need 1799 * to put this socket back into wildcard hash table. 1800 */ 1801 error = 0; 1802 goto out; 1803 } 1804 1805 forwarded = udp_inswildcardhash(inp, &msg->disconnect.base, 0); 1806 if (forwarded) { 1807 /* 1808 * The message is further forwarded, so we are done 1809 * here. 1810 */ 1811 return; 1812 } 1813 out: 1814 lwkt_replymsg(&msg->disconnect.base.lmsg, error); 1815 } 1816 1817 void 1818 udp_shutdown(netmsg_t msg) 1819 { 1820 struct socket *so = msg->shutdown.base.nm_so; 1821 struct inpcb *inp; 1822 int error; 1823 1824 inp = so->so_pcb; 1825 if (inp) { 1826 socantsendmore(so); 1827 error = 0; 1828 } else { 1829 error = EINVAL; 1830 } 1831 lwkt_replymsg(&msg->shutdown.base.lmsg, error); 1832 } 1833 1834 struct pr_usrreqs udp_usrreqs = { 1835 .pru_abort = udp_abort, 1836 .pru_accept = pr_generic_notsupp, 1837 .pru_attach = udp_attach, 1838 .pru_bind = udp_bind, 1839 .pru_connect = udp_connect, 1840 .pru_connect2 = pr_generic_notsupp, 1841 .pru_control = in_control_dispatch, 1842 .pru_detach = udp_detach, 1843 .pru_disconnect = udp_disconnect, 1844 .pru_listen = pr_generic_notsupp, 1845 .pru_peeraddr = in_setpeeraddr_dispatch, 1846 .pru_rcvd = pr_generic_notsupp, 1847 .pru_rcvoob = pr_generic_notsupp, 1848 .pru_send = udp_send, 1849 .pru_sense = pru_sense_null, 1850 .pru_shutdown = udp_shutdown, 1851 .pru_sockaddr = in_setsockaddr_dispatch, 1852 .pru_sosend = sosendudp, 1853 .pru_soreceive = soreceive 1854 }; 1855