1 /* $NetBSD: ip_output.c,v 1.102 2002/09/17 13:10:59 darrenr Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Public Access Networks Corporation ("Panix"). It was developed under 38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1990, 1993 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 102 */ 103 104 #include <sys/cdefs.h> 105 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.102 2002/09/17 13:10:59 darrenr Exp $"); 106 107 #include "opt_pfil_hooks.h" 108 #include "opt_ipsec.h" 109 #include "opt_mrouting.h" 110 111 #include <sys/param.h> 112 #include <sys/malloc.h> 113 #include <sys/mbuf.h> 114 #include <sys/errno.h> 115 #include <sys/protosw.h> 116 #include <sys/socket.h> 117 #include <sys/socketvar.h> 118 #include <sys/systm.h> 119 #include <sys/proc.h> 120 121 #include <net/if.h> 122 #include <net/route.h> 123 #include <net/pfil.h> 124 125 #include <netinet/in.h> 126 #include <netinet/in_systm.h> 127 #include <netinet/ip.h> 128 #include <netinet/in_pcb.h> 129 #include <netinet/in_var.h> 130 #include <netinet/ip_var.h> 131 132 #ifdef MROUTING 133 #include <netinet/ip_mroute.h> 134 #endif 135 136 #include <machine/stdarg.h> 137 138 #ifdef IPSEC 139 #include <netinet6/ipsec.h> 140 #include <netkey/key.h> 141 #include <netkey/key_debug.h> 142 #endif /*IPSEC*/ 143 144 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *)); 145 static struct ifnet *ip_multicast_if __P((struct in_addr *, int *)); 146 static void ip_mloopback 147 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *)); 148 149 #ifdef PFIL_HOOKS 150 extern struct pfil_head inet_pfil_hook; /* XXX */ 151 #endif 152 153 /* 154 * IP output. The packet in mbuf chain m contains a skeletal IP 155 * header (with len, off, ttl, proto, tos, src, dst). 156 * The mbuf chain containing the packet will be freed. 157 * The mbuf opt, if present, will not be freed. 158 */ 159 int 160 #if __STDC__ 161 ip_output(struct mbuf *m0, ...) 162 #else 163 ip_output(m0, va_alist) 164 struct mbuf *m0; 165 va_dcl 166 #endif 167 { 168 struct ip *ip, *mhip; 169 struct ifnet *ifp; 170 struct mbuf *m = m0; 171 int hlen = sizeof (struct ip); 172 int len, off, error = 0; 173 struct route iproute; 174 struct sockaddr_in *dst; 175 struct in_ifaddr *ia; 176 struct mbuf *opt; 177 struct route *ro; 178 int flags, sw_csum; 179 int *mtu_p; 180 u_long mtu; 181 struct ip_moptions *imo; 182 va_list ap; 183 #ifdef IPSEC 184 struct socket *so; 185 struct secpolicy *sp = NULL; 186 #endif /*IPSEC*/ 187 u_int16_t ip_len; 188 189 len = 0; 190 va_start(ap, m0); 191 opt = va_arg(ap, struct mbuf *); 192 ro = va_arg(ap, struct route *); 193 flags = va_arg(ap, int); 194 imo = va_arg(ap, struct ip_moptions *); 195 if (flags & IP_RETURNMTU) 196 mtu_p = va_arg(ap, int *); 197 else 198 mtu_p = NULL; 199 va_end(ap); 200 201 #ifdef IPSEC 202 so = ipsec_getsocket(m); 203 (void)ipsec_setsocket(m, NULL); 204 #endif /*IPSEC*/ 205 206 #ifdef DIAGNOSTIC 207 if ((m->m_flags & M_PKTHDR) == 0) 208 panic("ip_output no HDR"); 209 #endif 210 if (opt) { 211 m = ip_insertoptions(m, opt, &len); 212 if (len >= sizeof(struct ip)) 213 hlen = len; 214 } 215 ip = mtod(m, struct ip *); 216 /* 217 * Fill in IP header. 218 */ 219 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 220 ip->ip_v = IPVERSION; 221 ip->ip_off = htons(0); 222 ip->ip_id = htons(ip_id++); 223 ip->ip_hl = hlen >> 2; 224 ipstat.ips_localout++; 225 } else { 226 hlen = ip->ip_hl << 2; 227 } 228 /* 229 * Route packet. 230 */ 231 if (ro == 0) { 232 ro = &iproute; 233 bzero((caddr_t)ro, sizeof (*ro)); 234 } 235 dst = satosin(&ro->ro_dst); 236 /* 237 * If there is a cached route, 238 * check that it is to the same destination 239 * and is still up. If not, free it and try again. 240 * The address family should also be checked in case of sharing the 241 * cache with IPv6. 242 */ 243 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 244 dst->sin_family != AF_INET || 245 !in_hosteq(dst->sin_addr, ip->ip_dst))) { 246 RTFREE(ro->ro_rt); 247 ro->ro_rt = (struct rtentry *)0; 248 } 249 if (ro->ro_rt == 0) { 250 bzero(dst, sizeof(*dst)); 251 dst->sin_family = AF_INET; 252 dst->sin_len = sizeof(*dst); 253 dst->sin_addr = ip->ip_dst; 254 } 255 /* 256 * If routing to interface only, 257 * short circuit routing lookup. 258 */ 259 if (flags & IP_ROUTETOIF) { 260 if ((ia = ifatoia(ifa_ifwithladdr(sintosa(dst)))) == 0) { 261 ipstat.ips_noroute++; 262 error = ENETUNREACH; 263 goto bad; 264 } 265 ifp = ia->ia_ifp; 266 mtu = ifp->if_mtu; 267 ip->ip_ttl = 1; 268 } else if ((IN_MULTICAST(ip->ip_dst.s_addr) || 269 ip->ip_dst.s_addr == INADDR_BROADCAST) && 270 imo != NULL && imo->imo_multicast_ifp != NULL) { 271 ifp = imo->imo_multicast_ifp; 272 mtu = ifp->if_mtu; 273 IFP_TO_IA(ifp, ia); 274 } else { 275 if (ro->ro_rt == 0) 276 rtalloc(ro); 277 if (ro->ro_rt == 0) { 278 ipstat.ips_noroute++; 279 error = EHOSTUNREACH; 280 goto bad; 281 } 282 ia = ifatoia(ro->ro_rt->rt_ifa); 283 ifp = ro->ro_rt->rt_ifp; 284 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0) 285 mtu = ifp->if_mtu; 286 ro->ro_rt->rt_use++; 287 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 288 dst = satosin(ro->ro_rt->rt_gateway); 289 } 290 if (IN_MULTICAST(ip->ip_dst.s_addr) || 291 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 292 struct in_multi *inm; 293 294 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 295 M_BCAST : M_MCAST; 296 /* 297 * IP destination address is multicast. Make sure "dst" 298 * still points to the address in "ro". (It may have been 299 * changed to point to a gateway address, above.) 300 */ 301 dst = satosin(&ro->ro_dst); 302 /* 303 * See if the caller provided any multicast options 304 */ 305 if (imo != NULL) 306 ip->ip_ttl = imo->imo_multicast_ttl; 307 else 308 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 309 310 /* 311 * if we don't know the outgoing ifp yet, we can't generate 312 * output 313 */ 314 if (!ifp) { 315 ipstat.ips_noroute++; 316 error = ENETUNREACH; 317 goto bad; 318 } 319 320 /* 321 * If the packet is multicast or broadcast, confirm that 322 * the outgoing interface can transmit it. 323 */ 324 if (((m->m_flags & M_MCAST) && 325 (ifp->if_flags & IFF_MULTICAST) == 0) || 326 ((m->m_flags & M_BCAST) && 327 (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0)) { 328 ipstat.ips_noroute++; 329 error = ENETUNREACH; 330 goto bad; 331 } 332 /* 333 * If source address not specified yet, use an address 334 * of outgoing interface. 335 */ 336 if (in_nullhost(ip->ip_src)) { 337 struct in_ifaddr *ia; 338 339 IFP_TO_IA(ifp, ia); 340 if (!ia) { 341 error = EADDRNOTAVAIL; 342 goto bad; 343 } 344 ip->ip_src = ia->ia_addr.sin_addr; 345 } 346 347 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); 348 if (inm != NULL && 349 (imo == NULL || imo->imo_multicast_loop)) { 350 /* 351 * If we belong to the destination multicast group 352 * on the outgoing interface, and the caller did not 353 * forbid loopback, loop back a copy. 354 */ 355 ip_mloopback(ifp, m, dst); 356 } 357 #ifdef MROUTING 358 else { 359 /* 360 * If we are acting as a multicast router, perform 361 * multicast forwarding as if the packet had just 362 * arrived on the interface to which we are about 363 * to send. The multicast forwarding function 364 * recursively calls this function, using the 365 * IP_FORWARDING flag to prevent infinite recursion. 366 * 367 * Multicasts that are looped back by ip_mloopback(), 368 * above, will be forwarded by the ip_input() routine, 369 * if necessary. 370 */ 371 extern struct socket *ip_mrouter; 372 373 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 374 if (ip_mforward(m, ifp) != 0) { 375 m_freem(m); 376 goto done; 377 } 378 } 379 } 380 #endif 381 /* 382 * Multicasts with a time-to-live of zero may be looped- 383 * back, above, but must not be transmitted on a network. 384 * Also, multicasts addressed to the loopback interface 385 * are not sent -- the above call to ip_mloopback() will 386 * loop back a copy if this host actually belongs to the 387 * destination group on the loopback interface. 388 */ 389 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 390 m_freem(m); 391 goto done; 392 } 393 394 goto sendit; 395 } 396 #ifndef notdef 397 /* 398 * If source address not specified yet, use address 399 * of outgoing interface. 400 */ 401 if (in_nullhost(ip->ip_src)) 402 ip->ip_src = ia->ia_addr.sin_addr; 403 #endif 404 405 /* 406 * packets with Class-D address as source are not valid per 407 * RFC 1112 408 */ 409 if (IN_MULTICAST(ip->ip_src.s_addr)) { 410 ipstat.ips_odropped++; 411 error = EADDRNOTAVAIL; 412 goto bad; 413 } 414 415 /* 416 * Look for broadcast address and 417 * and verify user is allowed to send 418 * such a packet. 419 */ 420 if (in_broadcast(dst->sin_addr, ifp)) { 421 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 422 error = EADDRNOTAVAIL; 423 goto bad; 424 } 425 if ((flags & IP_ALLOWBROADCAST) == 0) { 426 error = EACCES; 427 goto bad; 428 } 429 /* don't allow broadcast messages to be fragmented */ 430 if (ntohs(ip->ip_len) > ifp->if_mtu) { 431 error = EMSGSIZE; 432 goto bad; 433 } 434 m->m_flags |= M_BCAST; 435 } else 436 m->m_flags &= ~M_BCAST; 437 438 sendit: 439 /* 440 * If we're doing Path MTU Discovery, we need to set DF unless 441 * the route's MTU is locked. 442 */ 443 if ((flags & IP_MTUDISC) != 0 && ro->ro_rt != NULL && 444 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) 445 ip->ip_off |= htons(IP_DF); 446 447 /* Remember the current ip_len */ 448 ip_len = ntohs(ip->ip_len); 449 450 #ifdef IPSEC 451 /* get SP for this packet */ 452 if (so == NULL) 453 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); 454 else 455 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); 456 457 if (sp == NULL) { 458 ipsecstat.out_inval++; 459 goto bad; 460 } 461 462 error = 0; 463 464 /* check policy */ 465 switch (sp->policy) { 466 case IPSEC_POLICY_DISCARD: 467 /* 468 * This packet is just discarded. 469 */ 470 ipsecstat.out_polvio++; 471 goto bad; 472 473 case IPSEC_POLICY_BYPASS: 474 case IPSEC_POLICY_NONE: 475 /* no need to do IPsec. */ 476 goto skip_ipsec; 477 478 case IPSEC_POLICY_IPSEC: 479 if (sp->req == NULL) { 480 /* XXX should be panic ? */ 481 printf("ip_output: No IPsec request specified.\n"); 482 error = EINVAL; 483 goto bad; 484 } 485 break; 486 487 case IPSEC_POLICY_ENTRUST: 488 default: 489 printf("ip_output: Invalid policy found. %d\n", sp->policy); 490 } 491 492 /* 493 * ipsec4_output() expects ip_len and ip_off in network 494 * order. They have been set to network order above. 495 */ 496 497 { 498 struct ipsec_output_state state; 499 bzero(&state, sizeof(state)); 500 state.m = m; 501 if (flags & IP_ROUTETOIF) { 502 state.ro = &iproute; 503 bzero(&iproute, sizeof(iproute)); 504 } else 505 state.ro = ro; 506 state.dst = (struct sockaddr *)dst; 507 508 /* 509 * We can't defer the checksum of payload data if 510 * we're about to encrypt/authenticate it. 511 * 512 * XXX When we support crypto offloading functions of 513 * XXX network interfaces, we need to reconsider this, 514 * XXX since it's likely that they'll support checksumming, 515 * XXX as well. 516 */ 517 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 518 in_delayed_cksum(m); 519 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 520 } 521 522 error = ipsec4_output(&state, sp, flags); 523 524 m = state.m; 525 if (flags & IP_ROUTETOIF) { 526 /* 527 * if we have tunnel mode SA, we may need to ignore 528 * IP_ROUTETOIF. 529 */ 530 if (state.ro != &iproute || state.ro->ro_rt != NULL) { 531 flags &= ~IP_ROUTETOIF; 532 ro = state.ro; 533 } 534 } else 535 ro = state.ro; 536 dst = (struct sockaddr_in *)state.dst; 537 if (error) { 538 /* mbuf is already reclaimed in ipsec4_output. */ 539 m0 = NULL; 540 switch (error) { 541 case EHOSTUNREACH: 542 case ENETUNREACH: 543 case EMSGSIZE: 544 case ENOBUFS: 545 case ENOMEM: 546 break; 547 default: 548 printf("ip4_output (ipsec): error code %d\n", error); 549 /*fall through*/ 550 case ENOENT: 551 /* don't show these error codes to the user */ 552 error = 0; 553 break; 554 } 555 goto bad; 556 } 557 558 /* be sure to update variables that are affected by ipsec4_output() */ 559 ip = mtod(m, struct ip *); 560 #ifdef _IP_VHL 561 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 562 #else 563 hlen = ip->ip_hl << 2; 564 #endif 565 ip_len = ntohs(ip->ip_len); 566 567 if (ro->ro_rt == NULL) { 568 if ((flags & IP_ROUTETOIF) == 0) { 569 printf("ip_output: " 570 "can't update route after IPsec processing\n"); 571 error = EHOSTUNREACH; /*XXX*/ 572 goto bad; 573 } 574 } else { 575 /* nobody uses ia beyond here */ 576 if (state.encap) 577 ifp = ro->ro_rt->rt_ifp; 578 } 579 } 580 581 skip_ipsec: 582 #endif /*IPSEC*/ 583 584 #ifdef PFIL_HOOKS 585 /* 586 * Run through list of hooks for output packets. 587 */ 588 if ((error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, 589 PFIL_OUT)) != 0) 590 goto done; 591 if (m == NULL) 592 goto done; 593 594 ip = mtod(m, struct ip *); 595 #endif /* PFIL_HOOKS */ 596 597 /* 598 * If small enough for mtu of path, can just send directly. 599 */ 600 if (ip_len <= mtu) { 601 #if IFA_STATS 602 /* 603 * search for the source address structure to 604 * maintain output statistics. 605 */ 606 INADDR_TO_IA(ip->ip_src, ia); 607 if (ia) 608 ia->ia_ifa.ifa_data.ifad_outbytes += ip_len; 609 #endif 610 /* 611 * Always initialize the sum to 0! Some HW assisted 612 * checksumming requires this. 613 */ 614 ip->ip_sum = 0; 615 m->m_pkthdr.csum_flags |= M_CSUM_IPv4; 616 617 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx; 618 619 /* 620 * Perform any checksums that the hardware can't do 621 * for us. 622 * 623 * XXX Does any hardware require the {th,uh}_sum 624 * XXX fields to be 0? 625 */ 626 if (sw_csum & M_CSUM_IPv4) 627 ip->ip_sum = in_cksum(m, hlen); 628 if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 629 in_delayed_cksum(m); 630 sw_csum &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 631 } 632 m->m_pkthdr.csum_flags &= ifp->if_csum_flags_tx; 633 634 #ifdef IPSEC 635 /* clean ipsec history once it goes out of the node */ 636 ipsec_delaux(m); 637 #endif 638 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt); 639 goto done; 640 } 641 642 /* 643 * We can't use HW checksumming if we're about to 644 * to fragment the packet. 645 * 646 * XXX Some hardware can do this. 647 */ 648 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 649 in_delayed_cksum(m); 650 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 651 } 652 653 /* 654 * Too large for interface; fragment if possible. 655 * Must be able to put at least 8 bytes per fragment. 656 */ 657 if (ntohs(ip->ip_off) & IP_DF) { 658 if (flags & IP_RETURNMTU) 659 *mtu_p = mtu; 660 error = EMSGSIZE; 661 ipstat.ips_cantfrag++; 662 goto bad; 663 } 664 len = (mtu - hlen) &~ 7; 665 if (len < 8) { 666 error = EMSGSIZE; 667 goto bad; 668 } 669 670 { 671 int mhlen, firstlen = len; 672 struct mbuf **mnext = &m->m_nextpkt; 673 int fragments = 0; 674 int s; 675 676 /* 677 * Loop through length of segment after first fragment, 678 * make new header and copy data of each part and link onto chain. 679 */ 680 m0 = m; 681 mhlen = sizeof (struct ip); 682 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 683 MGETHDR(m, M_DONTWAIT, MT_HEADER); 684 if (m == 0) { 685 error = ENOBUFS; 686 ipstat.ips_odropped++; 687 goto sendorfree; 688 } 689 *mnext = m; 690 mnext = &m->m_nextpkt; 691 m->m_data += max_linkhdr; 692 mhip = mtod(m, struct ip *); 693 *mhip = *ip; 694 /* we must inherit MCAST and BCAST flags */ 695 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 696 if (hlen > sizeof (struct ip)) { 697 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 698 mhip->ip_hl = mhlen >> 2; 699 } 700 m->m_len = mhlen; 701 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); 702 if (ip->ip_off & IP_MF) 703 mhip->ip_off |= IP_MF; 704 if (off + len >= ntohs(ip->ip_len)) 705 len = ntohs(ip->ip_len) - off; 706 else 707 mhip->ip_off |= IP_MF; 708 HTONS(mhip->ip_off); 709 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 710 m->m_next = m_copy(m0, off, len); 711 if (m->m_next == 0) { 712 error = ENOBUFS; /* ??? */ 713 ipstat.ips_odropped++; 714 goto sendorfree; 715 } 716 m->m_pkthdr.len = mhlen + len; 717 m->m_pkthdr.rcvif = (struct ifnet *)0; 718 mhip->ip_sum = 0; 719 mhip->ip_sum = in_cksum(m, mhlen); 720 ipstat.ips_ofragments++; 721 fragments++; 722 } 723 /* 724 * Update first fragment by trimming what's been copied out 725 * and updating header, then send each fragment (in order). 726 */ 727 m = m0; 728 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 729 m->m_pkthdr.len = hlen + firstlen; 730 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 731 ip->ip_off |= htons(IP_MF); 732 ip->ip_sum = 0; 733 ip->ip_sum = in_cksum(m, hlen); 734 sendorfree: 735 /* 736 * If there is no room for all the fragments, don't queue 737 * any of them. 738 */ 739 s = splnet(); 740 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments) 741 error = ENOBUFS; 742 splx(s); 743 for (m = m0; m; m = m0) { 744 m0 = m->m_nextpkt; 745 m->m_nextpkt = 0; 746 if (error == 0) { 747 #if IFA_STATS 748 /* 749 * search for the source address structure to 750 * maintain output statistics. 751 */ 752 INADDR_TO_IA(ip->ip_src, ia); 753 if (ia) { 754 ia->ia_ifa.ifa_data.ifad_outbytes += 755 ntohs(ip->ip_len); 756 } 757 #endif 758 #ifdef IPSEC 759 /* clean ipsec history once it goes out of the node */ 760 ipsec_delaux(m); 761 #endif 762 error = (*ifp->if_output)(ifp, m, sintosa(dst), 763 ro->ro_rt); 764 } else 765 m_freem(m); 766 } 767 768 if (error == 0) 769 ipstat.ips_fragmented++; 770 } 771 done: 772 if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt) { 773 RTFREE(ro->ro_rt); 774 ro->ro_rt = 0; 775 } 776 777 #ifdef IPSEC 778 if (sp != NULL) { 779 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 780 printf("DP ip_output call free SP:%p\n", sp)); 781 key_freesp(sp); 782 } 783 #endif /* IPSEC */ 784 785 return (error); 786 bad: 787 m_freem(m); 788 goto done; 789 } 790 791 /* 792 * Process a delayed payload checksum calculation. 793 */ 794 void 795 in_delayed_cksum(struct mbuf *m) 796 { 797 struct ip *ip; 798 u_int16_t csum, offset; 799 800 ip = mtod(m, struct ip *); 801 offset = ip->ip_hl << 2; 802 csum = in4_cksum(m, 0, offset, ntohs(ip->ip_len) - offset); 803 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) != 0) 804 csum = 0xffff; 805 806 offset += m->m_pkthdr.csum_data; /* checksum offset */ 807 808 if ((offset + sizeof(u_int16_t)) > m->m_len) { 809 /* This happen when ip options were inserted 810 printf("in_delayed_cksum: pullup len %d off %d proto %d\n", 811 m->m_len, offset, ip->ip_p); 812 */ 813 m_copyback(m, offset, sizeof(csum), (caddr_t) &csum); 814 } else 815 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 816 } 817 818 /* 819 * Determine the maximum length of the options to be inserted; 820 * we would far rather allocate too much space rather than too little. 821 */ 822 823 u_int 824 ip_optlen(inp) 825 struct inpcb *inp; 826 { 827 struct mbuf *m = inp->inp_options; 828 829 if (m && m->m_len > offsetof(struct ipoption, ipopt_dst)) 830 return (m->m_len - offsetof(struct ipoption, ipopt_dst)); 831 else 832 return 0; 833 } 834 835 836 /* 837 * Insert IP options into preformed packet. 838 * Adjust IP destination as required for IP source routing, 839 * as indicated by a non-zero in_addr at the start of the options. 840 */ 841 static struct mbuf * 842 ip_insertoptions(m, opt, phlen) 843 struct mbuf *m; 844 struct mbuf *opt; 845 int *phlen; 846 { 847 struct ipoption *p = mtod(opt, struct ipoption *); 848 struct mbuf *n; 849 struct ip *ip = mtod(m, struct ip *); 850 unsigned optlen; 851 852 optlen = opt->m_len - sizeof(p->ipopt_dst); 853 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 854 return (m); /* XXX should fail */ 855 if (!in_nullhost(p->ipopt_dst)) 856 ip->ip_dst = p->ipopt_dst; 857 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 858 MGETHDR(n, M_DONTWAIT, MT_HEADER); 859 if (n == 0) 860 return (m); 861 M_COPY_PKTHDR(n, m); 862 m->m_flags &= ~M_PKTHDR; 863 m->m_len -= sizeof(struct ip); 864 m->m_data += sizeof(struct ip); 865 n->m_next = m; 866 m = n; 867 m->m_len = optlen + sizeof(struct ip); 868 m->m_data += max_linkhdr; 869 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 870 } else { 871 m->m_data -= optlen; 872 m->m_len += optlen; 873 memmove(mtod(m, caddr_t), ip, sizeof(struct ip)); 874 } 875 m->m_pkthdr.len += optlen; 876 ip = mtod(m, struct ip *); 877 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen); 878 *phlen = sizeof(struct ip) + optlen; 879 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 880 return (m); 881 } 882 883 /* 884 * Copy options from ip to jp, 885 * omitting those not copied during fragmentation. 886 */ 887 int 888 ip_optcopy(ip, jp) 889 struct ip *ip, *jp; 890 { 891 u_char *cp, *dp; 892 int opt, optlen, cnt; 893 894 cp = (u_char *)(ip + 1); 895 dp = (u_char *)(jp + 1); 896 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 897 for (; cnt > 0; cnt -= optlen, cp += optlen) { 898 opt = cp[0]; 899 if (opt == IPOPT_EOL) 900 break; 901 if (opt == IPOPT_NOP) { 902 /* Preserve for IP mcast tunnel's LSRR alignment. */ 903 *dp++ = IPOPT_NOP; 904 optlen = 1; 905 continue; 906 } 907 #ifdef DIAGNOSTIC 908 if (cnt < IPOPT_OLEN + sizeof(*cp)) 909 panic("malformed IPv4 option passed to ip_optcopy"); 910 #endif 911 optlen = cp[IPOPT_OLEN]; 912 #ifdef DIAGNOSTIC 913 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 914 panic("malformed IPv4 option passed to ip_optcopy"); 915 #endif 916 /* bogus lengths should have been caught by ip_dooptions */ 917 if (optlen > cnt) 918 optlen = cnt; 919 if (IPOPT_COPIED(opt)) { 920 bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen); 921 dp += optlen; 922 } 923 } 924 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 925 *dp++ = IPOPT_EOL; 926 return (optlen); 927 } 928 929 /* 930 * IP socket option processing. 931 */ 932 int 933 ip_ctloutput(op, so, level, optname, mp) 934 int op; 935 struct socket *so; 936 int level, optname; 937 struct mbuf **mp; 938 { 939 struct inpcb *inp = sotoinpcb(so); 940 struct mbuf *m = *mp; 941 int optval = 0; 942 int error = 0; 943 #ifdef IPSEC 944 #ifdef __NetBSD__ 945 struct proc *p = curproc; /*XXX*/ 946 #endif 947 #endif 948 949 if (level != IPPROTO_IP) { 950 error = EINVAL; 951 if (op == PRCO_SETOPT && *mp) 952 (void) m_free(*mp); 953 } else switch (op) { 954 955 case PRCO_SETOPT: 956 switch (optname) { 957 case IP_OPTIONS: 958 #ifdef notyet 959 case IP_RETOPTS: 960 return (ip_pcbopts(optname, &inp->inp_options, m)); 961 #else 962 return (ip_pcbopts(&inp->inp_options, m)); 963 #endif 964 965 case IP_TOS: 966 case IP_TTL: 967 case IP_RECVOPTS: 968 case IP_RECVRETOPTS: 969 case IP_RECVDSTADDR: 970 case IP_RECVIF: 971 if (m == NULL || m->m_len != sizeof(int)) 972 error = EINVAL; 973 else { 974 optval = *mtod(m, int *); 975 switch (optname) { 976 977 case IP_TOS: 978 inp->inp_ip.ip_tos = optval; 979 break; 980 981 case IP_TTL: 982 inp->inp_ip.ip_ttl = optval; 983 break; 984 #define OPTSET(bit) \ 985 if (optval) \ 986 inp->inp_flags |= bit; \ 987 else \ 988 inp->inp_flags &= ~bit; 989 990 case IP_RECVOPTS: 991 OPTSET(INP_RECVOPTS); 992 break; 993 994 case IP_RECVRETOPTS: 995 OPTSET(INP_RECVRETOPTS); 996 break; 997 998 case IP_RECVDSTADDR: 999 OPTSET(INP_RECVDSTADDR); 1000 break; 1001 1002 case IP_RECVIF: 1003 OPTSET(INP_RECVIF); 1004 break; 1005 } 1006 } 1007 break; 1008 #undef OPTSET 1009 1010 case IP_MULTICAST_IF: 1011 case IP_MULTICAST_TTL: 1012 case IP_MULTICAST_LOOP: 1013 case IP_ADD_MEMBERSHIP: 1014 case IP_DROP_MEMBERSHIP: 1015 error = ip_setmoptions(optname, &inp->inp_moptions, m); 1016 break; 1017 1018 case IP_PORTRANGE: 1019 if (m == 0 || m->m_len != sizeof(int)) 1020 error = EINVAL; 1021 else { 1022 optval = *mtod(m, int *); 1023 1024 switch (optval) { 1025 1026 case IP_PORTRANGE_DEFAULT: 1027 case IP_PORTRANGE_HIGH: 1028 inp->inp_flags &= ~(INP_LOWPORT); 1029 break; 1030 1031 case IP_PORTRANGE_LOW: 1032 inp->inp_flags |= INP_LOWPORT; 1033 break; 1034 1035 default: 1036 error = EINVAL; 1037 break; 1038 } 1039 } 1040 break; 1041 1042 #ifdef IPSEC 1043 case IP_IPSEC_POLICY: 1044 { 1045 caddr_t req = NULL; 1046 size_t len = 0; 1047 int priv = 0; 1048 1049 #ifdef __NetBSD__ 1050 if (p == 0 || suser(p->p_ucred, &p->p_acflag)) 1051 priv = 0; 1052 else 1053 priv = 1; 1054 #else 1055 priv = (in6p->in6p_socket->so_state & SS_PRIV); 1056 #endif 1057 if (m) { 1058 req = mtod(m, caddr_t); 1059 len = m->m_len; 1060 } 1061 error = ipsec4_set_policy(inp, optname, req, len, priv); 1062 break; 1063 } 1064 #endif /*IPSEC*/ 1065 1066 default: 1067 error = ENOPROTOOPT; 1068 break; 1069 } 1070 if (m) 1071 (void)m_free(m); 1072 break; 1073 1074 case PRCO_GETOPT: 1075 switch (optname) { 1076 case IP_OPTIONS: 1077 case IP_RETOPTS: 1078 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1079 if (inp->inp_options) { 1080 m->m_len = inp->inp_options->m_len; 1081 bcopy(mtod(inp->inp_options, caddr_t), 1082 mtod(m, caddr_t), (unsigned)m->m_len); 1083 } else 1084 m->m_len = 0; 1085 break; 1086 1087 case IP_TOS: 1088 case IP_TTL: 1089 case IP_RECVOPTS: 1090 case IP_RECVRETOPTS: 1091 case IP_RECVDSTADDR: 1092 case IP_RECVIF: 1093 case IP_ERRORMTU: 1094 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1095 m->m_len = sizeof(int); 1096 switch (optname) { 1097 1098 case IP_TOS: 1099 optval = inp->inp_ip.ip_tos; 1100 break; 1101 1102 case IP_TTL: 1103 optval = inp->inp_ip.ip_ttl; 1104 break; 1105 1106 case IP_ERRORMTU: 1107 optval = inp->inp_errormtu; 1108 break; 1109 1110 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1111 1112 case IP_RECVOPTS: 1113 optval = OPTBIT(INP_RECVOPTS); 1114 break; 1115 1116 case IP_RECVRETOPTS: 1117 optval = OPTBIT(INP_RECVRETOPTS); 1118 break; 1119 1120 case IP_RECVDSTADDR: 1121 optval = OPTBIT(INP_RECVDSTADDR); 1122 break; 1123 1124 case IP_RECVIF: 1125 optval = OPTBIT(INP_RECVIF); 1126 break; 1127 } 1128 *mtod(m, int *) = optval; 1129 break; 1130 1131 #ifdef IPSEC 1132 case IP_IPSEC_POLICY: 1133 { 1134 caddr_t req = NULL; 1135 size_t len = 0; 1136 1137 if (m) { 1138 req = mtod(m, caddr_t); 1139 len = m->m_len; 1140 } 1141 error = ipsec4_get_policy(inp, req, len, mp); 1142 break; 1143 } 1144 #endif /*IPSEC*/ 1145 1146 case IP_MULTICAST_IF: 1147 case IP_MULTICAST_TTL: 1148 case IP_MULTICAST_LOOP: 1149 case IP_ADD_MEMBERSHIP: 1150 case IP_DROP_MEMBERSHIP: 1151 error = ip_getmoptions(optname, inp->inp_moptions, mp); 1152 break; 1153 1154 case IP_PORTRANGE: 1155 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1156 m->m_len = sizeof(int); 1157 1158 if (inp->inp_flags & INP_LOWPORT) 1159 optval = IP_PORTRANGE_LOW; 1160 else 1161 optval = IP_PORTRANGE_DEFAULT; 1162 1163 *mtod(m, int *) = optval; 1164 break; 1165 1166 default: 1167 error = ENOPROTOOPT; 1168 break; 1169 } 1170 break; 1171 } 1172 return (error); 1173 } 1174 1175 /* 1176 * Set up IP options in pcb for insertion in output packets. 1177 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1178 * with destination address if source routed. 1179 */ 1180 int 1181 #ifdef notyet 1182 ip_pcbopts(optname, pcbopt, m) 1183 int optname; 1184 #else 1185 ip_pcbopts(pcbopt, m) 1186 #endif 1187 struct mbuf **pcbopt; 1188 struct mbuf *m; 1189 { 1190 int cnt, optlen; 1191 u_char *cp; 1192 u_char opt; 1193 1194 /* turn off any old options */ 1195 if (*pcbopt) 1196 (void)m_free(*pcbopt); 1197 *pcbopt = 0; 1198 if (m == (struct mbuf *)0 || m->m_len == 0) { 1199 /* 1200 * Only turning off any previous options. 1201 */ 1202 if (m) 1203 (void)m_free(m); 1204 return (0); 1205 } 1206 1207 #ifndef __vax__ 1208 if (m->m_len % sizeof(int32_t)) 1209 goto bad; 1210 #endif 1211 /* 1212 * IP first-hop destination address will be stored before 1213 * actual options; move other options back 1214 * and clear it when none present. 1215 */ 1216 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1217 goto bad; 1218 cnt = m->m_len; 1219 m->m_len += sizeof(struct in_addr); 1220 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1221 memmove(cp, mtod(m, caddr_t), (unsigned)cnt); 1222 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1223 1224 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1225 opt = cp[IPOPT_OPTVAL]; 1226 if (opt == IPOPT_EOL) 1227 break; 1228 if (opt == IPOPT_NOP) 1229 optlen = 1; 1230 else { 1231 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1232 goto bad; 1233 optlen = cp[IPOPT_OLEN]; 1234 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1235 goto bad; 1236 } 1237 switch (opt) { 1238 1239 default: 1240 break; 1241 1242 case IPOPT_LSRR: 1243 case IPOPT_SSRR: 1244 /* 1245 * user process specifies route as: 1246 * ->A->B->C->D 1247 * D must be our final destination (but we can't 1248 * check that since we may not have connected yet). 1249 * A is first hop destination, which doesn't appear in 1250 * actual IP option, but is stored before the options. 1251 */ 1252 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1253 goto bad; 1254 m->m_len -= sizeof(struct in_addr); 1255 cnt -= sizeof(struct in_addr); 1256 optlen -= sizeof(struct in_addr); 1257 cp[IPOPT_OLEN] = optlen; 1258 /* 1259 * Move first hop before start of options. 1260 */ 1261 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1262 sizeof(struct in_addr)); 1263 /* 1264 * Then copy rest of options back 1265 * to close up the deleted entry. 1266 */ 1267 memmove(&cp[IPOPT_OFFSET+1], 1268 (caddr_t)(&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)), 1269 (unsigned)cnt + sizeof(struct in_addr)); 1270 break; 1271 } 1272 } 1273 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1274 goto bad; 1275 *pcbopt = m; 1276 return (0); 1277 1278 bad: 1279 (void)m_free(m); 1280 return (EINVAL); 1281 } 1282 1283 /* 1284 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1285 */ 1286 static struct ifnet * 1287 ip_multicast_if(a, ifindexp) 1288 struct in_addr *a; 1289 int *ifindexp; 1290 { 1291 int ifindex; 1292 struct ifnet *ifp; 1293 1294 if (ifindexp) 1295 *ifindexp = 0; 1296 if (ntohl(a->s_addr) >> 24 == 0) { 1297 ifindex = ntohl(a->s_addr) & 0xffffff; 1298 if (ifindex < 0 || if_index < ifindex) 1299 return NULL; 1300 ifp = ifindex2ifnet[ifindex]; 1301 if (ifindexp) 1302 *ifindexp = ifindex; 1303 } else { 1304 INADDR_TO_IFP(*a, ifp); 1305 } 1306 return ifp; 1307 } 1308 1309 /* 1310 * Set the IP multicast options in response to user setsockopt(). 1311 */ 1312 int 1313 ip_setmoptions(optname, imop, m) 1314 int optname; 1315 struct ip_moptions **imop; 1316 struct mbuf *m; 1317 { 1318 int error = 0; 1319 u_char loop; 1320 int i; 1321 struct in_addr addr; 1322 struct ip_mreq *mreq; 1323 struct ifnet *ifp; 1324 struct ip_moptions *imo = *imop; 1325 struct route ro; 1326 struct sockaddr_in *dst; 1327 int ifindex; 1328 1329 if (imo == NULL) { 1330 /* 1331 * No multicast option buffer attached to the pcb; 1332 * allocate one and initialize to default values. 1333 */ 1334 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS, 1335 M_WAITOK); 1336 1337 if (imo == NULL) 1338 return (ENOBUFS); 1339 *imop = imo; 1340 imo->imo_multicast_ifp = NULL; 1341 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1342 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1343 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1344 imo->imo_num_memberships = 0; 1345 } 1346 1347 switch (optname) { 1348 1349 case IP_MULTICAST_IF: 1350 /* 1351 * Select the interface for outgoing multicast packets. 1352 */ 1353 if (m == NULL || m->m_len != sizeof(struct in_addr)) { 1354 error = EINVAL; 1355 break; 1356 } 1357 addr = *(mtod(m, struct in_addr *)); 1358 /* 1359 * INADDR_ANY is used to remove a previous selection. 1360 * When no interface is selected, a default one is 1361 * chosen every time a multicast packet is sent. 1362 */ 1363 if (in_nullhost(addr)) { 1364 imo->imo_multicast_ifp = NULL; 1365 break; 1366 } 1367 /* 1368 * The selected interface is identified by its local 1369 * IP address. Find the interface and confirm that 1370 * it supports multicasting. 1371 */ 1372 ifp = ip_multicast_if(&addr, &ifindex); 1373 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1374 error = EADDRNOTAVAIL; 1375 break; 1376 } 1377 imo->imo_multicast_ifp = ifp; 1378 if (ifindex) 1379 imo->imo_multicast_addr = addr; 1380 else 1381 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1382 break; 1383 1384 case IP_MULTICAST_TTL: 1385 /* 1386 * Set the IP time-to-live for outgoing multicast packets. 1387 */ 1388 if (m == NULL || m->m_len != 1) { 1389 error = EINVAL; 1390 break; 1391 } 1392 imo->imo_multicast_ttl = *(mtod(m, u_char *)); 1393 break; 1394 1395 case IP_MULTICAST_LOOP: 1396 /* 1397 * Set the loopback flag for outgoing multicast packets. 1398 * Must be zero or one. 1399 */ 1400 if (m == NULL || m->m_len != 1 || 1401 (loop = *(mtod(m, u_char *))) > 1) { 1402 error = EINVAL; 1403 break; 1404 } 1405 imo->imo_multicast_loop = loop; 1406 break; 1407 1408 case IP_ADD_MEMBERSHIP: 1409 /* 1410 * Add a multicast group membership. 1411 * Group must be a valid IP multicast address. 1412 */ 1413 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1414 error = EINVAL; 1415 break; 1416 } 1417 mreq = mtod(m, struct ip_mreq *); 1418 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1419 error = EINVAL; 1420 break; 1421 } 1422 /* 1423 * If no interface address was provided, use the interface of 1424 * the route to the given multicast address. 1425 */ 1426 if (in_nullhost(mreq->imr_interface)) { 1427 bzero((caddr_t)&ro, sizeof(ro)); 1428 ro.ro_rt = NULL; 1429 dst = satosin(&ro.ro_dst); 1430 dst->sin_len = sizeof(*dst); 1431 dst->sin_family = AF_INET; 1432 dst->sin_addr = mreq->imr_multiaddr; 1433 rtalloc(&ro); 1434 if (ro.ro_rt == NULL) { 1435 error = EADDRNOTAVAIL; 1436 break; 1437 } 1438 ifp = ro.ro_rt->rt_ifp; 1439 rtfree(ro.ro_rt); 1440 } else { 1441 ifp = ip_multicast_if(&mreq->imr_interface, NULL); 1442 } 1443 /* 1444 * See if we found an interface, and confirm that it 1445 * supports multicast. 1446 */ 1447 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1448 error = EADDRNOTAVAIL; 1449 break; 1450 } 1451 /* 1452 * See if the membership already exists or if all the 1453 * membership slots are full. 1454 */ 1455 for (i = 0; i < imo->imo_num_memberships; ++i) { 1456 if (imo->imo_membership[i]->inm_ifp == ifp && 1457 in_hosteq(imo->imo_membership[i]->inm_addr, 1458 mreq->imr_multiaddr)) 1459 break; 1460 } 1461 if (i < imo->imo_num_memberships) { 1462 error = EADDRINUSE; 1463 break; 1464 } 1465 if (i == IP_MAX_MEMBERSHIPS) { 1466 error = ETOOMANYREFS; 1467 break; 1468 } 1469 /* 1470 * Everything looks good; add a new record to the multicast 1471 * address list for the given interface. 1472 */ 1473 if ((imo->imo_membership[i] = 1474 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { 1475 error = ENOBUFS; 1476 break; 1477 } 1478 ++imo->imo_num_memberships; 1479 break; 1480 1481 case IP_DROP_MEMBERSHIP: 1482 /* 1483 * Drop a multicast group membership. 1484 * Group must be a valid IP multicast address. 1485 */ 1486 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1487 error = EINVAL; 1488 break; 1489 } 1490 mreq = mtod(m, struct ip_mreq *); 1491 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1492 error = EINVAL; 1493 break; 1494 } 1495 /* 1496 * If an interface address was specified, get a pointer 1497 * to its ifnet structure. 1498 */ 1499 if (in_nullhost(mreq->imr_interface)) 1500 ifp = NULL; 1501 else { 1502 ifp = ip_multicast_if(&mreq->imr_interface, NULL); 1503 if (ifp == NULL) { 1504 error = EADDRNOTAVAIL; 1505 break; 1506 } 1507 } 1508 /* 1509 * Find the membership in the membership array. 1510 */ 1511 for (i = 0; i < imo->imo_num_memberships; ++i) { 1512 if ((ifp == NULL || 1513 imo->imo_membership[i]->inm_ifp == ifp) && 1514 in_hosteq(imo->imo_membership[i]->inm_addr, 1515 mreq->imr_multiaddr)) 1516 break; 1517 } 1518 if (i == imo->imo_num_memberships) { 1519 error = EADDRNOTAVAIL; 1520 break; 1521 } 1522 /* 1523 * Give up the multicast address record to which the 1524 * membership points. 1525 */ 1526 in_delmulti(imo->imo_membership[i]); 1527 /* 1528 * Remove the gap in the membership array. 1529 */ 1530 for (++i; i < imo->imo_num_memberships; ++i) 1531 imo->imo_membership[i-1] = imo->imo_membership[i]; 1532 --imo->imo_num_memberships; 1533 break; 1534 1535 default: 1536 error = EOPNOTSUPP; 1537 break; 1538 } 1539 1540 /* 1541 * If all options have default values, no need to keep the mbuf. 1542 */ 1543 if (imo->imo_multicast_ifp == NULL && 1544 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1545 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1546 imo->imo_num_memberships == 0) { 1547 free(*imop, M_IPMOPTS); 1548 *imop = NULL; 1549 } 1550 1551 return (error); 1552 } 1553 1554 /* 1555 * Return the IP multicast options in response to user getsockopt(). 1556 */ 1557 int 1558 ip_getmoptions(optname, imo, mp) 1559 int optname; 1560 struct ip_moptions *imo; 1561 struct mbuf **mp; 1562 { 1563 u_char *ttl; 1564 u_char *loop; 1565 struct in_addr *addr; 1566 struct in_ifaddr *ia; 1567 1568 *mp = m_get(M_WAIT, MT_SOOPTS); 1569 1570 switch (optname) { 1571 1572 case IP_MULTICAST_IF: 1573 addr = mtod(*mp, struct in_addr *); 1574 (*mp)->m_len = sizeof(struct in_addr); 1575 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1576 *addr = zeroin_addr; 1577 else if (imo->imo_multicast_addr.s_addr) { 1578 /* return the value user has set */ 1579 *addr = imo->imo_multicast_addr; 1580 } else { 1581 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1582 *addr = ia ? ia->ia_addr.sin_addr : zeroin_addr; 1583 } 1584 return (0); 1585 1586 case IP_MULTICAST_TTL: 1587 ttl = mtod(*mp, u_char *); 1588 (*mp)->m_len = 1; 1589 *ttl = imo ? imo->imo_multicast_ttl 1590 : IP_DEFAULT_MULTICAST_TTL; 1591 return (0); 1592 1593 case IP_MULTICAST_LOOP: 1594 loop = mtod(*mp, u_char *); 1595 (*mp)->m_len = 1; 1596 *loop = imo ? imo->imo_multicast_loop 1597 : IP_DEFAULT_MULTICAST_LOOP; 1598 return (0); 1599 1600 default: 1601 return (EOPNOTSUPP); 1602 } 1603 } 1604 1605 /* 1606 * Discard the IP multicast options. 1607 */ 1608 void 1609 ip_freemoptions(imo) 1610 struct ip_moptions *imo; 1611 { 1612 int i; 1613 1614 if (imo != NULL) { 1615 for (i = 0; i < imo->imo_num_memberships; ++i) 1616 in_delmulti(imo->imo_membership[i]); 1617 free(imo, M_IPMOPTS); 1618 } 1619 } 1620 1621 /* 1622 * Routine called from ip_output() to loop back a copy of an IP multicast 1623 * packet to the input queue of a specified interface. Note that this 1624 * calls the output routine of the loopback "driver", but with an interface 1625 * pointer that might NOT be &loif -- easier than replicating that code here. 1626 */ 1627 static void 1628 ip_mloopback(ifp, m, dst) 1629 struct ifnet *ifp; 1630 struct mbuf *m; 1631 struct sockaddr_in *dst; 1632 { 1633 struct ip *ip; 1634 struct mbuf *copym; 1635 1636 copym = m_copy(m, 0, M_COPYALL); 1637 if (copym != NULL 1638 && (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip))) 1639 copym = m_pullup(copym, sizeof(struct ip)); 1640 if (copym != NULL) { 1641 /* 1642 * We don't bother to fragment if the IP length is greater 1643 * than the interface's MTU. Can this possibly matter? 1644 */ 1645 ip = mtod(copym, struct ip *); 1646 1647 if (copym->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 1648 in_delayed_cksum(copym); 1649 copym->m_pkthdr.csum_flags &= 1650 ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 1651 } 1652 1653 ip->ip_sum = 0; 1654 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1655 (void) looutput(ifp, copym, sintosa(dst), NULL); 1656 } 1657 } 1658