1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 34 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.37 2003/04/15 06:44:45 silby Exp $ 35 * $DragonFly: src/sys/netinet/ip_output.c,v 1.8 2003/08/24 23:07:07 hsu Exp $ 36 */ 37 38 #define _IP_VHL 39 40 #include "opt_ipfw.h" 41 #include "opt_ipdn.h" 42 #include "opt_ipdivert.h" 43 #include "opt_ipfilter.h" 44 #include "opt_ipsec.h" 45 #include "opt_random_ip_id.h" 46 #include "opt_mbuf_stress_test.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/malloc.h> 52 #include <sys/mbuf.h> 53 #include <sys/protosw.h> 54 #include <sys/socket.h> 55 #include <sys/socketvar.h> 56 #include <sys/proc.h> 57 #include <sys/sysctl.h> 58 59 #include <net/if.h> 60 #include <net/route.h> 61 62 #include <netinet/in.h> 63 #include <netinet/in_systm.h> 64 #include <netinet/ip.h> 65 #include <netinet/in_pcb.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip_var.h> 68 69 #include <machine/in_cksum.h> 70 71 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 72 73 #ifdef IPSEC 74 #include <netinet6/ipsec.h> 75 #include <netproto/key/key.h> 76 #ifdef IPSEC_DEBUG 77 #include <netproto/key/key_debug.h> 78 #else 79 #define KEYDEBUG(lev,arg) 80 #endif 81 #endif /*IPSEC*/ 82 83 #ifdef FAST_IPSEC 84 #include <netipsec/ipsec.h> 85 #include <netipsec/xform.h> 86 #include <netipsec/key.h> 87 #endif /*FAST_IPSEC*/ 88 89 #include <net/ipfw/ip_fw.h> 90 #include <net/dummynet/ip_dummynet.h> 91 92 #define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ 93 x, (ntohl(a.s_addr)>>24)&0xFF,\ 94 (ntohl(a.s_addr)>>16)&0xFF,\ 95 (ntohl(a.s_addr)>>8)&0xFF,\ 96 (ntohl(a.s_addr))&0xFF, y); 97 98 u_short ip_id; 99 100 #ifdef MBUF_STRESS_TEST 101 int mbuf_frag_size = 0; 102 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 103 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 104 #endif 105 106 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 107 static struct ifnet *ip_multicast_if(struct in_addr *, int *); 108 static void ip_mloopback 109 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 110 static int ip_getmoptions 111 (struct sockopt *, struct ip_moptions *); 112 static int ip_pcbopts(int, struct mbuf **, struct mbuf *); 113 static int ip_setmoptions 114 (struct sockopt *, struct ip_moptions **); 115 116 int ip_optcopy(struct ip *, struct ip *); 117 extern int (*fr_checkp) (struct ip *, int, struct ifnet *, int, struct mbuf **); 118 119 120 extern struct protosw inetsw[]; 121 122 /* 123 * IP output. The packet in mbuf chain m contains a skeletal IP 124 * header (with len, off, ttl, proto, tos, src, dst). 125 * The mbuf chain containing the packet will be freed. 126 * The mbuf opt, if present, will not be freed. 127 */ 128 int 129 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, 130 int flags, struct ip_moptions *imo, struct inpcb *inp) 131 { 132 struct ip *ip; 133 struct ifnet *ifp = NULL; /* keep compiler happy */ 134 struct mbuf *m; 135 int hlen = sizeof (struct ip); 136 int len, off, error = 0; 137 struct sockaddr_in *dst = NULL; /* keep compiler happy */ 138 struct in_ifaddr *ia = NULL; 139 int isbroadcast, sw_csum; 140 struct in_addr pkt_dst; 141 #ifdef IPSEC 142 struct route iproute; 143 struct secpolicy *sp = NULL; 144 struct socket *so = inp ? inp->inp_socket : NULL; 145 #endif 146 #ifdef FAST_IPSEC 147 struct route iproute; 148 struct m_tag *mtag; 149 struct secpolicy *sp = NULL; 150 struct tdb_ident *tdbi; 151 int s; 152 #endif /* FAST_IPSEC */ 153 struct ip_fw_args args; 154 int src_was_INADDR_ANY = 0; /* as the name says... */ 155 156 args.eh = NULL; 157 args.rule = NULL; 158 args.next_hop = NULL; 159 args.divert_rule = 0; /* divert cookie */ 160 161 /* Grab info from MT_TAG mbufs prepended to the chain. */ 162 for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) { 163 switch(m0->_m_tag_id) { 164 default: 165 printf("ip_output: unrecognised MT_TAG tag %d\n", 166 m0->_m_tag_id); 167 break; 168 169 case PACKET_TAG_DUMMYNET: 170 /* 171 * the packet was already tagged, so part of the 172 * processing was already done, and we need to go down. 173 * Get parameters from the header. 174 */ 175 args.rule = ((struct dn_pkt *)m0)->rule; 176 opt = NULL ; 177 ro = & ( ((struct dn_pkt *)m0)->ro ) ; 178 imo = NULL ; 179 dst = ((struct dn_pkt *)m0)->dn_dst ; 180 ifp = ((struct dn_pkt *)m0)->ifp ; 181 flags = ((struct dn_pkt *)m0)->flags ; 182 break; 183 184 case PACKET_TAG_DIVERT: 185 args.divert_rule = (int)m0->m_data & 0xffff; 186 break; 187 188 case PACKET_TAG_IPFORWARD: 189 args.next_hop = (struct sockaddr_in *)m0->m_data; 190 break; 191 } 192 } 193 m = m0; 194 195 KASSERT(!m || (m->m_flags & M_PKTHDR) != 0, ("ip_output: no HDR")); 196 #ifndef FAST_IPSEC 197 KASSERT(ro != NULL, ("ip_output: no route, proto %d", 198 mtod(m, struct ip *)->ip_p)); 199 #endif 200 201 if (args.rule != NULL) { /* dummynet already saw us */ 202 ip = mtod(m, struct ip *); 203 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; 204 if (ro->ro_rt) 205 ia = ifatoia(ro->ro_rt->rt_ifa); 206 goto sendit; 207 } 208 209 if (opt) { 210 len = 0; 211 m = ip_insertoptions(m, opt, &len); 212 if (len != 0) 213 hlen = len; 214 } 215 ip = mtod(m, struct ip *); 216 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst; 217 218 /* 219 * Fill in IP header. 220 */ 221 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 222 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2); 223 ip->ip_off &= IP_DF; 224 #ifdef RANDOM_IP_ID 225 ip->ip_id = ip_randomid(); 226 #else 227 ip->ip_id = htons(ip_id++); 228 #endif 229 ipstat.ips_localout++; 230 } else { 231 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 232 } 233 234 #ifdef FAST_IPSEC 235 if (ro == NULL) { 236 ro = &iproute; 237 bzero(ro, sizeof (*ro)); 238 } 239 #endif /* FAST_IPSEC */ 240 dst = (struct sockaddr_in *)&ro->ro_dst; 241 /* 242 * If there is a cached route, 243 * check that it is to the same destination 244 * and is still up. If not, free it and try again. 245 * The address family should also be checked in case of sharing the 246 * cache with IPv6. 247 */ 248 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 249 dst->sin_family != AF_INET || 250 dst->sin_addr.s_addr != pkt_dst.s_addr)) { 251 RTFREE(ro->ro_rt); 252 ro->ro_rt = (struct rtentry *)0; 253 } 254 if (ro->ro_rt == 0) { 255 bzero(dst, sizeof(*dst)); 256 dst->sin_family = AF_INET; 257 dst->sin_len = sizeof(*dst); 258 dst->sin_addr = pkt_dst; 259 } 260 /* 261 * If routing to interface only, 262 * short circuit routing lookup. 263 */ 264 if (flags & IP_ROUTETOIF) { 265 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 266 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 267 ipstat.ips_noroute++; 268 error = ENETUNREACH; 269 goto bad; 270 } 271 ifp = ia->ia_ifp; 272 ip->ip_ttl = 1; 273 isbroadcast = in_broadcast(dst->sin_addr, ifp); 274 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 275 imo != NULL && imo->imo_multicast_ifp != NULL) { 276 /* 277 * Bypass the normal routing lookup for multicast 278 * packets if the interface is specified. 279 */ 280 ifp = imo->imo_multicast_ifp; 281 IFP_TO_IA(ifp, ia); 282 isbroadcast = 0; /* fool gcc */ 283 } else { 284 /* 285 * If this is the case, we probably don't want to allocate 286 * a protocol-cloned route since we didn't get one from the 287 * ULP. This lets TCP do its thing, while not burdening 288 * forwarding or ICMP with the overhead of cloning a route. 289 * Of course, we still want to do any cloning requested by 290 * the link layer, as this is probably required in all cases 291 * for correct operation (as it is for ARP). 292 */ 293 if (ro->ro_rt == 0) 294 rtalloc_ign(ro, RTF_PRCLONING); 295 if (ro->ro_rt == 0) { 296 ipstat.ips_noroute++; 297 error = EHOSTUNREACH; 298 goto bad; 299 } 300 ia = ifatoia(ro->ro_rt->rt_ifa); 301 ifp = ro->ro_rt->rt_ifp; 302 ro->ro_rt->rt_use++; 303 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 304 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 305 if (ro->ro_rt->rt_flags & RTF_HOST) 306 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 307 else 308 isbroadcast = in_broadcast(dst->sin_addr, ifp); 309 } 310 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { 311 struct in_multi *inm; 312 313 m->m_flags |= M_MCAST; 314 /* 315 * IP destination address is multicast. Make sure "dst" 316 * still points to the address in "ro". (It may have been 317 * changed to point to a gateway address, above.) 318 */ 319 dst = (struct sockaddr_in *)&ro->ro_dst; 320 /* 321 * See if the caller provided any multicast options 322 */ 323 if (imo != NULL) { 324 ip->ip_ttl = imo->imo_multicast_ttl; 325 if (imo->imo_multicast_vif != -1) 326 ip->ip_src.s_addr = 327 ip_mcast_src ? 328 ip_mcast_src(imo->imo_multicast_vif) : 329 INADDR_ANY; 330 } else 331 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 332 /* 333 * Confirm that the outgoing interface supports multicast. 334 */ 335 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 336 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 337 ipstat.ips_noroute++; 338 error = ENETUNREACH; 339 goto bad; 340 } 341 } 342 /* 343 * If source address not specified yet, use address 344 * of outgoing interface. 345 */ 346 if (ip->ip_src.s_addr == INADDR_ANY) { 347 /* Interface may have no addresses. */ 348 if (ia != NULL) 349 ip->ip_src = IA_SIN(ia)->sin_addr; 350 } 351 352 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 353 /* 354 * XXX 355 * delayed checksums are not currently 356 * compatible with IP multicast routing 357 */ 358 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 359 in_delayed_cksum(m); 360 m->m_pkthdr.csum_flags &= 361 ~CSUM_DELAY_DATA; 362 } 363 } 364 IN_LOOKUP_MULTI(pkt_dst, ifp, inm); 365 if (inm != NULL && 366 (imo == NULL || imo->imo_multicast_loop)) { 367 /* 368 * If we belong to the destination multicast group 369 * on the outgoing interface, and the caller did not 370 * forbid loopback, loop back a copy. 371 */ 372 ip_mloopback(ifp, m, dst, hlen); 373 } 374 else { 375 /* 376 * If we are acting as a multicast router, perform 377 * multicast forwarding as if the packet had just 378 * arrived on the interface to which we are about 379 * to send. The multicast forwarding function 380 * recursively calls this function, using the 381 * IP_FORWARDING flag to prevent infinite recursion. 382 * 383 * Multicasts that are looped back by ip_mloopback(), 384 * above, will be forwarded by the ip_input() routine, 385 * if necessary. 386 */ 387 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 388 /* 389 * If rsvp daemon is not running, do not 390 * set ip_moptions. This ensures that the packet 391 * is multicast and not just sent down one link 392 * as prescribed by rsvpd. 393 */ 394 if (!rsvp_on) 395 imo = NULL; 396 if (ip_mforward && 397 ip_mforward(ip, ifp, m, imo) != 0) { 398 m_freem(m); 399 goto done; 400 } 401 } 402 } 403 404 /* 405 * Multicasts with a time-to-live of zero may be looped- 406 * back, above, but must not be transmitted on a network. 407 * Also, multicasts addressed to the loopback interface 408 * are not sent -- the above call to ip_mloopback() will 409 * loop back a copy if this host actually belongs to the 410 * destination group on the loopback interface. 411 */ 412 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 413 m_freem(m); 414 goto done; 415 } 416 417 goto sendit; 418 } 419 #ifndef notdef 420 /* 421 * If the source address is not specified yet, use the address 422 * of the outoing interface. In case, keep note we did that, so 423 * if the the firewall changes the next-hop causing the output 424 * interface to change, we can fix that. 425 */ 426 if (ip->ip_src.s_addr == INADDR_ANY) { 427 /* Interface may have no addresses. */ 428 if (ia != NULL) { 429 ip->ip_src = IA_SIN(ia)->sin_addr; 430 src_was_INADDR_ANY = 1; 431 } 432 } 433 #endif /* notdef */ 434 /* 435 * Verify that we have any chance at all of being able to queue 436 * the packet or packet fragments 437 */ 438 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 439 ifp->if_snd.ifq_maxlen) { 440 error = ENOBUFS; 441 ipstat.ips_odropped++; 442 goto bad; 443 } 444 445 /* 446 * Look for broadcast address and 447 * verify user is allowed to send 448 * such a packet. 449 */ 450 if (isbroadcast) { 451 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 452 error = EADDRNOTAVAIL; 453 goto bad; 454 } 455 if ((flags & IP_ALLOWBROADCAST) == 0) { 456 error = EACCES; 457 goto bad; 458 } 459 /* don't allow broadcast messages to be fragmented */ 460 if (ip->ip_len > ifp->if_mtu) { 461 error = EMSGSIZE; 462 goto bad; 463 } 464 m->m_flags |= M_BCAST; 465 } else { 466 m->m_flags &= ~M_BCAST; 467 } 468 469 sendit: 470 #ifdef IPSEC 471 /* get SP for this packet */ 472 if (so == NULL) 473 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); 474 else 475 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); 476 477 if (sp == NULL) { 478 ipsecstat.out_inval++; 479 goto bad; 480 } 481 482 error = 0; 483 484 /* check policy */ 485 switch (sp->policy) { 486 case IPSEC_POLICY_DISCARD: 487 /* 488 * This packet is just discarded. 489 */ 490 ipsecstat.out_polvio++; 491 goto bad; 492 493 case IPSEC_POLICY_BYPASS: 494 case IPSEC_POLICY_NONE: 495 /* no need to do IPsec. */ 496 goto skip_ipsec; 497 498 case IPSEC_POLICY_IPSEC: 499 if (sp->req == NULL) { 500 /* acquire a policy */ 501 error = key_spdacquire(sp); 502 goto bad; 503 } 504 break; 505 506 case IPSEC_POLICY_ENTRUST: 507 default: 508 printf("ip_output: Invalid policy found. %d\n", sp->policy); 509 } 510 { 511 struct ipsec_output_state state; 512 bzero(&state, sizeof(state)); 513 state.m = m; 514 if (flags & IP_ROUTETOIF) { 515 state.ro = &iproute; 516 bzero(&iproute, sizeof(iproute)); 517 } else 518 state.ro = ro; 519 state.dst = (struct sockaddr *)dst; 520 521 ip->ip_sum = 0; 522 523 /* 524 * XXX 525 * delayed checksums are not currently compatible with IPsec 526 */ 527 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 528 in_delayed_cksum(m); 529 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 530 } 531 532 ip->ip_len = htons(ip->ip_len); 533 ip->ip_off = htons(ip->ip_off); 534 535 error = ipsec4_output(&state, sp, flags); 536 537 m = state.m; 538 if (flags & IP_ROUTETOIF) { 539 /* 540 * if we have tunnel mode SA, we may need to ignore 541 * IP_ROUTETOIF. 542 */ 543 if (state.ro != &iproute || state.ro->ro_rt != NULL) { 544 flags &= ~IP_ROUTETOIF; 545 ro = state.ro; 546 } 547 } else 548 ro = state.ro; 549 dst = (struct sockaddr_in *)state.dst; 550 if (error) { 551 /* mbuf is already reclaimed in ipsec4_output. */ 552 m0 = NULL; 553 switch (error) { 554 case EHOSTUNREACH: 555 case ENETUNREACH: 556 case EMSGSIZE: 557 case ENOBUFS: 558 case ENOMEM: 559 break; 560 default: 561 printf("ip4_output (ipsec): error code %d\n", error); 562 /*fall through*/ 563 case ENOENT: 564 /* don't show these error codes to the user */ 565 error = 0; 566 break; 567 } 568 goto bad; 569 } 570 } 571 572 /* be sure to update variables that are affected by ipsec4_output() */ 573 ip = mtod(m, struct ip *); 574 #ifdef _IP_VHL 575 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 576 #else 577 hlen = ip->ip_hl << 2; 578 #endif 579 if (ro->ro_rt == NULL) { 580 if ((flags & IP_ROUTETOIF) == 0) { 581 printf("ip_output: " 582 "can't update route after IPsec processing\n"); 583 error = EHOSTUNREACH; /*XXX*/ 584 goto bad; 585 } 586 } else { 587 ia = ifatoia(ro->ro_rt->rt_ifa); 588 ifp = ro->ro_rt->rt_ifp; 589 } 590 591 /* make it flipped, again. */ 592 ip->ip_len = ntohs(ip->ip_len); 593 ip->ip_off = ntohs(ip->ip_off); 594 skip_ipsec: 595 #endif /*IPSEC*/ 596 #ifdef FAST_IPSEC 597 /* 598 * Check the security policy (SP) for the packet and, if 599 * required, do IPsec-related processing. There are two 600 * cases here; the first time a packet is sent through 601 * it will be untagged and handled by ipsec4_checkpolicy. 602 * If the packet is resubmitted to ip_output (e.g. after 603 * AH, ESP, etc. processing), there will be a tag to bypass 604 * the lookup and related policy checking. 605 */ 606 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 607 s = splnet(); 608 if (mtag != NULL) { 609 tdbi = (struct tdb_ident *)(mtag + 1); 610 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); 611 if (sp == NULL) 612 error = -EINVAL; /* force silent drop */ 613 m_tag_delete(m, mtag); 614 } else { 615 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags, 616 &error, inp); 617 } 618 /* 619 * There are four return cases: 620 * sp != NULL apply IPsec policy 621 * sp == NULL, error == 0 no IPsec handling needed 622 * sp == NULL, error == -EINVAL discard packet w/o error 623 * sp == NULL, error != 0 discard packet, report error 624 */ 625 if (sp != NULL) { 626 /* Loop detection, check if ipsec processing already done */ 627 KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); 628 for (mtag = m_tag_first(m); mtag != NULL; 629 mtag = m_tag_next(m, mtag)) { 630 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) 631 continue; 632 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 633 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 634 continue; 635 /* 636 * Check if policy has an SA associated with it. 637 * This can happen when an SP has yet to acquire 638 * an SA; e.g. on first reference. If it occurs, 639 * then we let ipsec4_process_packet do its thing. 640 */ 641 if (sp->req->sav == NULL) 642 break; 643 tdbi = (struct tdb_ident *)(mtag + 1); 644 if (tdbi->spi == sp->req->sav->spi && 645 tdbi->proto == sp->req->sav->sah->saidx.proto && 646 bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, 647 sizeof (union sockaddr_union)) == 0) { 648 /* 649 * No IPsec processing is needed, free 650 * reference to SP. 651 * 652 * NB: null pointer to avoid free at 653 * done: below. 654 */ 655 KEY_FREESP(&sp), sp = NULL; 656 splx(s); 657 goto spd_done; 658 } 659 } 660 661 /* 662 * Do delayed checksums now because we send before 663 * this is done in the normal processing path. 664 */ 665 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 666 in_delayed_cksum(m); 667 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 668 } 669 670 ip->ip_len = htons(ip->ip_len); 671 ip->ip_off = htons(ip->ip_off); 672 673 /* NB: callee frees mbuf */ 674 error = ipsec4_process_packet(m, sp->req, flags, 0); 675 /* 676 * Preserve KAME behaviour: ENOENT can be returned 677 * when an SA acquire is in progress. Don't propagate 678 * this to user-level; it confuses applications. 679 * 680 * XXX this will go away when the SADB is redone. 681 */ 682 if (error == ENOENT) 683 error = 0; 684 splx(s); 685 goto done; 686 } else { 687 splx(s); 688 689 if (error != 0) { 690 /* 691 * Hack: -EINVAL is used to signal that a packet 692 * should be silently discarded. This is typically 693 * because we asked key management for an SA and 694 * it was delayed (e.g. kicked up to IKE). 695 */ 696 if (error == -EINVAL) 697 error = 0; 698 goto bad; 699 } else { 700 /* No IPsec processing for this packet. */ 701 } 702 #ifdef notyet 703 /* 704 * If deferred crypto processing is needed, check that 705 * the interface supports it. 706 */ 707 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL); 708 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) { 709 /* notify IPsec to do its own crypto */ 710 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 711 error = EHOSTUNREACH; 712 goto bad; 713 } 714 #endif 715 } 716 spd_done: 717 #endif /* FAST_IPSEC */ 718 /* 719 * IpHack's section. 720 * - Xlate: translate packet's addr/port (NAT). 721 * - Firewall: deny/allow/etc. 722 * - Wrap: fake packet's addr/port <unimpl.> 723 * - Encapsulate: put it in another IP and send out. <unimp.> 724 */ 725 if (fr_checkp) { 726 struct mbuf *m1 = m; 727 728 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) 729 goto done; 730 ip = mtod(m = m1, struct ip *); 731 } 732 733 /* 734 * Check with the firewall... 735 * but not if we are already being fwd'd from a firewall. 736 */ 737 if (fw_enable && IPFW_LOADED && !args.next_hop) { 738 struct sockaddr_in *old = dst; 739 740 args.m = m; 741 args.next_hop = dst; 742 args.oif = ifp; 743 off = ip_fw_chk_ptr(&args); 744 m = args.m; 745 dst = args.next_hop; 746 747 /* 748 * On return we must do the following: 749 * m == NULL -> drop the pkt (old interface, deprecated) 750 * (off & IP_FW_PORT_DENY_FLAG) -> drop the pkt (new interface) 751 * 1<=off<= 0xffff -> DIVERT 752 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe 753 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet 754 * dst != old -> IPFIREWALL_FORWARD 755 * off==0, dst==old -> accept 756 * If some of the above modules are not compiled in, then 757 * we should't have to check the corresponding condition 758 * (because the ipfw control socket should not accept 759 * unsupported rules), but better play safe and drop 760 * packets in case of doubt. 761 */ 762 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) { 763 if (m) 764 m_freem(m); 765 error = EACCES; 766 goto done; 767 } 768 ip = mtod(m, struct ip *); 769 if (off == 0 && dst == old) /* common case */ 770 goto pass; 771 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { 772 /* 773 * pass the pkt to dummynet. Need to include 774 * pipe number, m, ifp, ro, dst because these are 775 * not recomputed in the next pass. 776 * All other parameters have been already used and 777 * so they are not needed anymore. 778 * XXX note: if the ifp or ro entry are deleted 779 * while a pkt is in dummynet, we are in trouble! 780 */ 781 args.ro = ro; 782 args.dst = dst; 783 args.flags = flags; 784 785 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, 786 &args); 787 goto done; 788 } 789 #ifdef IPDIVERT 790 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { 791 struct mbuf *clone = NULL; 792 793 /* Clone packet if we're doing a 'tee' */ 794 if ((off & IP_FW_PORT_TEE_FLAG) != 0) 795 clone = m_dup(m, M_DONTWAIT); 796 797 /* 798 * XXX 799 * delayed checksums are not currently compatible 800 * with divert sockets. 801 */ 802 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 803 in_delayed_cksum(m); 804 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 805 } 806 807 /* Restore packet header fields to original values */ 808 ip->ip_len = htons(ip->ip_len); 809 ip->ip_off = htons(ip->ip_off); 810 811 /* Deliver packet to divert input routine */ 812 divert_packet(m, 0, off & 0xffff, args.divert_rule); 813 814 /* If 'tee', continue with original packet */ 815 if (clone != NULL) { 816 m = clone; 817 ip = mtod(m, struct ip *); 818 goto pass; 819 } 820 goto done; 821 } 822 #endif 823 824 /* IPFIREWALL_FORWARD */ 825 /* 826 * Check dst to make sure it is directly reachable on the 827 * interface we previously thought it was. 828 * If it isn't (which may be likely in some situations) we have 829 * to re-route it (ie, find a route for the next-hop and the 830 * associated interface) and set them here. This is nested 831 * forwarding which in most cases is undesirable, except where 832 * such control is nigh impossible. So we do it here. 833 * And I'm babbling. 834 */ 835 if (off == 0 && old != dst) { /* FORWARD, dst has changed */ 836 #if 0 837 /* 838 * XXX To improve readability, this block should be 839 * changed into a function call as below: 840 */ 841 error = ip_ipforward(&m, &dst, &ifp); 842 if (error) 843 goto bad; 844 if (m == NULL) /* ip_input consumed the mbuf */ 845 goto done; 846 #else 847 struct in_ifaddr *ia; 848 849 /* 850 * XXX sro_fwd below is static, and a pointer 851 * to it gets passed to routines downstream. 852 * This could have surprisingly bad results in 853 * practice, because its content is overwritten 854 * by subsequent packets. 855 */ 856 /* There must be a better way to do this next line... */ 857 static struct route sro_fwd; 858 struct route *ro_fwd = &sro_fwd; 859 860 #if 0 861 print_ip("IPFIREWALL_FORWARD: New dst ip: ", 862 dst->sin_addr, "\n"); 863 #endif 864 865 /* 866 * We need to figure out if we have been forwarded 867 * to a local socket. If so, then we should somehow 868 * "loop back" to ip_input, and get directed to the 869 * PCB as if we had received this packet. This is 870 * because it may be dificult to identify the packets 871 * you want to forward until they are being output 872 * and have selected an interface. (e.g. locally 873 * initiated packets) If we used the loopback inteface, 874 * we would not be able to control what happens 875 * as the packet runs through ip_input() as 876 * it is done through a ISR. 877 */ 878 LIST_FOREACH(ia, 879 INADDR_HASH(dst->sin_addr.s_addr), ia_hash) { 880 /* 881 * If the addr to forward to is one 882 * of ours, we pretend to 883 * be the destination for this packet. 884 */ 885 if (IA_SIN(ia)->sin_addr.s_addr == 886 dst->sin_addr.s_addr) 887 break; 888 } 889 if (ia) { /* tell ip_input "dont filter" */ 890 struct m_hdr tag; 891 892 tag.mh_type = MT_TAG; 893 tag.mh_flags = PACKET_TAG_IPFORWARD; 894 tag.mh_data = (caddr_t)args.next_hop; 895 tag.mh_next = m; 896 897 if (m->m_pkthdr.rcvif == NULL) 898 m->m_pkthdr.rcvif = ifunit("lo0"); 899 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 900 m->m_pkthdr.csum_flags |= 901 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 902 m0->m_pkthdr.csum_data = 0xffff; 903 } 904 m->m_pkthdr.csum_flags |= 905 CSUM_IP_CHECKED | CSUM_IP_VALID; 906 ip->ip_len = htons(ip->ip_len); 907 ip->ip_off = htons(ip->ip_off); 908 ip_input((struct mbuf *)&tag); 909 goto done; 910 } 911 /* Some of the logic for this was 912 * nicked from above. 913 * 914 * This rewrites the cached route in a local PCB. 915 * Is this what we want to do? 916 */ 917 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); 918 919 ro_fwd->ro_rt = 0; 920 rtalloc_ign(ro_fwd, RTF_PRCLONING); 921 922 if (ro_fwd->ro_rt == 0) { 923 ipstat.ips_noroute++; 924 error = EHOSTUNREACH; 925 goto bad; 926 } 927 928 ia = ifatoia(ro_fwd->ro_rt->rt_ifa); 929 ifp = ro_fwd->ro_rt->rt_ifp; 930 ro_fwd->ro_rt->rt_use++; 931 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) 932 dst = (struct sockaddr_in *) 933 ro_fwd->ro_rt->rt_gateway; 934 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) 935 isbroadcast = 936 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); 937 else 938 isbroadcast = in_broadcast(dst->sin_addr, ifp); 939 if (ro->ro_rt) 940 RTFREE(ro->ro_rt); 941 ro->ro_rt = ro_fwd->ro_rt; 942 dst = (struct sockaddr_in *)&ro_fwd->ro_dst; 943 944 #endif /* ... block to be put into a function */ 945 /* 946 * If we added a default src ip earlier, 947 * which would have been gotten from the-then 948 * interface, do it again, from the new one. 949 */ 950 if (src_was_INADDR_ANY) 951 ip->ip_src = IA_SIN(ia)->sin_addr; 952 goto pass ; 953 } 954 955 /* 956 * if we get here, none of the above matches, and 957 * we have to drop the pkt 958 */ 959 m_freem(m); 960 error = EACCES; /* not sure this is the right error msg */ 961 goto done; 962 } 963 964 pass: 965 /* 127/8 must not appear on wire - RFC1122. */ 966 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 967 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 968 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 969 ipstat.ips_badaddr++; 970 error = EADDRNOTAVAIL; 971 goto bad; 972 } 973 } 974 975 m->m_pkthdr.csum_flags |= CSUM_IP; 976 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 977 if (sw_csum & CSUM_DELAY_DATA) { 978 in_delayed_cksum(m); 979 sw_csum &= ~CSUM_DELAY_DATA; 980 } 981 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 982 983 /* 984 * If small enough for interface, or the interface will take 985 * care of the fragmentation for us, can just send directly. 986 */ 987 if (ip->ip_len <= ifp->if_mtu || ifp->if_hwassist & CSUM_FRAGMENT) { 988 ip->ip_len = htons(ip->ip_len); 989 ip->ip_off = htons(ip->ip_off); 990 ip->ip_sum = 0; 991 if (sw_csum & CSUM_DELAY_IP) { 992 if (ip->ip_vhl == IP_VHL_BORING) { 993 ip->ip_sum = in_cksum_hdr(ip); 994 } else { 995 ip->ip_sum = in_cksum(m, hlen); 996 } 997 } 998 999 /* Record statistics for this interface address. */ 1000 if (!(flags & IP_FORWARDING) && ia) { 1001 ia->ia_ifa.if_opackets++; 1002 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1003 } 1004 1005 #ifdef IPSEC 1006 /* clean ipsec history once it goes out of the node */ 1007 ipsec_delaux(m); 1008 #endif 1009 1010 #ifdef MBUF_STRESS_TEST 1011 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) { 1012 struct mbuf *m1, *m2; 1013 int length, tmp; 1014 1015 tmp = length = m->m_pkthdr.len; 1016 1017 while ((length -= mbuf_frag_size) >= 1) { 1018 m1 = m_split(m, length, M_DONTWAIT); 1019 if (m1 == NULL) 1020 break; 1021 m1->m_flags &= ~M_PKTHDR; 1022 m2 = m; 1023 while (m2->m_next != NULL) 1024 m2 = m2->m_next; 1025 m2->m_next = m1; 1026 } 1027 m->m_pkthdr.len = tmp; 1028 } 1029 #endif 1030 error = (*ifp->if_output)(ifp, m, 1031 (struct sockaddr *)dst, ro->ro_rt); 1032 goto done; 1033 } 1034 1035 if (ip->ip_off & IP_DF) { 1036 error = EMSGSIZE; 1037 /* 1038 * This case can happen if the user changed the MTU 1039 * of an interface after enabling IP on it. Because 1040 * most netifs don't keep track of routes pointing to 1041 * them, there is no way for one to update all its 1042 * routes when the MTU is changed. 1043 */ 1044 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && 1045 !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && 1046 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 1047 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 1048 } 1049 ipstat.ips_cantfrag++; 1050 goto bad; 1051 } 1052 1053 /* 1054 * Too large for interface; fragment if possible. If successful, 1055 * on return, m will point to a list of packets to be sent. 1056 */ 1057 error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum); 1058 if (error) 1059 goto bad; 1060 for (; m; m = m0) { 1061 m0 = m->m_nextpkt; 1062 m->m_nextpkt = 0; 1063 #ifdef IPSEC 1064 /* clean ipsec history once it goes out of the node */ 1065 ipsec_delaux(m); 1066 #endif 1067 if (error == 0) { 1068 /* Record statistics for this interface address. */ 1069 if (ia != NULL) { 1070 ia->ia_ifa.if_opackets++; 1071 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1072 } 1073 1074 error = (*ifp->if_output)(ifp, m, 1075 (struct sockaddr *)dst, ro->ro_rt); 1076 } else 1077 m_freem(m); 1078 } 1079 1080 if (error == 0) 1081 ipstat.ips_fragmented++; 1082 1083 done: 1084 #ifdef IPSEC 1085 if (ro == &iproute && ro->ro_rt) { 1086 RTFREE(ro->ro_rt); 1087 ro->ro_rt = NULL; 1088 } 1089 if (sp != NULL) { 1090 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 1091 printf("DP ip_output call free SP:%p\n", sp)); 1092 key_freesp(sp); 1093 } 1094 #endif 1095 #ifdef FAST_IPSEC 1096 if (ro == &iproute && ro->ro_rt) { 1097 RTFREE(ro->ro_rt); 1098 ro->ro_rt = NULL; 1099 } 1100 if (sp != NULL) 1101 KEY_FREESP(&sp); 1102 #endif 1103 return (error); 1104 bad: 1105 m_freem(m); 1106 goto done; 1107 } 1108 1109 /* 1110 * Create a chain of fragments which fit the given mtu. m_frag points to the 1111 * mbuf to be fragmented; on return it points to the chain with the fragments. 1112 * Return 0 if no error. If error, m_frag may contain a partially built 1113 * chain of fragments that should be freed by the caller. 1114 * 1115 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 1116 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). 1117 */ 1118 int 1119 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 1120 u_long if_hwassist_flags, int sw_csum) 1121 { 1122 int error = 0; 1123 int hlen = IP_VHL_HL(ip->ip_vhl) << 2; 1124 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 1125 int off; 1126 struct mbuf *m0 = *m_frag; /* the original packet */ 1127 int firstlen; 1128 struct mbuf **mnext; 1129 int nfrags; 1130 1131 if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ 1132 ipstat.ips_cantfrag++; 1133 return EMSGSIZE; 1134 } 1135 1136 /* 1137 * Must be able to put at least 8 bytes per fragment. 1138 */ 1139 if (len < 8) 1140 return EMSGSIZE; 1141 1142 /* 1143 * If the interface will not calculate checksums on 1144 * fragmented packets, then do it here. 1145 */ 1146 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 1147 (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { 1148 in_delayed_cksum(m0); 1149 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1150 } 1151 1152 if (len > PAGE_SIZE) { 1153 /* 1154 * Fragment large datagrams such that each segment 1155 * contains a multiple of PAGE_SIZE amount of data, 1156 * plus headers. This enables a receiver to perform 1157 * page-flipping zero-copy optimizations. 1158 * 1159 * XXX When does this help given that sender and receiver 1160 * could have different page sizes, and also mtu could 1161 * be less than the receiver's page size ? 1162 */ 1163 int newlen; 1164 struct mbuf *m; 1165 1166 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) 1167 off += m->m_len; 1168 1169 /* 1170 * firstlen (off - hlen) must be aligned on an 1171 * 8-byte boundary 1172 */ 1173 if (off < hlen) 1174 goto smart_frag_failure; 1175 off = ((off - hlen) & ~7) + hlen; 1176 newlen = (~PAGE_MASK) & mtu; 1177 if ((newlen + sizeof (struct ip)) > mtu) { 1178 /* we failed, go back the default */ 1179 smart_frag_failure: 1180 newlen = len; 1181 off = hlen + len; 1182 } 1183 len = newlen; 1184 1185 } else { 1186 off = hlen + len; 1187 } 1188 1189 firstlen = off - hlen; 1190 mnext = &m0->m_nextpkt; /* pointer to next packet */ 1191 1192 /* 1193 * Loop through length of segment after first fragment, 1194 * make new header and copy data of each part and link onto chain. 1195 * Here, m0 is the original packet, m is the fragment being created. 1196 * The fragments are linked off the m_nextpkt of the original 1197 * packet, which after processing serves as the first fragment. 1198 */ 1199 for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { 1200 struct ip *mhip; /* ip header on the fragment */ 1201 struct mbuf *m; 1202 int mhlen = sizeof (struct ip); 1203 1204 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1205 if (m == 0) { 1206 error = ENOBUFS; 1207 ipstat.ips_odropped++; 1208 goto done; 1209 } 1210 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 1211 /* 1212 * In the first mbuf, leave room for the link header, then 1213 * copy the original IP header including options. The payload 1214 * goes into an additional mbuf chain returned by m_copy(). 1215 */ 1216 m->m_data += max_linkhdr; 1217 mhip = mtod(m, struct ip *); 1218 *mhip = *ip; 1219 if (hlen > sizeof (struct ip)) { 1220 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 1221 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2); 1222 } 1223 m->m_len = mhlen; 1224 /* XXX do we need to add ip->ip_off below ? */ 1225 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 1226 if (off + len >= ip->ip_len) { /* last fragment */ 1227 len = ip->ip_len - off; 1228 m->m_flags |= M_LASTFRAG; 1229 } else 1230 mhip->ip_off |= IP_MF; 1231 mhip->ip_len = htons((u_short)(len + mhlen)); 1232 m->m_next = m_copy(m0, off, len); 1233 if (m->m_next == 0) { /* copy failed */ 1234 m_free(m); 1235 error = ENOBUFS; /* ??? */ 1236 ipstat.ips_odropped++; 1237 goto done; 1238 } 1239 m->m_pkthdr.len = mhlen + len; 1240 m->m_pkthdr.rcvif = (struct ifnet *)0; 1241 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 1242 mhip->ip_off = htons(mhip->ip_off); 1243 mhip->ip_sum = 0; 1244 if (sw_csum & CSUM_DELAY_IP) 1245 mhip->ip_sum = in_cksum(m, mhlen); 1246 *mnext = m; 1247 mnext = &m->m_nextpkt; 1248 } 1249 ipstat.ips_ofragments += nfrags; 1250 1251 /* set first marker for fragment chain */ 1252 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 1253 m0->m_pkthdr.csum_data = nfrags; 1254 1255 /* 1256 * Update first fragment by trimming what's been copied out 1257 * and updating header. 1258 */ 1259 m_adj(m0, hlen + firstlen - ip->ip_len); 1260 m0->m_pkthdr.len = hlen + firstlen; 1261 ip->ip_len = htons((u_short)m0->m_pkthdr.len); 1262 ip->ip_off |= IP_MF; 1263 ip->ip_off = htons(ip->ip_off); 1264 ip->ip_sum = 0; 1265 if (sw_csum & CSUM_DELAY_IP) 1266 ip->ip_sum = in_cksum(m0, hlen); 1267 1268 done: 1269 *m_frag = m0; 1270 return error; 1271 } 1272 1273 void 1274 in_delayed_cksum(struct mbuf *m) 1275 { 1276 struct ip *ip; 1277 u_short csum, offset; 1278 1279 ip = mtod(m, struct ip *); 1280 offset = IP_VHL_HL(ip->ip_vhl) << 2 ; 1281 csum = in_cksum_skip(m, ip->ip_len, offset); 1282 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 1283 csum = 0xffff; 1284 offset += m->m_pkthdr.csum_data; /* checksum offset */ 1285 1286 if (offset + sizeof(u_short) > m->m_len) { 1287 printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 1288 m->m_len, offset, ip->ip_p); 1289 /* 1290 * XXX 1291 * this shouldn't happen, but if it does, the 1292 * correct behavior may be to insert the checksum 1293 * in the existing chain instead of rearranging it. 1294 */ 1295 m = m_pullup(m, offset + sizeof(u_short)); 1296 } 1297 *(u_short *)(m->m_data + offset) = csum; 1298 } 1299 1300 /* 1301 * Insert IP options into preformed packet. 1302 * Adjust IP destination as required for IP source routing, 1303 * as indicated by a non-zero in_addr at the start of the options. 1304 * 1305 * XXX This routine assumes that the packet has no options in place. 1306 */ 1307 static struct mbuf * 1308 ip_insertoptions(m, opt, phlen) 1309 struct mbuf *m; 1310 struct mbuf *opt; 1311 int *phlen; 1312 { 1313 struct ipoption *p = mtod(opt, struct ipoption *); 1314 struct mbuf *n; 1315 struct ip *ip = mtod(m, struct ip *); 1316 unsigned optlen; 1317 1318 optlen = opt->m_len - sizeof(p->ipopt_dst); 1319 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) { 1320 *phlen = 0; 1321 return (m); /* XXX should fail */ 1322 } 1323 if (p->ipopt_dst.s_addr) 1324 ip->ip_dst = p->ipopt_dst; 1325 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 1326 MGETHDR(n, M_DONTWAIT, MT_HEADER); 1327 if (n == 0) { 1328 *phlen = 0; 1329 return (m); 1330 } 1331 n->m_pkthdr.rcvif = (struct ifnet *)0; 1332 n->m_pkthdr.len = m->m_pkthdr.len + optlen; 1333 m->m_len -= sizeof(struct ip); 1334 m->m_data += sizeof(struct ip); 1335 n->m_next = m; 1336 m = n; 1337 m->m_len = optlen + sizeof(struct ip); 1338 m->m_data += max_linkhdr; 1339 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip)); 1340 } else { 1341 m->m_data -= optlen; 1342 m->m_len += optlen; 1343 m->m_pkthdr.len += optlen; 1344 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 1345 } 1346 ip = mtod(m, struct ip *); 1347 bcopy(p->ipopt_list, ip + 1, optlen); 1348 *phlen = sizeof(struct ip) + optlen; 1349 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2); 1350 ip->ip_len += optlen; 1351 return (m); 1352 } 1353 1354 /* 1355 * Copy options from ip to jp, 1356 * omitting those not copied during fragmentation. 1357 */ 1358 int 1359 ip_optcopy(ip, jp) 1360 struct ip *ip, *jp; 1361 { 1362 u_char *cp, *dp; 1363 int opt, optlen, cnt; 1364 1365 cp = (u_char *)(ip + 1); 1366 dp = (u_char *)(jp + 1); 1367 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip); 1368 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1369 opt = cp[0]; 1370 if (opt == IPOPT_EOL) 1371 break; 1372 if (opt == IPOPT_NOP) { 1373 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1374 *dp++ = IPOPT_NOP; 1375 optlen = 1; 1376 continue; 1377 } 1378 1379 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp), 1380 ("ip_optcopy: malformed ipv4 option")); 1381 optlen = cp[IPOPT_OLEN]; 1382 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt, 1383 ("ip_optcopy: malformed ipv4 option")); 1384 1385 /* bogus lengths should have been caught by ip_dooptions */ 1386 if (optlen > cnt) 1387 optlen = cnt; 1388 if (IPOPT_COPIED(opt)) { 1389 bcopy(cp, dp, optlen); 1390 dp += optlen; 1391 } 1392 } 1393 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 1394 *dp++ = IPOPT_EOL; 1395 return (optlen); 1396 } 1397 1398 /* 1399 * IP socket option processing. 1400 */ 1401 int 1402 ip_ctloutput(so, sopt) 1403 struct socket *so; 1404 struct sockopt *sopt; 1405 { 1406 struct inpcb *inp = sotoinpcb(so); 1407 int error, optval; 1408 1409 error = optval = 0; 1410 if (sopt->sopt_level != IPPROTO_IP) { 1411 return (EINVAL); 1412 } 1413 1414 switch (sopt->sopt_dir) { 1415 case SOPT_SET: 1416 switch (sopt->sopt_name) { 1417 case IP_OPTIONS: 1418 #ifdef notyet 1419 case IP_RETOPTS: 1420 #endif 1421 { 1422 struct mbuf *m; 1423 if (sopt->sopt_valsize > MLEN) { 1424 error = EMSGSIZE; 1425 break; 1426 } 1427 MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_HEADER); 1428 if (m == 0) { 1429 error = ENOBUFS; 1430 break; 1431 } 1432 m->m_len = sopt->sopt_valsize; 1433 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1434 m->m_len); 1435 1436 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, 1437 m)); 1438 } 1439 1440 case IP_TOS: 1441 case IP_TTL: 1442 case IP_RECVOPTS: 1443 case IP_RECVRETOPTS: 1444 case IP_RECVDSTADDR: 1445 case IP_RECVIF: 1446 case IP_FAITH: 1447 error = sooptcopyin(sopt, &optval, sizeof optval, 1448 sizeof optval); 1449 if (error) 1450 break; 1451 1452 switch (sopt->sopt_name) { 1453 case IP_TOS: 1454 inp->inp_ip_tos = optval; 1455 break; 1456 1457 case IP_TTL: 1458 inp->inp_ip_ttl = optval; 1459 break; 1460 #define OPTSET(bit) \ 1461 if (optval) \ 1462 inp->inp_flags |= bit; \ 1463 else \ 1464 inp->inp_flags &= ~bit; 1465 1466 case IP_RECVOPTS: 1467 OPTSET(INP_RECVOPTS); 1468 break; 1469 1470 case IP_RECVRETOPTS: 1471 OPTSET(INP_RECVRETOPTS); 1472 break; 1473 1474 case IP_RECVDSTADDR: 1475 OPTSET(INP_RECVDSTADDR); 1476 break; 1477 1478 case IP_RECVIF: 1479 OPTSET(INP_RECVIF); 1480 break; 1481 1482 case IP_FAITH: 1483 OPTSET(INP_FAITH); 1484 break; 1485 } 1486 break; 1487 #undef OPTSET 1488 1489 case IP_MULTICAST_IF: 1490 case IP_MULTICAST_VIF: 1491 case IP_MULTICAST_TTL: 1492 case IP_MULTICAST_LOOP: 1493 case IP_ADD_MEMBERSHIP: 1494 case IP_DROP_MEMBERSHIP: 1495 error = ip_setmoptions(sopt, &inp->inp_moptions); 1496 break; 1497 1498 case IP_PORTRANGE: 1499 error = sooptcopyin(sopt, &optval, sizeof optval, 1500 sizeof optval); 1501 if (error) 1502 break; 1503 1504 switch (optval) { 1505 case IP_PORTRANGE_DEFAULT: 1506 inp->inp_flags &= ~(INP_LOWPORT); 1507 inp->inp_flags &= ~(INP_HIGHPORT); 1508 break; 1509 1510 case IP_PORTRANGE_HIGH: 1511 inp->inp_flags &= ~(INP_LOWPORT); 1512 inp->inp_flags |= INP_HIGHPORT; 1513 break; 1514 1515 case IP_PORTRANGE_LOW: 1516 inp->inp_flags &= ~(INP_HIGHPORT); 1517 inp->inp_flags |= INP_LOWPORT; 1518 break; 1519 1520 default: 1521 error = EINVAL; 1522 break; 1523 } 1524 break; 1525 1526 #if defined(IPSEC) || defined(FAST_IPSEC) 1527 case IP_IPSEC_POLICY: 1528 { 1529 caddr_t req; 1530 size_t len = 0; 1531 int priv; 1532 struct mbuf *m; 1533 int optname; 1534 1535 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1536 break; 1537 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1538 break; 1539 priv = (sopt->sopt_td != NULL && 1540 suser(sopt->sopt_td) != 0) ? 0 : 1; 1541 req = mtod(m, caddr_t); 1542 len = m->m_len; 1543 optname = sopt->sopt_name; 1544 error = ipsec4_set_policy(inp, optname, req, len, priv); 1545 m_freem(m); 1546 break; 1547 } 1548 #endif /*IPSEC*/ 1549 1550 default: 1551 error = ENOPROTOOPT; 1552 break; 1553 } 1554 break; 1555 1556 case SOPT_GET: 1557 switch (sopt->sopt_name) { 1558 case IP_OPTIONS: 1559 case IP_RETOPTS: 1560 if (inp->inp_options) 1561 error = sooptcopyout(sopt, 1562 mtod(inp->inp_options, 1563 char *), 1564 inp->inp_options->m_len); 1565 else 1566 sopt->sopt_valsize = 0; 1567 break; 1568 1569 case IP_TOS: 1570 case IP_TTL: 1571 case IP_RECVOPTS: 1572 case IP_RECVRETOPTS: 1573 case IP_RECVDSTADDR: 1574 case IP_RECVIF: 1575 case IP_PORTRANGE: 1576 case IP_FAITH: 1577 switch (sopt->sopt_name) { 1578 1579 case IP_TOS: 1580 optval = inp->inp_ip_tos; 1581 break; 1582 1583 case IP_TTL: 1584 optval = inp->inp_ip_ttl; 1585 break; 1586 1587 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1588 1589 case IP_RECVOPTS: 1590 optval = OPTBIT(INP_RECVOPTS); 1591 break; 1592 1593 case IP_RECVRETOPTS: 1594 optval = OPTBIT(INP_RECVRETOPTS); 1595 break; 1596 1597 case IP_RECVDSTADDR: 1598 optval = OPTBIT(INP_RECVDSTADDR); 1599 break; 1600 1601 case IP_RECVIF: 1602 optval = OPTBIT(INP_RECVIF); 1603 break; 1604 1605 case IP_PORTRANGE: 1606 if (inp->inp_flags & INP_HIGHPORT) 1607 optval = IP_PORTRANGE_HIGH; 1608 else if (inp->inp_flags & INP_LOWPORT) 1609 optval = IP_PORTRANGE_LOW; 1610 else 1611 optval = 0; 1612 break; 1613 1614 case IP_FAITH: 1615 optval = OPTBIT(INP_FAITH); 1616 break; 1617 } 1618 error = sooptcopyout(sopt, &optval, sizeof optval); 1619 break; 1620 1621 case IP_MULTICAST_IF: 1622 case IP_MULTICAST_VIF: 1623 case IP_MULTICAST_TTL: 1624 case IP_MULTICAST_LOOP: 1625 case IP_ADD_MEMBERSHIP: 1626 case IP_DROP_MEMBERSHIP: 1627 error = ip_getmoptions(sopt, inp->inp_moptions); 1628 break; 1629 1630 #if defined(IPSEC) || defined(FAST_IPSEC) 1631 case IP_IPSEC_POLICY: 1632 { 1633 struct mbuf *m = NULL; 1634 caddr_t req = NULL; 1635 size_t len = 0; 1636 1637 if (m != 0) { 1638 req = mtod(m, caddr_t); 1639 len = m->m_len; 1640 } 1641 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 1642 if (error == 0) 1643 error = soopt_mcopyout(sopt, m); /* XXX */ 1644 if (error == 0) 1645 m_freem(m); 1646 break; 1647 } 1648 #endif /*IPSEC*/ 1649 1650 default: 1651 error = ENOPROTOOPT; 1652 break; 1653 } 1654 break; 1655 } 1656 return (error); 1657 } 1658 1659 /* 1660 * Set up IP options in pcb for insertion in output packets. 1661 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1662 * with destination address if source routed. 1663 */ 1664 static int 1665 ip_pcbopts(optname, pcbopt, m) 1666 int optname; 1667 struct mbuf **pcbopt; 1668 struct mbuf *m; 1669 { 1670 int cnt, optlen; 1671 u_char *cp; 1672 u_char opt; 1673 1674 /* turn off any old options */ 1675 if (*pcbopt) 1676 (void)m_free(*pcbopt); 1677 *pcbopt = 0; 1678 if (m == (struct mbuf *)0 || m->m_len == 0) { 1679 /* 1680 * Only turning off any previous options. 1681 */ 1682 if (m) 1683 (void)m_free(m); 1684 return (0); 1685 } 1686 1687 if (m->m_len % sizeof(int32_t)) 1688 goto bad; 1689 /* 1690 * IP first-hop destination address will be stored before 1691 * actual options; move other options back 1692 * and clear it when none present. 1693 */ 1694 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1695 goto bad; 1696 cnt = m->m_len; 1697 m->m_len += sizeof(struct in_addr); 1698 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1699 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); 1700 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1701 1702 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1703 opt = cp[IPOPT_OPTVAL]; 1704 if (opt == IPOPT_EOL) 1705 break; 1706 if (opt == IPOPT_NOP) 1707 optlen = 1; 1708 else { 1709 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1710 goto bad; 1711 optlen = cp[IPOPT_OLEN]; 1712 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1713 goto bad; 1714 } 1715 switch (opt) { 1716 1717 default: 1718 break; 1719 1720 case IPOPT_LSRR: 1721 case IPOPT_SSRR: 1722 /* 1723 * user process specifies route as: 1724 * ->A->B->C->D 1725 * D must be our final destination (but we can't 1726 * check that since we may not have connected yet). 1727 * A is first hop destination, which doesn't appear in 1728 * actual IP option, but is stored before the options. 1729 */ 1730 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1731 goto bad; 1732 m->m_len -= sizeof(struct in_addr); 1733 cnt -= sizeof(struct in_addr); 1734 optlen -= sizeof(struct in_addr); 1735 cp[IPOPT_OLEN] = optlen; 1736 /* 1737 * Move first hop before start of options. 1738 */ 1739 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1740 sizeof(struct in_addr)); 1741 /* 1742 * Then copy rest of options back 1743 * to close up the deleted entry. 1744 */ 1745 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + 1746 sizeof(struct in_addr)), 1747 (caddr_t)&cp[IPOPT_OFFSET+1], 1748 (unsigned)cnt + sizeof(struct in_addr)); 1749 break; 1750 } 1751 } 1752 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1753 goto bad; 1754 *pcbopt = m; 1755 return (0); 1756 1757 bad: 1758 (void)m_free(m); 1759 return (EINVAL); 1760 } 1761 1762 /* 1763 * XXX 1764 * The whole multicast option thing needs to be re-thought. 1765 * Several of these options are equally applicable to non-multicast 1766 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1767 * standard option (IP_TTL). 1768 */ 1769 1770 /* 1771 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1772 */ 1773 static struct ifnet * 1774 ip_multicast_if(a, ifindexp) 1775 struct in_addr *a; 1776 int *ifindexp; 1777 { 1778 int ifindex; 1779 struct ifnet *ifp; 1780 1781 if (ifindexp) 1782 *ifindexp = 0; 1783 if (ntohl(a->s_addr) >> 24 == 0) { 1784 ifindex = ntohl(a->s_addr) & 0xffffff; 1785 if (ifindex < 0 || if_index < ifindex) 1786 return NULL; 1787 ifp = ifindex2ifnet[ifindex]; 1788 if (ifindexp) 1789 *ifindexp = ifindex; 1790 } else { 1791 INADDR_TO_IFP(*a, ifp); 1792 } 1793 return ifp; 1794 } 1795 1796 /* 1797 * Set the IP multicast options in response to user setsockopt(). 1798 */ 1799 static int 1800 ip_setmoptions(sopt, imop) 1801 struct sockopt *sopt; 1802 struct ip_moptions **imop; 1803 { 1804 int error = 0; 1805 int i; 1806 struct in_addr addr; 1807 struct ip_mreq mreq; 1808 struct ifnet *ifp; 1809 struct ip_moptions *imo = *imop; 1810 struct route ro; 1811 struct sockaddr_in *dst; 1812 int ifindex; 1813 int s; 1814 1815 if (imo == NULL) { 1816 /* 1817 * No multicast option buffer attached to the pcb; 1818 * allocate one and initialize to default values. 1819 */ 1820 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, 1821 M_WAITOK); 1822 1823 if (imo == NULL) 1824 return (ENOBUFS); 1825 *imop = imo; 1826 imo->imo_multicast_ifp = NULL; 1827 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1828 imo->imo_multicast_vif = -1; 1829 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1830 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1831 imo->imo_num_memberships = 0; 1832 } 1833 1834 switch (sopt->sopt_name) { 1835 /* store an index number for the vif you wanna use in the send */ 1836 case IP_MULTICAST_VIF: 1837 if (legal_vif_num == 0) { 1838 error = EOPNOTSUPP; 1839 break; 1840 } 1841 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1842 if (error) 1843 break; 1844 if (!legal_vif_num(i) && (i != -1)) { 1845 error = EINVAL; 1846 break; 1847 } 1848 imo->imo_multicast_vif = i; 1849 break; 1850 1851 case IP_MULTICAST_IF: 1852 /* 1853 * Select the interface for outgoing multicast packets. 1854 */ 1855 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); 1856 if (error) 1857 break; 1858 /* 1859 * INADDR_ANY is used to remove a previous selection. 1860 * When no interface is selected, a default one is 1861 * chosen every time a multicast packet is sent. 1862 */ 1863 if (addr.s_addr == INADDR_ANY) { 1864 imo->imo_multicast_ifp = NULL; 1865 break; 1866 } 1867 /* 1868 * The selected interface is identified by its local 1869 * IP address. Find the interface and confirm that 1870 * it supports multicasting. 1871 */ 1872 s = splimp(); 1873 ifp = ip_multicast_if(&addr, &ifindex); 1874 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1875 splx(s); 1876 error = EADDRNOTAVAIL; 1877 break; 1878 } 1879 imo->imo_multicast_ifp = ifp; 1880 if (ifindex) 1881 imo->imo_multicast_addr = addr; 1882 else 1883 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1884 splx(s); 1885 break; 1886 1887 case IP_MULTICAST_TTL: 1888 /* 1889 * Set the IP time-to-live for outgoing multicast packets. 1890 * The original multicast API required a char argument, 1891 * which is inconsistent with the rest of the socket API. 1892 * We allow either a char or an int. 1893 */ 1894 if (sopt->sopt_valsize == 1) { 1895 u_char ttl; 1896 error = sooptcopyin(sopt, &ttl, 1, 1); 1897 if (error) 1898 break; 1899 imo->imo_multicast_ttl = ttl; 1900 } else { 1901 u_int ttl; 1902 error = sooptcopyin(sopt, &ttl, sizeof ttl, 1903 sizeof ttl); 1904 if (error) 1905 break; 1906 if (ttl > 255) 1907 error = EINVAL; 1908 else 1909 imo->imo_multicast_ttl = ttl; 1910 } 1911 break; 1912 1913 case IP_MULTICAST_LOOP: 1914 /* 1915 * Set the loopback flag for outgoing multicast packets. 1916 * Must be zero or one. The original multicast API required a 1917 * char argument, which is inconsistent with the rest 1918 * of the socket API. We allow either a char or an int. 1919 */ 1920 if (sopt->sopt_valsize == 1) { 1921 u_char loop; 1922 error = sooptcopyin(sopt, &loop, 1, 1); 1923 if (error) 1924 break; 1925 imo->imo_multicast_loop = !!loop; 1926 } else { 1927 u_int loop; 1928 error = sooptcopyin(sopt, &loop, sizeof loop, 1929 sizeof loop); 1930 if (error) 1931 break; 1932 imo->imo_multicast_loop = !!loop; 1933 } 1934 break; 1935 1936 case IP_ADD_MEMBERSHIP: 1937 /* 1938 * Add a multicast group membership. 1939 * Group must be a valid IP multicast address. 1940 */ 1941 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1942 if (error) 1943 break; 1944 1945 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1946 error = EINVAL; 1947 break; 1948 } 1949 s = splimp(); 1950 /* 1951 * If no interface address was provided, use the interface of 1952 * the route to the given multicast address. 1953 */ 1954 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1955 bzero((caddr_t)&ro, sizeof(ro)); 1956 dst = (struct sockaddr_in *)&ro.ro_dst; 1957 dst->sin_len = sizeof(*dst); 1958 dst->sin_family = AF_INET; 1959 dst->sin_addr = mreq.imr_multiaddr; 1960 rtalloc(&ro); 1961 if (ro.ro_rt == NULL) { 1962 error = EADDRNOTAVAIL; 1963 splx(s); 1964 break; 1965 } 1966 ifp = ro.ro_rt->rt_ifp; 1967 rtfree(ro.ro_rt); 1968 } 1969 else { 1970 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1971 } 1972 1973 /* 1974 * See if we found an interface, and confirm that it 1975 * supports multicast. 1976 */ 1977 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1978 error = EADDRNOTAVAIL; 1979 splx(s); 1980 break; 1981 } 1982 /* 1983 * See if the membership already exists or if all the 1984 * membership slots are full. 1985 */ 1986 for (i = 0; i < imo->imo_num_memberships; ++i) { 1987 if (imo->imo_membership[i]->inm_ifp == ifp && 1988 imo->imo_membership[i]->inm_addr.s_addr 1989 == mreq.imr_multiaddr.s_addr) 1990 break; 1991 } 1992 if (i < imo->imo_num_memberships) { 1993 error = EADDRINUSE; 1994 splx(s); 1995 break; 1996 } 1997 if (i == IP_MAX_MEMBERSHIPS) { 1998 error = ETOOMANYREFS; 1999 splx(s); 2000 break; 2001 } 2002 /* 2003 * Everything looks good; add a new record to the multicast 2004 * address list for the given interface. 2005 */ 2006 if ((imo->imo_membership[i] = 2007 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 2008 error = ENOBUFS; 2009 splx(s); 2010 break; 2011 } 2012 ++imo->imo_num_memberships; 2013 splx(s); 2014 break; 2015 2016 case IP_DROP_MEMBERSHIP: 2017 /* 2018 * Drop a multicast group membership. 2019 * Group must be a valid IP multicast address. 2020 */ 2021 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 2022 if (error) 2023 break; 2024 2025 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 2026 error = EINVAL; 2027 break; 2028 } 2029 2030 s = splimp(); 2031 /* 2032 * If an interface address was specified, get a pointer 2033 * to its ifnet structure. 2034 */ 2035 if (mreq.imr_interface.s_addr == INADDR_ANY) 2036 ifp = NULL; 2037 else { 2038 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 2039 if (ifp == NULL) { 2040 error = EADDRNOTAVAIL; 2041 splx(s); 2042 break; 2043 } 2044 } 2045 /* 2046 * Find the membership in the membership array. 2047 */ 2048 for (i = 0; i < imo->imo_num_memberships; ++i) { 2049 if ((ifp == NULL || 2050 imo->imo_membership[i]->inm_ifp == ifp) && 2051 imo->imo_membership[i]->inm_addr.s_addr == 2052 mreq.imr_multiaddr.s_addr) 2053 break; 2054 } 2055 if (i == imo->imo_num_memberships) { 2056 error = EADDRNOTAVAIL; 2057 splx(s); 2058 break; 2059 } 2060 /* 2061 * Give up the multicast address record to which the 2062 * membership points. 2063 */ 2064 in_delmulti(imo->imo_membership[i]); 2065 /* 2066 * Remove the gap in the membership array. 2067 */ 2068 for (++i; i < imo->imo_num_memberships; ++i) 2069 imo->imo_membership[i-1] = imo->imo_membership[i]; 2070 --imo->imo_num_memberships; 2071 splx(s); 2072 break; 2073 2074 default: 2075 error = EOPNOTSUPP; 2076 break; 2077 } 2078 2079 /* 2080 * If all options have default values, no need to keep the mbuf. 2081 */ 2082 if (imo->imo_multicast_ifp == NULL && 2083 imo->imo_multicast_vif == -1 && 2084 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 2085 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 2086 imo->imo_num_memberships == 0) { 2087 free(*imop, M_IPMOPTS); 2088 *imop = NULL; 2089 } 2090 2091 return (error); 2092 } 2093 2094 /* 2095 * Return the IP multicast options in response to user getsockopt(). 2096 */ 2097 static int 2098 ip_getmoptions(sopt, imo) 2099 struct sockopt *sopt; 2100 struct ip_moptions *imo; 2101 { 2102 struct in_addr addr; 2103 struct in_ifaddr *ia; 2104 int error, optval; 2105 u_char coptval; 2106 2107 error = 0; 2108 switch (sopt->sopt_name) { 2109 case IP_MULTICAST_VIF: 2110 if (imo != NULL) 2111 optval = imo->imo_multicast_vif; 2112 else 2113 optval = -1; 2114 error = sooptcopyout(sopt, &optval, sizeof optval); 2115 break; 2116 2117 case IP_MULTICAST_IF: 2118 if (imo == NULL || imo->imo_multicast_ifp == NULL) 2119 addr.s_addr = INADDR_ANY; 2120 else if (imo->imo_multicast_addr.s_addr) { 2121 /* return the value user has set */ 2122 addr = imo->imo_multicast_addr; 2123 } else { 2124 IFP_TO_IA(imo->imo_multicast_ifp, ia); 2125 addr.s_addr = (ia == NULL) ? INADDR_ANY 2126 : IA_SIN(ia)->sin_addr.s_addr; 2127 } 2128 error = sooptcopyout(sopt, &addr, sizeof addr); 2129 break; 2130 2131 case IP_MULTICAST_TTL: 2132 if (imo == 0) 2133 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 2134 else 2135 optval = coptval = imo->imo_multicast_ttl; 2136 if (sopt->sopt_valsize == 1) 2137 error = sooptcopyout(sopt, &coptval, 1); 2138 else 2139 error = sooptcopyout(sopt, &optval, sizeof optval); 2140 break; 2141 2142 case IP_MULTICAST_LOOP: 2143 if (imo == 0) 2144 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 2145 else 2146 optval = coptval = imo->imo_multicast_loop; 2147 if (sopt->sopt_valsize == 1) 2148 error = sooptcopyout(sopt, &coptval, 1); 2149 else 2150 error = sooptcopyout(sopt, &optval, sizeof optval); 2151 break; 2152 2153 default: 2154 error = ENOPROTOOPT; 2155 break; 2156 } 2157 return (error); 2158 } 2159 2160 /* 2161 * Discard the IP multicast options. 2162 */ 2163 void 2164 ip_freemoptions(imo) 2165 struct ip_moptions *imo; 2166 { 2167 int i; 2168 2169 if (imo != NULL) { 2170 for (i = 0; i < imo->imo_num_memberships; ++i) 2171 in_delmulti(imo->imo_membership[i]); 2172 free(imo, M_IPMOPTS); 2173 } 2174 } 2175 2176 /* 2177 * Routine called from ip_output() to loop back a copy of an IP multicast 2178 * packet to the input queue of a specified interface. Note that this 2179 * calls the output routine of the loopback "driver", but with an interface 2180 * pointer that might NOT be a loopback interface -- evil, but easier than 2181 * replicating that code here. 2182 */ 2183 static void 2184 ip_mloopback(ifp, m, dst, hlen) 2185 struct ifnet *ifp; 2186 struct mbuf *m; 2187 struct sockaddr_in *dst; 2188 int hlen; 2189 { 2190 struct ip *ip; 2191 struct mbuf *copym; 2192 2193 copym = m_copy(m, 0, M_COPYALL); 2194 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 2195 copym = m_pullup(copym, hlen); 2196 if (copym != NULL) { 2197 /* 2198 * We don't bother to fragment if the IP length is greater 2199 * than the interface's MTU. Can this possibly matter? 2200 */ 2201 ip = mtod(copym, struct ip *); 2202 ip->ip_len = htons(ip->ip_len); 2203 ip->ip_off = htons(ip->ip_off); 2204 ip->ip_sum = 0; 2205 if (ip->ip_vhl == IP_VHL_BORING) { 2206 ip->ip_sum = in_cksum_hdr(ip); 2207 } else { 2208 ip->ip_sum = in_cksum(copym, hlen); 2209 } 2210 /* 2211 * NB: 2212 * It's not clear whether there are any lingering 2213 * reentrancy problems in other areas which might 2214 * be exposed by using ip_input directly (in 2215 * particular, everything which modifies the packet 2216 * in-place). Yet another option is using the 2217 * protosw directly to deliver the looped back 2218 * packet. For the moment, we'll err on the side 2219 * of safety by using if_simloop(). 2220 */ 2221 #if 1 /* XXX */ 2222 if (dst->sin_family != AF_INET) { 2223 printf("ip_mloopback: bad address family %d\n", 2224 dst->sin_family); 2225 dst->sin_family = AF_INET; 2226 } 2227 #endif 2228 2229 #ifdef notdef 2230 copym->m_pkthdr.rcvif = ifp; 2231 ip_input(copym); 2232 #else 2233 /* if the checksum hasn't been computed, mark it as valid */ 2234 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 2235 copym->m_pkthdr.csum_flags |= 2236 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 2237 copym->m_pkthdr.csum_data = 0xffff; 2238 } 2239 if_simloop(ifp, copym, dst->sin_family, 0); 2240 #endif 2241 } 2242 } 2243