1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 34 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.37 2003/04/15 06:44:45 silby Exp $ 35 * $DragonFly: src/sys/netinet/ip_output.c,v 1.15 2004/06/24 08:15:17 dillon Exp $ 36 */ 37 38 #define _IP_VHL 39 40 #include "opt_ipfw.h" 41 #include "opt_ipdn.h" 42 #include "opt_ipdivert.h" 43 #include "opt_ipfilter.h" 44 #include "opt_ipsec.h" 45 #include "opt_random_ip_id.h" 46 #include "opt_mbuf_stress_test.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/malloc.h> 52 #include <sys/mbuf.h> 53 #include <sys/protosw.h> 54 #include <sys/socket.h> 55 #include <sys/socketvar.h> 56 #include <sys/proc.h> 57 #include <sys/sysctl.h> 58 #include <sys/in_cksum.h> 59 60 #include <net/if.h> 61 #include <net/netisr.h> 62 #include <net/pfil.h> 63 #include <net/route.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/ip.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/in_var.h> 70 #include <netinet/ip_var.h> 71 72 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 73 74 #ifdef IPSEC 75 #include <netinet6/ipsec.h> 76 #include <netproto/key/key.h> 77 #ifdef IPSEC_DEBUG 78 #include <netproto/key/key_debug.h> 79 #else 80 #define KEYDEBUG(lev,arg) 81 #endif 82 #endif /*IPSEC*/ 83 84 #ifdef FAST_IPSEC 85 #include <netipsec/ipsec.h> 86 #include <netipsec/xform.h> 87 #include <netipsec/key.h> 88 #endif /*FAST_IPSEC*/ 89 90 #include <net/ipfw/ip_fw.h> 91 #include <net/dummynet/ip_dummynet.h> 92 93 #define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ 94 x, (ntohl(a.s_addr)>>24)&0xFF,\ 95 (ntohl(a.s_addr)>>16)&0xFF,\ 96 (ntohl(a.s_addr)>>8)&0xFF,\ 97 (ntohl(a.s_addr))&0xFF, y); 98 99 u_short ip_id; 100 101 #ifdef MBUF_STRESS_TEST 102 int mbuf_frag_size = 0; 103 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 104 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 105 #endif 106 107 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 108 static struct ifnet *ip_multicast_if(struct in_addr *, int *); 109 static void ip_mloopback 110 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 111 static int ip_getmoptions 112 (struct sockopt *, struct ip_moptions *); 113 static int ip_pcbopts(int, struct mbuf **, struct mbuf *); 114 static int ip_setmoptions 115 (struct sockopt *, struct ip_moptions **); 116 117 int ip_optcopy(struct ip *, struct ip *); 118 extern int (*fr_checkp) (struct ip *, int, struct ifnet *, int, struct mbuf **); 119 120 121 extern struct protosw inetsw[]; 122 123 /* 124 * IP output. The packet in mbuf chain m contains a skeletal IP 125 * header (with len, off, ttl, proto, tos, src, dst). 126 * The mbuf chain containing the packet will be freed. 127 * The mbuf opt, if present, will not be freed. 128 */ 129 int 130 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, 131 int flags, struct ip_moptions *imo, struct inpcb *inp) 132 { 133 struct ip *ip; 134 struct ifnet *ifp = NULL; /* keep compiler happy */ 135 struct mbuf *m; 136 int hlen = sizeof (struct ip); 137 int len, off, error = 0; 138 struct sockaddr_in *dst = NULL; /* keep compiler happy */ 139 struct in_ifaddr *ia = NULL; 140 int isbroadcast, sw_csum; 141 struct in_addr pkt_dst; 142 #ifdef IPSEC 143 struct route iproute; 144 struct secpolicy *sp = NULL; 145 struct socket *so = inp ? inp->inp_socket : NULL; 146 #endif 147 #ifdef FAST_IPSEC 148 struct route iproute; 149 struct m_tag *mtag; 150 struct secpolicy *sp = NULL; 151 struct tdb_ident *tdbi; 152 int s; 153 #endif /* FAST_IPSEC */ 154 struct ip_fw_args args; 155 int src_was_INADDR_ANY = 0; /* as the name says... */ 156 157 args.eh = NULL; 158 args.rule = NULL; 159 args.next_hop = NULL; 160 args.divert_rule = 0; /* divert cookie */ 161 162 /* Grab info from MT_TAG mbufs prepended to the chain. */ 163 for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) { 164 switch(m0->_m_tag_id) { 165 default: 166 printf("ip_output: unrecognised MT_TAG tag %d\n", 167 m0->_m_tag_id); 168 break; 169 170 case PACKET_TAG_DUMMYNET: 171 /* 172 * the packet was already tagged, so part of the 173 * processing was already done, and we need to go down. 174 * Get parameters from the header. 175 */ 176 args.rule = ((struct dn_pkt *)m0)->rule; 177 opt = NULL ; 178 ro = & ( ((struct dn_pkt *)m0)->ro ) ; 179 imo = NULL ; 180 dst = ((struct dn_pkt *)m0)->dn_dst ; 181 ifp = ((struct dn_pkt *)m0)->ifp ; 182 flags = ((struct dn_pkt *)m0)->flags ; 183 break; 184 185 case PACKET_TAG_DIVERT: 186 args.divert_rule = (int)m0->m_data & 0xffff; 187 break; 188 189 case PACKET_TAG_IPFORWARD: 190 args.next_hop = (struct sockaddr_in *)m0->m_data; 191 break; 192 } 193 } 194 m = m0; 195 196 KASSERT(!m || (m->m_flags & M_PKTHDR) != 0, ("ip_output: no HDR")); 197 #ifndef FAST_IPSEC 198 KASSERT(ro != NULL, ("ip_output: no route, proto %d", 199 mtod(m, struct ip *)->ip_p)); 200 #endif 201 202 if (args.rule != NULL) { /* dummynet already saw us */ 203 ip = mtod(m, struct ip *); 204 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; 205 if (ro->ro_rt) 206 ia = ifatoia(ro->ro_rt->rt_ifa); 207 goto sendit; 208 } 209 210 if (opt) { 211 len = 0; 212 m = ip_insertoptions(m, opt, &len); 213 if (len != 0) 214 hlen = len; 215 } 216 ip = mtod(m, struct ip *); 217 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst; 218 219 /* 220 * Fill in IP header. 221 */ 222 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 223 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2); 224 ip->ip_off &= IP_DF; 225 #ifdef RANDOM_IP_ID 226 ip->ip_id = ip_randomid(); 227 #else 228 ip->ip_id = htons(ip_id++); 229 #endif 230 ipstat.ips_localout++; 231 } else { 232 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 233 } 234 235 #ifdef FAST_IPSEC 236 if (ro == NULL) { 237 ro = &iproute; 238 bzero(ro, sizeof (*ro)); 239 } 240 #endif /* FAST_IPSEC */ 241 dst = (struct sockaddr_in *)&ro->ro_dst; 242 /* 243 * If there is a cached route, 244 * check that it is to the same destination 245 * and is still up. If not, free it and try again. 246 * The address family should also be checked in case of sharing the 247 * cache with IPv6. 248 */ 249 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 250 dst->sin_family != AF_INET || 251 dst->sin_addr.s_addr != pkt_dst.s_addr)) { 252 RTFREE(ro->ro_rt); 253 ro->ro_rt = (struct rtentry *)0; 254 } 255 if (ro->ro_rt == 0) { 256 bzero(dst, sizeof(*dst)); 257 dst->sin_family = AF_INET; 258 dst->sin_len = sizeof(*dst); 259 dst->sin_addr = pkt_dst; 260 } 261 /* 262 * If routing to interface only, 263 * short circuit routing lookup. 264 */ 265 if (flags & IP_ROUTETOIF) { 266 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 267 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 268 ipstat.ips_noroute++; 269 error = ENETUNREACH; 270 goto bad; 271 } 272 ifp = ia->ia_ifp; 273 ip->ip_ttl = 1; 274 isbroadcast = in_broadcast(dst->sin_addr, ifp); 275 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 276 imo != NULL && imo->imo_multicast_ifp != NULL) { 277 /* 278 * Bypass the normal routing lookup for multicast 279 * packets if the interface is specified. 280 */ 281 ifp = imo->imo_multicast_ifp; 282 IFP_TO_IA(ifp, ia); 283 isbroadcast = 0; /* fool gcc */ 284 } else { 285 /* 286 * If this is the case, we probably don't want to allocate 287 * a protocol-cloned route since we didn't get one from the 288 * ULP. This lets TCP do its thing, while not burdening 289 * forwarding or ICMP with the overhead of cloning a route. 290 * Of course, we still want to do any cloning requested by 291 * the link layer, as this is probably required in all cases 292 * for correct operation (as it is for ARP). 293 */ 294 if (ro->ro_rt == 0) 295 rtalloc_ign(ro, RTF_PRCLONING); 296 if (ro->ro_rt == 0) { 297 ipstat.ips_noroute++; 298 error = EHOSTUNREACH; 299 goto bad; 300 } 301 ia = ifatoia(ro->ro_rt->rt_ifa); 302 ifp = ro->ro_rt->rt_ifp; 303 ro->ro_rt->rt_use++; 304 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 305 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 306 if (ro->ro_rt->rt_flags & RTF_HOST) 307 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 308 else 309 isbroadcast = in_broadcast(dst->sin_addr, ifp); 310 } 311 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { 312 struct in_multi *inm; 313 314 m->m_flags |= M_MCAST; 315 /* 316 * IP destination address is multicast. Make sure "dst" 317 * still points to the address in "ro". (It may have been 318 * changed to point to a gateway address, above.) 319 */ 320 dst = (struct sockaddr_in *)&ro->ro_dst; 321 /* 322 * See if the caller provided any multicast options 323 */ 324 if (imo != NULL) { 325 ip->ip_ttl = imo->imo_multicast_ttl; 326 if (imo->imo_multicast_vif != -1) 327 ip->ip_src.s_addr = 328 ip_mcast_src ? 329 ip_mcast_src(imo->imo_multicast_vif) : 330 INADDR_ANY; 331 } else 332 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 333 /* 334 * Confirm that the outgoing interface supports multicast. 335 */ 336 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 337 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 338 ipstat.ips_noroute++; 339 error = ENETUNREACH; 340 goto bad; 341 } 342 } 343 /* 344 * If source address not specified yet, use address 345 * of outgoing interface. 346 */ 347 if (ip->ip_src.s_addr == INADDR_ANY) { 348 /* Interface may have no addresses. */ 349 if (ia != NULL) 350 ip->ip_src = IA_SIN(ia)->sin_addr; 351 } 352 353 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 354 /* 355 * XXX 356 * delayed checksums are not currently 357 * compatible with IP multicast routing 358 */ 359 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 360 in_delayed_cksum(m); 361 m->m_pkthdr.csum_flags &= 362 ~CSUM_DELAY_DATA; 363 } 364 } 365 IN_LOOKUP_MULTI(pkt_dst, ifp, inm); 366 if (inm != NULL && 367 (imo == NULL || imo->imo_multicast_loop)) { 368 /* 369 * If we belong to the destination multicast group 370 * on the outgoing interface, and the caller did not 371 * forbid loopback, loop back a copy. 372 */ 373 ip_mloopback(ifp, m, dst, hlen); 374 } 375 else { 376 /* 377 * If we are acting as a multicast router, perform 378 * multicast forwarding as if the packet had just 379 * arrived on the interface to which we are about 380 * to send. The multicast forwarding function 381 * recursively calls this function, using the 382 * IP_FORWARDING flag to prevent infinite recursion. 383 * 384 * Multicasts that are looped back by ip_mloopback(), 385 * above, will be forwarded by the ip_input() routine, 386 * if necessary. 387 */ 388 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 389 /* 390 * If rsvp daemon is not running, do not 391 * set ip_moptions. This ensures that the packet 392 * is multicast and not just sent down one link 393 * as prescribed by rsvpd. 394 */ 395 if (!rsvp_on) 396 imo = NULL; 397 if (ip_mforward && 398 ip_mforward(ip, ifp, m, imo) != 0) { 399 m_freem(m); 400 goto done; 401 } 402 } 403 } 404 405 /* 406 * Multicasts with a time-to-live of zero may be looped- 407 * back, above, but must not be transmitted on a network. 408 * Also, multicasts addressed to the loopback interface 409 * are not sent -- the above call to ip_mloopback() will 410 * loop back a copy if this host actually belongs to the 411 * destination group on the loopback interface. 412 */ 413 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 414 m_freem(m); 415 goto done; 416 } 417 418 goto sendit; 419 } 420 #ifndef notdef 421 /* 422 * If the source address is not specified yet, use the address 423 * of the outoing interface. In case, keep note we did that, so 424 * if the the firewall changes the next-hop causing the output 425 * interface to change, we can fix that. 426 */ 427 if (ip->ip_src.s_addr == INADDR_ANY) { 428 /* Interface may have no addresses. */ 429 if (ia != NULL) { 430 ip->ip_src = IA_SIN(ia)->sin_addr; 431 src_was_INADDR_ANY = 1; 432 } 433 } 434 #endif /* notdef */ 435 /* 436 * Verify that we have any chance at all of being able to queue 437 * the packet or packet fragments 438 */ 439 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 440 ifp->if_snd.ifq_maxlen) { 441 error = ENOBUFS; 442 ipstat.ips_odropped++; 443 goto bad; 444 } 445 446 /* 447 * Look for broadcast address and 448 * verify user is allowed to send 449 * such a packet. 450 */ 451 if (isbroadcast) { 452 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 453 error = EADDRNOTAVAIL; 454 goto bad; 455 } 456 if ((flags & IP_ALLOWBROADCAST) == 0) { 457 error = EACCES; 458 goto bad; 459 } 460 /* don't allow broadcast messages to be fragmented */ 461 if (ip->ip_len > ifp->if_mtu) { 462 error = EMSGSIZE; 463 goto bad; 464 } 465 m->m_flags |= M_BCAST; 466 } else { 467 m->m_flags &= ~M_BCAST; 468 } 469 470 sendit: 471 #ifdef IPSEC 472 /* get SP for this packet */ 473 if (so == NULL) 474 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); 475 else 476 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); 477 478 if (sp == NULL) { 479 ipsecstat.out_inval++; 480 goto bad; 481 } 482 483 error = 0; 484 485 /* check policy */ 486 switch (sp->policy) { 487 case IPSEC_POLICY_DISCARD: 488 /* 489 * This packet is just discarded. 490 */ 491 ipsecstat.out_polvio++; 492 goto bad; 493 494 case IPSEC_POLICY_BYPASS: 495 case IPSEC_POLICY_NONE: 496 /* no need to do IPsec. */ 497 goto skip_ipsec; 498 499 case IPSEC_POLICY_IPSEC: 500 if (sp->req == NULL) { 501 /* acquire a policy */ 502 error = key_spdacquire(sp); 503 goto bad; 504 } 505 break; 506 507 case IPSEC_POLICY_ENTRUST: 508 default: 509 printf("ip_output: Invalid policy found. %d\n", sp->policy); 510 } 511 { 512 struct ipsec_output_state state; 513 bzero(&state, sizeof(state)); 514 state.m = m; 515 if (flags & IP_ROUTETOIF) { 516 state.ro = &iproute; 517 bzero(&iproute, sizeof(iproute)); 518 } else 519 state.ro = ro; 520 state.dst = (struct sockaddr *)dst; 521 522 ip->ip_sum = 0; 523 524 /* 525 * XXX 526 * delayed checksums are not currently compatible with IPsec 527 */ 528 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 529 in_delayed_cksum(m); 530 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 531 } 532 533 ip->ip_len = htons(ip->ip_len); 534 ip->ip_off = htons(ip->ip_off); 535 536 error = ipsec4_output(&state, sp, flags); 537 538 m = state.m; 539 if (flags & IP_ROUTETOIF) { 540 /* 541 * if we have tunnel mode SA, we may need to ignore 542 * IP_ROUTETOIF. 543 */ 544 if (state.ro != &iproute || state.ro->ro_rt != NULL) { 545 flags &= ~IP_ROUTETOIF; 546 ro = state.ro; 547 } 548 } else 549 ro = state.ro; 550 dst = (struct sockaddr_in *)state.dst; 551 if (error) { 552 /* mbuf is already reclaimed in ipsec4_output. */ 553 m0 = NULL; 554 switch (error) { 555 case EHOSTUNREACH: 556 case ENETUNREACH: 557 case EMSGSIZE: 558 case ENOBUFS: 559 case ENOMEM: 560 break; 561 default: 562 printf("ip4_output (ipsec): error code %d\n", error); 563 /*fall through*/ 564 case ENOENT: 565 /* don't show these error codes to the user */ 566 error = 0; 567 break; 568 } 569 goto bad; 570 } 571 } 572 573 /* be sure to update variables that are affected by ipsec4_output() */ 574 ip = mtod(m, struct ip *); 575 #ifdef _IP_VHL 576 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 577 #else 578 hlen = ip->ip_hl << 2; 579 #endif 580 if (ro->ro_rt == NULL) { 581 if ((flags & IP_ROUTETOIF) == 0) { 582 printf("ip_output: " 583 "can't update route after IPsec processing\n"); 584 error = EHOSTUNREACH; /*XXX*/ 585 goto bad; 586 } 587 } else { 588 ia = ifatoia(ro->ro_rt->rt_ifa); 589 ifp = ro->ro_rt->rt_ifp; 590 } 591 592 /* make it flipped, again. */ 593 ip->ip_len = ntohs(ip->ip_len); 594 ip->ip_off = ntohs(ip->ip_off); 595 skip_ipsec: 596 #endif /*IPSEC*/ 597 #ifdef FAST_IPSEC 598 /* 599 * Check the security policy (SP) for the packet and, if 600 * required, do IPsec-related processing. There are two 601 * cases here; the first time a packet is sent through 602 * it will be untagged and handled by ipsec4_checkpolicy. 603 * If the packet is resubmitted to ip_output (e.g. after 604 * AH, ESP, etc. processing), there will be a tag to bypass 605 * the lookup and related policy checking. 606 */ 607 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 608 s = splnet(); 609 if (mtag != NULL) { 610 tdbi = (struct tdb_ident *)(mtag + 1); 611 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); 612 if (sp == NULL) 613 error = -EINVAL; /* force silent drop */ 614 m_tag_delete(m, mtag); 615 } else { 616 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags, 617 &error, inp); 618 } 619 /* 620 * There are four return cases: 621 * sp != NULL apply IPsec policy 622 * sp == NULL, error == 0 no IPsec handling needed 623 * sp == NULL, error == -EINVAL discard packet w/o error 624 * sp == NULL, error != 0 discard packet, report error 625 */ 626 if (sp != NULL) { 627 /* Loop detection, check if ipsec processing already done */ 628 KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); 629 for (mtag = m_tag_first(m); mtag != NULL; 630 mtag = m_tag_next(m, mtag)) { 631 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) 632 continue; 633 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 634 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 635 continue; 636 /* 637 * Check if policy has an SA associated with it. 638 * This can happen when an SP has yet to acquire 639 * an SA; e.g. on first reference. If it occurs, 640 * then we let ipsec4_process_packet do its thing. 641 */ 642 if (sp->req->sav == NULL) 643 break; 644 tdbi = (struct tdb_ident *)(mtag + 1); 645 if (tdbi->spi == sp->req->sav->spi && 646 tdbi->proto == sp->req->sav->sah->saidx.proto && 647 bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, 648 sizeof (union sockaddr_union)) == 0) { 649 /* 650 * No IPsec processing is needed, free 651 * reference to SP. 652 * 653 * NB: null pointer to avoid free at 654 * done: below. 655 */ 656 KEY_FREESP(&sp), sp = NULL; 657 splx(s); 658 goto spd_done; 659 } 660 } 661 662 /* 663 * Do delayed checksums now because we send before 664 * this is done in the normal processing path. 665 */ 666 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 667 in_delayed_cksum(m); 668 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 669 } 670 671 ip->ip_len = htons(ip->ip_len); 672 ip->ip_off = htons(ip->ip_off); 673 674 /* NB: callee frees mbuf */ 675 error = ipsec4_process_packet(m, sp->req, flags, 0); 676 /* 677 * Preserve KAME behaviour: ENOENT can be returned 678 * when an SA acquire is in progress. Don't propagate 679 * this to user-level; it confuses applications. 680 * 681 * XXX this will go away when the SADB is redone. 682 */ 683 if (error == ENOENT) 684 error = 0; 685 splx(s); 686 goto done; 687 } else { 688 splx(s); 689 690 if (error != 0) { 691 /* 692 * Hack: -EINVAL is used to signal that a packet 693 * should be silently discarded. This is typically 694 * because we asked key management for an SA and 695 * it was delayed (e.g. kicked up to IKE). 696 */ 697 if (error == -EINVAL) 698 error = 0; 699 goto bad; 700 } else { 701 /* No IPsec processing for this packet. */ 702 } 703 #ifdef notyet 704 /* 705 * If deferred crypto processing is needed, check that 706 * the interface supports it. 707 */ 708 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL); 709 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) { 710 /* notify IPsec to do its own crypto */ 711 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 712 error = EHOSTUNREACH; 713 goto bad; 714 } 715 #endif 716 } 717 spd_done: 718 #endif /* FAST_IPSEC */ 719 /* 720 * IpHack's section. 721 * - Xlate: translate packet's addr/port (NAT). 722 * - Firewall: deny/allow/etc. 723 * - Wrap: fake packet's addr/port <unimpl.> 724 * - Encapsulate: put it in another IP and send out. <unimp.> 725 */ 726 727 /* 728 * Run through list of hooks for output packets. 729 */ 730 if (pfil_has_hooks(&inet_pfil_hook)) { 731 error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT); 732 if (error != 0 || m == NULL) 733 goto done; 734 ip = mtod(m, struct ip *); 735 } 736 737 /* 738 * Check with the firewall... 739 * but not if we are already being fwd'd from a firewall. 740 */ 741 if (fw_enable && IPFW_LOADED && !args.next_hop) { 742 struct sockaddr_in *old = dst; 743 744 args.m = m; 745 args.next_hop = dst; 746 args.oif = ifp; 747 off = ip_fw_chk_ptr(&args); 748 m = args.m; 749 dst = args.next_hop; 750 751 /* 752 * On return we must do the following: 753 * m == NULL -> drop the pkt (old interface, deprecated) 754 * (off & IP_FW_PORT_DENY_FLAG) -> drop the pkt (new interface) 755 * 1<=off<= 0xffff -> DIVERT 756 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe 757 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet 758 * dst != old -> IPFIREWALL_FORWARD 759 * off==0, dst==old -> accept 760 * If some of the above modules are not compiled in, then 761 * we should't have to check the corresponding condition 762 * (because the ipfw control socket should not accept 763 * unsupported rules), but better play safe and drop 764 * packets in case of doubt. 765 */ 766 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) { 767 if (m) 768 m_freem(m); 769 error = EACCES; 770 goto done; 771 } 772 ip = mtod(m, struct ip *); 773 if (off == 0 && dst == old) /* common case */ 774 goto pass; 775 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { 776 /* 777 * pass the pkt to dummynet. Need to include 778 * pipe number, m, ifp, ro, dst because these are 779 * not recomputed in the next pass. 780 * All other parameters have been already used and 781 * so they are not needed anymore. 782 * XXX note: if the ifp or ro entry are deleted 783 * while a pkt is in dummynet, we are in trouble! 784 */ 785 args.ro = ro; 786 args.dst = dst; 787 args.flags = flags; 788 789 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, 790 &args); 791 goto done; 792 } 793 #ifdef IPDIVERT 794 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { 795 struct mbuf *clone = NULL; 796 797 /* Clone packet if we're doing a 'tee' */ 798 if ((off & IP_FW_PORT_TEE_FLAG) != 0) 799 clone = m_dup(m, MB_DONTWAIT); 800 801 /* 802 * XXX 803 * delayed checksums are not currently compatible 804 * with divert sockets. 805 */ 806 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 807 in_delayed_cksum(m); 808 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 809 } 810 811 /* Restore packet header fields to original values */ 812 ip->ip_len = htons(ip->ip_len); 813 ip->ip_off = htons(ip->ip_off); 814 815 /* Deliver packet to divert input routine */ 816 divert_packet(m, 0, off & 0xffff, args.divert_rule); 817 818 /* If 'tee', continue with original packet */ 819 if (clone != NULL) { 820 m = clone; 821 ip = mtod(m, struct ip *); 822 goto pass; 823 } 824 goto done; 825 } 826 #endif 827 828 /* IPFIREWALL_FORWARD */ 829 /* 830 * Check dst to make sure it is directly reachable on the 831 * interface we previously thought it was. 832 * If it isn't (which may be likely in some situations) we have 833 * to re-route it (ie, find a route for the next-hop and the 834 * associated interface) and set them here. This is nested 835 * forwarding which in most cases is undesirable, except where 836 * such control is nigh impossible. So we do it here. 837 * And I'm babbling. 838 */ 839 if (off == 0 && old != dst) { /* FORWARD, dst has changed */ 840 #if 0 841 /* 842 * XXX To improve readability, this block should be 843 * changed into a function call as below: 844 */ 845 error = ip_ipforward(&m, &dst, &ifp); 846 if (error) 847 goto bad; 848 if (m == NULL) /* ip_input consumed the mbuf */ 849 goto done; 850 #else 851 struct in_ifaddr *ia; 852 853 /* 854 * XXX sro_fwd below is static, and a pointer 855 * to it gets passed to routines downstream. 856 * This could have surprisingly bad results in 857 * practice, because its content is overwritten 858 * by subsequent packets. 859 */ 860 /* There must be a better way to do this next line... */ 861 static struct route sro_fwd; 862 struct route *ro_fwd = &sro_fwd; 863 864 #if 0 865 print_ip("IPFIREWALL_FORWARD: New dst ip: ", 866 dst->sin_addr, "\n"); 867 #endif 868 869 /* 870 * We need to figure out if we have been forwarded 871 * to a local socket. If so, then we should somehow 872 * "loop back" to ip_input, and get directed to the 873 * PCB as if we had received this packet. This is 874 * because it may be dificult to identify the packets 875 * you want to forward until they are being output 876 * and have selected an interface. (e.g. locally 877 * initiated packets) If we used the loopback inteface, 878 * we would not be able to control what happens 879 * as the packet runs through ip_input() as 880 * it is done through a ISR. 881 */ 882 LIST_FOREACH(ia, 883 INADDR_HASH(dst->sin_addr.s_addr), ia_hash) { 884 /* 885 * If the addr to forward to is one 886 * of ours, we pretend to 887 * be the destination for this packet. 888 */ 889 if (IA_SIN(ia)->sin_addr.s_addr == 890 dst->sin_addr.s_addr) 891 break; 892 } 893 if (ia) { /* tell ip_input "dont filter" */ 894 struct m_hdr tag; 895 896 tag.mh_type = MT_TAG; 897 tag.mh_flags = PACKET_TAG_IPFORWARD; 898 tag.mh_data = (caddr_t)args.next_hop; 899 tag.mh_next = m; 900 901 if (m->m_pkthdr.rcvif == NULL) 902 m->m_pkthdr.rcvif = ifunit("lo0"); 903 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 904 m->m_pkthdr.csum_flags |= 905 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 906 m0->m_pkthdr.csum_data = 0xffff; 907 } 908 m->m_pkthdr.csum_flags |= 909 CSUM_IP_CHECKED | CSUM_IP_VALID; 910 ip->ip_len = htons(ip->ip_len); 911 ip->ip_off = htons(ip->ip_off); 912 ip_input((struct mbuf *)&tag); 913 goto done; 914 } 915 /* Some of the logic for this was 916 * nicked from above. 917 * 918 * This rewrites the cached route in a local PCB. 919 * Is this what we want to do? 920 */ 921 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); 922 923 ro_fwd->ro_rt = 0; 924 rtalloc_ign(ro_fwd, RTF_PRCLONING); 925 926 if (ro_fwd->ro_rt == 0) { 927 ipstat.ips_noroute++; 928 error = EHOSTUNREACH; 929 goto bad; 930 } 931 932 ia = ifatoia(ro_fwd->ro_rt->rt_ifa); 933 ifp = ro_fwd->ro_rt->rt_ifp; 934 ro_fwd->ro_rt->rt_use++; 935 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) 936 dst = (struct sockaddr_in *) 937 ro_fwd->ro_rt->rt_gateway; 938 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) 939 isbroadcast = 940 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); 941 else 942 isbroadcast = in_broadcast(dst->sin_addr, ifp); 943 if (ro->ro_rt) 944 RTFREE(ro->ro_rt); 945 ro->ro_rt = ro_fwd->ro_rt; 946 dst = (struct sockaddr_in *)&ro_fwd->ro_dst; 947 948 #endif /* ... block to be put into a function */ 949 /* 950 * If we added a default src ip earlier, 951 * which would have been gotten from the-then 952 * interface, do it again, from the new one. 953 */ 954 if (src_was_INADDR_ANY) 955 ip->ip_src = IA_SIN(ia)->sin_addr; 956 goto pass ; 957 } 958 959 /* 960 * if we get here, none of the above matches, and 961 * we have to drop the pkt 962 */ 963 m_freem(m); 964 error = EACCES; /* not sure this is the right error msg */ 965 goto done; 966 } 967 968 pass: 969 /* 127/8 must not appear on wire - RFC1122. */ 970 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 971 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 972 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 973 ipstat.ips_badaddr++; 974 error = EADDRNOTAVAIL; 975 goto bad; 976 } 977 } 978 979 m->m_pkthdr.csum_flags |= CSUM_IP; 980 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 981 if (sw_csum & CSUM_DELAY_DATA) { 982 in_delayed_cksum(m); 983 sw_csum &= ~CSUM_DELAY_DATA; 984 } 985 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 986 987 /* 988 * If small enough for interface, or the interface will take 989 * care of the fragmentation for us, can just send directly. 990 */ 991 if (ip->ip_len <= ifp->if_mtu || ifp->if_hwassist & CSUM_FRAGMENT) { 992 ip->ip_len = htons(ip->ip_len); 993 ip->ip_off = htons(ip->ip_off); 994 ip->ip_sum = 0; 995 if (sw_csum & CSUM_DELAY_IP) { 996 if (ip->ip_vhl == IP_VHL_BORING) { 997 ip->ip_sum = in_cksum_hdr(ip); 998 } else { 999 ip->ip_sum = in_cksum(m, hlen); 1000 } 1001 } 1002 1003 /* Record statistics for this interface address. */ 1004 if (!(flags & IP_FORWARDING) && ia) { 1005 ia->ia_ifa.if_opackets++; 1006 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1007 } 1008 1009 #ifdef IPSEC 1010 /* clean ipsec history once it goes out of the node */ 1011 ipsec_delaux(m); 1012 #endif 1013 1014 #ifdef MBUF_STRESS_TEST 1015 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) { 1016 struct mbuf *m1, *m2; 1017 int length, tmp; 1018 1019 tmp = length = m->m_pkthdr.len; 1020 1021 while ((length -= mbuf_frag_size) >= 1) { 1022 m1 = m_split(m, length, MB_DONTWAIT); 1023 if (m1 == NULL) 1024 break; 1025 m1->m_flags &= ~M_PKTHDR; 1026 m2 = m; 1027 while (m2->m_next != NULL) 1028 m2 = m2->m_next; 1029 m2->m_next = m1; 1030 } 1031 m->m_pkthdr.len = tmp; 1032 } 1033 #endif 1034 error = (*ifp->if_output)(ifp, m, 1035 (struct sockaddr *)dst, ro->ro_rt); 1036 goto done; 1037 } 1038 1039 if (ip->ip_off & IP_DF) { 1040 error = EMSGSIZE; 1041 /* 1042 * This case can happen if the user changed the MTU 1043 * of an interface after enabling IP on it. Because 1044 * most netifs don't keep track of routes pointing to 1045 * them, there is no way for one to update all its 1046 * routes when the MTU is changed. 1047 */ 1048 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && 1049 !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && 1050 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 1051 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 1052 } 1053 ipstat.ips_cantfrag++; 1054 goto bad; 1055 } 1056 1057 /* 1058 * Too large for interface; fragment if possible. If successful, 1059 * on return, m will point to a list of packets to be sent. 1060 */ 1061 error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum); 1062 if (error) 1063 goto bad; 1064 for (; m; m = m0) { 1065 m0 = m->m_nextpkt; 1066 m->m_nextpkt = 0; 1067 #ifdef IPSEC 1068 /* clean ipsec history once it goes out of the node */ 1069 ipsec_delaux(m); 1070 #endif 1071 if (error == 0) { 1072 /* Record statistics for this interface address. */ 1073 if (ia != NULL) { 1074 ia->ia_ifa.if_opackets++; 1075 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1076 } 1077 1078 error = (*ifp->if_output)(ifp, m, 1079 (struct sockaddr *)dst, ro->ro_rt); 1080 } else 1081 m_freem(m); 1082 } 1083 1084 if (error == 0) 1085 ipstat.ips_fragmented++; 1086 1087 done: 1088 #ifdef IPSEC 1089 if (ro == &iproute && ro->ro_rt) { 1090 RTFREE(ro->ro_rt); 1091 ro->ro_rt = NULL; 1092 } 1093 if (sp != NULL) { 1094 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 1095 printf("DP ip_output call free SP:%p\n", sp)); 1096 key_freesp(sp); 1097 } 1098 #endif 1099 #ifdef FAST_IPSEC 1100 if (ro == &iproute && ro->ro_rt) { 1101 RTFREE(ro->ro_rt); 1102 ro->ro_rt = NULL; 1103 } 1104 if (sp != NULL) 1105 KEY_FREESP(&sp); 1106 #endif 1107 return (error); 1108 bad: 1109 m_freem(m); 1110 goto done; 1111 } 1112 1113 /* 1114 * Create a chain of fragments which fit the given mtu. m_frag points to the 1115 * mbuf to be fragmented; on return it points to the chain with the fragments. 1116 * Return 0 if no error. If error, m_frag may contain a partially built 1117 * chain of fragments that should be freed by the caller. 1118 * 1119 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 1120 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). 1121 */ 1122 int 1123 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 1124 u_long if_hwassist_flags, int sw_csum) 1125 { 1126 int error = 0; 1127 int hlen = IP_VHL_HL(ip->ip_vhl) << 2; 1128 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 1129 int off; 1130 struct mbuf *m0 = *m_frag; /* the original packet */ 1131 int firstlen; 1132 struct mbuf **mnext; 1133 int nfrags; 1134 1135 if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ 1136 ipstat.ips_cantfrag++; 1137 return EMSGSIZE; 1138 } 1139 1140 /* 1141 * Must be able to put at least 8 bytes per fragment. 1142 */ 1143 if (len < 8) 1144 return EMSGSIZE; 1145 1146 /* 1147 * If the interface will not calculate checksums on 1148 * fragmented packets, then do it here. 1149 */ 1150 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 1151 (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { 1152 in_delayed_cksum(m0); 1153 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1154 } 1155 1156 if (len > PAGE_SIZE) { 1157 /* 1158 * Fragment large datagrams such that each segment 1159 * contains a multiple of PAGE_SIZE amount of data, 1160 * plus headers. This enables a receiver to perform 1161 * page-flipping zero-copy optimizations. 1162 * 1163 * XXX When does this help given that sender and receiver 1164 * could have different page sizes, and also mtu could 1165 * be less than the receiver's page size ? 1166 */ 1167 int newlen; 1168 struct mbuf *m; 1169 1170 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) 1171 off += m->m_len; 1172 1173 /* 1174 * firstlen (off - hlen) must be aligned on an 1175 * 8-byte boundary 1176 */ 1177 if (off < hlen) 1178 goto smart_frag_failure; 1179 off = ((off - hlen) & ~7) + hlen; 1180 newlen = (~PAGE_MASK) & mtu; 1181 if ((newlen + sizeof (struct ip)) > mtu) { 1182 /* we failed, go back the default */ 1183 smart_frag_failure: 1184 newlen = len; 1185 off = hlen + len; 1186 } 1187 len = newlen; 1188 1189 } else { 1190 off = hlen + len; 1191 } 1192 1193 firstlen = off - hlen; 1194 mnext = &m0->m_nextpkt; /* pointer to next packet */ 1195 1196 /* 1197 * Loop through length of segment after first fragment, 1198 * make new header and copy data of each part and link onto chain. 1199 * Here, m0 is the original packet, m is the fragment being created. 1200 * The fragments are linked off the m_nextpkt of the original 1201 * packet, which after processing serves as the first fragment. 1202 */ 1203 for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { 1204 struct ip *mhip; /* ip header on the fragment */ 1205 struct mbuf *m; 1206 int mhlen = sizeof (struct ip); 1207 1208 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 1209 if (m == 0) { 1210 error = ENOBUFS; 1211 ipstat.ips_odropped++; 1212 goto done; 1213 } 1214 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 1215 /* 1216 * In the first mbuf, leave room for the link header, then 1217 * copy the original IP header including options. The payload 1218 * goes into an additional mbuf chain returned by m_copy(). 1219 */ 1220 m->m_data += max_linkhdr; 1221 mhip = mtod(m, struct ip *); 1222 *mhip = *ip; 1223 if (hlen > sizeof (struct ip)) { 1224 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 1225 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2); 1226 } 1227 m->m_len = mhlen; 1228 /* XXX do we need to add ip->ip_off below ? */ 1229 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 1230 if (off + len >= ip->ip_len) { /* last fragment */ 1231 len = ip->ip_len - off; 1232 m->m_flags |= M_LASTFRAG; 1233 } else 1234 mhip->ip_off |= IP_MF; 1235 mhip->ip_len = htons((u_short)(len + mhlen)); 1236 m->m_next = m_copy(m0, off, len); 1237 if (m->m_next == 0) { /* copy failed */ 1238 m_free(m); 1239 error = ENOBUFS; /* ??? */ 1240 ipstat.ips_odropped++; 1241 goto done; 1242 } 1243 m->m_pkthdr.len = mhlen + len; 1244 m->m_pkthdr.rcvif = (struct ifnet *)0; 1245 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 1246 mhip->ip_off = htons(mhip->ip_off); 1247 mhip->ip_sum = 0; 1248 if (sw_csum & CSUM_DELAY_IP) 1249 mhip->ip_sum = in_cksum(m, mhlen); 1250 *mnext = m; 1251 mnext = &m->m_nextpkt; 1252 } 1253 ipstat.ips_ofragments += nfrags; 1254 1255 /* set first marker for fragment chain */ 1256 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 1257 m0->m_pkthdr.csum_data = nfrags; 1258 1259 /* 1260 * Update first fragment by trimming what's been copied out 1261 * and updating header. 1262 */ 1263 m_adj(m0, hlen + firstlen - ip->ip_len); 1264 m0->m_pkthdr.len = hlen + firstlen; 1265 ip->ip_len = htons((u_short)m0->m_pkthdr.len); 1266 ip->ip_off |= IP_MF; 1267 ip->ip_off = htons(ip->ip_off); 1268 ip->ip_sum = 0; 1269 if (sw_csum & CSUM_DELAY_IP) 1270 ip->ip_sum = in_cksum(m0, hlen); 1271 1272 done: 1273 *m_frag = m0; 1274 return error; 1275 } 1276 1277 void 1278 in_delayed_cksum(struct mbuf *m) 1279 { 1280 struct ip *ip; 1281 u_short csum, offset; 1282 1283 ip = mtod(m, struct ip *); 1284 offset = IP_VHL_HL(ip->ip_vhl) << 2 ; 1285 csum = in_cksum_skip(m, ip->ip_len, offset); 1286 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 1287 csum = 0xffff; 1288 offset += m->m_pkthdr.csum_data; /* checksum offset */ 1289 1290 if (offset + sizeof(u_short) > m->m_len) { 1291 printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 1292 m->m_len, offset, ip->ip_p); 1293 /* 1294 * XXX 1295 * this shouldn't happen, but if it does, the 1296 * correct behavior may be to insert the checksum 1297 * in the existing chain instead of rearranging it. 1298 */ 1299 m = m_pullup(m, offset + sizeof(u_short)); 1300 } 1301 *(u_short *)(m->m_data + offset) = csum; 1302 } 1303 1304 /* 1305 * Insert IP options into preformed packet. 1306 * Adjust IP destination as required for IP source routing, 1307 * as indicated by a non-zero in_addr at the start of the options. 1308 * 1309 * XXX This routine assumes that the packet has no options in place. 1310 */ 1311 static struct mbuf * 1312 ip_insertoptions(m, opt, phlen) 1313 struct mbuf *m; 1314 struct mbuf *opt; 1315 int *phlen; 1316 { 1317 struct ipoption *p = mtod(opt, struct ipoption *); 1318 struct mbuf *n; 1319 struct ip *ip = mtod(m, struct ip *); 1320 unsigned optlen; 1321 1322 optlen = opt->m_len - sizeof(p->ipopt_dst); 1323 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) { 1324 *phlen = 0; 1325 return (m); /* XXX should fail */ 1326 } 1327 if (p->ipopt_dst.s_addr) 1328 ip->ip_dst = p->ipopt_dst; 1329 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 1330 MGETHDR(n, MB_DONTWAIT, MT_HEADER); 1331 if (n == 0) { 1332 *phlen = 0; 1333 return (m); 1334 } 1335 n->m_pkthdr.rcvif = (struct ifnet *)0; 1336 n->m_pkthdr.len = m->m_pkthdr.len + optlen; 1337 m->m_len -= sizeof(struct ip); 1338 m->m_data += sizeof(struct ip); 1339 n->m_next = m; 1340 m = n; 1341 m->m_len = optlen + sizeof(struct ip); 1342 m->m_data += max_linkhdr; 1343 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip)); 1344 } else { 1345 m->m_data -= optlen; 1346 m->m_len += optlen; 1347 m->m_pkthdr.len += optlen; 1348 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 1349 } 1350 ip = mtod(m, struct ip *); 1351 bcopy(p->ipopt_list, ip + 1, optlen); 1352 *phlen = sizeof(struct ip) + optlen; 1353 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2); 1354 ip->ip_len += optlen; 1355 return (m); 1356 } 1357 1358 /* 1359 * Copy options from ip to jp, 1360 * omitting those not copied during fragmentation. 1361 */ 1362 int 1363 ip_optcopy(ip, jp) 1364 struct ip *ip, *jp; 1365 { 1366 u_char *cp, *dp; 1367 int opt, optlen, cnt; 1368 1369 cp = (u_char *)(ip + 1); 1370 dp = (u_char *)(jp + 1); 1371 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip); 1372 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1373 opt = cp[0]; 1374 if (opt == IPOPT_EOL) 1375 break; 1376 if (opt == IPOPT_NOP) { 1377 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1378 *dp++ = IPOPT_NOP; 1379 optlen = 1; 1380 continue; 1381 } 1382 1383 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp), 1384 ("ip_optcopy: malformed ipv4 option")); 1385 optlen = cp[IPOPT_OLEN]; 1386 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt, 1387 ("ip_optcopy: malformed ipv4 option")); 1388 1389 /* bogus lengths should have been caught by ip_dooptions */ 1390 if (optlen > cnt) 1391 optlen = cnt; 1392 if (IPOPT_COPIED(opt)) { 1393 bcopy(cp, dp, optlen); 1394 dp += optlen; 1395 } 1396 } 1397 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 1398 *dp++ = IPOPT_EOL; 1399 return (optlen); 1400 } 1401 1402 /* 1403 * IP socket option processing. 1404 */ 1405 int 1406 ip_ctloutput(so, sopt) 1407 struct socket *so; 1408 struct sockopt *sopt; 1409 { 1410 struct inpcb *inp = sotoinpcb(so); 1411 int error, optval; 1412 1413 error = optval = 0; 1414 if (sopt->sopt_level != IPPROTO_IP) { 1415 return (EINVAL); 1416 } 1417 1418 switch (sopt->sopt_dir) { 1419 case SOPT_SET: 1420 switch (sopt->sopt_name) { 1421 case IP_OPTIONS: 1422 #ifdef notyet 1423 case IP_RETOPTS: 1424 #endif 1425 { 1426 struct mbuf *m; 1427 if (sopt->sopt_valsize > MLEN) { 1428 error = EMSGSIZE; 1429 break; 1430 } 1431 MGET(m, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, MT_HEADER); 1432 if (m == 0) { 1433 error = ENOBUFS; 1434 break; 1435 } 1436 m->m_len = sopt->sopt_valsize; 1437 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1438 m->m_len); 1439 1440 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, 1441 m)); 1442 } 1443 1444 case IP_TOS: 1445 case IP_TTL: 1446 case IP_RECVOPTS: 1447 case IP_RECVRETOPTS: 1448 case IP_RECVDSTADDR: 1449 case IP_RECVIF: 1450 case IP_FAITH: 1451 error = sooptcopyin(sopt, &optval, sizeof optval, 1452 sizeof optval); 1453 if (error) 1454 break; 1455 1456 switch (sopt->sopt_name) { 1457 case IP_TOS: 1458 inp->inp_ip_tos = optval; 1459 break; 1460 1461 case IP_TTL: 1462 inp->inp_ip_ttl = optval; 1463 break; 1464 #define OPTSET(bit) \ 1465 if (optval) \ 1466 inp->inp_flags |= bit; \ 1467 else \ 1468 inp->inp_flags &= ~bit; 1469 1470 case IP_RECVOPTS: 1471 OPTSET(INP_RECVOPTS); 1472 break; 1473 1474 case IP_RECVRETOPTS: 1475 OPTSET(INP_RECVRETOPTS); 1476 break; 1477 1478 case IP_RECVDSTADDR: 1479 OPTSET(INP_RECVDSTADDR); 1480 break; 1481 1482 case IP_RECVIF: 1483 OPTSET(INP_RECVIF); 1484 break; 1485 1486 case IP_FAITH: 1487 OPTSET(INP_FAITH); 1488 break; 1489 } 1490 break; 1491 #undef OPTSET 1492 1493 case IP_MULTICAST_IF: 1494 case IP_MULTICAST_VIF: 1495 case IP_MULTICAST_TTL: 1496 case IP_MULTICAST_LOOP: 1497 case IP_ADD_MEMBERSHIP: 1498 case IP_DROP_MEMBERSHIP: 1499 error = ip_setmoptions(sopt, &inp->inp_moptions); 1500 break; 1501 1502 case IP_PORTRANGE: 1503 error = sooptcopyin(sopt, &optval, sizeof optval, 1504 sizeof optval); 1505 if (error) 1506 break; 1507 1508 switch (optval) { 1509 case IP_PORTRANGE_DEFAULT: 1510 inp->inp_flags &= ~(INP_LOWPORT); 1511 inp->inp_flags &= ~(INP_HIGHPORT); 1512 break; 1513 1514 case IP_PORTRANGE_HIGH: 1515 inp->inp_flags &= ~(INP_LOWPORT); 1516 inp->inp_flags |= INP_HIGHPORT; 1517 break; 1518 1519 case IP_PORTRANGE_LOW: 1520 inp->inp_flags &= ~(INP_HIGHPORT); 1521 inp->inp_flags |= INP_LOWPORT; 1522 break; 1523 1524 default: 1525 error = EINVAL; 1526 break; 1527 } 1528 break; 1529 1530 #if defined(IPSEC) || defined(FAST_IPSEC) 1531 case IP_IPSEC_POLICY: 1532 { 1533 caddr_t req; 1534 size_t len = 0; 1535 int priv; 1536 struct mbuf *m; 1537 int optname; 1538 1539 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1540 break; 1541 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1542 break; 1543 priv = (sopt->sopt_td != NULL && 1544 suser(sopt->sopt_td) != 0) ? 0 : 1; 1545 req = mtod(m, caddr_t); 1546 len = m->m_len; 1547 optname = sopt->sopt_name; 1548 error = ipsec4_set_policy(inp, optname, req, len, priv); 1549 m_freem(m); 1550 break; 1551 } 1552 #endif /*IPSEC*/ 1553 1554 default: 1555 error = ENOPROTOOPT; 1556 break; 1557 } 1558 break; 1559 1560 case SOPT_GET: 1561 switch (sopt->sopt_name) { 1562 case IP_OPTIONS: 1563 case IP_RETOPTS: 1564 if (inp->inp_options) 1565 error = sooptcopyout(sopt, 1566 mtod(inp->inp_options, 1567 char *), 1568 inp->inp_options->m_len); 1569 else 1570 sopt->sopt_valsize = 0; 1571 break; 1572 1573 case IP_TOS: 1574 case IP_TTL: 1575 case IP_RECVOPTS: 1576 case IP_RECVRETOPTS: 1577 case IP_RECVDSTADDR: 1578 case IP_RECVIF: 1579 case IP_PORTRANGE: 1580 case IP_FAITH: 1581 switch (sopt->sopt_name) { 1582 1583 case IP_TOS: 1584 optval = inp->inp_ip_tos; 1585 break; 1586 1587 case IP_TTL: 1588 optval = inp->inp_ip_ttl; 1589 break; 1590 1591 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1592 1593 case IP_RECVOPTS: 1594 optval = OPTBIT(INP_RECVOPTS); 1595 break; 1596 1597 case IP_RECVRETOPTS: 1598 optval = OPTBIT(INP_RECVRETOPTS); 1599 break; 1600 1601 case IP_RECVDSTADDR: 1602 optval = OPTBIT(INP_RECVDSTADDR); 1603 break; 1604 1605 case IP_RECVIF: 1606 optval = OPTBIT(INP_RECVIF); 1607 break; 1608 1609 case IP_PORTRANGE: 1610 if (inp->inp_flags & INP_HIGHPORT) 1611 optval = IP_PORTRANGE_HIGH; 1612 else if (inp->inp_flags & INP_LOWPORT) 1613 optval = IP_PORTRANGE_LOW; 1614 else 1615 optval = 0; 1616 break; 1617 1618 case IP_FAITH: 1619 optval = OPTBIT(INP_FAITH); 1620 break; 1621 } 1622 error = sooptcopyout(sopt, &optval, sizeof optval); 1623 break; 1624 1625 case IP_MULTICAST_IF: 1626 case IP_MULTICAST_VIF: 1627 case IP_MULTICAST_TTL: 1628 case IP_MULTICAST_LOOP: 1629 case IP_ADD_MEMBERSHIP: 1630 case IP_DROP_MEMBERSHIP: 1631 error = ip_getmoptions(sopt, inp->inp_moptions); 1632 break; 1633 1634 #if defined(IPSEC) || defined(FAST_IPSEC) 1635 case IP_IPSEC_POLICY: 1636 { 1637 struct mbuf *m = NULL; 1638 caddr_t req = NULL; 1639 size_t len = 0; 1640 1641 if (m != 0) { 1642 req = mtod(m, caddr_t); 1643 len = m->m_len; 1644 } 1645 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 1646 if (error == 0) 1647 error = soopt_mcopyout(sopt, m); /* XXX */ 1648 if (error == 0) 1649 m_freem(m); 1650 break; 1651 } 1652 #endif /*IPSEC*/ 1653 1654 default: 1655 error = ENOPROTOOPT; 1656 break; 1657 } 1658 break; 1659 } 1660 return (error); 1661 } 1662 1663 /* 1664 * Set up IP options in pcb for insertion in output packets. 1665 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1666 * with destination address if source routed. 1667 */ 1668 static int 1669 ip_pcbopts(optname, pcbopt, m) 1670 int optname; 1671 struct mbuf **pcbopt; 1672 struct mbuf *m; 1673 { 1674 int cnt, optlen; 1675 u_char *cp; 1676 u_char opt; 1677 1678 /* turn off any old options */ 1679 if (*pcbopt) 1680 (void)m_free(*pcbopt); 1681 *pcbopt = 0; 1682 if (m == (struct mbuf *)0 || m->m_len == 0) { 1683 /* 1684 * Only turning off any previous options. 1685 */ 1686 if (m) 1687 (void)m_free(m); 1688 return (0); 1689 } 1690 1691 if (m->m_len % sizeof(int32_t)) 1692 goto bad; 1693 /* 1694 * IP first-hop destination address will be stored before 1695 * actual options; move other options back 1696 * and clear it when none present. 1697 */ 1698 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1699 goto bad; 1700 cnt = m->m_len; 1701 m->m_len += sizeof(struct in_addr); 1702 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1703 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); 1704 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1705 1706 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1707 opt = cp[IPOPT_OPTVAL]; 1708 if (opt == IPOPT_EOL) 1709 break; 1710 if (opt == IPOPT_NOP) 1711 optlen = 1; 1712 else { 1713 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1714 goto bad; 1715 optlen = cp[IPOPT_OLEN]; 1716 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1717 goto bad; 1718 } 1719 switch (opt) { 1720 1721 default: 1722 break; 1723 1724 case IPOPT_LSRR: 1725 case IPOPT_SSRR: 1726 /* 1727 * user process specifies route as: 1728 * ->A->B->C->D 1729 * D must be our final destination (but we can't 1730 * check that since we may not have connected yet). 1731 * A is first hop destination, which doesn't appear in 1732 * actual IP option, but is stored before the options. 1733 */ 1734 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1735 goto bad; 1736 m->m_len -= sizeof(struct in_addr); 1737 cnt -= sizeof(struct in_addr); 1738 optlen -= sizeof(struct in_addr); 1739 cp[IPOPT_OLEN] = optlen; 1740 /* 1741 * Move first hop before start of options. 1742 */ 1743 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1744 sizeof(struct in_addr)); 1745 /* 1746 * Then copy rest of options back 1747 * to close up the deleted entry. 1748 */ 1749 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + 1750 sizeof(struct in_addr)), 1751 (caddr_t)&cp[IPOPT_OFFSET+1], 1752 (unsigned)cnt + sizeof(struct in_addr)); 1753 break; 1754 } 1755 } 1756 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1757 goto bad; 1758 *pcbopt = m; 1759 return (0); 1760 1761 bad: 1762 (void)m_free(m); 1763 return (EINVAL); 1764 } 1765 1766 /* 1767 * XXX 1768 * The whole multicast option thing needs to be re-thought. 1769 * Several of these options are equally applicable to non-multicast 1770 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1771 * standard option (IP_TTL). 1772 */ 1773 1774 /* 1775 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1776 */ 1777 static struct ifnet * 1778 ip_multicast_if(a, ifindexp) 1779 struct in_addr *a; 1780 int *ifindexp; 1781 { 1782 int ifindex; 1783 struct ifnet *ifp; 1784 1785 if (ifindexp) 1786 *ifindexp = 0; 1787 if (ntohl(a->s_addr) >> 24 == 0) { 1788 ifindex = ntohl(a->s_addr) & 0xffffff; 1789 if (ifindex < 0 || if_index < ifindex) 1790 return NULL; 1791 ifp = ifindex2ifnet[ifindex]; 1792 if (ifindexp) 1793 *ifindexp = ifindex; 1794 } else { 1795 INADDR_TO_IFP(*a, ifp); 1796 } 1797 return ifp; 1798 } 1799 1800 /* 1801 * Set the IP multicast options in response to user setsockopt(). 1802 */ 1803 static int 1804 ip_setmoptions(sopt, imop) 1805 struct sockopt *sopt; 1806 struct ip_moptions **imop; 1807 { 1808 int error = 0; 1809 int i; 1810 struct in_addr addr; 1811 struct ip_mreq mreq; 1812 struct ifnet *ifp; 1813 struct ip_moptions *imo = *imop; 1814 struct route ro; 1815 struct sockaddr_in *dst; 1816 int ifindex; 1817 int s; 1818 1819 if (imo == NULL) { 1820 /* 1821 * No multicast option buffer attached to the pcb; 1822 * allocate one and initialize to default values. 1823 */ 1824 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, 1825 M_WAITOK); 1826 1827 if (imo == NULL) 1828 return (ENOBUFS); 1829 *imop = imo; 1830 imo->imo_multicast_ifp = NULL; 1831 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1832 imo->imo_multicast_vif = -1; 1833 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1834 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1835 imo->imo_num_memberships = 0; 1836 } 1837 1838 switch (sopt->sopt_name) { 1839 /* store an index number for the vif you wanna use in the send */ 1840 case IP_MULTICAST_VIF: 1841 if (legal_vif_num == 0) { 1842 error = EOPNOTSUPP; 1843 break; 1844 } 1845 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1846 if (error) 1847 break; 1848 if (!legal_vif_num(i) && (i != -1)) { 1849 error = EINVAL; 1850 break; 1851 } 1852 imo->imo_multicast_vif = i; 1853 break; 1854 1855 case IP_MULTICAST_IF: 1856 /* 1857 * Select the interface for outgoing multicast packets. 1858 */ 1859 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); 1860 if (error) 1861 break; 1862 /* 1863 * INADDR_ANY is used to remove a previous selection. 1864 * When no interface is selected, a default one is 1865 * chosen every time a multicast packet is sent. 1866 */ 1867 if (addr.s_addr == INADDR_ANY) { 1868 imo->imo_multicast_ifp = NULL; 1869 break; 1870 } 1871 /* 1872 * The selected interface is identified by its local 1873 * IP address. Find the interface and confirm that 1874 * it supports multicasting. 1875 */ 1876 s = splimp(); 1877 ifp = ip_multicast_if(&addr, &ifindex); 1878 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1879 splx(s); 1880 error = EADDRNOTAVAIL; 1881 break; 1882 } 1883 imo->imo_multicast_ifp = ifp; 1884 if (ifindex) 1885 imo->imo_multicast_addr = addr; 1886 else 1887 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1888 splx(s); 1889 break; 1890 1891 case IP_MULTICAST_TTL: 1892 /* 1893 * Set the IP time-to-live for outgoing multicast packets. 1894 * The original multicast API required a char argument, 1895 * which is inconsistent with the rest of the socket API. 1896 * We allow either a char or an int. 1897 */ 1898 if (sopt->sopt_valsize == 1) { 1899 u_char ttl; 1900 error = sooptcopyin(sopt, &ttl, 1, 1); 1901 if (error) 1902 break; 1903 imo->imo_multicast_ttl = ttl; 1904 } else { 1905 u_int ttl; 1906 error = sooptcopyin(sopt, &ttl, sizeof ttl, 1907 sizeof ttl); 1908 if (error) 1909 break; 1910 if (ttl > 255) 1911 error = EINVAL; 1912 else 1913 imo->imo_multicast_ttl = ttl; 1914 } 1915 break; 1916 1917 case IP_MULTICAST_LOOP: 1918 /* 1919 * Set the loopback flag for outgoing multicast packets. 1920 * Must be zero or one. The original multicast API required a 1921 * char argument, which is inconsistent with the rest 1922 * of the socket API. We allow either a char or an int. 1923 */ 1924 if (sopt->sopt_valsize == 1) { 1925 u_char loop; 1926 error = sooptcopyin(sopt, &loop, 1, 1); 1927 if (error) 1928 break; 1929 imo->imo_multicast_loop = !!loop; 1930 } else { 1931 u_int loop; 1932 error = sooptcopyin(sopt, &loop, sizeof loop, 1933 sizeof loop); 1934 if (error) 1935 break; 1936 imo->imo_multicast_loop = !!loop; 1937 } 1938 break; 1939 1940 case IP_ADD_MEMBERSHIP: 1941 /* 1942 * Add a multicast group membership. 1943 * Group must be a valid IP multicast address. 1944 */ 1945 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1946 if (error) 1947 break; 1948 1949 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1950 error = EINVAL; 1951 break; 1952 } 1953 s = splimp(); 1954 /* 1955 * If no interface address was provided, use the interface of 1956 * the route to the given multicast address. 1957 */ 1958 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1959 bzero((caddr_t)&ro, sizeof(ro)); 1960 dst = (struct sockaddr_in *)&ro.ro_dst; 1961 dst->sin_len = sizeof(*dst); 1962 dst->sin_family = AF_INET; 1963 dst->sin_addr = mreq.imr_multiaddr; 1964 rtalloc(&ro); 1965 if (ro.ro_rt == NULL) { 1966 error = EADDRNOTAVAIL; 1967 splx(s); 1968 break; 1969 } 1970 ifp = ro.ro_rt->rt_ifp; 1971 rtfree(ro.ro_rt); 1972 } 1973 else { 1974 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1975 } 1976 1977 /* 1978 * See if we found an interface, and confirm that it 1979 * supports multicast. 1980 */ 1981 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1982 error = EADDRNOTAVAIL; 1983 splx(s); 1984 break; 1985 } 1986 /* 1987 * See if the membership already exists or if all the 1988 * membership slots are full. 1989 */ 1990 for (i = 0; i < imo->imo_num_memberships; ++i) { 1991 if (imo->imo_membership[i]->inm_ifp == ifp && 1992 imo->imo_membership[i]->inm_addr.s_addr 1993 == mreq.imr_multiaddr.s_addr) 1994 break; 1995 } 1996 if (i < imo->imo_num_memberships) { 1997 error = EADDRINUSE; 1998 splx(s); 1999 break; 2000 } 2001 if (i == IP_MAX_MEMBERSHIPS) { 2002 error = ETOOMANYREFS; 2003 splx(s); 2004 break; 2005 } 2006 /* 2007 * Everything looks good; add a new record to the multicast 2008 * address list for the given interface. 2009 */ 2010 if ((imo->imo_membership[i] = 2011 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 2012 error = ENOBUFS; 2013 splx(s); 2014 break; 2015 } 2016 ++imo->imo_num_memberships; 2017 splx(s); 2018 break; 2019 2020 case IP_DROP_MEMBERSHIP: 2021 /* 2022 * Drop a multicast group membership. 2023 * Group must be a valid IP multicast address. 2024 */ 2025 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 2026 if (error) 2027 break; 2028 2029 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 2030 error = EINVAL; 2031 break; 2032 } 2033 2034 s = splimp(); 2035 /* 2036 * If an interface address was specified, get a pointer 2037 * to its ifnet structure. 2038 */ 2039 if (mreq.imr_interface.s_addr == INADDR_ANY) 2040 ifp = NULL; 2041 else { 2042 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 2043 if (ifp == NULL) { 2044 error = EADDRNOTAVAIL; 2045 splx(s); 2046 break; 2047 } 2048 } 2049 /* 2050 * Find the membership in the membership array. 2051 */ 2052 for (i = 0; i < imo->imo_num_memberships; ++i) { 2053 if ((ifp == NULL || 2054 imo->imo_membership[i]->inm_ifp == ifp) && 2055 imo->imo_membership[i]->inm_addr.s_addr == 2056 mreq.imr_multiaddr.s_addr) 2057 break; 2058 } 2059 if (i == imo->imo_num_memberships) { 2060 error = EADDRNOTAVAIL; 2061 splx(s); 2062 break; 2063 } 2064 /* 2065 * Give up the multicast address record to which the 2066 * membership points. 2067 */ 2068 in_delmulti(imo->imo_membership[i]); 2069 /* 2070 * Remove the gap in the membership array. 2071 */ 2072 for (++i; i < imo->imo_num_memberships; ++i) 2073 imo->imo_membership[i-1] = imo->imo_membership[i]; 2074 --imo->imo_num_memberships; 2075 splx(s); 2076 break; 2077 2078 default: 2079 error = EOPNOTSUPP; 2080 break; 2081 } 2082 2083 /* 2084 * If all options have default values, no need to keep the mbuf. 2085 */ 2086 if (imo->imo_multicast_ifp == NULL && 2087 imo->imo_multicast_vif == -1 && 2088 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 2089 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 2090 imo->imo_num_memberships == 0) { 2091 free(*imop, M_IPMOPTS); 2092 *imop = NULL; 2093 } 2094 2095 return (error); 2096 } 2097 2098 /* 2099 * Return the IP multicast options in response to user getsockopt(). 2100 */ 2101 static int 2102 ip_getmoptions(sopt, imo) 2103 struct sockopt *sopt; 2104 struct ip_moptions *imo; 2105 { 2106 struct in_addr addr; 2107 struct in_ifaddr *ia; 2108 int error, optval; 2109 u_char coptval; 2110 2111 error = 0; 2112 switch (sopt->sopt_name) { 2113 case IP_MULTICAST_VIF: 2114 if (imo != NULL) 2115 optval = imo->imo_multicast_vif; 2116 else 2117 optval = -1; 2118 error = sooptcopyout(sopt, &optval, sizeof optval); 2119 break; 2120 2121 case IP_MULTICAST_IF: 2122 if (imo == NULL || imo->imo_multicast_ifp == NULL) 2123 addr.s_addr = INADDR_ANY; 2124 else if (imo->imo_multicast_addr.s_addr) { 2125 /* return the value user has set */ 2126 addr = imo->imo_multicast_addr; 2127 } else { 2128 IFP_TO_IA(imo->imo_multicast_ifp, ia); 2129 addr.s_addr = (ia == NULL) ? INADDR_ANY 2130 : IA_SIN(ia)->sin_addr.s_addr; 2131 } 2132 error = sooptcopyout(sopt, &addr, sizeof addr); 2133 break; 2134 2135 case IP_MULTICAST_TTL: 2136 if (imo == 0) 2137 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 2138 else 2139 optval = coptval = imo->imo_multicast_ttl; 2140 if (sopt->sopt_valsize == 1) 2141 error = sooptcopyout(sopt, &coptval, 1); 2142 else 2143 error = sooptcopyout(sopt, &optval, sizeof optval); 2144 break; 2145 2146 case IP_MULTICAST_LOOP: 2147 if (imo == 0) 2148 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 2149 else 2150 optval = coptval = imo->imo_multicast_loop; 2151 if (sopt->sopt_valsize == 1) 2152 error = sooptcopyout(sopt, &coptval, 1); 2153 else 2154 error = sooptcopyout(sopt, &optval, sizeof optval); 2155 break; 2156 2157 default: 2158 error = ENOPROTOOPT; 2159 break; 2160 } 2161 return (error); 2162 } 2163 2164 /* 2165 * Discard the IP multicast options. 2166 */ 2167 void 2168 ip_freemoptions(imo) 2169 struct ip_moptions *imo; 2170 { 2171 int i; 2172 2173 if (imo != NULL) { 2174 for (i = 0; i < imo->imo_num_memberships; ++i) 2175 in_delmulti(imo->imo_membership[i]); 2176 free(imo, M_IPMOPTS); 2177 } 2178 } 2179 2180 /* 2181 * Routine called from ip_output() to loop back a copy of an IP multicast 2182 * packet to the input queue of a specified interface. Note that this 2183 * calls the output routine of the loopback "driver", but with an interface 2184 * pointer that might NOT be a loopback interface -- evil, but easier than 2185 * replicating that code here. 2186 */ 2187 static void 2188 ip_mloopback(ifp, m, dst, hlen) 2189 struct ifnet *ifp; 2190 struct mbuf *m; 2191 struct sockaddr_in *dst; 2192 int hlen; 2193 { 2194 struct ip *ip; 2195 struct mbuf *copym; 2196 2197 copym = m_copy(m, 0, M_COPYALL); 2198 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 2199 copym = m_pullup(copym, hlen); 2200 if (copym != NULL) { 2201 /* 2202 * We don't bother to fragment if the IP length is greater 2203 * than the interface's MTU. Can this possibly matter? 2204 */ 2205 ip = mtod(copym, struct ip *); 2206 ip->ip_len = htons(ip->ip_len); 2207 ip->ip_off = htons(ip->ip_off); 2208 ip->ip_sum = 0; 2209 if (ip->ip_vhl == IP_VHL_BORING) { 2210 ip->ip_sum = in_cksum_hdr(ip); 2211 } else { 2212 ip->ip_sum = in_cksum(copym, hlen); 2213 } 2214 /* 2215 * NB: 2216 * It's not clear whether there are any lingering 2217 * reentrancy problems in other areas which might 2218 * be exposed by using ip_input directly (in 2219 * particular, everything which modifies the packet 2220 * in-place). Yet another option is using the 2221 * protosw directly to deliver the looped back 2222 * packet. For the moment, we'll err on the side 2223 * of safety by using if_simloop(). 2224 */ 2225 #if 1 /* XXX */ 2226 if (dst->sin_family != AF_INET) { 2227 printf("ip_mloopback: bad address family %d\n", 2228 dst->sin_family); 2229 dst->sin_family = AF_INET; 2230 } 2231 #endif 2232 2233 #ifdef notdef 2234 copym->m_pkthdr.rcvif = ifp; 2235 ip_input(copym); 2236 #else 2237 /* if the checksum hasn't been computed, mark it as valid */ 2238 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 2239 copym->m_pkthdr.csum_flags |= 2240 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 2241 copym->m_pkthdr.csum_data = 0xffff; 2242 } 2243 if_simloop(ifp, copym, dst->sin_family, 0); 2244 #endif 2245 } 2246 } 2247