1 /* $OpenBSD: ip_mroute.c,v 1.90 2016/03/07 18:44:00 naddy Exp $ */ 2 /* $NetBSD: ip_mroute.c,v 1.85 2004/04/26 01:31:57 matt Exp $ */ 3 4 /* 5 * Copyright (c) 1989 Stephen Deering 6 * Copyright (c) 1992, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * Stephen Deering of Stanford University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93 37 */ 38 39 /* 40 * IP multicast forwarding procedures 41 * 42 * Written by David Waitzman, BBN Labs, August 1988. 43 * Modified by Steve Deering, Stanford, February 1989. 44 * Modified by Mark J. Steiglitz, Stanford, May, 1991 45 * Modified by Van Jacobson, LBL, January 1993 46 * Modified by Ajit Thyagarajan, PARC, August 1993 47 * Modified by Bill Fenner, PARC, April 1994 48 * Modified by Charles M. Hannum, NetBSD, May 1995. 49 * Modified by Ahmed Helmy, SGI, June 1996 50 * Modified by George Edmond Eddy (Rusty), ISI, February 1998 51 * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000 52 * Modified by Hitoshi Asaeda, WIDE, August 2000 53 * Modified by Pavlin Radoslavov, ICSI, October 2002 54 * 55 * MROUTING Revision: 1.2 56 * and PIM-SMv2 and PIM-DM support, advanced API support, 57 * bandwidth metering and signaling 58 */ 59 60 #ifdef PIM 61 #define _PIM_VT 1 62 #endif 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/mbuf.h> 67 #include <sys/socket.h> 68 #include <sys/socketvar.h> 69 #include <sys/protosw.h> 70 #include <sys/kernel.h> 71 #include <sys/ioctl.h> 72 #include <sys/syslog.h> 73 #include <sys/sysctl.h> 74 #include <sys/timeout.h> 75 76 #include <crypto/siphash.h> 77 78 #include <net/if.h> 79 #include <net/if_var.h> 80 #include <net/route.h> 81 82 #include <netinet/in.h> 83 #include <netinet/ip.h> 84 #include <netinet/ip_var.h> 85 #include <netinet/in_pcb.h> 86 #include <netinet/igmp.h> 87 #include <netinet/igmp_var.h> 88 #include <netinet/ip_mroute.h> 89 90 #ifdef PIM 91 #include <netinet/pim.h> 92 #include <netinet/pim_var.h> 93 #endif 94 95 #define M_PULLUP(m, len) \ 96 do { \ 97 if ((m) && ((m)->m_flags & M_EXT || (m)->m_len < (len))) \ 98 (m) = m_pullup((m), (len)); \ 99 } while (/*CONSTCOND*/ 0) 100 101 /* 102 * Globals. All but ip_mrouter and ip_mrtproto could be static, 103 * except for netstat or debugging purposes. 104 */ 105 struct socket *ip_mrouter = NULL; 106 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 107 108 #define NO_RTE_FOUND 0x1 109 #define RTE_FOUND 0x2 110 111 u_int32_t _mfchash(struct in_addr, struct in_addr); 112 113 #define MFCHASH(a, g) _mfchash((a), (g)) 114 LIST_HEAD(mfchashhdr, mfc) *mfchashtbl; 115 u_long mfchash; 116 SIPHASH_KEY mfchashkey; 117 118 u_char nexpire[MFCTBLSIZ]; 119 struct vif viftable[MAXVIFS]; 120 struct mrtstat mrtstat; 121 122 #define VIFI_INVALID ((vifi_t) -1) 123 124 #define EXPIRE_TIMEOUT 250 /* 4x / second */ 125 #define UPCALL_EXPIRE 6 /* number of timeouts */ 126 struct timeout expire_upcalls_ch; 127 128 int get_sg_cnt(struct sioc_sg_req *); 129 int get_vif_cnt(struct sioc_vif_req *); 130 int get_vif_ctl(struct vifctl *); 131 int ip_mrouter_init(struct socket *, struct mbuf *); 132 int get_version(struct mbuf *); 133 int add_vif(struct mbuf *); 134 int del_vif(struct mbuf *); 135 void update_mfc_params(struct mfc *, struct mfcctl2 *); 136 void init_mfc_params(struct mfc *, struct mfcctl2 *); 137 void expire_mfc(struct mfc *); 138 int add_mfc(struct mbuf *); 139 int del_mfc(struct mbuf *); 140 int set_api_config(struct mbuf *); /* chose API capabilities */ 141 int get_api_support(struct mbuf *); 142 int get_api_config(struct mbuf *); 143 int socket_send(struct socket *, struct mbuf *, 144 struct sockaddr_in *); 145 void expire_upcalls(void *); 146 int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *); 147 void phyint_send(struct ip *, struct vif *, struct mbuf *); 148 void send_packet(struct vif *, struct mbuf *); 149 150 #ifdef PIM 151 int pim_register_send(struct ip *, struct vif *, 152 struct mbuf *, struct mfc *); 153 int pim_register_send_rp(struct ip *, struct vif *, 154 struct mbuf *, struct mfc *); 155 int pim_register_send_upcall(struct ip *, struct vif *, 156 struct mbuf *, struct mfc *); 157 struct mbuf *pim_register_prepare(struct ip *, struct mbuf *); 158 int set_assert(struct mbuf *); 159 int get_assert(struct mbuf *); 160 161 struct pimstat pimstat; 162 163 /* 164 * Note: the PIM Register encapsulation adds the following in front of a 165 * data packet: 166 * 167 * struct pim_encap_hdr { 168 * struct ip ip; 169 * struct pim_encap_pimhdr pim; 170 * } 171 * 172 */ 173 struct pim_encap_pimhdr { 174 struct pim pim; 175 uint32_t flags; 176 }; 177 #define PIM_ENCAP_TTL 64 178 179 static struct ip pim_encap_iphdr = { 180 #if BYTE_ORDER == LITTLE_ENDIAN 181 sizeof(struct ip) >> 2, 182 IPVERSION, 183 #else 184 IPVERSION, 185 sizeof(struct ip) >> 2, 186 #endif 187 0, /* tos */ 188 sizeof(struct ip), /* total length */ 189 0, /* id */ 190 0, /* frag offset */ 191 PIM_ENCAP_TTL, 192 IPPROTO_PIM, 193 0, /* checksum */ 194 }; 195 196 static struct pim_encap_pimhdr pim_encap_pimhdr = { 197 { 198 PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */ 199 0, /* reserved */ 200 0, /* checksum */ 201 }, 202 0 /* flags */ 203 }; 204 205 static struct ifnet multicast_register_if; 206 static vifi_t reg_vif_num = VIFI_INVALID; 207 208 /* 209 * whether or not special PIM assert processing is enabled. 210 */ 211 static int pim_assert; 212 #endif /* PIM */ 213 214 static vifi_t numvifs = 0; 215 216 /* 217 * Rate limit for assert notification messages, in usec 218 */ 219 #define ASSERT_MSG_TIME 3000000 220 221 /* 222 * Kernel multicast routing API capabilities and setup. 223 * If more API capabilities are added to the kernel, they should be 224 * recorded in `mrt_api_support'. 225 */ 226 static const u_int32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF | 227 MRT_MFC_FLAGS_BORDER_VIF | 228 MRT_MFC_RP); 229 static u_int32_t mrt_api_config = 0; 230 231 /* 232 * Find a route for a given origin IP address and Multicast group address 233 * Type of service parameter to be added in the future!!! 234 * Statistics are updated by the caller if needed 235 * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses) 236 */ 237 static struct mfc * 238 mfc_find(struct in_addr *o, struct in_addr *g) 239 { 240 struct mfc *rt; 241 u_int32_t hash; 242 243 hash = MFCHASH(*o, *g); 244 LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) { 245 if (in_hosteq(rt->mfc_origin, *o) && 246 in_hosteq(rt->mfc_mcastgrp, *g) && 247 (rt->mfc_stall == NULL)) 248 break; 249 } 250 251 return (rt); 252 } 253 254 /* 255 * Macros to compute elapsed time efficiently 256 * Borrowed from Van Jacobson's scheduling code 257 */ 258 #define TV_DELTA(a, b, delta) do { \ 259 int xxs; \ 260 delta = (a).tv_usec - (b).tv_usec; \ 261 xxs = (a).tv_sec - (b).tv_sec; \ 262 switch (xxs) { \ 263 case 2: \ 264 delta += 1000000; \ 265 /* FALLTHROUGH */ \ 266 case 1: \ 267 delta += 1000000; \ 268 /* FALLTHROUGH */ \ 269 case 0: \ 270 break; \ 271 default: \ 272 delta += (1000000 * xxs); \ 273 break; \ 274 } \ 275 } while (/*CONSTCOND*/ 0) 276 277 /* 278 * Handle MRT setsockopt commands to modify the multicast routing tables. 279 */ 280 int 281 ip_mrouter_set(struct socket *so, int optname, struct mbuf **mp) 282 { 283 int error; 284 285 if (optname != MRT_INIT && so != ip_mrouter) 286 error = ENOPROTOOPT; 287 else 288 switch (optname) { 289 case MRT_INIT: 290 error = ip_mrouter_init(so, *mp); 291 break; 292 case MRT_DONE: 293 error = ip_mrouter_done(); 294 break; 295 case MRT_ADD_VIF: 296 error = add_vif(*mp); 297 break; 298 case MRT_DEL_VIF: 299 error = del_vif(*mp); 300 break; 301 case MRT_ADD_MFC: 302 error = add_mfc(*mp); 303 break; 304 case MRT_DEL_MFC: 305 error = del_mfc(*mp); 306 break; 307 #ifdef PIM 308 case MRT_ASSERT: 309 error = set_assert(*mp); 310 break; 311 #endif 312 case MRT_API_CONFIG: 313 error = set_api_config(*mp); 314 break; 315 default: 316 error = ENOPROTOOPT; 317 break; 318 } 319 320 if (*mp) 321 m_free(*mp); 322 return (error); 323 } 324 325 /* 326 * Handle MRT getsockopt commands 327 */ 328 int 329 ip_mrouter_get(struct socket *so, int optname, struct mbuf **mp) 330 { 331 int error; 332 333 if (so != ip_mrouter) 334 error = ENOPROTOOPT; 335 else { 336 *mp = m_get(M_WAIT, MT_SOOPTS); 337 338 switch (optname) { 339 case MRT_VERSION: 340 error = get_version(*mp); 341 break; 342 #ifdef PIM 343 case MRT_ASSERT: 344 error = get_assert(*mp); 345 break; 346 #endif 347 case MRT_API_SUPPORT: 348 error = get_api_support(*mp); 349 break; 350 case MRT_API_CONFIG: 351 error = get_api_config(*mp); 352 break; 353 default: 354 error = ENOPROTOOPT; 355 break; 356 } 357 358 if (error) 359 m_free(*mp); 360 } 361 362 return (error); 363 } 364 365 /* 366 * Handle ioctl commands to obtain information from the cache 367 */ 368 int 369 mrt_ioctl(struct socket *so, u_long cmd, caddr_t data) 370 { 371 int error; 372 373 if (so != ip_mrouter) 374 error = EINVAL; 375 else 376 switch (cmd) { 377 case SIOCGETVIFCNT: 378 error = get_vif_cnt((struct sioc_vif_req *)data); 379 break; 380 case SIOCGETSGCNT: 381 error = get_sg_cnt((struct sioc_sg_req *)data); 382 break; 383 default: 384 error = ENOTTY; 385 break; 386 } 387 388 return (error); 389 } 390 391 /* 392 * returns the packet, byte, rpf-failure count for the source group provided 393 */ 394 int 395 get_sg_cnt(struct sioc_sg_req *req) 396 { 397 int s; 398 struct mfc *rt; 399 400 s = splsoftnet(); 401 rt = mfc_find(&req->src, &req->grp); 402 if (rt == NULL) { 403 splx(s); 404 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; 405 return (EADDRNOTAVAIL); 406 } 407 req->pktcnt = rt->mfc_pkt_cnt; 408 req->bytecnt = rt->mfc_byte_cnt; 409 req->wrong_if = rt->mfc_wrong_if; 410 splx(s); 411 412 return (0); 413 } 414 415 /* 416 * returns the input and output packet and byte counts on the vif provided 417 */ 418 int 419 get_vif_cnt(struct sioc_vif_req *req) 420 { 421 vifi_t vifi = req->vifi; 422 423 if (vifi >= numvifs) 424 return (EINVAL); 425 426 req->icount = viftable[vifi].v_pkt_in; 427 req->ocount = viftable[vifi].v_pkt_out; 428 req->ibytes = viftable[vifi].v_bytes_in; 429 req->obytes = viftable[vifi].v_bytes_out; 430 431 return (0); 432 } 433 434 int 435 mrt_sysctl_vif(void *oldp, size_t *oldlenp) 436 { 437 caddr_t where = oldp; 438 size_t needed, given; 439 struct vif *vifp; 440 vifi_t vifi; 441 struct vifinfo vinfo; 442 443 given = *oldlenp; 444 needed = 0; 445 for (vifi = 0; vifi < numvifs; vifi++) { 446 vifp = &viftable[vifi]; 447 if (in_nullhost(vifp->v_lcl_addr)) 448 continue; 449 450 vinfo.v_vifi = vifi; 451 vinfo.v_flags = vifp->v_flags; 452 vinfo.v_threshold = vifp->v_threshold; 453 vinfo.v_lcl_addr = vifp->v_lcl_addr; 454 vinfo.v_rmt_addr = vifp->v_rmt_addr; 455 vinfo.v_pkt_in = vifp->v_pkt_in; 456 vinfo.v_pkt_out = vifp->v_pkt_out; 457 vinfo.v_bytes_in = vifp->v_bytes_in; 458 vinfo.v_bytes_out = vifp->v_bytes_out; 459 460 needed += sizeof(vinfo); 461 if (where && needed <= given) { 462 int error; 463 464 error = copyout(&vinfo, where, sizeof(vinfo)); 465 if (error) 466 return (error); 467 where += sizeof(vinfo); 468 } 469 } 470 if (where) { 471 *oldlenp = needed; 472 if (given < needed) 473 return (ENOMEM); 474 } else 475 *oldlenp = (11 * needed) / 10; 476 477 return (0); 478 } 479 480 int 481 mrt_sysctl_mfc(void *oldp, size_t *oldlenp) 482 { 483 caddr_t where = oldp; 484 size_t needed, given; 485 u_long i; 486 struct mfc *m; 487 struct mfcinfo minfo; 488 489 given = *oldlenp; 490 needed = 0; 491 for (i = 0; mfchashtbl && i < MFCTBLSIZ; ++i) { 492 LIST_FOREACH(m, &mfchashtbl[i], mfc_hash) { 493 minfo.mfc_origin = m->mfc_origin; 494 minfo.mfc_mcastgrp = m->mfc_mcastgrp; 495 minfo.mfc_parent = m->mfc_parent; 496 minfo.mfc_pkt_cnt = m->mfc_pkt_cnt; 497 minfo.mfc_byte_cnt = m->mfc_byte_cnt; 498 memcpy(minfo.mfc_ttls, m->mfc_ttls, MAXVIFS); 499 500 needed += sizeof(minfo); 501 if (where && needed <= given) { 502 int error; 503 504 error = copyout(&minfo, where, sizeof(minfo)); 505 if (error) 506 return (error); 507 where += sizeof(minfo); 508 } 509 } 510 } 511 if (where) { 512 *oldlenp = needed; 513 if (given < needed) 514 return (ENOMEM); 515 } else 516 *oldlenp = (11 * needed) / 10; 517 518 return (0); 519 } 520 521 /* 522 * Enable multicast routing 523 */ 524 int 525 ip_mrouter_init(struct socket *so, struct mbuf *m) 526 { 527 int *v; 528 529 if (so->so_type != SOCK_RAW || 530 so->so_proto->pr_protocol != IPPROTO_IGMP) 531 return (EOPNOTSUPP); 532 533 if (m == NULL || m->m_len < sizeof(int)) 534 return (EINVAL); 535 536 v = mtod(m, int *); 537 if (*v != 1) 538 return (EINVAL); 539 540 if (ip_mrouter != NULL) 541 return (EADDRINUSE); 542 543 ip_mrouter = so; 544 545 mfchashtbl = hashinit(MFCTBLSIZ, M_MRTABLE, M_WAITOK, &mfchash); 546 arc4random_buf(&mfchashkey, sizeof(mfchashkey)); 547 memset(nexpire, 0, sizeof(nexpire)); 548 549 #ifdef PIM 550 pim_assert = 0; 551 #endif 552 553 timeout_set(&expire_upcalls_ch, expire_upcalls, NULL); 554 timeout_add_msec(&expire_upcalls_ch, EXPIRE_TIMEOUT); 555 556 return (0); 557 } 558 559 u_int32_t 560 _mfchash(struct in_addr o, struct in_addr g) 561 { 562 SIPHASH_CTX ctx; 563 564 SipHash24_Init(&ctx, &mfchashkey); 565 SipHash24_Update(&ctx, &o.s_addr, sizeof(o.s_addr)); 566 SipHash24_Update(&ctx, &g.s_addr, sizeof(g.s_addr)); 567 568 return (SipHash24_End(&ctx) & mfchash); 569 } 570 571 /* 572 * Disable multicast routing 573 */ 574 int 575 ip_mrouter_done(void) 576 { 577 vifi_t vifi; 578 struct vif *vifp; 579 int i; 580 int s; 581 582 s = splsoftnet(); 583 584 /* Clear out all the vifs currently in use. */ 585 for (vifi = 0; vifi < numvifs; vifi++) { 586 vifp = &viftable[vifi]; 587 if (!in_nullhost(vifp->v_lcl_addr)) 588 reset_vif(vifp); 589 } 590 591 numvifs = 0; 592 mrt_api_config = 0; 593 594 #ifdef PIM 595 pim_assert = 0; 596 #endif 597 598 timeout_del(&expire_upcalls_ch); 599 600 /* 601 * Free all multicast forwarding cache entries. 602 */ 603 for (i = 0; i < MFCTBLSIZ; i++) { 604 struct mfc *rt, *nrt; 605 606 for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) { 607 nrt = LIST_NEXT(rt, mfc_hash); 608 609 expire_mfc(rt); 610 } 611 } 612 613 memset(nexpire, 0, sizeof(nexpire)); 614 free(mfchashtbl, M_MRTABLE, 0); 615 mfchashtbl = NULL; 616 617 ip_mrouter = NULL; 618 619 splx(s); 620 621 return (0); 622 } 623 624 void 625 ip_mrouter_detach(struct ifnet *ifp) 626 { 627 int vifi, i; 628 struct vif *vifp; 629 struct mfc *rt; 630 struct rtdetq *rte; 631 632 /* XXX not sure about side effect to userland routing daemon */ 633 for (vifi = 0; vifi < numvifs; vifi++) { 634 vifp = &viftable[vifi]; 635 if (vifp->v_ifp == ifp) 636 reset_vif(vifp); 637 } 638 for (i = 0; i < MFCTBLSIZ; i++) { 639 if (nexpire[i] == 0) 640 continue; 641 LIST_FOREACH(rt, &mfchashtbl[i], mfc_hash) { 642 for (rte = rt->mfc_stall; rte; rte = rte->next) { 643 if (rte->ifp == ifp) 644 rte->ifp = NULL; 645 } 646 } 647 } 648 } 649 650 int 651 get_version(struct mbuf *m) 652 { 653 int *v = mtod(m, int *); 654 655 *v = 0x0305; /* XXX !!!! */ 656 m->m_len = sizeof(int); 657 return (0); 658 } 659 660 #ifdef PIM 661 /* 662 * Set PIM assert processing global 663 */ 664 int 665 set_assert(struct mbuf *m) 666 { 667 int *i; 668 669 if (m == NULL || m->m_len < sizeof(int)) 670 return (EINVAL); 671 672 i = mtod(m, int *); 673 pim_assert = !!*i; 674 return (0); 675 } 676 677 /* 678 * Get PIM assert processing global 679 */ 680 int 681 get_assert(struct mbuf *m) 682 { 683 int *i = mtod(m, int *); 684 685 *i = pim_assert; 686 m->m_len = sizeof(int); 687 return (0); 688 } 689 #endif 690 691 /* 692 * Configure API capabilities 693 */ 694 int 695 set_api_config(struct mbuf *m) 696 { 697 int i; 698 u_int32_t *apival; 699 700 if (m == NULL || m->m_len < sizeof(u_int32_t)) 701 return (EINVAL); 702 703 apival = mtod(m, u_int32_t *); 704 705 /* 706 * We can set the API capabilities only if it is the first operation 707 * after MRT_INIT. I.e.: 708 * - there are no vifs installed 709 * - pim_assert is not enabled 710 * - the MFC table is empty 711 */ 712 if (numvifs > 0) { 713 *apival = 0; 714 return (EPERM); 715 } 716 #ifdef PIM 717 if (pim_assert) { 718 *apival = 0; 719 return (EPERM); 720 } 721 #endif 722 for (i = 0; i < MFCTBLSIZ; i++) { 723 if (LIST_FIRST(&mfchashtbl[i]) != NULL) { 724 *apival = 0; 725 return (EPERM); 726 } 727 } 728 729 mrt_api_config = *apival & mrt_api_support; 730 *apival = mrt_api_config; 731 732 return (0); 733 } 734 735 /* 736 * Get API capabilities 737 */ 738 int 739 get_api_support(struct mbuf *m) 740 { 741 u_int32_t *apival; 742 743 if (m == NULL || m->m_len < sizeof(u_int32_t)) 744 return (EINVAL); 745 746 apival = mtod(m, u_int32_t *); 747 748 *apival = mrt_api_support; 749 750 return (0); 751 } 752 753 /* 754 * Get API configured capabilities 755 */ 756 int 757 get_api_config(struct mbuf *m) 758 { 759 u_int32_t *apival; 760 761 if (m == NULL || m->m_len < sizeof(u_int32_t)) 762 return (EINVAL); 763 764 apival = mtod(m, u_int32_t *); 765 766 *apival = mrt_api_config; 767 768 return (0); 769 } 770 771 static struct sockaddr_in sin = { sizeof(sin), AF_INET }; 772 773 /* 774 * Add a vif to the vif table 775 */ 776 int 777 add_vif(struct mbuf *m) 778 { 779 struct vifctl *vifcp; 780 struct vif *vifp; 781 struct ifaddr *ifa; 782 struct ifnet *ifp; 783 struct ifreq ifr; 784 int error, s; 785 786 if (m == NULL || m->m_len < sizeof(struct vifctl)) 787 return (EINVAL); 788 789 vifcp = mtod(m, struct vifctl *); 790 if (vifcp->vifc_vifi >= MAXVIFS) 791 return (EINVAL); 792 if (in_nullhost(vifcp->vifc_lcl_addr)) 793 return (EADDRNOTAVAIL); 794 795 vifp = &viftable[vifcp->vifc_vifi]; 796 if (!in_nullhost(vifp->v_lcl_addr)) 797 return (EADDRINUSE); 798 799 /* Tunnels are no longer supported use gif(4) instead. */ 800 if (vifcp->vifc_flags & VIFF_TUNNEL) 801 return (EOPNOTSUPP); 802 803 #ifdef PIM 804 if (vifcp->vifc_flags & VIFF_REGISTER) { 805 /* 806 * XXX: Because VIFF_REGISTER does not really need a valid 807 * local interface (e.g. it could be 127.0.0.2), we don't 808 * check its address. 809 */ 810 } else 811 #endif 812 { 813 sin.sin_addr = vifcp->vifc_lcl_addr; 814 ifa = ifa_ifwithaddr(sintosa(&sin), /* XXX */ 0); 815 if (ifa == NULL) 816 return (EADDRNOTAVAIL); 817 } 818 819 #ifdef PIM 820 if (vifcp->vifc_flags & VIFF_REGISTER) { 821 ifp = &multicast_register_if; 822 if (reg_vif_num == VIFI_INVALID) { 823 memset(ifp, 0, sizeof(*ifp)); 824 snprintf(ifp->if_xname, sizeof ifp->if_xname, 825 "register_vif"); 826 ifp->if_flags = IFF_LOOPBACK; 827 memset(&vifp->v_route, 0, sizeof(vifp->v_route)); 828 reg_vif_num = vifcp->vifc_vifi; 829 } 830 } else 831 #endif 832 { 833 /* Use the physical interface associated with the address. */ 834 ifp = ifa->ifa_ifp; 835 836 /* Make sure the interface supports multicast. */ 837 if ((ifp->if_flags & IFF_MULTICAST) == 0) 838 return (EOPNOTSUPP); 839 840 /* Enable promiscuous reception of all IP multicasts. */ 841 memset(&ifr, 0, sizeof(ifr)); 842 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in); 843 satosin(&ifr.ifr_addr)->sin_family = AF_INET; 844 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr; 845 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); 846 if (error) 847 return (error); 848 } 849 850 s = splsoftnet(); 851 852 vifp->v_flags = vifcp->vifc_flags; 853 vifp->v_threshold = vifcp->vifc_threshold; 854 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 855 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 856 vifp->v_ifp = ifp; 857 /* Initialize per vif pkt counters. */ 858 vifp->v_pkt_in = 0; 859 vifp->v_pkt_out = 0; 860 vifp->v_bytes_in = 0; 861 vifp->v_bytes_out = 0; 862 863 timeout_del(&vifp->v_repq_ch); 864 865 splx(s); 866 867 /* Adjust numvifs up if the vifi is higher than numvifs. */ 868 if (numvifs <= vifcp->vifc_vifi) 869 numvifs = vifcp->vifc_vifi + 1; 870 871 return (0); 872 } 873 874 void 875 reset_vif(struct vif *vifp) 876 { 877 struct ifnet *ifp; 878 struct ifreq ifr; 879 880 #ifdef PIM 881 if (vifp->v_flags & VIFF_REGISTER) { 882 reg_vif_num = VIFI_INVALID; 883 } else 884 #endif 885 { 886 memset(&ifr, 0, sizeof(ifr)); 887 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in); 888 satosin(&ifr.ifr_addr)->sin_family = AF_INET; 889 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr; 890 ifp = vifp->v_ifp; 891 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 892 } 893 memset(vifp, 0, sizeof(*vifp)); 894 } 895 896 /* 897 * Delete a vif from the vif table 898 */ 899 int 900 del_vif(struct mbuf *m) 901 { 902 vifi_t *vifip; 903 struct vif *vifp; 904 vifi_t vifi; 905 int s; 906 907 if (m == NULL || m->m_len < sizeof(vifi_t)) 908 return (EINVAL); 909 910 vifip = mtod(m, vifi_t *); 911 if (*vifip >= numvifs) 912 return (EINVAL); 913 914 vifp = &viftable[*vifip]; 915 if (in_nullhost(vifp->v_lcl_addr)) 916 return (EADDRNOTAVAIL); 917 918 s = splsoftnet(); 919 920 reset_vif(vifp); 921 922 /* Adjust numvifs down */ 923 for (vifi = numvifs; vifi > 0; vifi--) 924 if (!in_nullhost(viftable[vifi - 1].v_lcl_addr)) 925 break; 926 numvifs = vifi; 927 928 splx(s); 929 930 return (0); 931 } 932 933 void 934 vif_delete(struct ifnet *ifp) 935 { 936 int i; 937 struct vif *vifp; 938 struct mfc *rt; 939 struct rtdetq *rte; 940 941 for (i = 0; i < numvifs; i++) { 942 vifp = &viftable[i]; 943 if (vifp->v_ifp == ifp) 944 memset(vifp, 0, sizeof(*vifp)); 945 } 946 947 for (i = numvifs; i > 0; i--) 948 if (!in_nullhost(viftable[i - 1].v_lcl_addr)) 949 break; 950 numvifs = i; 951 952 for (i = 0; i < MFCTBLSIZ; i++) { 953 if (nexpire[i] == 0) 954 continue; 955 LIST_FOREACH(rt, &mfchashtbl[i], mfc_hash) { 956 for (rte = rt->mfc_stall; rte; rte = rte->next) { 957 if (rte->ifp == ifp) 958 rte->ifp = NULL; 959 } 960 } 961 } 962 } 963 964 /* 965 * update an mfc entry without resetting counters and S,G addresses. 966 */ 967 void 968 update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) 969 { 970 int i; 971 972 rt->mfc_parent = mfccp->mfcc_parent; 973 for (i = 0; i < numvifs; i++) { 974 rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 975 rt->mfc_flags[i] = mfccp->mfcc_flags[i] & mrt_api_config & 976 MRT_MFC_FLAGS_ALL; 977 } 978 /* set the RP address */ 979 if (mrt_api_config & MRT_MFC_RP) 980 rt->mfc_rp = mfccp->mfcc_rp; 981 else 982 rt->mfc_rp = zeroin_addr; 983 } 984 985 /* 986 * fully initialize an mfc entry from the parameter. 987 */ 988 void 989 init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) 990 { 991 rt->mfc_origin = mfccp->mfcc_origin; 992 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 993 994 update_mfc_params(rt, mfccp); 995 996 /* initialize pkt counters per src-grp */ 997 rt->mfc_pkt_cnt = 0; 998 rt->mfc_byte_cnt = 0; 999 rt->mfc_wrong_if = 0; 1000 timerclear(&rt->mfc_last_assert); 1001 } 1002 1003 void 1004 expire_mfc(struct mfc *rt) 1005 { 1006 struct rtdetq *rte, *nrte; 1007 1008 for (rte = rt->mfc_stall; rte != NULL; rte = nrte) { 1009 nrte = rte->next; 1010 m_freem(rte->m); 1011 free(rte, M_MRTABLE, 0); 1012 } 1013 1014 LIST_REMOVE(rt, mfc_hash); 1015 free(rt, M_MRTABLE, 0); 1016 } 1017 1018 /* 1019 * Add an mfc entry 1020 */ 1021 int 1022 add_mfc(struct mbuf *m) 1023 { 1024 struct mfcctl2 mfcctl2; 1025 struct mfcctl2 *mfccp; 1026 struct mfc *rt; 1027 u_int32_t hash = 0; 1028 struct rtdetq *rte, *nrte; 1029 u_short nstl; 1030 int s; 1031 int mfcctl_size = sizeof(struct mfcctl); 1032 1033 if (mrt_api_config & MRT_API_FLAGS_ALL) 1034 mfcctl_size = sizeof(struct mfcctl2); 1035 1036 if (m == NULL || m->m_len < mfcctl_size) 1037 return (EINVAL); 1038 1039 /* 1040 * select data size depending on API version. 1041 */ 1042 if (mrt_api_config & MRT_API_FLAGS_ALL) { 1043 struct mfcctl2 *mp2 = mtod(m, struct mfcctl2 *); 1044 bcopy(mp2, (caddr_t)&mfcctl2, sizeof(*mp2)); 1045 } else { 1046 struct mfcctl *mp = mtod(m, struct mfcctl *); 1047 bcopy(mp, (caddr_t)&mfcctl2, sizeof(*mp)); 1048 memset((caddr_t)&mfcctl2 + sizeof(struct mfcctl), 0, 1049 sizeof(mfcctl2) - sizeof(struct mfcctl)); 1050 } 1051 mfccp = &mfcctl2; 1052 1053 s = splsoftnet(); 1054 rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp); 1055 1056 /* If an entry already exists, just update the fields */ 1057 if (rt) { 1058 update_mfc_params(rt, mfccp); 1059 splx(s); 1060 return (0); 1061 } 1062 1063 /* 1064 * Find the entry for which the upcall was made and update 1065 */ 1066 nstl = 0; 1067 hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp); 1068 LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) { 1069 if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && 1070 in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) && 1071 rt->mfc_stall != NULL) { 1072 if (nstl++) { 1073 log(LOG_ERR, "add_mfc %s o %x g %x " 1074 "p %x dbx %p\n", 1075 "multiple kernel entries", 1076 ntohl(mfccp->mfcc_origin.s_addr), 1077 ntohl(mfccp->mfcc_mcastgrp.s_addr), 1078 mfccp->mfcc_parent, rt->mfc_stall); 1079 } 1080 1081 rte = rt->mfc_stall; 1082 init_mfc_params(rt, mfccp); 1083 rt->mfc_stall = NULL; 1084 1085 rt->mfc_expire = 0; /* Don't clean this guy up */ 1086 nexpire[hash]--; 1087 1088 /* free packets Qed at the end of this entry */ 1089 for (; rte != NULL; rte = nrte) { 1090 nrte = rte->next; 1091 if (rte->ifp) { 1092 ip_mdq(rte->m, rte->ifp, rt); 1093 } 1094 m_freem(rte->m); 1095 free(rte, M_MRTABLE, 0); 1096 } 1097 } 1098 } 1099 1100 /* 1101 * It is possible that an entry is being inserted without an upcall 1102 */ 1103 if (nstl == 0) { 1104 /* 1105 * No mfc; make a new one 1106 */ 1107 LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) { 1108 if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && 1109 in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) { 1110 init_mfc_params(rt, mfccp); 1111 if (rt->mfc_expire) 1112 nexpire[hash]--; 1113 rt->mfc_expire = 0; 1114 break; /* XXX */ 1115 } 1116 } 1117 if (rt == NULL) { /* no upcall, so make a new entry */ 1118 rt = malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1119 if (rt == NULL) { 1120 splx(s); 1121 return (ENOBUFS); 1122 } 1123 1124 init_mfc_params(rt, mfccp); 1125 rt->mfc_expire = 0; 1126 rt->mfc_stall = NULL; 1127 1128 /* insert new entry at head of hash chain */ 1129 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash); 1130 } 1131 } 1132 1133 splx(s); 1134 return (0); 1135 } 1136 1137 /* 1138 * Delete an mfc entry 1139 */ 1140 int 1141 del_mfc(struct mbuf *m) 1142 { 1143 struct mfcctl2 mfcctl2; 1144 struct mfcctl2 *mfccp; 1145 struct mfc *rt; 1146 int s; 1147 int mfcctl_size = sizeof(struct mfcctl); 1148 struct mfcctl *mp = mtod(m, struct mfcctl *); 1149 1150 /* 1151 * XXX: for deleting MFC entries the information in entries 1152 * of size "struct mfcctl" is sufficient. 1153 */ 1154 1155 if (m == NULL || m->m_len < mfcctl_size) 1156 return (EINVAL); 1157 1158 bcopy(mp, (caddr_t)&mfcctl2, sizeof(*mp)); 1159 memset((caddr_t)&mfcctl2 + sizeof(struct mfcctl), 0, 1160 sizeof(mfcctl2) - sizeof(struct mfcctl)); 1161 1162 mfccp = &mfcctl2; 1163 1164 s = splsoftnet(); 1165 1166 rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp); 1167 if (rt == NULL) { 1168 splx(s); 1169 return (EADDRNOTAVAIL); 1170 } 1171 1172 LIST_REMOVE(rt, mfc_hash); 1173 free(rt, M_MRTABLE, 0); 1174 1175 splx(s); 1176 return (0); 1177 } 1178 1179 int 1180 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) 1181 { 1182 if (s != NULL) { 1183 if (sbappendaddr(&s->so_rcv, sintosa(src), mm, NULL) != 0) { 1184 sorwakeup(s); 1185 return (0); 1186 } 1187 } 1188 m_freem(mm); 1189 return (-1); 1190 } 1191 1192 /* 1193 * IP multicast forwarding function. This function assumes that the packet 1194 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 1195 * pointed to by "ifp", and the packet is to be relayed to other networks 1196 * that have members of the packet's destination IP multicast group. 1197 * 1198 * The packet is returned unscathed to the caller, unless it is 1199 * erroneous, in which case a non-zero return value tells the caller to 1200 * discard it. 1201 */ 1202 1203 #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 1204 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 1205 1206 int 1207 ip_mforward(struct mbuf *m, struct ifnet *ifp) 1208 { 1209 struct ip *ip = mtod(m, struct ip *); 1210 struct mfc *rt; 1211 static int srctun = 0; 1212 struct mbuf *mm; 1213 int s; 1214 vifi_t vifi; 1215 1216 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 1217 ((u_char *)(ip + 1))[1] != IPOPT_LSRR) { 1218 /* 1219 * Packet arrived via a physical interface or 1220 * an encapsulated tunnel or a register_vif. 1221 */ 1222 } else { 1223 /* 1224 * Packet arrived through a source-route tunnel. 1225 * Source-route tunnels are no longer supported. 1226 */ 1227 if ((srctun++ % 1000) == 0) 1228 log(LOG_ERR, "ip_mforward: received source-routed " 1229 "packet from %x\n", ntohl(ip->ip_src.s_addr)); 1230 1231 return (1); 1232 } 1233 1234 /* 1235 * Don't forward a packet with time-to-live of zero or one, 1236 * or a packet destined to a local-only group. 1237 */ 1238 if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ip->ip_dst.s_addr)) 1239 return (0); 1240 1241 /* 1242 * Determine forwarding vifs from the forwarding cache table 1243 */ 1244 s = splsoftnet(); 1245 ++mrtstat.mrts_mfc_lookups; 1246 rt = mfc_find(&ip->ip_src, &ip->ip_dst); 1247 1248 /* Entry exists, so forward if necessary */ 1249 if (rt != NULL) { 1250 splx(s); 1251 return (ip_mdq(m, ifp, rt)); 1252 } else { 1253 /* 1254 * If we don't have a route for packet's origin, 1255 * Make a copy of the packet & send message to routing daemon 1256 */ 1257 1258 struct mbuf *mb0; 1259 struct rtdetq *rte; 1260 u_int32_t hash; 1261 int hlen = ip->ip_hl << 2; 1262 1263 ++mrtstat.mrts_mfc_misses; 1264 1265 mrtstat.mrts_no_route++; 1266 /* 1267 * Allocate mbufs early so that we don't do extra work if we are 1268 * just going to fail anyway. Make sure to pullup the header so 1269 * that other people can't step on it. 1270 */ 1271 rte = malloc(sizeof(*rte), M_MRTABLE, M_NOWAIT); 1272 if (rte == NULL) { 1273 splx(s); 1274 return (ENOBUFS); 1275 } 1276 mb0 = m_copym(m, 0, M_COPYALL, M_NOWAIT); 1277 M_PULLUP(mb0, hlen); 1278 if (mb0 == NULL) { 1279 free(rte, M_MRTABLE, 0); 1280 splx(s); 1281 return (ENOBUFS); 1282 } 1283 1284 /* is there an upcall waiting for this flow? */ 1285 hash = MFCHASH(ip->ip_src, ip->ip_dst); 1286 LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) { 1287 if (in_hosteq(ip->ip_src, rt->mfc_origin) && 1288 in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) && 1289 rt->mfc_stall != NULL) 1290 break; 1291 } 1292 1293 if (rt == NULL) { 1294 int i; 1295 struct igmpmsg *im; 1296 1297 /* 1298 * Locate the vifi for the incoming interface for 1299 * this packet. 1300 * If none found, drop packet. 1301 */ 1302 for (vifi = 0; vifi < numvifs && 1303 viftable[vifi].v_ifp != ifp; vifi++) 1304 ; 1305 if (vifi >= numvifs) /* vif not found, drop packet */ 1306 goto non_fatal; 1307 1308 /* no upcall, so make a new entry */ 1309 rt = malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1310 if (rt == NULL) 1311 goto fail; 1312 /* 1313 * Make a copy of the header to send to the user level 1314 * process 1315 */ 1316 mm = m_copym(m, 0, hlen, M_NOWAIT); 1317 M_PULLUP(mm, hlen); 1318 if (mm == NULL) 1319 goto fail1; 1320 1321 /* 1322 * Send message to routing daemon to install 1323 * a route into the kernel table 1324 */ 1325 1326 im = mtod(mm, struct igmpmsg *); 1327 im->im_msgtype = IGMPMSG_NOCACHE; 1328 im->im_mbz = 0; 1329 im->im_vif = vifi; 1330 1331 mrtstat.mrts_upcalls++; 1332 1333 sin.sin_addr = ip->ip_src; 1334 if (socket_send(ip_mrouter, mm, &sin) < 0) { 1335 log(LOG_WARNING, "ip_mforward: ip_mrouter " 1336 "socket queue full\n"); 1337 ++mrtstat.mrts_upq_sockfull; 1338 fail1: 1339 free(rt, M_MRTABLE, 0); 1340 fail: 1341 free(rte, M_MRTABLE, 0); 1342 m_freem(mb0); 1343 splx(s); 1344 return (ENOBUFS); 1345 } 1346 1347 /* insert new entry at head of hash chain */ 1348 rt->mfc_origin = ip->ip_src; 1349 rt->mfc_mcastgrp = ip->ip_dst; 1350 rt->mfc_pkt_cnt = 0; 1351 rt->mfc_byte_cnt = 0; 1352 rt->mfc_wrong_if = 0; 1353 rt->mfc_expire = UPCALL_EXPIRE; 1354 nexpire[hash]++; 1355 for (i = 0; i < numvifs; i++) { 1356 rt->mfc_ttls[i] = 0; 1357 rt->mfc_flags[i] = 0; 1358 } 1359 rt->mfc_parent = -1; 1360 1361 /* clear the RP address */ 1362 rt->mfc_rp = zeroin_addr; 1363 1364 /* link into table */ 1365 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash); 1366 /* Add this entry to the end of the queue */ 1367 rt->mfc_stall = rte; 1368 } else { 1369 /* determine if q has overflowed */ 1370 struct rtdetq **p; 1371 int npkts = 0; 1372 1373 /* 1374 * XXX ouch! we need to append to the list, but we 1375 * only have a pointer to the front, so we have to 1376 * scan the entire list every time. 1377 */ 1378 for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next) 1379 if (++npkts > MAX_UPQ) { 1380 mrtstat.mrts_upq_ovflw++; 1381 non_fatal: 1382 free(rte, M_MRTABLE, 0); 1383 m_freem(mb0); 1384 splx(s); 1385 return (0); 1386 } 1387 1388 /* Add this entry to the end of the queue */ 1389 *p = rte; 1390 } 1391 1392 rte->next = NULL; 1393 rte->m = mb0; 1394 rte->ifp = ifp; 1395 1396 splx(s); 1397 1398 return (0); 1399 } 1400 } 1401 1402 1403 /*ARGSUSED*/ 1404 void 1405 expire_upcalls(void *v) 1406 { 1407 int i; 1408 int s; 1409 1410 s = splsoftnet(); 1411 1412 for (i = 0; i < MFCTBLSIZ; i++) { 1413 struct mfc *rt, *nrt; 1414 1415 if (nexpire[i] == 0) 1416 continue; 1417 1418 for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) { 1419 nrt = LIST_NEXT(rt, mfc_hash); 1420 1421 if (rt->mfc_expire == 0 || --rt->mfc_expire > 0) 1422 continue; 1423 nexpire[i]--; 1424 1425 ++mrtstat.mrts_cache_cleanups; 1426 expire_mfc(rt); 1427 } 1428 } 1429 1430 splx(s); 1431 timeout_add_msec(&expire_upcalls_ch, EXPIRE_TIMEOUT); 1432 } 1433 1434 /* 1435 * Packet forwarding routine once entry in the cache is made 1436 */ 1437 int 1438 ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt) 1439 { 1440 struct ip *ip = mtod(m, struct ip *); 1441 vifi_t vifi; 1442 struct vif *vifp; 1443 int plen = ntohs(ip->ip_len) - (ip->ip_hl << 2); 1444 1445 /* 1446 * Don't forward if it didn't arrive from the parent vif for its origin. 1447 */ 1448 vifi = rt->mfc_parent; 1449 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { 1450 /* came in the wrong interface */ 1451 ++mrtstat.mrts_wrong_if; 1452 ++rt->mfc_wrong_if; 1453 #ifdef PIM 1454 /* 1455 * If we are doing PIM assert processing, send a message 1456 * to the routing daemon. 1457 * 1458 * XXX: A PIM-SM router needs the WRONGVIF detection so it 1459 * can complete the SPT switch, regardless of the type 1460 * of interface (broadcast media, GRE tunnel, etc). 1461 */ 1462 if (pim_assert && (vifi < numvifs) && viftable[vifi].v_ifp) { 1463 struct timeval now; 1464 u_int32_t delta; 1465 1466 if (ifp == &multicast_register_if) 1467 pimstat.pims_rcv_registers_wrongiif++; 1468 1469 /* Get vifi for the incoming packet */ 1470 for (vifi = 0; 1471 vifi < numvifs && viftable[vifi].v_ifp != ifp; 1472 vifi++) 1473 ; 1474 if (vifi >= numvifs) { 1475 /* The iif is not found: ignore the packet. */ 1476 return (0); 1477 } 1478 1479 if (rt->mfc_flags[vifi] & 1480 MRT_MFC_FLAGS_DISABLE_WRONGVIF) { 1481 /* WRONGVIF disabled: ignore the packet */ 1482 return (0); 1483 } 1484 1485 microtime(&now); 1486 1487 TV_DELTA(rt->mfc_last_assert, now, delta); 1488 1489 if (delta > ASSERT_MSG_TIME) { 1490 struct igmpmsg *im; 1491 int hlen = ip->ip_hl << 2; 1492 struct mbuf *mm = m_copym(m, 0, hlen, M_NOWAIT); 1493 1494 M_PULLUP(mm, hlen); 1495 if (mm == NULL) 1496 return (ENOBUFS); 1497 1498 rt->mfc_last_assert = now; 1499 1500 im = mtod(mm, struct igmpmsg *); 1501 im->im_msgtype = IGMPMSG_WRONGVIF; 1502 im->im_mbz = 0; 1503 im->im_vif = vifi; 1504 1505 mrtstat.mrts_upcalls++; 1506 1507 sin.sin_addr = im->im_src; 1508 if (socket_send(ip_mrouter, mm, &sin) < 0) { 1509 log(LOG_WARNING, "ip_mforward: " 1510 "ip_mrouter socket queue full\n"); 1511 ++mrtstat.mrts_upq_sockfull; 1512 return (ENOBUFS); 1513 } 1514 } 1515 } 1516 #endif 1517 return (0); 1518 } 1519 1520 /* If I sourced this packet, it counts as output, else it was input. */ 1521 if (in_hosteq(ip->ip_src, viftable[vifi].v_lcl_addr)) { 1522 viftable[vifi].v_pkt_out++; 1523 viftable[vifi].v_bytes_out += plen; 1524 } else { 1525 viftable[vifi].v_pkt_in++; 1526 viftable[vifi].v_bytes_in += plen; 1527 } 1528 rt->mfc_pkt_cnt++; 1529 rt->mfc_byte_cnt += plen; 1530 1531 /* 1532 * For each vif, decide if a copy of the packet should be forwarded. 1533 * Forward if: 1534 * - the ttl exceeds the vif's threshold 1535 * - there are group members downstream on interface 1536 */ 1537 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) 1538 if ((rt->mfc_ttls[vifi] > 0) && 1539 (ip->ip_ttl > rt->mfc_ttls[vifi])) { 1540 vifp->v_pkt_out++; 1541 vifp->v_bytes_out += plen; 1542 #ifdef PIM 1543 if (vifp->v_flags & VIFF_REGISTER) 1544 pim_register_send(ip, vifp, m, rt); 1545 else 1546 #endif 1547 phyint_send(ip, vifp, m); 1548 } 1549 1550 return (0); 1551 } 1552 1553 void 1554 phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) 1555 { 1556 struct mbuf *mb_copy; 1557 int hlen = ip->ip_hl << 2; 1558 1559 /* 1560 * Make a new reference to the packet; make sure that 1561 * the IP header is actually copied, not just referenced, 1562 * so that ip_output() only scribbles on the copy. 1563 */ 1564 mb_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT); 1565 M_PULLUP(mb_copy, hlen); 1566 if (mb_copy == NULL) 1567 return; 1568 1569 send_packet(vifp, mb_copy); 1570 } 1571 1572 void 1573 send_packet(struct vif *vifp, struct mbuf *m) 1574 { 1575 struct ip_moptions imo; 1576 int s; 1577 1578 /* 1579 * if physical interface option, extract the options 1580 * and then send 1581 */ 1582 imo.imo_ifidx = vifp->v_ifp->if_index; 1583 imo.imo_ttl = mtod(m, struct ip *)->ip_ttl - IPTTLDEC; 1584 imo.imo_loop = 1; 1585 1586 s = splsoftnet(); 1587 ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL, 0); 1588 splx(s); 1589 } 1590 1591 #ifdef PIM 1592 /* 1593 * Send the packet up to the user daemon, or eventually do kernel encapsulation 1594 */ 1595 int 1596 pim_register_send(struct ip *ip, struct vif *vifp, 1597 struct mbuf *m, struct mfc *rt) 1598 { 1599 struct mbuf *mb_copy, *mm; 1600 1601 mb_copy = pim_register_prepare(ip, m); 1602 if (mb_copy == NULL) 1603 return (ENOBUFS); 1604 1605 /* 1606 * Send all the fragments. Note that the mbuf for each fragment 1607 * is freed by the sending machinery. 1608 */ 1609 for (mm = mb_copy; mm; mm = mb_copy) { 1610 mb_copy = mm->m_nextpkt; 1611 mm->m_nextpkt = NULL; 1612 mm = m_pullup(mm, sizeof(struct ip)); 1613 if (mm != NULL) { 1614 ip = mtod(mm, struct ip *); 1615 if ((mrt_api_config & MRT_MFC_RP) && 1616 !in_nullhost(rt->mfc_rp)) { 1617 pim_register_send_rp(ip, vifp, mm, rt); 1618 } else { 1619 pim_register_send_upcall(ip, vifp, mm, rt); 1620 } 1621 } 1622 } 1623 1624 return (0); 1625 } 1626 1627 /* 1628 * Return a copy of the data packet that is ready for PIM Register 1629 * encapsulation. 1630 * XXX: Note that in the returned copy the IP header is a valid one. 1631 */ 1632 struct mbuf * 1633 pim_register_prepare(struct ip *ip, struct mbuf *m) 1634 { 1635 struct mbuf *mb_copy = NULL; 1636 int mtu; 1637 1638 in_proto_cksum_out(m, NULL); 1639 1640 /* 1641 * Copy the old packet & pullup its IP header into the 1642 * new mbuf so we can modify it. 1643 */ 1644 mb_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT); 1645 if (mb_copy == NULL) 1646 return (NULL); 1647 mb_copy = m_pullup(mb_copy, ip->ip_hl << 2); 1648 if (mb_copy == NULL) 1649 return (NULL); 1650 1651 /* take care of the TTL */ 1652 ip = mtod(mb_copy, struct ip *); 1653 --ip->ip_ttl; 1654 1655 /* Compute the MTU after the PIM Register encapsulation */ 1656 mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr); 1657 1658 if (ntohs(ip->ip_len) <= mtu) { 1659 /* Turn the IP header into a valid one */ 1660 ip->ip_sum = 0; 1661 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1662 } else { 1663 /* Fragment the packet */ 1664 if (ip_fragment(mb_copy, NULL, mtu) != 0) { 1665 /* XXX: mb_copy was freed by ip_fragment() */ 1666 return (NULL); 1667 } 1668 } 1669 return (mb_copy); 1670 } 1671 1672 /* 1673 * Send an upcall with the data packet to the user-level process. 1674 */ 1675 int 1676 pim_register_send_upcall(struct ip *ip, struct vif *vifp, 1677 struct mbuf *mb_copy, struct mfc *rt) 1678 { 1679 struct mbuf *mb_first; 1680 int len = ntohs(ip->ip_len); 1681 struct igmpmsg *im; 1682 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1683 1684 /* Add a new mbuf with an upcall header */ 1685 MGETHDR(mb_first, M_DONTWAIT, MT_HEADER); 1686 if (mb_first == NULL) { 1687 m_freem(mb_copy); 1688 return (ENOBUFS); 1689 } 1690 mb_first->m_data += max_linkhdr; 1691 mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg); 1692 mb_first->m_len = sizeof(struct igmpmsg); 1693 mb_first->m_next = mb_copy; 1694 1695 /* Send message to routing daemon */ 1696 im = mtod(mb_first, struct igmpmsg *); 1697 im->im_msgtype = IGMPMSG_WHOLEPKT; 1698 im->im_mbz = 0; 1699 im->im_vif = vifp - viftable; 1700 im->im_src = ip->ip_src; 1701 im->im_dst = ip->ip_dst; 1702 1703 k_igmpsrc.sin_addr = ip->ip_src; 1704 1705 mrtstat.mrts_upcalls++; 1706 1707 if (socket_send(ip_mrouter, mb_first, &k_igmpsrc) < 0) { 1708 ++mrtstat.mrts_upq_sockfull; 1709 return (ENOBUFS); 1710 } 1711 1712 /* Keep statistics */ 1713 pimstat.pims_snd_registers_msgs++; 1714 pimstat.pims_snd_registers_bytes += len; 1715 1716 return (0); 1717 } 1718 1719 /* 1720 * Encapsulate the data packet in PIM Register message and send it to the RP. 1721 */ 1722 int 1723 pim_register_send_rp(struct ip *ip, struct vif *vifp, 1724 struct mbuf *mb_copy, struct mfc *rt) 1725 { 1726 struct mbuf *mb_first; 1727 struct ip *ip_outer; 1728 struct pim_encap_pimhdr *pimhdr; 1729 int len = ntohs(ip->ip_len); 1730 vifi_t vifi = rt->mfc_parent; 1731 1732 if ((vifi >= numvifs) || in_nullhost(viftable[vifi].v_lcl_addr)) { 1733 m_freem(mb_copy); 1734 return (EADDRNOTAVAIL); /* The iif vif is invalid */ 1735 } 1736 1737 /* Add a new mbuf with the encapsulating header */ 1738 MGETHDR(mb_first, M_DONTWAIT, MT_HEADER); 1739 if (mb_first == NULL) { 1740 m_freem(mb_copy); 1741 return (ENOBUFS); 1742 } 1743 mb_first->m_data += max_linkhdr; 1744 mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); 1745 mb_first->m_next = mb_copy; 1746 1747 mb_first->m_pkthdr.len = len + mb_first->m_len; 1748 1749 /* Fill in the encapsulating IP and PIM header */ 1750 ip_outer = mtod(mb_first, struct ip *); 1751 *ip_outer = pim_encap_iphdr; 1752 ip_outer->ip_id = htons(ip_randomid()); 1753 ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) + 1754 sizeof(pim_encap_pimhdr)); 1755 ip_outer->ip_src = viftable[vifi].v_lcl_addr; 1756 ip_outer->ip_dst = rt->mfc_rp; 1757 /* 1758 * Copy the inner header TOS to the outer header, and take care of the 1759 * IP_DF bit. 1760 */ 1761 ip_outer->ip_tos = ip->ip_tos; 1762 if (ntohs(ip->ip_off) & IP_DF) 1763 ip_outer->ip_off |= htons(IP_DF); 1764 pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer 1765 + sizeof(pim_encap_iphdr)); 1766 *pimhdr = pim_encap_pimhdr; 1767 /* If the iif crosses a border, set the Border-bit */ 1768 if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & mrt_api_config) 1769 pimhdr->flags |= htonl(PIM_BORDER_REGISTER); 1770 1771 mb_first->m_data += sizeof(pim_encap_iphdr); 1772 pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr)); 1773 mb_first->m_data -= sizeof(pim_encap_iphdr); 1774 1775 send_packet(vifp, mb_first); 1776 1777 /* Keep statistics */ 1778 pimstat.pims_snd_registers_msgs++; 1779 pimstat.pims_snd_registers_bytes += len; 1780 1781 return (0); 1782 } 1783 1784 /* 1785 * PIM-SMv2 and PIM-DM messages processing. 1786 * Receives and verifies the PIM control messages, and passes them 1787 * up to the listening socket, using rip_input(). 1788 * The only message with special processing is the PIM_REGISTER message 1789 * (used by PIM-SM): the PIM header is stripped off, and the inner packet 1790 * is passed to if_simloop(). 1791 */ 1792 void 1793 pim_input(struct mbuf *m, ...) 1794 { 1795 struct ip *ip = mtod(m, struct ip *); 1796 struct pim *pim; 1797 int minlen; 1798 int datalen; 1799 int ip_tos; 1800 int iphlen; 1801 va_list ap; 1802 1803 va_start(ap, m); 1804 iphlen = va_arg(ap, int); 1805 va_end(ap); 1806 1807 datalen = ntohs(ip->ip_len) - iphlen; 1808 1809 /* Keep statistics */ 1810 pimstat.pims_rcv_total_msgs++; 1811 pimstat.pims_rcv_total_bytes += datalen; 1812 1813 /* Validate lengths */ 1814 if (datalen < PIM_MINLEN) { 1815 pimstat.pims_rcv_tooshort++; 1816 log(LOG_ERR, "pim_input: packet size too small %d from %lx\n", 1817 datalen, (u_long)ip->ip_src.s_addr); 1818 m_freem(m); 1819 return; 1820 } 1821 1822 /* 1823 * If the packet is at least as big as a REGISTER, go agead 1824 * and grab the PIM REGISTER header size, to avoid another 1825 * possible m_pullup() later. 1826 * 1827 * PIM_MINLEN == pimhdr + u_int32_t == 4 + 4 = 8 1828 * PIM_REG_MINLEN == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28 1829 */ 1830 minlen = iphlen + (datalen >= PIM_REG_MINLEN ? 1831 PIM_REG_MINLEN : PIM_MINLEN); 1832 /* 1833 * Get the IP and PIM headers in contiguous memory, and 1834 * possibly the PIM REGISTER header. 1835 */ 1836 if ((m->m_flags & M_EXT || m->m_len < minlen) && 1837 (m = m_pullup(m, minlen)) == NULL) { 1838 log(LOG_ERR, "pim_input: m_pullup failure\n"); 1839 return; 1840 } 1841 /* m_pullup() may have given us a new mbuf so reset ip. */ 1842 ip = mtod(m, struct ip *); 1843 ip_tos = ip->ip_tos; 1844 1845 /* adjust mbuf to point to the PIM header */ 1846 m->m_data += iphlen; 1847 m->m_len -= iphlen; 1848 pim = mtod(m, struct pim *); 1849 1850 /* 1851 * Validate checksum. If PIM REGISTER, exclude the data packet. 1852 * 1853 * XXX: some older PIMv2 implementations don't make this distinction, 1854 * so for compatibility reason perform the checksum over part of the 1855 * message, and if error, then over the whole message. 1856 */ 1857 if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && 1858 in_cksum(m, PIM_MINLEN) == 0) { 1859 /* do nothing, checksum okay */ 1860 } else if (in_cksum(m, datalen)) { 1861 pimstat.pims_rcv_badsum++; 1862 m_freem(m); 1863 return; 1864 } 1865 1866 /* PIM version check */ 1867 if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) { 1868 pimstat.pims_rcv_badversion++; 1869 log(LOG_ERR, "pim_input: incorrect version %d, expecting %d\n", 1870 PIM_VT_V(pim->pim_vt), PIM_VERSION); 1871 m_freem(m); 1872 return; 1873 } 1874 1875 /* restore mbuf back to the outer IP */ 1876 m->m_data -= iphlen; 1877 m->m_len += iphlen; 1878 1879 if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) { 1880 /* 1881 * Since this is a REGISTER, we'll make a copy of the register 1882 * headers ip + pim + u_int32 + encap_ip, to be passed up to the 1883 * routing daemon. 1884 */ 1885 int s; 1886 struct sockaddr_in dst = { sizeof(dst), AF_INET }; 1887 struct mbuf *mcp; 1888 struct ip *encap_ip; 1889 u_int32_t *reghdr; 1890 struct ifnet *vifp; 1891 1892 s = splsoftnet(); 1893 if ((reg_vif_num >= numvifs) || (reg_vif_num == VIFI_INVALID)) { 1894 splx(s); 1895 m_freem(m); 1896 return; 1897 } 1898 /* XXX need refcnt? */ 1899 vifp = viftable[reg_vif_num].v_ifp; 1900 splx(s); 1901 1902 /* Validate length */ 1903 if (datalen < PIM_REG_MINLEN) { 1904 pimstat.pims_rcv_tooshort++; 1905 pimstat.pims_rcv_badregisters++; 1906 log(LOG_ERR, "pim_input: register packet size " 1907 "too small %d from %lx\n", 1908 datalen, (u_long)ip->ip_src.s_addr); 1909 m_freem(m); 1910 return; 1911 } 1912 1913 reghdr = (u_int32_t *)(pim + 1); 1914 encap_ip = (struct ip *)(reghdr + 1); 1915 1916 /* verify the version number of the inner packet */ 1917 if (encap_ip->ip_v != IPVERSION) { 1918 pimstat.pims_rcv_badregisters++; 1919 m_freem(m); 1920 return; 1921 } 1922 1923 /* verify the inner packet is destined to a mcast group */ 1924 if (!IN_MULTICAST(encap_ip->ip_dst.s_addr)) { 1925 pimstat.pims_rcv_badregisters++; 1926 m_freem(m); 1927 return; 1928 } 1929 1930 /* If a NULL_REGISTER, pass it to the daemon */ 1931 if ((ntohl(*reghdr) & PIM_NULL_REGISTER)) 1932 goto pim_input_to_daemon; 1933 1934 /* 1935 * Copy the TOS from the outer IP header to the inner 1936 * IP header. 1937 */ 1938 if (encap_ip->ip_tos != ip_tos) { 1939 /* Outer TOS -> inner TOS */ 1940 encap_ip->ip_tos = ip_tos; 1941 /* Recompute the inner header checksum. Sigh... */ 1942 1943 /* adjust mbuf to point to the inner IP header */ 1944 m->m_data += (iphlen + PIM_MINLEN); 1945 m->m_len -= (iphlen + PIM_MINLEN); 1946 1947 encap_ip->ip_sum = 0; 1948 encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2); 1949 1950 /* restore mbuf to point back to the outer IP header */ 1951 m->m_data -= (iphlen + PIM_MINLEN); 1952 m->m_len += (iphlen + PIM_MINLEN); 1953 } 1954 1955 /* 1956 * Decapsulate the inner IP packet and loopback to forward it 1957 * as a normal multicast packet. Also, make a copy of the 1958 * outer_iphdr + pimhdr + reghdr + encap_iphdr 1959 * to pass to the daemon later, so it can take the appropriate 1960 * actions (e.g., send back PIM_REGISTER_STOP). 1961 * XXX: here m->m_data points to the outer IP header. 1962 */ 1963 mcp = m_copym(m, 0, iphlen + PIM_REG_MINLEN, M_NOWAIT); 1964 if (mcp == NULL) { 1965 log(LOG_ERR, "pim_input: pim register: could not " 1966 "copy register head\n"); 1967 m_freem(m); 1968 return; 1969 } 1970 1971 /* Keep statistics */ 1972 /* XXX: registers_bytes include only the encap. mcast pkt */ 1973 pimstat.pims_rcv_registers_msgs++; 1974 pimstat.pims_rcv_registers_bytes += ntohs(encap_ip->ip_len); 1975 1976 /* forward the inner ip packet; point m_data at the inner ip. */ 1977 m_adj(m, iphlen + PIM_MINLEN); 1978 1979 /* NB: vifp was collected above; can it change on us? */ 1980 if_input_local(vifp, m, dst.sin_family); 1981 1982 /* prepare the register head to send to the mrouting daemon */ 1983 m = mcp; 1984 } 1985 1986 pim_input_to_daemon: 1987 /* 1988 * Pass the PIM message up to the daemon; if it is a Register message, 1989 * pass the 'head' only up to the daemon. This includes the 1990 * outer IP header, PIM header, PIM-Register header and the 1991 * inner IP header. 1992 * XXX: the outer IP header pkt size of a Register is not adjust to 1993 * reflect the fact that the inner multicast data is truncated. 1994 */ 1995 rip_input(m); 1996 1997 return; 1998 } 1999 2000 /* 2001 * Sysctl for pim variables. 2002 */ 2003 int 2004 pim_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, 2005 void *newp, size_t newlen) 2006 { 2007 /* All sysctl names at this level are terminal. */ 2008 if (namelen != 1) 2009 return (ENOTDIR); 2010 2011 switch (name[0]) { 2012 case PIMCTL_STATS: 2013 if (newp != NULL) 2014 return (EPERM); 2015 return (sysctl_struct(oldp, oldlenp, newp, newlen, 2016 &pimstat, sizeof(pimstat))); 2017 2018 default: 2019 return (ENOPROTOOPT); 2020 } 2021 /* NOTREACHED */ 2022 } 2023 2024 2025 #endif /* PIM */ 2026