1 /* 2 * IP multicast forwarding procedures 3 * 4 * Written by David Waitzman, BBN Labs, August 1988. 5 * Modified by Steve Deering, Stanford, February 1989. 6 * Modified by Mark J. Steiglitz, Stanford, May, 1991 7 * Modified by Van Jacobson, LBL, January 1993 8 * Modified by Ajit Thyagarajan, PARC, August 1993 9 * Modified by Bill Fenner, PARC, April 1995 10 * Modified by Ahmed Helmy, SGI, June 1996 11 * Modified by George Edmond Eddy (Rusty), ISI, February 1998 12 * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000 13 * Modified by Hitoshi Asaeda, WIDE, August 2000 14 * Modified by Pavlin Radoslavov, ICSI, October 2002 15 * 16 * MROUTING Revision: 3.5 17 * and PIM-SMv2 and PIM-DM support, advanced API support, 18 * bandwidth metering and signaling 19 * 20 * $FreeBSD: src/sys/netinet/ip_mroute.c,v 1.56.2.10 2003/08/24 21:37:34 hsu Exp $ 21 * $DragonFly: src/sys/net/ip_mroute/ip_mroute.c,v 1.9 2004/03/08 07:43:44 hsu Exp $ 22 */ 23 24 #include "opt_mrouting.h" 25 #include "opt_random_ip_id.h" 26 27 #ifdef PIM 28 #define _PIM_VT 1 29 #endif 30 31 #include <sys/param.h> 32 #include <sys/kernel.h> 33 #include <sys/malloc.h> 34 #include <sys/mbuf.h> 35 #include <sys/protosw.h> 36 #include <sys/socket.h> 37 #include <sys/socketvar.h> 38 #include <sys/sockio.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/systm.h> 42 #include <sys/time.h> 43 #include <sys/in_cksum.h> 44 #include <net/if.h> 45 #include <net/netisr.h> 46 #include <net/route.h> 47 #include <netinet/in.h> 48 #include <netinet/igmp.h> 49 #include <netinet/in_systm.h> 50 #include <netinet/in_var.h> 51 #include <netinet/ip.h> 52 #include "ip_mroute.h" 53 #include <netinet/ip_var.h> 54 #ifdef PIM 55 #include <netinet/pim.h> 56 #include <netinet/pim_var.h> 57 #endif 58 #include <netinet/udp.h> 59 60 /* 61 * Control debugging code for rsvp and multicast routing code. 62 * Can only set them with the debugger. 63 */ 64 static u_int rsvpdebug; /* non-zero enables debugging */ 65 66 static u_int mrtdebug; /* any set of the flags below */ 67 68 #define DEBUG_MFC 0x02 69 #define DEBUG_FORWARD 0x04 70 #define DEBUG_EXPIRE 0x08 71 #define DEBUG_XMIT 0x10 72 #define DEBUG_PIM 0x20 73 74 #define VIFI_INVALID ((vifi_t) -1) 75 76 #define M_HASCL(m) ((m)->m_flags & M_EXT) 77 78 static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables"); 79 80 static struct mrtstat mrtstat; 81 SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW, 82 &mrtstat, mrtstat, 83 "Multicast Routing Statistics (struct mrtstat, netinet/ip_mroute.h)"); 84 85 static struct mfc *mfctable[MFCTBLSIZ]; 86 SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, 87 &mfctable, sizeof(mfctable), "S,*mfc[MFCTBLSIZ]", 88 "Multicast Forwarding Table (struct *mfc[MFCTBLSIZ], netinet/ip_mroute.h)"); 89 90 static struct vif viftable[MAXVIFS]; 91 SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD, 92 &viftable, sizeof(viftable), "S,vif[MAXVIFS]", 93 "Multicast Virtual Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)"); 94 95 static u_char nexpire[MFCTBLSIZ]; 96 97 static struct callout_handle expire_upcalls_ch; 98 99 #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ 100 #define UPCALL_EXPIRE 6 /* number of timeouts */ 101 102 /* 103 * Define the token bucket filter structures 104 * tbftable -> each vif has one of these for storing info 105 */ 106 107 static struct tbf tbftable[MAXVIFS]; 108 #define TBF_REPROCESS (hz / 100) /* 100x / second */ 109 110 /* 111 * 'Interfaces' associated with decapsulator (so we can tell 112 * packets that went through it from ones that get reflected 113 * by a broken gateway). These interfaces are never linked into 114 * the system ifnet list & no routes point to them. I.e., packets 115 * can't be sent this way. They only exist as a placeholder for 116 * multicast source verification. 117 */ 118 static struct ifnet multicast_decap_if[MAXVIFS]; 119 120 #define ENCAP_TTL 64 121 #define ENCAP_PROTO IPPROTO_IPIP /* 4 */ 122 123 /* prototype IP hdr for encapsulated packets */ 124 static struct ip multicast_encap_iphdr = { 125 #if BYTE_ORDER == LITTLE_ENDIAN 126 sizeof(struct ip) >> 2, IPVERSION, 127 #else 128 IPVERSION, sizeof(struct ip) >> 2, 129 #endif 130 0, /* tos */ 131 sizeof(struct ip), /* total length */ 132 0, /* id */ 133 0, /* frag offset */ 134 ENCAP_TTL, ENCAP_PROTO, 135 0, /* checksum */ 136 }; 137 138 /* 139 * Bandwidth meter variables and constants 140 */ 141 static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters"); 142 /* 143 * Pending timeouts are stored in a hash table, the key being the 144 * expiration time. Periodically, the entries are analysed and processed. 145 */ 146 #define BW_METER_BUCKETS 1024 147 static struct bw_meter *bw_meter_timers[BW_METER_BUCKETS]; 148 static struct callout_handle bw_meter_ch; 149 #define BW_METER_PERIOD (hz) /* periodical handling of bw meters */ 150 151 /* 152 * Pending upcalls are stored in a vector which is flushed when 153 * full, or periodically 154 */ 155 static struct bw_upcall bw_upcalls[BW_UPCALLS_MAX]; 156 static u_int bw_upcalls_n; /* # of pending upcalls */ 157 static struct callout_handle bw_upcalls_ch; 158 #define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */ 159 160 #ifdef PIM 161 static struct pimstat pimstat; 162 SYSCTL_STRUCT(_net_inet_pim, PIMCTL_STATS, stats, CTLFLAG_RD, 163 &pimstat, pimstat, 164 "PIM Statistics (struct pimstat, netinet/pim_var.h)"); 165 166 /* 167 * Note: the PIM Register encapsulation adds the following in front of a 168 * data packet: 169 * 170 * struct pim_encap_hdr { 171 * struct ip ip; 172 * struct pim_encap_pimhdr pim; 173 * } 174 * 175 */ 176 177 struct pim_encap_pimhdr { 178 struct pim pim; 179 uint32_t flags; 180 }; 181 182 static struct ip pim_encap_iphdr = { 183 #if BYTE_ORDER == LITTLE_ENDIAN 184 sizeof(struct ip) >> 2, 185 IPVERSION, 186 #else 187 IPVERSION, 188 sizeof(struct ip) >> 2, 189 #endif 190 0, /* tos */ 191 sizeof(struct ip), /* total length */ 192 0, /* id */ 193 0, /* frag offset */ 194 ENCAP_TTL, 195 IPPROTO_PIM, 196 0, /* checksum */ 197 }; 198 199 static struct pim_encap_pimhdr pim_encap_pimhdr = { 200 { 201 PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */ 202 0, /* reserved */ 203 0, /* checksum */ 204 }, 205 0 /* flags */ 206 }; 207 208 static struct ifnet multicast_register_if; 209 static vifi_t reg_vif_num = VIFI_INVALID; 210 #endif /* PIM */ 211 212 /* 213 * Private variables. 214 */ 215 static vifi_t numvifs; 216 static int have_encap_tunnel; 217 218 /* 219 * one-back cache used by ipip_input to locate a tunnel's vif 220 * given a datagram's src ip address. 221 */ 222 static u_long last_encap_src; 223 static struct vif *last_encap_vif; 224 225 static u_long X_ip_mcast_src(int vifi); 226 static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, 227 struct mbuf *m, struct ip_moptions *imo); 228 static int X_ip_mrouter_done(void); 229 static int X_ip_mrouter_get(struct socket *so, struct sockopt *m); 230 static int X_ip_mrouter_set(struct socket *so, struct sockopt *m); 231 static int X_legal_vif_num(int vif); 232 static int X_mrt_ioctl(int cmd, caddr_t data); 233 234 static int get_sg_cnt(struct sioc_sg_req *); 235 static int get_vif_cnt(struct sioc_vif_req *); 236 static int ip_mrouter_init(struct socket *, int); 237 static int add_vif(struct vifctl *); 238 static int del_vif(vifi_t); 239 static int add_mfc(struct mfcctl2 *); 240 static int del_mfc(struct mfcctl2 *); 241 static int set_api_config(uint32_t *); /* chose API capabilities */ 242 static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); 243 static int set_assert(int); 244 static void expire_upcalls(void *); 245 static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t); 246 static void phyint_send(struct ip *, struct vif *, struct mbuf *); 247 static void encap_send(struct ip *, struct vif *, struct mbuf *); 248 static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long); 249 static void tbf_queue(struct vif *, struct mbuf *); 250 static void tbf_process_q(struct vif *); 251 static void tbf_reprocess_q(void *); 252 static int tbf_dq_sel(struct vif *, struct ip *); 253 static void tbf_send_packet(struct vif *, struct mbuf *); 254 static void tbf_update_tokens(struct vif *); 255 static int priority(struct vif *, struct ip *); 256 257 /* 258 * Bandwidth monitoring 259 */ 260 static void free_bw_list(struct bw_meter *list); 261 static int add_bw_upcall(struct bw_upcall *); 262 static int del_bw_upcall(struct bw_upcall *); 263 static void bw_meter_receive_packet(struct bw_meter *x, int plen, 264 struct timeval *nowp); 265 static void bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp); 266 static void bw_upcalls_send(void); 267 static void schedule_bw_meter(struct bw_meter *x, struct timeval *nowp); 268 static void unschedule_bw_meter(struct bw_meter *x); 269 static void bw_meter_process(void); 270 static void expire_bw_upcalls_send(void *); 271 static void expire_bw_meter_process(void *); 272 273 #ifdef PIM 274 static int pim_register_send(struct ip *, struct vif *, 275 struct mbuf *, struct mfc *); 276 static int pim_register_send_rp(struct ip *, struct vif *, 277 struct mbuf *, struct mfc *); 278 static int pim_register_send_upcall(struct ip *, struct vif *, 279 struct mbuf *, struct mfc *); 280 static struct mbuf *pim_register_prepare(struct ip *, struct mbuf *); 281 #endif 282 283 /* 284 * whether or not special PIM assert processing is enabled. 285 */ 286 static int pim_assert; 287 /* 288 * Rate limit for assert notification messages, in usec 289 */ 290 #define ASSERT_MSG_TIME 3000000 291 292 /* 293 * Kernel multicast routing API capabilities and setup. 294 * If more API capabilities are added to the kernel, they should be 295 * recorded in `mrt_api_support'. 296 */ 297 static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF | 298 MRT_MFC_FLAGS_BORDER_VIF | 299 MRT_MFC_RP | 300 MRT_MFC_BW_UPCALL); 301 static uint32_t mrt_api_config = 0; 302 303 /* 304 * Hash function for a source, group entry 305 */ 306 #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 307 ((g) >> 20) ^ ((g) >> 10) ^ (g)) 308 309 /* 310 * Find a route for a given origin IP address and Multicast group address 311 * Type of service parameter to be added in the future!!! 312 * Statistics are updated by the caller if needed 313 * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses) 314 */ 315 static struct mfc * 316 mfc_find(in_addr_t o, in_addr_t g) 317 { 318 struct mfc *rt; 319 320 for (rt = mfctable[MFCHASH(o,g)]; rt; rt = rt->mfc_next) 321 if ((rt->mfc_origin.s_addr == o) && 322 (rt->mfc_mcastgrp.s_addr == g) && (rt->mfc_stall == NULL)) 323 break; 324 return rt; 325 } 326 327 /* 328 * Macros to compute elapsed time efficiently 329 * Borrowed from Van Jacobson's scheduling code 330 */ 331 #define TV_DELTA(a, b, delta) { \ 332 int xxs; \ 333 delta = (a).tv_usec - (b).tv_usec; \ 334 if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 335 switch (xxs) { \ 336 case 2: \ 337 delta += 1000000; \ 338 /* FALLTHROUGH */ \ 339 case 1: \ 340 delta += 1000000; \ 341 break; \ 342 default: \ 343 delta += (1000000 * xxs); \ 344 } \ 345 } \ 346 } 347 348 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 349 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 350 351 /* 352 * Handle MRT setsockopt commands to modify the multicast routing tables. 353 */ 354 static int 355 X_ip_mrouter_set(struct socket *so, struct sockopt *sopt) 356 { 357 int error, optval; 358 vifi_t vifi; 359 struct vifctl vifc; 360 struct mfcctl2 mfc; 361 struct bw_upcall bw_upcall; 362 uint32_t i; 363 364 if (so != ip_mrouter && sopt->sopt_name != MRT_INIT) 365 return EPERM; 366 367 error = 0; 368 switch (sopt->sopt_name) { 369 case MRT_INIT: 370 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 371 if (error) 372 break; 373 error = ip_mrouter_init(so, optval); 374 break; 375 376 case MRT_DONE: 377 error = ip_mrouter_done(); 378 break; 379 380 case MRT_ADD_VIF: 381 error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); 382 if (error) 383 break; 384 error = add_vif(&vifc); 385 break; 386 387 case MRT_DEL_VIF: 388 error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); 389 if (error) 390 break; 391 error = del_vif(vifi); 392 break; 393 394 case MRT_ADD_MFC: 395 case MRT_DEL_MFC: 396 /* 397 * select data size depending on API version. 398 */ 399 if (sopt->sopt_name == MRT_ADD_MFC && 400 mrt_api_config & MRT_API_FLAGS_ALL) { 401 error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2), 402 sizeof(struct mfcctl2)); 403 } else { 404 error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl), 405 sizeof(struct mfcctl)); 406 bzero((caddr_t)&mfc + sizeof(struct mfcctl), 407 sizeof(mfc) - sizeof(struct mfcctl)); 408 } 409 if (error) 410 break; 411 if (sopt->sopt_name == MRT_ADD_MFC) 412 error = add_mfc(&mfc); 413 else 414 error = del_mfc(&mfc); 415 break; 416 417 case MRT_ASSERT: 418 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 419 if (error) 420 break; 421 set_assert(optval); 422 break; 423 424 case MRT_API_CONFIG: 425 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 426 if (!error) 427 error = set_api_config(&i); 428 if (!error) 429 error = sooptcopyout(sopt, &i, sizeof i); 430 break; 431 432 case MRT_ADD_BW_UPCALL: 433 case MRT_DEL_BW_UPCALL: 434 error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall, 435 sizeof bw_upcall); 436 if (error) 437 break; 438 if (sopt->sopt_name == MRT_ADD_BW_UPCALL) 439 error = add_bw_upcall(&bw_upcall); 440 else 441 error = del_bw_upcall(&bw_upcall); 442 break; 443 444 default: 445 error = EOPNOTSUPP; 446 break; 447 } 448 return error; 449 } 450 451 /* 452 * Handle MRT getsockopt commands 453 */ 454 static int 455 X_ip_mrouter_get(struct socket *so, struct sockopt *sopt) 456 { 457 int error; 458 static int version = 0x0305; /* !!! why is this here? XXX */ 459 460 switch (sopt->sopt_name) { 461 case MRT_VERSION: 462 error = sooptcopyout(sopt, &version, sizeof version); 463 break; 464 465 case MRT_ASSERT: 466 error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert); 467 break; 468 469 case MRT_API_SUPPORT: 470 error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support); 471 break; 472 473 case MRT_API_CONFIG: 474 error = sooptcopyout(sopt, &mrt_api_config, sizeof mrt_api_config); 475 break; 476 477 default: 478 error = EOPNOTSUPP; 479 break; 480 } 481 return error; 482 } 483 484 /* 485 * Handle ioctl commands to obtain information from the cache 486 */ 487 static int 488 X_mrt_ioctl(int cmd, caddr_t data) 489 { 490 int error = 0; 491 492 switch (cmd) { 493 case SIOCGETVIFCNT: 494 error = get_vif_cnt((struct sioc_vif_req *)data); 495 break; 496 497 case SIOCGETSGCNT: 498 error = get_sg_cnt((struct sioc_sg_req *)data); 499 break; 500 501 default: 502 error = EINVAL; 503 break; 504 } 505 return error; 506 } 507 508 /* 509 * returns the packet, byte, rpf-failure count for the source group provided 510 */ 511 static int 512 get_sg_cnt(struct sioc_sg_req *req) 513 { 514 int s; 515 struct mfc *rt; 516 517 s = splnet(); 518 rt = mfc_find(req->src.s_addr, req->grp.s_addr); 519 splx(s); 520 if (rt == NULL) { 521 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; 522 return EADDRNOTAVAIL; 523 } 524 req->pktcnt = rt->mfc_pkt_cnt; 525 req->bytecnt = rt->mfc_byte_cnt; 526 req->wrong_if = rt->mfc_wrong_if; 527 return 0; 528 } 529 530 /* 531 * returns the input and output packet and byte counts on the vif provided 532 */ 533 static int 534 get_vif_cnt(struct sioc_vif_req *req) 535 { 536 vifi_t vifi = req->vifi; 537 538 if (vifi >= numvifs) 539 return EINVAL; 540 541 req->icount = viftable[vifi].v_pkt_in; 542 req->ocount = viftable[vifi].v_pkt_out; 543 req->ibytes = viftable[vifi].v_bytes_in; 544 req->obytes = viftable[vifi].v_bytes_out; 545 546 return 0; 547 } 548 549 /* 550 * Enable multicast routing 551 */ 552 static int 553 ip_mrouter_init(struct socket *so, int version) 554 { 555 if (mrtdebug) 556 log(LOG_DEBUG, "ip_mrouter_init: so_type = %d, pr_protocol = %d\n", 557 so->so_type, so->so_proto->pr_protocol); 558 559 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP) 560 return EOPNOTSUPP; 561 562 if (version != 1) 563 return ENOPROTOOPT; 564 565 if (ip_mrouter != NULL) 566 return EADDRINUSE; 567 568 ip_mrouter = so; 569 570 bzero((caddr_t)mfctable, sizeof(mfctable)); 571 bzero((caddr_t)nexpire, sizeof(nexpire)); 572 573 pim_assert = 0; 574 575 expire_upcalls_ch = timeout(expire_upcalls, NULL, EXPIRE_TIMEOUT); 576 577 bw_upcalls_n = 0; 578 bzero((caddr_t)bw_meter_timers, sizeof(bw_meter_timers)); 579 bw_upcalls_ch = timeout(expire_bw_upcalls_send, NULL, BW_UPCALLS_PERIOD); 580 bw_meter_ch = timeout(expire_bw_meter_process, NULL, BW_METER_PERIOD); 581 582 mrt_api_config = 0; 583 584 if (mrtdebug) 585 log(LOG_DEBUG, "ip_mrouter_init\n"); 586 587 return 0; 588 } 589 590 /* 591 * Disable multicast routing 592 */ 593 static int 594 X_ip_mrouter_done(void) 595 { 596 vifi_t vifi; 597 int i; 598 struct ifnet *ifp; 599 struct ifreq ifr; 600 struct mfc *rt; 601 struct rtdetq *rte; 602 int s; 603 604 s = splnet(); 605 606 /* 607 * For each phyint in use, disable promiscuous reception of all IP 608 * multicasts. 609 */ 610 for (vifi = 0; vifi < numvifs; vifi++) { 611 if (viftable[vifi].v_lcl_addr.s_addr != 0 && 612 !(viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 613 struct sockaddr_in *so = (struct sockaddr_in *)&(ifr.ifr_addr); 614 615 so->sin_len = sizeof(struct sockaddr_in); 616 so->sin_family = AF_INET; 617 so->sin_addr.s_addr = INADDR_ANY; 618 ifp = viftable[vifi].v_ifp; 619 if_allmulti(ifp, 0); 620 } 621 } 622 bzero((caddr_t)tbftable, sizeof(tbftable)); 623 bzero((caddr_t)viftable, sizeof(viftable)); 624 numvifs = 0; 625 pim_assert = 0; 626 627 untimeout(expire_upcalls, NULL, expire_upcalls_ch); 628 629 mrt_api_config = 0; 630 bw_upcalls_n = 0; 631 untimeout(expire_bw_upcalls_send, NULL, bw_upcalls_ch); 632 untimeout(expire_bw_meter_process, NULL, bw_meter_ch); 633 634 /* 635 * Free all multicast forwarding cache entries. 636 */ 637 for (i = 0; i < MFCTBLSIZ; i++) { 638 for (rt = mfctable[i]; rt != NULL; ) { 639 struct mfc *nr = rt->mfc_next; 640 641 for (rte = rt->mfc_stall; rte != NULL; ) { 642 struct rtdetq *n = rte->next; 643 644 m_freem(rte->m); 645 free(rte, M_MRTABLE); 646 rte = n; 647 } 648 free_bw_list(rt->mfc_bw_meter); 649 free(rt, M_MRTABLE); 650 rt = nr; 651 } 652 } 653 654 bzero((caddr_t)mfctable, sizeof(mfctable)); 655 656 bzero(bw_meter_timers, sizeof(bw_meter_timers)); 657 658 /* 659 * Reset de-encapsulation cache 660 */ 661 last_encap_src = INADDR_ANY; 662 last_encap_vif = NULL; 663 #ifdef PIM 664 reg_vif_num = VIFI_INVALID; 665 #endif 666 have_encap_tunnel = 0; 667 668 ip_mrouter = NULL; 669 670 splx(s); 671 672 if (mrtdebug) 673 log(LOG_DEBUG, "ip_mrouter_done\n"); 674 675 return 0; 676 } 677 678 /* 679 * Set PIM assert processing global 680 */ 681 static int 682 set_assert(int i) 683 { 684 if ((i != 1) && (i != 0)) 685 return EINVAL; 686 687 pim_assert = i; 688 689 return 0; 690 } 691 692 /* 693 * Configure API capabilities 694 */ 695 int 696 set_api_config(uint32_t *apival) 697 { 698 int i; 699 700 /* 701 * We can set the API capabilities only if it is the first operation 702 * after MRT_INIT. I.e.: 703 * - there are no vifs installed 704 * - pim_assert is not enabled 705 * - the MFC table is empty 706 */ 707 if (numvifs > 0) { 708 *apival = 0; 709 return EPERM; 710 } 711 if (pim_assert) { 712 *apival = 0; 713 return EPERM; 714 } 715 for (i = 0; i < MFCTBLSIZ; i++) { 716 if (mfctable[i] != NULL) { 717 *apival = 0; 718 return EPERM; 719 } 720 } 721 722 mrt_api_config = *apival & mrt_api_support; 723 *apival = mrt_api_config; 724 725 return 0; 726 } 727 728 /* 729 * Add a vif to the vif table 730 */ 731 static int 732 add_vif(struct vifctl *vifcp) 733 { 734 struct vif *vifp = viftable + vifcp->vifc_vifi; 735 struct sockaddr_in sin = {sizeof sin, AF_INET}; 736 struct ifaddr *ifa; 737 struct ifnet *ifp; 738 int error, s; 739 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 740 741 if (vifcp->vifc_vifi >= MAXVIFS) 742 return EINVAL; 743 if (vifp->v_lcl_addr.s_addr != INADDR_ANY) 744 return EADDRINUSE; 745 if (vifcp->vifc_lcl_addr.s_addr == INADDR_ANY) 746 return EADDRNOTAVAIL; 747 748 /* Find the interface with an address in AF_INET family */ 749 #ifdef PIM 750 if (vifcp->vifc_flags & VIFF_REGISTER) { 751 /* 752 * XXX: Because VIFF_REGISTER does not really need a valid 753 * local interface (e.g. it could be 127.0.0.2), we don't 754 * check its address. 755 */ 756 ifp = NULL; 757 } else 758 #endif 759 { 760 sin.sin_addr = vifcp->vifc_lcl_addr; 761 ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 762 if (ifa == NULL) 763 return EADDRNOTAVAIL; 764 ifp = ifa->ifa_ifp; 765 } 766 767 if (vifcp->vifc_flags & VIFF_TUNNEL) { 768 if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 769 /* 770 * An encapsulating tunnel is wanted. Tell ipip_input() to 771 * start paying attention to encapsulated packets. 772 */ 773 if (have_encap_tunnel == 0) { 774 have_encap_tunnel = 1; 775 for (s = 0; s < MAXVIFS; ++s) { 776 if_initname(&multicast_decap_if[s], "mdecap", s); 777 } 778 } 779 /* 780 * Set interface to fake encapsulator interface 781 */ 782 ifp = &multicast_decap_if[vifcp->vifc_vifi]; 783 /* 784 * Prepare cached route entry 785 */ 786 bzero(&vifp->v_route, sizeof(vifp->v_route)); 787 } else { 788 log(LOG_ERR, "source routed tunnels not supported\n"); 789 return EOPNOTSUPP; 790 } 791 #ifdef PIM 792 } else if (vifcp->vifc_flags & VIFF_REGISTER) { 793 ifp = &multicast_register_if; 794 if (mrtdebug) 795 log(LOG_DEBUG, "Adding a register vif, ifp: %p\n", 796 (void *)&multicast_register_if); 797 if (reg_vif_num == VIFI_INVALID) { 798 if_initname(&multicast_register_if, "register_vif", 0); 799 multicast_register_if.if_flags = IFF_LOOPBACK; 800 bzero(&vifp->v_route, sizeof(vifp->v_route)); 801 reg_vif_num = vifcp->vifc_vifi; 802 } 803 #endif 804 } else { /* Make sure the interface supports multicast */ 805 if ((ifp->if_flags & IFF_MULTICAST) == 0) 806 return EOPNOTSUPP; 807 808 /* Enable promiscuous reception of all IP multicasts from the if */ 809 s = splnet(); 810 error = if_allmulti(ifp, 1); 811 splx(s); 812 if (error) 813 return error; 814 } 815 816 s = splnet(); 817 /* define parameters for the tbf structure */ 818 vifp->v_tbf = v_tbf; 819 GET_TIME(vifp->v_tbf->tbf_last_pkt_t); 820 vifp->v_tbf->tbf_n_tok = 0; 821 vifp->v_tbf->tbf_q_len = 0; 822 vifp->v_tbf->tbf_max_q_len = MAXQSIZE; 823 vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; 824 825 vifp->v_flags = vifcp->vifc_flags; 826 vifp->v_threshold = vifcp->vifc_threshold; 827 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 828 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 829 vifp->v_ifp = ifp; 830 /* scaling up here allows division by 1024 in critical code */ 831 vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000; 832 vifp->v_rsvp_on = 0; 833 vifp->v_rsvpd = NULL; 834 /* initialize per vif pkt counters */ 835 vifp->v_pkt_in = 0; 836 vifp->v_pkt_out = 0; 837 vifp->v_bytes_in = 0; 838 vifp->v_bytes_out = 0; 839 splx(s); 840 841 /* Adjust numvifs up if the vifi is higher than numvifs */ 842 if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 843 844 if (mrtdebug) 845 log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n", 846 vifcp->vifc_vifi, 847 (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr), 848 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 849 (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr), 850 vifcp->vifc_threshold, 851 vifcp->vifc_rate_limit); 852 853 return 0; 854 } 855 856 /* 857 * Delete a vif from the vif table 858 */ 859 static int 860 del_vif(vifi_t vifi) 861 { 862 struct vif *vifp; 863 int s; 864 865 if (vifi >= numvifs) 866 return EINVAL; 867 vifp = &viftable[vifi]; 868 if (vifp->v_lcl_addr.s_addr == INADDR_ANY) 869 return EADDRNOTAVAIL; 870 871 s = splnet(); 872 873 if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) 874 if_allmulti(vifp->v_ifp, 0); 875 876 if (vifp == last_encap_vif) { 877 last_encap_vif = NULL; 878 last_encap_src = INADDR_ANY; 879 } 880 881 /* 882 * Free packets queued at the interface 883 */ 884 while (vifp->v_tbf->tbf_q) { 885 struct mbuf *m = vifp->v_tbf->tbf_q; 886 887 vifp->v_tbf->tbf_q = m->m_act; 888 m_freem(m); 889 } 890 891 #ifdef PIM 892 if (vifp->v_flags & VIFF_REGISTER) 893 reg_vif_num = VIFI_INVALID; 894 #endif 895 896 bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 897 bzero((caddr_t)vifp, sizeof (*vifp)); 898 899 if (mrtdebug) 900 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs); 901 902 /* Adjust numvifs down */ 903 for (vifi = numvifs; vifi > 0; vifi--) 904 if (viftable[vifi-1].v_lcl_addr.s_addr != INADDR_ANY) 905 break; 906 numvifs = vifi; 907 908 splx(s); 909 910 return 0; 911 } 912 913 /* 914 * update an mfc entry without resetting counters and S,G addresses. 915 */ 916 static void 917 update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) 918 { 919 int i; 920 921 rt->mfc_parent = mfccp->mfcc_parent; 922 for (i = 0; i < numvifs; i++) { 923 rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 924 rt->mfc_flags[i] = mfccp->mfcc_flags[i] & mrt_api_config & 925 MRT_MFC_FLAGS_ALL; 926 } 927 /* set the RP address */ 928 if (mrt_api_config & MRT_MFC_RP) 929 rt->mfc_rp = mfccp->mfcc_rp; 930 else 931 rt->mfc_rp.s_addr = INADDR_ANY; 932 } 933 934 /* 935 * fully initialize an mfc entry from the parameter. 936 */ 937 static void 938 init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) 939 { 940 rt->mfc_origin = mfccp->mfcc_origin; 941 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 942 943 update_mfc_params(rt, mfccp); 944 945 /* initialize pkt counters per src-grp */ 946 rt->mfc_pkt_cnt = 0; 947 rt->mfc_byte_cnt = 0; 948 rt->mfc_wrong_if = 0; 949 rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; 950 } 951 952 953 /* 954 * Add an mfc entry 955 */ 956 static int 957 add_mfc(struct mfcctl2 *mfccp) 958 { 959 struct mfc *rt; 960 u_long hash; 961 struct rtdetq *rte; 962 u_short nstl; 963 int s; 964 965 rt = mfc_find(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 966 967 /* If an entry already exists, just update the fields */ 968 if (rt) { 969 if (mrtdebug & DEBUG_MFC) 970 log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n", 971 (u_long)ntohl(mfccp->mfcc_origin.s_addr), 972 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 973 mfccp->mfcc_parent); 974 975 s = splnet(); 976 update_mfc_params(rt, mfccp); 977 splx(s); 978 return 0; 979 } 980 981 /* 982 * Find the entry for which the upcall was made and update 983 */ 984 s = splnet(); 985 hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 986 for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) { 987 988 if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 989 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 990 (rt->mfc_stall != NULL)) { 991 992 if (nstl++) 993 log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n", 994 "multiple kernel entries", 995 (u_long)ntohl(mfccp->mfcc_origin.s_addr), 996 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 997 mfccp->mfcc_parent, (void *)rt->mfc_stall); 998 999 if (mrtdebug & DEBUG_MFC) 1000 log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n", 1001 (u_long)ntohl(mfccp->mfcc_origin.s_addr), 1002 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 1003 mfccp->mfcc_parent, (void *)rt->mfc_stall); 1004 1005 init_mfc_params(rt, mfccp); 1006 1007 rt->mfc_expire = 0; /* Don't clean this guy up */ 1008 nexpire[hash]--; 1009 1010 /* free packets Qed at the end of this entry */ 1011 for (rte = rt->mfc_stall; rte != NULL; ) { 1012 struct rtdetq *n = rte->next; 1013 1014 ip_mdq(rte->m, rte->ifp, rt, -1); 1015 m_freem(rte->m); 1016 free(rte, M_MRTABLE); 1017 rte = n; 1018 } 1019 rt->mfc_stall = NULL; 1020 } 1021 } 1022 1023 /* 1024 * It is possible that an entry is being inserted without an upcall 1025 */ 1026 if (nstl == 0) { 1027 if (mrtdebug & DEBUG_MFC) 1028 log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n", 1029 hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr), 1030 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 1031 mfccp->mfcc_parent); 1032 1033 for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) { 1034 if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 1035 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 1036 init_mfc_params(rt, mfccp); 1037 if (rt->mfc_expire) 1038 nexpire[hash]--; 1039 rt->mfc_expire = 0; 1040 break; /* XXX */ 1041 } 1042 } 1043 if (rt == NULL) { /* no upcall, so make a new entry */ 1044 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1045 if (rt == NULL) { 1046 splx(s); 1047 return ENOBUFS; 1048 } 1049 1050 init_mfc_params(rt, mfccp); 1051 rt->mfc_expire = 0; 1052 rt->mfc_stall = NULL; 1053 1054 rt->mfc_bw_meter = NULL; 1055 /* insert new entry at head of hash chain */ 1056 rt->mfc_next = mfctable[hash]; 1057 mfctable[hash] = rt; 1058 } 1059 } 1060 splx(s); 1061 return 0; 1062 } 1063 1064 /* 1065 * Delete an mfc entry 1066 */ 1067 static int 1068 del_mfc(struct mfcctl2 *mfccp) 1069 { 1070 struct in_addr origin; 1071 struct in_addr mcastgrp; 1072 struct mfc *rt; 1073 struct mfc **nptr; 1074 u_long hash; 1075 int s; 1076 struct bw_meter *list; 1077 1078 origin = mfccp->mfcc_origin; 1079 mcastgrp = mfccp->mfcc_mcastgrp; 1080 1081 if (mrtdebug & DEBUG_MFC) 1082 log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n", 1083 (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr)); 1084 1085 s = splnet(); 1086 1087 hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); 1088 for (nptr = &mfctable[hash]; (rt = *nptr) != NULL; nptr = &rt->mfc_next) 1089 if (origin.s_addr == rt->mfc_origin.s_addr && 1090 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 1091 rt->mfc_stall == NULL) 1092 break; 1093 if (rt == NULL) { 1094 splx(s); 1095 return EADDRNOTAVAIL; 1096 } 1097 1098 *nptr = rt->mfc_next; 1099 1100 /* 1101 * free the bw_meter entries 1102 */ 1103 list = rt->mfc_bw_meter; 1104 rt->mfc_bw_meter = NULL; 1105 1106 free(rt, M_MRTABLE); 1107 1108 splx(s); 1109 1110 free_bw_list(list); 1111 1112 return 0; 1113 } 1114 1115 /* 1116 * Send a message to mrouted on the multicast routing socket 1117 */ 1118 static int 1119 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) 1120 { 1121 if (s) { 1122 if (sbappendaddr(&s->so_rcv, (struct sockaddr *)src, mm, NULL) != 0) { 1123 sorwakeup(s); 1124 return 0; 1125 } 1126 } 1127 m_freem(mm); 1128 return -1; 1129 } 1130 1131 /* 1132 * IP multicast forwarding function. This function assumes that the packet 1133 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 1134 * pointed to by "ifp", and the packet is to be relayed to other networks 1135 * that have members of the packet's destination IP multicast group. 1136 * 1137 * The packet is returned unscathed to the caller, unless it is 1138 * erroneous, in which case a non-zero return value tells the caller to 1139 * discard it. 1140 */ 1141 1142 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 1143 1144 static int 1145 X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, 1146 struct ip_moptions *imo) 1147 { 1148 struct mfc *rt; 1149 int s; 1150 vifi_t vifi; 1151 1152 if (mrtdebug & DEBUG_FORWARD) 1153 log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n", 1154 (u_long)ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr), 1155 (void *)ifp); 1156 1157 if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 || 1158 ((u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 1159 /* 1160 * Packet arrived via a physical interface or 1161 * an encapsulated tunnel or a register_vif. 1162 */ 1163 } else { 1164 /* 1165 * Packet arrived through a source-route tunnel. 1166 * Source-route tunnels are no longer supported. 1167 */ 1168 static int last_log; 1169 if (last_log != time_second) { 1170 last_log = time_second; 1171 log(LOG_ERR, 1172 "ip_mforward: received source-routed packet from %lx\n", 1173 (u_long)ntohl(ip->ip_src.s_addr)); 1174 } 1175 return 1; 1176 } 1177 1178 if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) { 1179 if (ip->ip_ttl < 255) 1180 ip->ip_ttl++; /* compensate for -1 in *_send routines */ 1181 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1182 struct vif *vifp = viftable + vifi; 1183 1184 printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s)\n", 1185 (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr), 1186 vifi, 1187 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", 1188 vifp->v_ifp->if_xname); 1189 } 1190 return ip_mdq(m, ifp, NULL, vifi); 1191 } 1192 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1193 printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n", 1194 (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr)); 1195 if (!imo) 1196 printf("In fact, no options were specified at all\n"); 1197 } 1198 1199 /* 1200 * Don't forward a packet with time-to-live of zero or one, 1201 * or a packet destined to a local-only group. 1202 */ 1203 if (ip->ip_ttl <= 1 || ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 1204 return 0; 1205 1206 /* 1207 * Determine forwarding vifs from the forwarding cache table 1208 */ 1209 s = splnet(); 1210 ++mrtstat.mrts_mfc_lookups; 1211 rt = mfc_find(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1212 1213 /* Entry exists, so forward if necessary */ 1214 if (rt != NULL) { 1215 splx(s); 1216 return ip_mdq(m, ifp, rt, -1); 1217 } else { 1218 /* 1219 * If we don't have a route for packet's origin, 1220 * Make a copy of the packet & send message to routing daemon 1221 */ 1222 1223 struct mbuf *mb0; 1224 struct rtdetq *rte; 1225 u_long hash; 1226 int hlen = ip->ip_hl << 2; 1227 1228 ++mrtstat.mrts_mfc_misses; 1229 1230 mrtstat.mrts_no_route++; 1231 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) 1232 log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n", 1233 (u_long)ntohl(ip->ip_src.s_addr), 1234 (u_long)ntohl(ip->ip_dst.s_addr)); 1235 1236 /* 1237 * Allocate mbufs early so that we don't do extra work if we are 1238 * just going to fail anyway. Make sure to pullup the header so 1239 * that other people can't step on it. 1240 */ 1241 rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT); 1242 if (rte == NULL) { 1243 splx(s); 1244 return ENOBUFS; 1245 } 1246 mb0 = m_copypacket(m, M_DONTWAIT); 1247 if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen)) 1248 mb0 = m_pullup(mb0, hlen); 1249 if (mb0 == NULL) { 1250 free(rte, M_MRTABLE); 1251 splx(s); 1252 return ENOBUFS; 1253 } 1254 1255 /* is there an upcall waiting for this flow ? */ 1256 hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1257 for (rt = mfctable[hash]; rt; rt = rt->mfc_next) { 1258 if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && 1259 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1260 (rt->mfc_stall != NULL)) 1261 break; 1262 } 1263 1264 if (rt == NULL) { 1265 int i; 1266 struct igmpmsg *im; 1267 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1268 struct mbuf *mm; 1269 1270 /* 1271 * Locate the vifi for the incoming interface for this packet. 1272 * If none found, drop packet. 1273 */ 1274 for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++) 1275 ; 1276 if (vifi >= numvifs) /* vif not found, drop packet */ 1277 goto non_fatal; 1278 1279 /* no upcall, so make a new entry */ 1280 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1281 if (rt == NULL) 1282 goto fail; 1283 /* Make a copy of the header to send to the user level process */ 1284 mm = m_copy(mb0, 0, hlen); 1285 if (mm == NULL) 1286 goto fail1; 1287 1288 /* 1289 * Send message to routing daemon to install 1290 * a route into the kernel table 1291 */ 1292 1293 im = mtod(mm, struct igmpmsg *); 1294 im->im_msgtype = IGMPMSG_NOCACHE; 1295 im->im_mbz = 0; 1296 im->im_vif = vifi; 1297 1298 mrtstat.mrts_upcalls++; 1299 1300 k_igmpsrc.sin_addr = ip->ip_src; 1301 if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { 1302 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n"); 1303 ++mrtstat.mrts_upq_sockfull; 1304 fail1: 1305 free(rt, M_MRTABLE); 1306 fail: 1307 free(rte, M_MRTABLE); 1308 m_freem(mb0); 1309 splx(s); 1310 return ENOBUFS; 1311 } 1312 1313 /* insert new entry at head of hash chain */ 1314 rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1315 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1316 rt->mfc_expire = UPCALL_EXPIRE; 1317 nexpire[hash]++; 1318 for (i = 0; i < numvifs; i++) { 1319 rt->mfc_ttls[i] = 0; 1320 rt->mfc_flags[i] = 0; 1321 } 1322 rt->mfc_parent = -1; 1323 1324 rt->mfc_rp.s_addr = INADDR_ANY; /* clear the RP address */ 1325 1326 rt->mfc_bw_meter = NULL; 1327 1328 /* link into table */ 1329 rt->mfc_next = mfctable[hash]; 1330 mfctable[hash] = rt; 1331 rt->mfc_stall = rte; 1332 1333 } else { 1334 /* determine if q has overflowed */ 1335 int npkts = 0; 1336 struct rtdetq **p; 1337 1338 /* 1339 * XXX ouch! we need to append to the list, but we 1340 * only have a pointer to the front, so we have to 1341 * scan the entire list every time. 1342 */ 1343 for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next) 1344 npkts++; 1345 1346 if (npkts > MAX_UPQ) { 1347 mrtstat.mrts_upq_ovflw++; 1348 non_fatal: 1349 free(rte, M_MRTABLE); 1350 m_freem(mb0); 1351 splx(s); 1352 return 0; 1353 } 1354 1355 /* Add this entry to the end of the queue */ 1356 *p = rte; 1357 } 1358 1359 rte->m = mb0; 1360 rte->ifp = ifp; 1361 rte->next = NULL; 1362 1363 splx(s); 1364 1365 return 0; 1366 } 1367 } 1368 1369 /* 1370 * Clean up the cache entry if upcall is not serviced 1371 */ 1372 static void 1373 expire_upcalls(void *unused) 1374 { 1375 struct rtdetq *rte; 1376 struct mfc *mfc, **nptr; 1377 int i; 1378 int s; 1379 1380 s = splnet(); 1381 for (i = 0; i < MFCTBLSIZ; i++) { 1382 if (nexpire[i] == 0) 1383 continue; 1384 nptr = &mfctable[i]; 1385 for (mfc = *nptr; mfc != NULL; mfc = *nptr) { 1386 /* 1387 * Skip real cache entries 1388 * Make sure it wasn't marked to not expire (shouldn't happen) 1389 * If it expires now 1390 */ 1391 if (mfc->mfc_stall != NULL && mfc->mfc_expire != 0 && 1392 --mfc->mfc_expire == 0) { 1393 if (mrtdebug & DEBUG_EXPIRE) 1394 log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n", 1395 (u_long)ntohl(mfc->mfc_origin.s_addr), 1396 (u_long)ntohl(mfc->mfc_mcastgrp.s_addr)); 1397 /* 1398 * drop all the packets 1399 * free the mbuf with the pkt, if, timing info 1400 */ 1401 for (rte = mfc->mfc_stall; rte; ) { 1402 struct rtdetq *n = rte->next; 1403 1404 m_freem(rte->m); 1405 free(rte, M_MRTABLE); 1406 rte = n; 1407 } 1408 ++mrtstat.mrts_cache_cleanups; 1409 nexpire[i]--; 1410 1411 /* 1412 * free the bw_meter entries 1413 */ 1414 while (mfc->mfc_bw_meter != NULL) { 1415 struct bw_meter *x = mfc->mfc_bw_meter; 1416 1417 mfc->mfc_bw_meter = x->bm_mfc_next; 1418 free(x, M_BWMETER); 1419 } 1420 1421 *nptr = mfc->mfc_next; 1422 free(mfc, M_MRTABLE); 1423 } else { 1424 nptr = &mfc->mfc_next; 1425 } 1426 } 1427 } 1428 splx(s); 1429 expire_upcalls_ch = timeout(expire_upcalls, NULL, EXPIRE_TIMEOUT); 1430 } 1431 1432 /* 1433 * Packet forwarding routine once entry in the cache is made 1434 */ 1435 static int 1436 ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) 1437 { 1438 struct ip *ip = mtod(m, struct ip *); 1439 vifi_t vifi; 1440 int plen = ip->ip_len; 1441 1442 /* 1443 * Macro to send packet on vif. Since RSVP packets don't get counted on 1444 * input, they shouldn't get counted on output, so statistics keeping is 1445 * separate. 1446 */ 1447 #define MC_SEND(ip,vifp,m) { \ 1448 if ((vifp)->v_flags & VIFF_TUNNEL) \ 1449 encap_send((ip), (vifp), (m)); \ 1450 else \ 1451 phyint_send((ip), (vifp), (m)); \ 1452 } 1453 1454 /* 1455 * If xmt_vif is not -1, send on only the requested vif. 1456 * 1457 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) 1458 */ 1459 if (xmt_vif < numvifs) { 1460 #ifdef PIM 1461 if (viftable[xmt_vif].v_flags & VIFF_REGISTER) 1462 pim_register_send(ip, viftable + xmt_vif, m, rt); 1463 else 1464 #endif 1465 MC_SEND(ip, viftable + xmt_vif, m); 1466 return 1; 1467 } 1468 1469 /* 1470 * Don't forward if it didn't arrive from the parent vif for its origin. 1471 */ 1472 vifi = rt->mfc_parent; 1473 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { 1474 /* came in the wrong interface */ 1475 if (mrtdebug & DEBUG_FORWARD) 1476 log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n", 1477 (void *)ifp, vifi, (void *)viftable[vifi].v_ifp); 1478 ++mrtstat.mrts_wrong_if; 1479 ++rt->mfc_wrong_if; 1480 /* 1481 * If we are doing PIM assert processing, send a message 1482 * to the routing daemon. 1483 * 1484 * XXX: A PIM-SM router needs the WRONGVIF detection so it 1485 * can complete the SPT switch, regardless of the type 1486 * of the iif (broadcast media, GRE tunnel, etc). 1487 */ 1488 if (pim_assert && (vifi < numvifs) && viftable[vifi].v_ifp) { 1489 struct timeval now; 1490 u_long delta; 1491 1492 #ifdef PIM 1493 if (ifp == &multicast_register_if) 1494 pimstat.pims_rcv_registers_wrongiif++; 1495 #endif 1496 1497 /* Get vifi for the incoming packet */ 1498 for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++) 1499 ; 1500 if (vifi >= numvifs) 1501 return 0; /* The iif is not found: ignore the packet. */ 1502 1503 if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF) 1504 return 0; /* WRONGVIF disabled: ignore the packet */ 1505 1506 GET_TIME(now); 1507 1508 TV_DELTA(rt->mfc_last_assert, now, delta); 1509 1510 if (delta > ASSERT_MSG_TIME) { 1511 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1512 struct igmpmsg *im; 1513 int hlen = ip->ip_hl << 2; 1514 struct mbuf *mm = m_copy(m, 0, hlen); 1515 1516 if (mm && (M_HASCL(mm) || mm->m_len < hlen)) 1517 mm = m_pullup(mm, hlen); 1518 if (mm == NULL) 1519 return ENOBUFS; 1520 1521 rt->mfc_last_assert = now; 1522 1523 im = mtod(mm, struct igmpmsg *); 1524 im->im_msgtype = IGMPMSG_WRONGVIF; 1525 im->im_mbz = 0; 1526 im->im_vif = vifi; 1527 1528 mrtstat.mrts_upcalls++; 1529 1530 k_igmpsrc.sin_addr = im->im_src; 1531 if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { 1532 log(LOG_WARNING, 1533 "ip_mforward: ip_mrouter socket queue full\n"); 1534 ++mrtstat.mrts_upq_sockfull; 1535 return ENOBUFS; 1536 } 1537 } 1538 } 1539 return 0; 1540 } 1541 1542 /* If I sourced this packet, it counts as output, else it was input. */ 1543 if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { 1544 viftable[vifi].v_pkt_out++; 1545 viftable[vifi].v_bytes_out += plen; 1546 } else { 1547 viftable[vifi].v_pkt_in++; 1548 viftable[vifi].v_bytes_in += plen; 1549 } 1550 rt->mfc_pkt_cnt++; 1551 rt->mfc_byte_cnt += plen; 1552 1553 /* 1554 * For each vif, decide if a copy of the packet should be forwarded. 1555 * Forward if: 1556 * - the ttl exceeds the vif's threshold 1557 * - there are group members downstream on interface 1558 */ 1559 for (vifi = 0; vifi < numvifs; vifi++) 1560 if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) { 1561 viftable[vifi].v_pkt_out++; 1562 viftable[vifi].v_bytes_out += plen; 1563 #ifdef PIM 1564 if (viftable[vifi].v_flags & VIFF_REGISTER) 1565 pim_register_send(ip, viftable + vifi, m, rt); 1566 else 1567 #endif 1568 MC_SEND(ip, viftable+vifi, m); 1569 } 1570 1571 /* 1572 * Perform upcall-related bw measuring. 1573 */ 1574 if (rt->mfc_bw_meter != NULL) { 1575 struct bw_meter *x; 1576 struct timeval now; 1577 1578 GET_TIME(now); 1579 for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) 1580 bw_meter_receive_packet(x, plen, &now); 1581 } 1582 1583 return 0; 1584 } 1585 1586 /* 1587 * check if a vif number is legal/ok. This is used by ip_output. 1588 */ 1589 static int 1590 X_legal_vif_num(int vif) 1591 { 1592 return (vif >= 0 && vif < numvifs); 1593 } 1594 1595 /* 1596 * Return the local address used by this vif 1597 */ 1598 static u_long 1599 X_ip_mcast_src(int vifi) 1600 { 1601 if (vifi >= 0 && vifi < numvifs) 1602 return viftable[vifi].v_lcl_addr.s_addr; 1603 else 1604 return INADDR_ANY; 1605 } 1606 1607 static void 1608 phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) 1609 { 1610 struct mbuf *mb_copy; 1611 int hlen = ip->ip_hl << 2; 1612 1613 /* 1614 * Make a new reference to the packet; make sure that 1615 * the IP header is actually copied, not just referenced, 1616 * so that ip_output() only scribbles on the copy. 1617 */ 1618 mb_copy = m_copypacket(m, M_DONTWAIT); 1619 if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) 1620 mb_copy = m_pullup(mb_copy, hlen); 1621 if (mb_copy == NULL) 1622 return; 1623 1624 if (vifp->v_rate_limit == 0) 1625 tbf_send_packet(vifp, mb_copy); 1626 else 1627 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len); 1628 } 1629 1630 static void 1631 encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m) 1632 { 1633 struct mbuf *mb_copy; 1634 struct ip *ip_copy; 1635 int i, len = ip->ip_len; 1636 1637 /* Take care of delayed checksums */ 1638 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1639 in_delayed_cksum(m); 1640 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1641 } 1642 1643 /* 1644 * copy the old packet & pullup its IP header into the 1645 * new mbuf so we can modify it. Try to fill the new 1646 * mbuf since if we don't the ethernet driver will. 1647 */ 1648 MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER); 1649 if (mb_copy == NULL) 1650 return; 1651 mb_copy->m_data += max_linkhdr; 1652 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1653 1654 if ((mb_copy->m_next = m_copypacket(m, M_DONTWAIT)) == NULL) { 1655 m_freem(mb_copy); 1656 return; 1657 } 1658 i = MHLEN - M_LEADINGSPACE(mb_copy); 1659 if (i > len) 1660 i = len; 1661 mb_copy = m_pullup(mb_copy, i); 1662 if (mb_copy == NULL) 1663 return; 1664 mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); 1665 1666 /* 1667 * fill in the encapsulating IP header. 1668 */ 1669 ip_copy = mtod(mb_copy, struct ip *); 1670 *ip_copy = multicast_encap_iphdr; 1671 #ifdef RANDOM_IP_ID 1672 ip_copy->ip_id = ip_randomid(); 1673 #else 1674 ip_copy->ip_id = htons(ip_id++); 1675 #endif 1676 ip_copy->ip_len += len; 1677 ip_copy->ip_src = vifp->v_lcl_addr; 1678 ip_copy->ip_dst = vifp->v_rmt_addr; 1679 1680 /* 1681 * turn the encapsulated IP header back into a valid one. 1682 */ 1683 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1684 --ip->ip_ttl; 1685 ip->ip_len = htons(ip->ip_len); 1686 ip->ip_off = htons(ip->ip_off); 1687 ip->ip_sum = 0; 1688 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1689 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1690 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1691 1692 if (vifp->v_rate_limit == 0) 1693 tbf_send_packet(vifp, mb_copy); 1694 else 1695 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len); 1696 } 1697 1698 /* 1699 * De-encapsulate a packet and feed it back through ip input (this 1700 * routine is called whenever IP gets a packet with proto type 1701 * ENCAP_PROTO and a local destination address). 1702 * 1703 * This is similar to mroute_encapcheck() + mroute_encap_input() in -current. 1704 */ 1705 static void 1706 X_ipip_input(struct mbuf *m, int off, int proto) 1707 { 1708 struct ip *ip = mtod(m, struct ip *); 1709 int hlen = ip->ip_hl << 2; 1710 1711 if (!have_encap_tunnel) { 1712 rip_input(m, off, proto); 1713 return; 1714 } 1715 /* 1716 * dump the packet if it's not to a multicast destination or if 1717 * we don't have an encapsulating tunnel with the source. 1718 * Note: This code assumes that the remote site IP address 1719 * uniquely identifies the tunnel (i.e., that this site has 1720 * at most one tunnel with the remote site). 1721 */ 1722 if (!IN_MULTICAST(ntohl(((struct ip *)((char *)ip+hlen))->ip_dst.s_addr))) { 1723 ++mrtstat.mrts_bad_tunnel; 1724 m_freem(m); 1725 return; 1726 } 1727 if (ip->ip_src.s_addr != last_encap_src) { 1728 struct vif *vifp = viftable; 1729 struct vif *vife = vifp + numvifs; 1730 1731 last_encap_src = ip->ip_src.s_addr; 1732 last_encap_vif = NULL; 1733 for ( ; vifp < vife; ++vifp) 1734 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 1735 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) 1736 == VIFF_TUNNEL) 1737 last_encap_vif = vifp; 1738 break; 1739 } 1740 } 1741 if (last_encap_vif == NULL) { 1742 last_encap_src = INADDR_ANY; 1743 mrtstat.mrts_cant_tunnel++; /*XXX*/ 1744 m_freem(m); 1745 if (mrtdebug) 1746 log(LOG_DEBUG, "ip_mforward: no tunnel with %lx\n", 1747 (u_long)ntohl(ip->ip_src.s_addr)); 1748 return; 1749 } 1750 1751 if (hlen > sizeof(struct ip)) 1752 ip_stripoptions(m); 1753 m->m_data += sizeof(struct ip); 1754 m->m_len -= sizeof(struct ip); 1755 m->m_pkthdr.len -= sizeof(struct ip); 1756 m->m_pkthdr.rcvif = last_encap_vif->v_ifp; 1757 1758 netisr_queue(NETISR_IP, m); 1759 } 1760 1761 /* 1762 * Token bucket filter module 1763 */ 1764 1765 static void 1766 tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, u_long p_len) 1767 { 1768 struct tbf *t = vifp->v_tbf; 1769 1770 if (p_len > MAX_BKT_SIZE) { /* drop if packet is too large */ 1771 mrtstat.mrts_pkt2large++; 1772 m_freem(m); 1773 return; 1774 } 1775 1776 tbf_update_tokens(vifp); 1777 1778 if (t->tbf_q_len == 0) { /* queue empty... */ 1779 if (p_len <= t->tbf_n_tok) { /* send packet if enough tokens */ 1780 t->tbf_n_tok -= p_len; 1781 tbf_send_packet(vifp, m); 1782 } else { /* no, queue packet and try later */ 1783 tbf_queue(vifp, m); 1784 timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); 1785 } 1786 } else if (t->tbf_q_len < t->tbf_max_q_len) { 1787 /* finite queue length, so queue pkts and process queue */ 1788 tbf_queue(vifp, m); 1789 tbf_process_q(vifp); 1790 } else { 1791 /* queue full, try to dq and queue and process */ 1792 if (!tbf_dq_sel(vifp, ip)) { 1793 mrtstat.mrts_q_overflow++; 1794 m_freem(m); 1795 } else { 1796 tbf_queue(vifp, m); 1797 tbf_process_q(vifp); 1798 } 1799 } 1800 } 1801 1802 /* 1803 * adds a packet to the queue at the interface 1804 */ 1805 static void 1806 tbf_queue(struct vif *vifp, struct mbuf *m) 1807 { 1808 int s = splnet(); 1809 struct tbf *t = vifp->v_tbf; 1810 1811 if (t->tbf_t == NULL) /* Queue was empty */ 1812 t->tbf_q = m; 1813 else /* Insert at tail */ 1814 t->tbf_t->m_act = m; 1815 1816 t->tbf_t = m; /* Set new tail pointer */ 1817 1818 #ifdef DIAGNOSTIC 1819 /* Make sure we didn't get fed a bogus mbuf */ 1820 if (m->m_act) 1821 panic("tbf_queue: m_act"); 1822 #endif 1823 m->m_act = NULL; 1824 1825 t->tbf_q_len++; 1826 1827 splx(s); 1828 } 1829 1830 /* 1831 * processes the queue at the interface 1832 */ 1833 static void 1834 tbf_process_q(struct vif *vifp) 1835 { 1836 int s = splnet(); 1837 struct tbf *t = vifp->v_tbf; 1838 1839 /* loop through the queue at the interface and send as many packets 1840 * as possible 1841 */ 1842 while (t->tbf_q_len > 0) { 1843 struct mbuf *m = t->tbf_q; 1844 int len = mtod(m, struct ip *)->ip_len; 1845 1846 /* determine if the packet can be sent */ 1847 if (len > t->tbf_n_tok) /* not enough tokens, we are done */ 1848 break; 1849 /* ok, reduce no of tokens, dequeue and send the packet. */ 1850 t->tbf_n_tok -= len; 1851 1852 t->tbf_q = m->m_act; 1853 if (--t->tbf_q_len == 0) 1854 t->tbf_t = NULL; 1855 1856 m->m_act = NULL; 1857 tbf_send_packet(vifp, m); 1858 } 1859 splx(s); 1860 } 1861 1862 static void 1863 tbf_reprocess_q(void *xvifp) 1864 { 1865 struct vif *vifp = xvifp; 1866 1867 if (ip_mrouter == NULL) 1868 return; 1869 tbf_update_tokens(vifp); 1870 tbf_process_q(vifp); 1871 if (vifp->v_tbf->tbf_q_len) 1872 timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); 1873 } 1874 1875 /* function that will selectively discard a member of the queue 1876 * based on the precedence value and the priority 1877 */ 1878 static int 1879 tbf_dq_sel(struct vif *vifp, struct ip *ip) 1880 { 1881 int s = splnet(); 1882 u_int p; 1883 struct mbuf *m, *last; 1884 struct mbuf **np; 1885 struct tbf *t = vifp->v_tbf; 1886 1887 p = priority(vifp, ip); 1888 1889 np = &t->tbf_q; 1890 last = NULL; 1891 while ((m = *np) != NULL) { 1892 if (p > priority(vifp, mtod(m, struct ip *))) { 1893 *np = m->m_act; 1894 /* If we're removing the last packet, fix the tail pointer */ 1895 if (m == t->tbf_t) 1896 t->tbf_t = last; 1897 m_freem(m); 1898 /* It's impossible for the queue to be empty, but check anyways. */ 1899 if (--t->tbf_q_len == 0) 1900 t->tbf_t = NULL; 1901 splx(s); 1902 mrtstat.mrts_drop_sel++; 1903 return 1; 1904 } 1905 np = &m->m_act; 1906 last = m; 1907 } 1908 splx(s); 1909 return 0; 1910 } 1911 1912 static void 1913 tbf_send_packet(struct vif *vifp, struct mbuf *m) 1914 { 1915 int s = splnet(); 1916 1917 if (vifp->v_flags & VIFF_TUNNEL) /* If tunnel options */ 1918 ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, NULL, NULL); 1919 else { 1920 struct ip_moptions imo; 1921 int error; 1922 static struct route ro; /* XXX check this */ 1923 1924 imo.imo_multicast_ifp = vifp->v_ifp; 1925 imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; 1926 imo.imo_multicast_loop = 1; 1927 imo.imo_multicast_vif = -1; 1928 1929 /* 1930 * Re-entrancy should not be a problem here, because 1931 * the packets that we send out and are looped back at us 1932 * should get rejected because they appear to come from 1933 * the loopback interface, thus preventing looping. 1934 */ 1935 error = ip_output(m, NULL, &ro, IP_FORWARDING, &imo, NULL); 1936 1937 if (mrtdebug & DEBUG_XMIT) 1938 log(LOG_DEBUG, "phyint_send on vif %d err %d\n", 1939 (int)(vifp - viftable), error); 1940 } 1941 splx(s); 1942 } 1943 1944 /* determine the current time and then 1945 * the elapsed time (between the last time and time now) 1946 * in milliseconds & update the no. of tokens in the bucket 1947 */ 1948 static void 1949 tbf_update_tokens(struct vif *vifp) 1950 { 1951 struct timeval tp; 1952 u_long tm; 1953 int s = splnet(); 1954 struct tbf *t = vifp->v_tbf; 1955 1956 GET_TIME(tp); 1957 1958 TV_DELTA(tp, t->tbf_last_pkt_t, tm); 1959 1960 /* 1961 * This formula is actually 1962 * "time in seconds" * "bytes/second". 1963 * 1964 * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) 1965 * 1966 * The (1000/1024) was introduced in add_vif to optimize 1967 * this divide into a shift. 1968 */ 1969 t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8; 1970 t->tbf_last_pkt_t = tp; 1971 1972 if (t->tbf_n_tok > MAX_BKT_SIZE) 1973 t->tbf_n_tok = MAX_BKT_SIZE; 1974 1975 splx(s); 1976 } 1977 1978 static int 1979 priority(struct vif *vifp, struct ip *ip) 1980 { 1981 int prio = 50; /* the lowest priority -- default case */ 1982 1983 /* temporary hack; may add general packet classifier some day */ 1984 1985 /* 1986 * The UDP port space is divided up into four priority ranges: 1987 * [0, 16384) : unclassified - lowest priority 1988 * [16384, 32768) : audio - highest priority 1989 * [32768, 49152) : whiteboard - medium priority 1990 * [49152, 65536) : video - low priority 1991 * 1992 * Everything else gets lowest priority. 1993 */ 1994 if (ip->ip_p == IPPROTO_UDP) { 1995 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); 1996 switch (ntohs(udp->uh_dport) & 0xc000) { 1997 case 0x4000: 1998 prio = 70; 1999 break; 2000 case 0x8000: 2001 prio = 60; 2002 break; 2003 case 0xc000: 2004 prio = 55; 2005 break; 2006 } 2007 } 2008 return prio; 2009 } 2010 2011 /* 2012 * End of token bucket filter modifications 2013 */ 2014 2015 static int 2016 X_ip_rsvp_vif(struct socket *so, struct sockopt *sopt) 2017 { 2018 int error, vifi, s; 2019 2020 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 2021 return EOPNOTSUPP; 2022 2023 error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); 2024 if (error) 2025 return error; 2026 2027 s = splnet(); 2028 2029 if (vifi < 0 || vifi >= numvifs) { /* Error if vif is invalid */ 2030 splx(s); 2031 return EADDRNOTAVAIL; 2032 } 2033 2034 if (sopt->sopt_name == IP_RSVP_VIF_ON) { 2035 /* Check if socket is available. */ 2036 if (viftable[vifi].v_rsvpd != NULL) { 2037 splx(s); 2038 return EADDRINUSE; 2039 } 2040 2041 viftable[vifi].v_rsvpd = so; 2042 /* This may seem silly, but we need to be sure we don't over-increment 2043 * the RSVP counter, in case something slips up. 2044 */ 2045 if (!viftable[vifi].v_rsvp_on) { 2046 viftable[vifi].v_rsvp_on = 1; 2047 rsvp_on++; 2048 } 2049 } else { /* must be VIF_OFF */ 2050 /* 2051 * XXX as an additional consistency check, one could make sure 2052 * that viftable[vifi].v_rsvpd == so, otherwise passing so as 2053 * first parameter is pretty useless. 2054 */ 2055 viftable[vifi].v_rsvpd = NULL; 2056 /* 2057 * This may seem silly, but we need to be sure we don't over-decrement 2058 * the RSVP counter, in case something slips up. 2059 */ 2060 if (viftable[vifi].v_rsvp_on) { 2061 viftable[vifi].v_rsvp_on = 0; 2062 rsvp_on--; 2063 } 2064 } 2065 splx(s); 2066 return 0; 2067 } 2068 2069 static void 2070 X_ip_rsvp_force_done(struct socket *so) 2071 { 2072 int vifi; 2073 int s; 2074 2075 /* Don't bother if it is not the right type of socket. */ 2076 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 2077 return; 2078 2079 s = splnet(); 2080 2081 /* The socket may be attached to more than one vif...this 2082 * is perfectly legal. 2083 */ 2084 for (vifi = 0; vifi < numvifs; vifi++) { 2085 if (viftable[vifi].v_rsvpd == so) { 2086 viftable[vifi].v_rsvpd = NULL; 2087 /* This may seem silly, but we need to be sure we don't 2088 * over-decrement the RSVP counter, in case something slips up. 2089 */ 2090 if (viftable[vifi].v_rsvp_on) { 2091 viftable[vifi].v_rsvp_on = 0; 2092 rsvp_on--; 2093 } 2094 } 2095 } 2096 2097 splx(s); 2098 } 2099 2100 static void 2101 X_rsvp_input(struct mbuf *m, int off, int proto) 2102 { 2103 int vifi; 2104 struct ip *ip = mtod(m, struct ip *); 2105 struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET }; 2106 int s; 2107 struct ifnet *ifp; 2108 2109 if (rsvpdebug) 2110 printf("rsvp_input: rsvp_on %d\n",rsvp_on); 2111 2112 /* Can still get packets with rsvp_on = 0 if there is a local member 2113 * of the group to which the RSVP packet is addressed. But in this 2114 * case we want to throw the packet away. 2115 */ 2116 if (!rsvp_on) { 2117 m_freem(m); 2118 return; 2119 } 2120 2121 s = splnet(); 2122 2123 if (rsvpdebug) 2124 printf("rsvp_input: check vifs\n"); 2125 2126 #ifdef DIAGNOSTIC 2127 if (!(m->m_flags & M_PKTHDR)) 2128 panic("rsvp_input no hdr"); 2129 #endif 2130 2131 ifp = m->m_pkthdr.rcvif; 2132 /* Find which vif the packet arrived on. */ 2133 for (vifi = 0; vifi < numvifs; vifi++) 2134 if (viftable[vifi].v_ifp == ifp) 2135 break; 2136 2137 if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) { 2138 /* 2139 * If the old-style non-vif-associated socket is set, 2140 * then use it. Otherwise, drop packet since there 2141 * is no specific socket for this vif. 2142 */ 2143 if (ip_rsvpd != NULL) { 2144 if (rsvpdebug) 2145 printf("rsvp_input: Sending packet up old-style socket\n"); 2146 rip_input(m, off, proto); /* xxx */ 2147 } else { 2148 if (rsvpdebug && vifi == numvifs) 2149 printf("rsvp_input: Can't find vif for packet.\n"); 2150 else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL) 2151 printf("rsvp_input: No socket defined for vif %d\n",vifi); 2152 m_freem(m); 2153 } 2154 splx(s); 2155 return; 2156 } 2157 rsvp_src.sin_addr = ip->ip_src; 2158 2159 if (rsvpdebug && m) 2160 printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n", 2161 m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); 2162 2163 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) { 2164 if (rsvpdebug) 2165 printf("rsvp_input: Failed to append to socket\n"); 2166 } else { 2167 if (rsvpdebug) 2168 printf("rsvp_input: send packet up\n"); 2169 } 2170 2171 splx(s); 2172 } 2173 2174 /* 2175 * Code for bandwidth monitors 2176 */ 2177 2178 /* 2179 * Define common interface for timeval-related methods 2180 */ 2181 #define BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp) 2182 #define BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp)) 2183 #define BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp)) 2184 2185 static uint32_t 2186 compute_bw_meter_flags(struct bw_upcall *req) 2187 { 2188 uint32_t flags = 0; 2189 2190 if (req->bu_flags & BW_UPCALL_UNIT_PACKETS) 2191 flags |= BW_METER_UNIT_PACKETS; 2192 if (req->bu_flags & BW_UPCALL_UNIT_BYTES) 2193 flags |= BW_METER_UNIT_BYTES; 2194 if (req->bu_flags & BW_UPCALL_GEQ) 2195 flags |= BW_METER_GEQ; 2196 if (req->bu_flags & BW_UPCALL_LEQ) 2197 flags |= BW_METER_LEQ; 2198 2199 return flags; 2200 } 2201 2202 /* 2203 * Add a bw_meter entry 2204 */ 2205 static int 2206 add_bw_upcall(struct bw_upcall *req) 2207 { 2208 struct mfc *mfc; 2209 struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC, 2210 BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC }; 2211 struct timeval now; 2212 struct bw_meter *x; 2213 uint32_t flags; 2214 int s; 2215 2216 if (!(mrt_api_config & MRT_MFC_BW_UPCALL)) 2217 return EOPNOTSUPP; 2218 2219 /* Test if the flags are valid */ 2220 if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES))) 2221 return EINVAL; 2222 if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))) 2223 return EINVAL; 2224 if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) 2225 == (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) 2226 return EINVAL; 2227 2228 /* Test if the threshold time interval is valid */ 2229 if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <)) 2230 return EINVAL; 2231 2232 flags = compute_bw_meter_flags(req); 2233 2234 /* 2235 * Find if we have already same bw_meter entry 2236 */ 2237 s = splnet(); 2238 mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr); 2239 if (mfc == NULL) { 2240 splx(s); 2241 return EADDRNOTAVAIL; 2242 } 2243 for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) { 2244 if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, 2245 &req->bu_threshold.b_time, ==)) && 2246 (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && 2247 (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && 2248 (x->bm_flags & BW_METER_USER_FLAGS) == flags) { 2249 splx(s); 2250 return 0; /* XXX Already installed */ 2251 } 2252 } 2253 splx(s); 2254 2255 /* Allocate the new bw_meter entry */ 2256 x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT); 2257 if (x == NULL) 2258 return ENOBUFS; 2259 2260 /* Set the new bw_meter entry */ 2261 x->bm_threshold.b_time = req->bu_threshold.b_time; 2262 GET_TIME(now); 2263 x->bm_start_time = now; 2264 x->bm_threshold.b_packets = req->bu_threshold.b_packets; 2265 x->bm_threshold.b_bytes = req->bu_threshold.b_bytes; 2266 x->bm_measured.b_packets = 0; 2267 x->bm_measured.b_bytes = 0; 2268 x->bm_flags = flags; 2269 x->bm_time_next = NULL; 2270 x->bm_time_hash = BW_METER_BUCKETS; 2271 2272 /* Add the new bw_meter entry to the front of entries for this MFC */ 2273 s = splnet(); 2274 x->bm_mfc = mfc; 2275 x->bm_mfc_next = mfc->mfc_bw_meter; 2276 mfc->mfc_bw_meter = x; 2277 schedule_bw_meter(x, &now); 2278 splx(s); 2279 2280 return 0; 2281 } 2282 2283 static void 2284 free_bw_list(struct bw_meter *list) 2285 { 2286 while (list != NULL) { 2287 struct bw_meter *x = list; 2288 2289 list = list->bm_mfc_next; 2290 unschedule_bw_meter(x); 2291 free(x, M_BWMETER); 2292 } 2293 } 2294 2295 /* 2296 * Delete one or multiple bw_meter entries 2297 */ 2298 static int 2299 del_bw_upcall(struct bw_upcall *req) 2300 { 2301 struct mfc *mfc; 2302 struct bw_meter *x; 2303 int s; 2304 2305 if (!(mrt_api_config & MRT_MFC_BW_UPCALL)) 2306 return EOPNOTSUPP; 2307 2308 s = splnet(); 2309 /* Find the corresponding MFC entry */ 2310 mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr); 2311 if (mfc == NULL) { 2312 splx(s); 2313 return EADDRNOTAVAIL; 2314 } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) { 2315 /* 2316 * Delete all bw_meter entries for this mfc 2317 */ 2318 struct bw_meter *list; 2319 2320 list = mfc->mfc_bw_meter; 2321 mfc->mfc_bw_meter = NULL; 2322 splx(s); 2323 free_bw_list(list); 2324 return 0; 2325 } else { /* Delete a single bw_meter entry */ 2326 struct bw_meter *prev; 2327 uint32_t flags = 0; 2328 2329 flags = compute_bw_meter_flags(req); 2330 2331 /* Find the bw_meter entry to delete */ 2332 for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL; 2333 x = x->bm_mfc_next) { 2334 if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, 2335 &req->bu_threshold.b_time, ==)) && 2336 (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && 2337 (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && 2338 (x->bm_flags & BW_METER_USER_FLAGS) == flags) 2339 break; 2340 } 2341 if (x != NULL) { /* Delete entry from the list for this MFC */ 2342 if (prev != NULL) 2343 prev->bm_mfc_next = x->bm_mfc_next; /* remove from middle*/ 2344 else 2345 x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */ 2346 splx(s); 2347 2348 unschedule_bw_meter(x); 2349 /* Free the bw_meter entry */ 2350 free(x, M_BWMETER); 2351 return 0; 2352 } else { 2353 splx(s); 2354 return EINVAL; 2355 } 2356 } 2357 /* NOTREACHED */ 2358 } 2359 2360 /* 2361 * Perform bandwidth measurement processing that may result in an upcall 2362 */ 2363 static void 2364 bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp) 2365 { 2366 struct timeval delta; 2367 int s; 2368 2369 s = splnet(); 2370 delta = *nowp; 2371 BW_TIMEVALDECR(&delta, &x->bm_start_time); 2372 2373 if (x->bm_flags & BW_METER_GEQ) { 2374 /* 2375 * Processing for ">=" type of bw_meter entry 2376 */ 2377 if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { 2378 /* Reset the bw_meter entry */ 2379 x->bm_start_time = *nowp; 2380 x->bm_measured.b_packets = 0; 2381 x->bm_measured.b_bytes = 0; 2382 x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2383 } 2384 2385 /* Record that a packet is received */ 2386 x->bm_measured.b_packets++; 2387 x->bm_measured.b_bytes += plen; 2388 2389 /* 2390 * Test if we should deliver an upcall 2391 */ 2392 if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) { 2393 if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2394 (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) || 2395 ((x->bm_flags & BW_METER_UNIT_BYTES) && 2396 (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) { 2397 /* Prepare an upcall for delivery */ 2398 bw_meter_prepare_upcall(x, nowp); 2399 x->bm_flags |= BW_METER_UPCALL_DELIVERED; 2400 } 2401 } 2402 } else if (x->bm_flags & BW_METER_LEQ) { 2403 /* 2404 * Processing for "<=" type of bw_meter entry 2405 */ 2406 if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { 2407 /* 2408 * We are behind time with the multicast forwarding table 2409 * scanning for "<=" type of bw_meter entries, so test now 2410 * if we should deliver an upcall. 2411 */ 2412 if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2413 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || 2414 ((x->bm_flags & BW_METER_UNIT_BYTES) && 2415 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { 2416 /* Prepare an upcall for delivery */ 2417 bw_meter_prepare_upcall(x, nowp); 2418 } 2419 /* Reschedule the bw_meter entry */ 2420 unschedule_bw_meter(x); 2421 schedule_bw_meter(x, nowp); 2422 } 2423 2424 /* Record that a packet is received */ 2425 x->bm_measured.b_packets++; 2426 x->bm_measured.b_bytes += plen; 2427 2428 /* 2429 * Test if we should restart the measuring interval 2430 */ 2431 if ((x->bm_flags & BW_METER_UNIT_PACKETS && 2432 x->bm_measured.b_packets <= x->bm_threshold.b_packets) || 2433 (x->bm_flags & BW_METER_UNIT_BYTES && 2434 x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) { 2435 /* Don't restart the measuring interval */ 2436 } else { 2437 /* Do restart the measuring interval */ 2438 /* 2439 * XXX: note that we don't unschedule and schedule, because this 2440 * might be too much overhead per packet. Instead, when we process 2441 * all entries for a given timer hash bin, we check whether it is 2442 * really a timeout. If not, we reschedule at that time. 2443 */ 2444 x->bm_start_time = *nowp; 2445 x->bm_measured.b_packets = 0; 2446 x->bm_measured.b_bytes = 0; 2447 x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2448 } 2449 } 2450 splx(s); 2451 } 2452 2453 /* 2454 * Prepare a bandwidth-related upcall 2455 */ 2456 static void 2457 bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp) 2458 { 2459 struct timeval delta; 2460 struct bw_upcall *u; 2461 int s; 2462 2463 s = splnet(); 2464 2465 /* 2466 * Compute the measured time interval 2467 */ 2468 delta = *nowp; 2469 BW_TIMEVALDECR(&delta, &x->bm_start_time); 2470 2471 /* 2472 * If there are too many pending upcalls, deliver them now 2473 */ 2474 if (bw_upcalls_n >= BW_UPCALLS_MAX) 2475 bw_upcalls_send(); 2476 2477 /* 2478 * Set the bw_upcall entry 2479 */ 2480 u = &bw_upcalls[bw_upcalls_n++]; 2481 u->bu_src = x->bm_mfc->mfc_origin; 2482 u->bu_dst = x->bm_mfc->mfc_mcastgrp; 2483 u->bu_threshold.b_time = x->bm_threshold.b_time; 2484 u->bu_threshold.b_packets = x->bm_threshold.b_packets; 2485 u->bu_threshold.b_bytes = x->bm_threshold.b_bytes; 2486 u->bu_measured.b_time = delta; 2487 u->bu_measured.b_packets = x->bm_measured.b_packets; 2488 u->bu_measured.b_bytes = x->bm_measured.b_bytes; 2489 u->bu_flags = 0; 2490 if (x->bm_flags & BW_METER_UNIT_PACKETS) 2491 u->bu_flags |= BW_UPCALL_UNIT_PACKETS; 2492 if (x->bm_flags & BW_METER_UNIT_BYTES) 2493 u->bu_flags |= BW_UPCALL_UNIT_BYTES; 2494 if (x->bm_flags & BW_METER_GEQ) 2495 u->bu_flags |= BW_UPCALL_GEQ; 2496 if (x->bm_flags & BW_METER_LEQ) 2497 u->bu_flags |= BW_UPCALL_LEQ; 2498 2499 splx(s); 2500 } 2501 2502 /* 2503 * Send the pending bandwidth-related upcalls 2504 */ 2505 static void 2506 bw_upcalls_send(void) 2507 { 2508 struct mbuf *m; 2509 int len = bw_upcalls_n * sizeof(bw_upcalls[0]); 2510 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 2511 static struct igmpmsg igmpmsg = { 0, /* unused1 */ 2512 0, /* unused2 */ 2513 IGMPMSG_BW_UPCALL,/* im_msgtype */ 2514 0, /* im_mbz */ 2515 0, /* im_vif */ 2516 0, /* unused3 */ 2517 { 0 }, /* im_src */ 2518 { 0 } }; /* im_dst */ 2519 2520 if (bw_upcalls_n == 0) 2521 return; /* No pending upcalls */ 2522 2523 bw_upcalls_n = 0; 2524 2525 /* 2526 * Allocate a new mbuf, initialize it with the header and 2527 * the payload for the pending calls. 2528 */ 2529 MGETHDR(m, M_DONTWAIT, MT_HEADER); 2530 if (m == NULL) { 2531 log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n"); 2532 return; 2533 } 2534 2535 m->m_len = m->m_pkthdr.len = 0; 2536 m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg); 2537 m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&bw_upcalls[0]); 2538 2539 /* 2540 * Send the upcalls 2541 * XXX do we need to set the address in k_igmpsrc ? 2542 */ 2543 mrtstat.mrts_upcalls++; 2544 if (socket_send(ip_mrouter, m, &k_igmpsrc) < 0) { 2545 log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n"); 2546 ++mrtstat.mrts_upq_sockfull; 2547 } 2548 } 2549 2550 /* 2551 * Compute the timeout hash value for the bw_meter entries 2552 */ 2553 #define BW_METER_TIMEHASH(bw_meter, hash) \ 2554 do { \ 2555 struct timeval next_timeval = (bw_meter)->bm_start_time; \ 2556 \ 2557 BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \ 2558 (hash) = next_timeval.tv_sec; \ 2559 if (next_timeval.tv_usec) \ 2560 (hash)++; /* XXX: make sure we don't timeout early */ \ 2561 (hash) %= BW_METER_BUCKETS; \ 2562 } while (0) 2563 2564 /* 2565 * Schedule a timer to process periodically bw_meter entry of type "<=" 2566 * by linking the entry in the proper hash bucket. 2567 */ 2568 static void 2569 schedule_bw_meter(struct bw_meter *x, struct timeval *nowp) 2570 { 2571 int time_hash, s; 2572 2573 if (!(x->bm_flags & BW_METER_LEQ)) 2574 return; /* XXX: we schedule timers only for "<=" entries */ 2575 2576 /* 2577 * Reset the bw_meter entry 2578 */ 2579 s = splnet(); 2580 x->bm_start_time = *nowp; 2581 x->bm_measured.b_packets = 0; 2582 x->bm_measured.b_bytes = 0; 2583 x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2584 splx(s); 2585 2586 /* 2587 * Compute the timeout hash value and insert the entry 2588 */ 2589 BW_METER_TIMEHASH(x, time_hash); 2590 x->bm_time_next = bw_meter_timers[time_hash]; 2591 bw_meter_timers[time_hash] = x; 2592 x->bm_time_hash = time_hash; 2593 } 2594 2595 /* 2596 * Unschedule the periodic timer that processes bw_meter entry of type "<=" 2597 * by removing the entry from the proper hash bucket. 2598 */ 2599 static void 2600 unschedule_bw_meter(struct bw_meter *x) 2601 { 2602 int time_hash; 2603 struct bw_meter *prev, *tmp; 2604 2605 if (!(x->bm_flags & BW_METER_LEQ)) 2606 return; /* XXX: we schedule timers only for "<=" entries */ 2607 2608 /* 2609 * Compute the timeout hash value and delete the entry 2610 */ 2611 time_hash = x->bm_time_hash; 2612 if (time_hash >= BW_METER_BUCKETS) 2613 return; /* Entry was not scheduled */ 2614 2615 for (prev = NULL, tmp = bw_meter_timers[time_hash]; 2616 tmp != NULL; prev = tmp, tmp = tmp->bm_time_next) 2617 if (tmp == x) 2618 break; 2619 2620 if (tmp == NULL) 2621 panic("unschedule_bw_meter: bw_meter entry not found"); 2622 2623 if (prev != NULL) 2624 prev->bm_time_next = x->bm_time_next; 2625 else 2626 bw_meter_timers[time_hash] = x->bm_time_next; 2627 2628 x->bm_time_next = NULL; 2629 x->bm_time_hash = BW_METER_BUCKETS; 2630 } 2631 2632 2633 /* 2634 * Process all "<=" type of bw_meter that should be processed now, 2635 * and for each entry prepare an upcall if necessary. Each processed 2636 * entry is rescheduled again for the (periodic) processing. 2637 * 2638 * This is run periodically (once per second normally). On each round, 2639 * all the potentially matching entries are in the hash slot that we are 2640 * looking at. 2641 */ 2642 static void 2643 bw_meter_process() 2644 { 2645 static uint32_t last_tv_sec; /* last time we processed this */ 2646 2647 uint32_t loops; 2648 int i, s; 2649 struct timeval now, process_endtime; 2650 2651 GET_TIME(now); 2652 if (last_tv_sec == now.tv_sec) 2653 return; /* nothing to do */ 2654 2655 s = splnet(); 2656 loops = now.tv_sec - last_tv_sec; 2657 last_tv_sec = now.tv_sec; 2658 if (loops > BW_METER_BUCKETS) 2659 loops = BW_METER_BUCKETS; 2660 2661 /* 2662 * Process all bins of bw_meter entries from the one after the last 2663 * processed to the current one. On entry, i points to the last bucket 2664 * visited, so we need to increment i at the beginning of the loop. 2665 */ 2666 for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) { 2667 struct bw_meter *x, *tmp_list; 2668 2669 if (++i >= BW_METER_BUCKETS) 2670 i = 0; 2671 2672 /* Disconnect the list of bw_meter entries from the bin */ 2673 tmp_list = bw_meter_timers[i]; 2674 bw_meter_timers[i] = NULL; 2675 2676 /* Process the list of bw_meter entries */ 2677 while (tmp_list != NULL) { 2678 x = tmp_list; 2679 tmp_list = tmp_list->bm_time_next; 2680 2681 /* Test if the time interval is over */ 2682 process_endtime = x->bm_start_time; 2683 BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time); 2684 if (BW_TIMEVALCMP(&process_endtime, &now, >)) { 2685 /* Not yet: reschedule, but don't reset */ 2686 int time_hash; 2687 2688 BW_METER_TIMEHASH(x, time_hash); 2689 if (time_hash == i && process_endtime.tv_sec == now.tv_sec) { 2690 /* 2691 * XXX: somehow the bin processing is a bit ahead of time. 2692 * Put the entry in the next bin. 2693 */ 2694 if (++time_hash >= BW_METER_BUCKETS) 2695 time_hash = 0; 2696 } 2697 x->bm_time_next = bw_meter_timers[time_hash]; 2698 bw_meter_timers[time_hash] = x; 2699 x->bm_time_hash = time_hash; 2700 2701 continue; 2702 } 2703 2704 /* 2705 * Test if we should deliver an upcall 2706 */ 2707 if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2708 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || 2709 ((x->bm_flags & BW_METER_UNIT_BYTES) && 2710 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { 2711 /* Prepare an upcall for delivery */ 2712 bw_meter_prepare_upcall(x, &now); 2713 } 2714 2715 /* 2716 * Reschedule for next processing 2717 */ 2718 schedule_bw_meter(x, &now); 2719 } 2720 } 2721 splx(s); 2722 2723 /* Send all upcalls that are pending delivery */ 2724 bw_upcalls_send(); 2725 } 2726 2727 /* 2728 * A periodic function for sending all upcalls that are pending delivery 2729 */ 2730 static void 2731 expire_bw_upcalls_send(void *unused) 2732 { 2733 bw_upcalls_send(); 2734 2735 bw_upcalls_ch = timeout(expire_bw_upcalls_send, NULL, BW_UPCALLS_PERIOD); 2736 } 2737 2738 /* 2739 * A periodic function for periodic scanning of the multicast forwarding 2740 * table for processing all "<=" bw_meter entries. 2741 */ 2742 static void 2743 expire_bw_meter_process(void *unused) 2744 { 2745 if (mrt_api_config & MRT_MFC_BW_UPCALL) 2746 bw_meter_process(); 2747 2748 bw_meter_ch = timeout(expire_bw_meter_process, NULL, BW_METER_PERIOD); 2749 } 2750 2751 /* 2752 * End of bandwidth monitoring code 2753 */ 2754 2755 #ifdef PIM 2756 /* 2757 * Send the packet up to the user daemon, or eventually do kernel encapsulation 2758 * 2759 */ 2760 static int 2761 pim_register_send(struct ip *ip, struct vif *vifp, 2762 struct mbuf *m, struct mfc *rt) 2763 { 2764 struct mbuf *mb_copy, *mm; 2765 2766 if (mrtdebug & DEBUG_PIM) 2767 log(LOG_DEBUG, "pim_register_send: "); 2768 2769 mb_copy = pim_register_prepare(ip, m); 2770 if (mb_copy == NULL) 2771 return ENOBUFS; 2772 2773 /* 2774 * Send all the fragments. Note that the mbuf for each fragment 2775 * is freed by the sending machinery. 2776 */ 2777 for (mm = mb_copy; mm; mm = mb_copy) { 2778 mb_copy = mm->m_nextpkt; 2779 mm->m_nextpkt = 0; 2780 mm = m_pullup(mm, sizeof(struct ip)); 2781 if (mm != NULL) { 2782 ip = mtod(mm, struct ip *); 2783 if ((mrt_api_config & MRT_MFC_RP) && 2784 (rt->mfc_rp.s_addr != INADDR_ANY)) { 2785 pim_register_send_rp(ip, vifp, mm, rt); 2786 } else { 2787 pim_register_send_upcall(ip, vifp, mm, rt); 2788 } 2789 } 2790 } 2791 2792 return 0; 2793 } 2794 2795 /* 2796 * Return a copy of the data packet that is ready for PIM Register 2797 * encapsulation. 2798 * XXX: Note that in the returned copy the IP header is a valid one. 2799 */ 2800 static struct mbuf * 2801 pim_register_prepare(struct ip *ip, struct mbuf *m) 2802 { 2803 struct mbuf *mb_copy = NULL; 2804 int mtu; 2805 2806 /* Take care of delayed checksums */ 2807 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 2808 in_delayed_cksum(m); 2809 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 2810 } 2811 2812 /* 2813 * Copy the old packet & pullup its IP header into the 2814 * new mbuf so we can modify it. 2815 */ 2816 mb_copy = m_copypacket(m, M_DONTWAIT); 2817 if (mb_copy == NULL) 2818 return NULL; 2819 mb_copy = m_pullup(mb_copy, ip->ip_hl << 2); 2820 if (mb_copy == NULL) 2821 return NULL; 2822 2823 /* take care of the TTL */ 2824 ip = mtod(mb_copy, struct ip *); 2825 --ip->ip_ttl; 2826 2827 /* Compute the MTU after the PIM Register encapsulation */ 2828 mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr); 2829 2830 if (ip->ip_len <= mtu) { 2831 /* Turn the IP header into a valid one */ 2832 ip->ip_len = htons(ip->ip_len); 2833 ip->ip_off = htons(ip->ip_off); 2834 ip->ip_sum = 0; 2835 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 2836 } else { 2837 /* Fragment the packet */ 2838 if (ip_fragment(ip, &mb_copy, mtu, 0, CSUM_DELAY_IP) != 0) { 2839 m_freem(mb_copy); 2840 return NULL; 2841 } 2842 } 2843 return mb_copy; 2844 } 2845 2846 /* 2847 * Send an upcall with the data packet to the user-level process. 2848 */ 2849 static int 2850 pim_register_send_upcall(struct ip *ip, struct vif *vifp, 2851 struct mbuf *mb_copy, struct mfc *rt) 2852 { 2853 struct mbuf *mb_first; 2854 int len = ntohs(ip->ip_len); 2855 struct igmpmsg *im; 2856 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 2857 2858 /* 2859 * Add a new mbuf with an upcall header 2860 */ 2861 MGETHDR(mb_first, M_DONTWAIT, MT_HEADER); 2862 if (mb_first == NULL) { 2863 m_freem(mb_copy); 2864 return ENOBUFS; 2865 } 2866 mb_first->m_data += max_linkhdr; 2867 mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg); 2868 mb_first->m_len = sizeof(struct igmpmsg); 2869 mb_first->m_next = mb_copy; 2870 2871 /* Send message to routing daemon */ 2872 im = mtod(mb_first, struct igmpmsg *); 2873 im->im_msgtype = IGMPMSG_WHOLEPKT; 2874 im->im_mbz = 0; 2875 im->im_vif = vifp - viftable; 2876 im->im_src = ip->ip_src; 2877 im->im_dst = ip->ip_dst; 2878 2879 k_igmpsrc.sin_addr = ip->ip_src; 2880 2881 mrtstat.mrts_upcalls++; 2882 2883 if (socket_send(ip_mrouter, mb_first, &k_igmpsrc) < 0) { 2884 if (mrtdebug & DEBUG_PIM) 2885 log(LOG_WARNING, 2886 "mcast: pim_register_send_upcall: ip_mrouter socket queue full"); 2887 ++mrtstat.mrts_upq_sockfull; 2888 return ENOBUFS; 2889 } 2890 2891 /* Keep statistics */ 2892 pimstat.pims_snd_registers_msgs++; 2893 pimstat.pims_snd_registers_bytes += len; 2894 2895 return 0; 2896 } 2897 2898 /* 2899 * Encapsulate the data packet in PIM Register message and send it to the RP. 2900 */ 2901 static int 2902 pim_register_send_rp(struct ip *ip, struct vif *vifp, 2903 struct mbuf *mb_copy, struct mfc *rt) 2904 { 2905 struct mbuf *mb_first; 2906 struct ip *ip_outer; 2907 struct pim_encap_pimhdr *pimhdr; 2908 int len = ntohs(ip->ip_len); 2909 vifi_t vifi = rt->mfc_parent; 2910 2911 if ((vifi >= numvifs) || (viftable[vifi].v_lcl_addr.s_addr == 0)) { 2912 m_freem(mb_copy); 2913 return EADDRNOTAVAIL; /* The iif vif is invalid */ 2914 } 2915 2916 /* 2917 * Add a new mbuf with the encapsulating header 2918 */ 2919 MGETHDR(mb_first, M_DONTWAIT, MT_HEADER); 2920 if (mb_first == NULL) { 2921 m_freem(mb_copy); 2922 return ENOBUFS; 2923 } 2924 mb_first->m_data += max_linkhdr; 2925 mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); 2926 mb_first->m_next = mb_copy; 2927 2928 mb_first->m_pkthdr.len = len + mb_first->m_len; 2929 2930 /* 2931 * Fill in the encapsulating IP and PIM header 2932 */ 2933 ip_outer = mtod(mb_first, struct ip *); 2934 *ip_outer = pim_encap_iphdr; 2935 #ifdef RANDOM_IP_ID 2936 ip_outer->ip_id = ip_randomid(); 2937 #else 2938 ip_outer->ip_id = htons(ip_id++); 2939 #endif 2940 ip_outer->ip_len = len + sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); 2941 ip_outer->ip_src = viftable[vifi].v_lcl_addr; 2942 ip_outer->ip_dst = rt->mfc_rp; 2943 /* 2944 * Copy the inner header TOS to the outer header, and take care of the 2945 * IP_DF bit. 2946 */ 2947 ip_outer->ip_tos = ip->ip_tos; 2948 if (ntohs(ip->ip_off) & IP_DF) 2949 ip_outer->ip_off |= IP_DF; 2950 pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer 2951 + sizeof(pim_encap_iphdr)); 2952 *pimhdr = pim_encap_pimhdr; 2953 /* If the iif crosses a border, set the Border-bit */ 2954 if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & mrt_api_config) 2955 pimhdr->flags |= htonl(PIM_BORDER_REGISTER); 2956 2957 mb_first->m_data += sizeof(pim_encap_iphdr); 2958 pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr)); 2959 mb_first->m_data -= sizeof(pim_encap_iphdr); 2960 2961 if (vifp->v_rate_limit == 0) 2962 tbf_send_packet(vifp, mb_first); 2963 else 2964 tbf_control(vifp, mb_first, ip, ip_outer->ip_len); 2965 2966 /* Keep statistics */ 2967 pimstat.pims_snd_registers_msgs++; 2968 pimstat.pims_snd_registers_bytes += len; 2969 2970 return 0; 2971 } 2972 2973 /* 2974 * PIM-SMv2 and PIM-DM messages processing. 2975 * Receives and verifies the PIM control messages, and passes them 2976 * up to the listening socket, using rip_input(). 2977 * The only message with special processing is the PIM_REGISTER message 2978 * (used by PIM-SM): the PIM header is stripped off, and the inner packet 2979 * is passed to if_simloop(). 2980 */ 2981 void 2982 pim_input(struct mbuf *m, int off, int proto) 2983 { 2984 struct ip *ip = mtod(m, struct ip *); 2985 struct pim *pim; 2986 int minlen; 2987 int datalen = ip->ip_len; 2988 int ip_tos; 2989 int iphlen = off; 2990 2991 /* Keep statistics */ 2992 pimstat.pims_rcv_total_msgs++; 2993 pimstat.pims_rcv_total_bytes += datalen; 2994 2995 /* 2996 * Validate lengths 2997 */ 2998 if (datalen < PIM_MINLEN) { 2999 pimstat.pims_rcv_tooshort++; 3000 log(LOG_ERR, "pim_input: packet size too small %d from %lx\n", 3001 datalen, (u_long)ip->ip_src.s_addr); 3002 m_freem(m); 3003 return; 3004 } 3005 3006 /* 3007 * If the packet is at least as big as a REGISTER, go agead 3008 * and grab the PIM REGISTER header size, to avoid another 3009 * possible m_pullup() later. 3010 * 3011 * PIM_MINLEN == pimhdr + u_int32_t == 4 + 4 = 8 3012 * PIM_REG_MINLEN == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28 3013 */ 3014 minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN); 3015 /* 3016 * Get the IP and PIM headers in contiguous memory, and 3017 * possibly the PIM REGISTER header. 3018 */ 3019 if ((m->m_flags & M_EXT || m->m_len < minlen) && 3020 (m = m_pullup(m, minlen)) == 0) { 3021 log(LOG_ERR, "pim_input: m_pullup failure\n"); 3022 return; 3023 } 3024 /* m_pullup() may have given us a new mbuf so reset ip. */ 3025 ip = mtod(m, struct ip *); 3026 ip_tos = ip->ip_tos; 3027 3028 /* adjust mbuf to point to the PIM header */ 3029 m->m_data += iphlen; 3030 m->m_len -= iphlen; 3031 pim = mtod(m, struct pim *); 3032 3033 /* 3034 * Validate checksum. If PIM REGISTER, exclude the data packet. 3035 * 3036 * XXX: some older PIMv2 implementations don't make this distinction, 3037 * so for compatibility reason perform the checksum over part of the 3038 * message, and if error, then over the whole message. 3039 */ 3040 if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) { 3041 /* do nothing, checksum okay */ 3042 } else if (in_cksum(m, datalen)) { 3043 pimstat.pims_rcv_badsum++; 3044 if (mrtdebug & DEBUG_PIM) 3045 log(LOG_DEBUG, "pim_input: invalid checksum"); 3046 m_freem(m); 3047 return; 3048 } 3049 3050 /* PIM version check */ 3051 if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) { 3052 pimstat.pims_rcv_badversion++; 3053 log(LOG_ERR, "pim_input: incorrect version %d, expecting %d\n", 3054 PIM_VT_V(pim->pim_vt), PIM_VERSION); 3055 m_freem(m); 3056 return; 3057 } 3058 3059 /* restore mbuf back to the outer IP */ 3060 m->m_data -= iphlen; 3061 m->m_len += iphlen; 3062 3063 if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) { 3064 /* 3065 * Since this is a REGISTER, we'll make a copy of the register 3066 * headers ip + pim + u_int32 + encap_ip, to be passed up to the 3067 * routing daemon. 3068 */ 3069 struct sockaddr_in dst = { sizeof(dst), AF_INET }; 3070 struct mbuf *mcp; 3071 struct ip *encap_ip; 3072 u_int32_t *reghdr; 3073 3074 if ((reg_vif_num >= numvifs) || (reg_vif_num == VIFI_INVALID)) { 3075 if (mrtdebug & DEBUG_PIM) 3076 log(LOG_DEBUG, 3077 "pim_input: register vif not set: %d\n", reg_vif_num); 3078 m_freem(m); 3079 return; 3080 } 3081 3082 /* 3083 * Validate length 3084 */ 3085 if (datalen < PIM_REG_MINLEN) { 3086 pimstat.pims_rcv_tooshort++; 3087 pimstat.pims_rcv_badregisters++; 3088 log(LOG_ERR, 3089 "pim_input: register packet size too small %d from %lx\n", 3090 datalen, (u_long)ip->ip_src.s_addr); 3091 m_freem(m); 3092 return; 3093 } 3094 3095 reghdr = (u_int32_t *)(pim + 1); 3096 encap_ip = (struct ip *)(reghdr + 1); 3097 3098 if (mrtdebug & DEBUG_PIM) { 3099 log(LOG_DEBUG, 3100 "pim_input[register], encap_ip: %lx -> %lx, encap_ip len %d\n", 3101 (u_long)ntohl(encap_ip->ip_src.s_addr), 3102 (u_long)ntohl(encap_ip->ip_dst.s_addr), 3103 ntohs(encap_ip->ip_len)); 3104 } 3105 3106 /* verify the version number of the inner packet */ 3107 if (encap_ip->ip_v != IPVERSION) { 3108 pimstat.pims_rcv_badregisters++; 3109 if (mrtdebug & DEBUG_PIM) { 3110 log(LOG_DEBUG, "pim_input: invalid IP version (%d) " 3111 "of the inner packet\n", encap_ip->ip_v); 3112 } 3113 m_freem(m); 3114 return; 3115 } 3116 3117 /* verify the inner packet is destined to a mcast group */ 3118 if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) { 3119 pimstat.pims_rcv_badregisters++; 3120 if (mrtdebug & DEBUG_PIM) 3121 log(LOG_DEBUG, 3122 "pim_input: inner packet of register is not " 3123 "multicast %lx\n", 3124 (u_long)ntohl(encap_ip->ip_dst.s_addr)); 3125 m_freem(m); 3126 return; 3127 } 3128 3129 /* If a NULL_REGISTER, pass it to the daemon */ 3130 if ((ntohl(*reghdr) & PIM_NULL_REGISTER)) 3131 goto pim_input_to_daemon; 3132 3133 /* 3134 * Copy the TOS from the outer IP header to the inner IP header. 3135 */ 3136 if (encap_ip->ip_tos != ip_tos) { 3137 /* Outer TOS -> inner TOS */ 3138 encap_ip->ip_tos = ip_tos; 3139 /* Recompute the inner header checksum. Sigh... */ 3140 3141 /* adjust mbuf to point to the inner IP header */ 3142 m->m_data += (iphlen + PIM_MINLEN); 3143 m->m_len -= (iphlen + PIM_MINLEN); 3144 3145 encap_ip->ip_sum = 0; 3146 encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2); 3147 3148 /* restore mbuf to point back to the outer IP header */ 3149 m->m_data -= (iphlen + PIM_MINLEN); 3150 m->m_len += (iphlen + PIM_MINLEN); 3151 } 3152 3153 /* 3154 * Decapsulate the inner IP packet and loopback to forward it 3155 * as a normal multicast packet. Also, make a copy of the 3156 * outer_iphdr + pimhdr + reghdr + encap_iphdr 3157 * to pass to the daemon later, so it can take the appropriate 3158 * actions (e.g., send back PIM_REGISTER_STOP). 3159 * XXX: here m->m_data points to the outer IP header. 3160 */ 3161 mcp = m_copy(m, 0, iphlen + PIM_REG_MINLEN); 3162 if (mcp == NULL) { 3163 log(LOG_ERR, 3164 "pim_input: pim register: could not copy register head\n"); 3165 m_freem(m); 3166 return; 3167 } 3168 3169 /* Keep statistics */ 3170 /* XXX: registers_bytes include only the encap. mcast pkt */ 3171 pimstat.pims_rcv_registers_msgs++; 3172 pimstat.pims_rcv_registers_bytes += ntohs(encap_ip->ip_len); 3173 3174 /* 3175 * forward the inner ip packet; point m_data at the inner ip. 3176 */ 3177 m_adj(m, iphlen + PIM_MINLEN); 3178 3179 if (mrtdebug & DEBUG_PIM) { 3180 log(LOG_DEBUG, 3181 "pim_input: forwarding decapsulated register: " 3182 "src %lx, dst %lx, vif %d\n", 3183 (u_long)ntohl(encap_ip->ip_src.s_addr), 3184 (u_long)ntohl(encap_ip->ip_dst.s_addr), 3185 reg_vif_num); 3186 } 3187 if_simloop(viftable[reg_vif_num].v_ifp, m, dst.sin_family, 0); 3188 3189 /* prepare the register head to send to the mrouting daemon */ 3190 m = mcp; 3191 } 3192 3193 pim_input_to_daemon: 3194 /* 3195 * Pass the PIM message up to the daemon; if it is a Register message, 3196 * pass the 'head' only up to the daemon. This includes the 3197 * outer IP header, PIM header, PIM-Register header and the 3198 * inner IP header. 3199 * XXX: the outer IP header pkt size of a Register is not adjust to 3200 * reflect the fact that the inner multicast data is truncated. 3201 */ 3202 rip_input(m, iphlen, proto); 3203 3204 return; 3205 } 3206 #endif /* PIM */ 3207 3208 static int 3209 ip_mroute_modevent(module_t mod, int type, void *unused) 3210 { 3211 int s; 3212 3213 switch (type) { 3214 case MOD_LOAD: 3215 s = splnet(); 3216 /* XXX Protect against multiple loading */ 3217 ip_mcast_src = X_ip_mcast_src; 3218 ip_mforward = X_ip_mforward; 3219 ip_mrouter_done = X_ip_mrouter_done; 3220 ip_mrouter_get = X_ip_mrouter_get; 3221 ip_mrouter_set = X_ip_mrouter_set; 3222 ip_rsvp_force_done = X_ip_rsvp_force_done; 3223 ip_rsvp_vif = X_ip_rsvp_vif; 3224 ipip_input = X_ipip_input; 3225 legal_vif_num = X_legal_vif_num; 3226 mrt_ioctl = X_mrt_ioctl; 3227 rsvp_input_p = X_rsvp_input; 3228 splx(s); 3229 break; 3230 3231 case MOD_UNLOAD: 3232 if (ip_mrouter) 3233 return EINVAL; 3234 3235 s = splnet(); 3236 ip_mcast_src = NULL; 3237 ip_mforward = NULL; 3238 ip_mrouter_done = NULL; 3239 ip_mrouter_get = NULL; 3240 ip_mrouter_set = NULL; 3241 ip_rsvp_force_done = NULL; 3242 ip_rsvp_vif = NULL; 3243 ipip_input = NULL; 3244 legal_vif_num = NULL; 3245 mrt_ioctl = NULL; 3246 rsvp_input_p = NULL; 3247 splx(s); 3248 break; 3249 } 3250 return 0; 3251 } 3252 3253 static moduledata_t ip_mroutemod = { 3254 "ip_mroute", 3255 ip_mroute_modevent, 3256 0 3257 }; 3258 DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY); 3259