xref: /openbsd/sys/netinet/ip_mroute.c (revision 07ea8d15)
1 /*	$OpenBSD: ip_mroute.c,v 1.4 1996/05/10 12:31:19 deraadt Exp $	*/
2 /*	$NetBSD: ip_mroute.c,v 1.27 1996/05/07 02:40:50 thorpej Exp $	*/
3 
4 /*
5  * IP multicast forwarding procedures
6  *
7  * Written by David Waitzman, BBN Labs, August 1988.
8  * Modified by Steve Deering, Stanford, February 1989.
9  * Modified by Mark J. Steiglitz, Stanford, May, 1991
10  * Modified by Van Jacobson, LBL, January 1993
11  * Modified by Ajit Thyagarajan, PARC, August 1993
12  * Modified by Bill Fenner, PARC, April 1994
13  * Modified by Charles M. Hannum, NetBSD, May 1995.
14  *
15  * MROUTING Revision: 1.2
16  */
17 
18 #include <sys/param.h>
19 #include <sys/systm.h>
20 #include <sys/mbuf.h>
21 #include <sys/socket.h>
22 #include <sys/socketvar.h>
23 #include <sys/protosw.h>
24 #include <sys/errno.h>
25 #include <sys/time.h>
26 #include <sys/kernel.h>
27 #include <sys/ioctl.h>
28 #include <sys/syslog.h>
29 #include <net/if.h>
30 #include <net/route.h>
31 #include <net/raw_cb.h>
32 #include <netinet/in.h>
33 #include <netinet/in_var.h>
34 #include <netinet/in_systm.h>
35 #include <netinet/ip.h>
36 #include <netinet/ip_var.h>
37 #include <netinet/in_pcb.h>
38 #include <netinet/udp.h>
39 #include <netinet/igmp.h>
40 #include <netinet/igmp_var.h>
41 #include <netinet/ip_mroute.h>
42 
43 #include <machine/stdarg.h>
44 
45 #define IP_MULTICASTOPTS 0
46 #define	M_PULLUP(m, len) \
47 	do { \
48 		if ((m) && ((m)->m_flags & M_EXT || (m)->m_len < (len))) \
49 			(m) = m_pullup((m), (len)); \
50 	} while (0)
51 
52 /*
53  * Globals.  All but ip_mrouter and ip_mrtproto could be static,
54  * except for netstat or debugging purposes.
55  */
56 struct socket  *ip_mrouter  = NULL;
57 int		ip_mrtproto = IGMP_DVMRP;    /* for netstat only */
58 
59 #define NO_RTE_FOUND 	0x1
60 #define RTE_FOUND	0x2
61 
62 #define	MFCHASH(a, g) \
63 	((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
64 	  ((g) >> 20) ^ ((g) >> 10) ^ (g)) & mfchash)
65 LIST_HEAD(mfchashhdr, mfc) *mfchashtbl;
66 u_long	mfchash;
67 
68 u_char		nexpire[MFCTBLSIZ];
69 struct vif	viftable[MAXVIFS];
70 struct mrtstat	mrtstat;
71 u_int		mrtdebug = 0;	  /* debug level 	*/
72 #define		DEBUG_MFC	0x02
73 #define		DEBUG_FORWARD	0x04
74 #define		DEBUG_EXPIRE	0x08
75 #define		DEBUG_XMIT	0x10
76 u_int       	tbfdebug = 0;     /* tbf debug level 	*/
77 #ifdef RSVP_ISI
78 u_int		rsvpdebug = 0;	  /* rsvp debug level   */
79 extern struct socket *ip_rsvpd;
80 extern int rsvp_on;
81 #endif /* RSVP_ISI */
82 
83 #define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second */
84 #define		UPCALL_EXPIRE	6		/* number of timeouts */
85 
86 /*
87  * Define the token bucket filter structures
88  * qtable   -> each interface has an associated queue of pkts
89  */
90 
91 struct pkt_queue qtable[MAXVIFS][MAXQSIZE];
92 
93 static int get_sg_cnt __P((struct sioc_sg_req *));
94 static int get_vif_cnt __P((struct sioc_vif_req *));
95 static int ip_mrouter_init __P((struct socket *, struct mbuf *));
96 static int get_version __P((struct mbuf *));
97 static int set_assert __P((struct mbuf *));
98 static int get_assert __P((struct mbuf *));
99 static int add_vif __P((struct mbuf *));
100 static int del_vif __P((struct mbuf *));
101 static void update_mfc __P((struct mfcctl *, struct mfc *));
102 static void expire_mfc __P((struct mfc *));
103 static int add_mfc __P((struct mbuf *));
104 #ifdef UPCALL_TIMING
105 static void collate __P((struct timeval *));
106 #endif
107 static int del_mfc __P((struct mbuf *));
108 static int socket_send __P((struct socket *, struct mbuf *,
109 			    struct sockaddr_in *));
110 static void expire_upcalls __P((void *));
111 #ifdef RSVP_ISI
112 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *, vifi_t));
113 #else
114 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *));
115 #endif
116 static void phyint_send __P((struct ip *, struct vif *, struct mbuf *));
117 static void encap_send __P((struct ip *, struct vif *, struct mbuf *));
118 static void tbf_control __P((struct vif *, struct mbuf *, struct ip *,
119 			     u_int32_t));
120 static void tbf_queue __P((struct vif *, struct mbuf *, struct ip *));
121 static void tbf_process_q __P((struct vif *));
122 static void tbf_dequeue __P((struct vif *, int));
123 static void tbf_reprocess_q __P((void *));
124 static int tbf_dq_sel __P((struct vif *, struct ip *));
125 static void tbf_send_packet __P((struct vif *, struct mbuf *));
126 static void tbf_update_tokens __P((struct vif *));
127 static int priority __P((struct vif *, struct ip *));
128 
129 /*
130  * 'Interfaces' associated with decapsulator (so we can tell
131  * packets that went through it from ones that get reflected
132  * by a broken gateway).  These interfaces are never linked into
133  * the system ifnet list & no routes point to them.  I.e., packets
134  * can't be sent this way.  They only exist as a placeholder for
135  * multicast source verification.
136  */
137 #if 0
138 struct ifnet multicast_decap_if[MAXVIFS];
139 #endif
140 
141 #define	ENCAP_TTL	64
142 #define	ENCAP_PROTO	IPPROTO_IPIP	/* 4 */
143 
144 /* prototype IP hdr for encapsulated packets */
145 struct ip multicast_encap_iphdr = {
146 #if BYTE_ORDER == LITTLE_ENDIAN
147 	sizeof(struct ip) >> 2, IPVERSION,
148 #else
149 	IPVERSION, sizeof(struct ip) >> 2,
150 #endif
151 	0,				/* tos */
152 	sizeof(struct ip),		/* total length */
153 	0,				/* id */
154 	0,				/* frag offset */
155 	ENCAP_TTL, ENCAP_PROTO,
156 	0,				/* checksum */
157 };
158 
159 /*
160  * Private variables.
161  */
162 static vifi_t	   numvifs = 0;
163 static int have_encap_tunnel = 0;
164 
165 /*
166  * one-back cache used by ipip_input to locate a tunnel's vif
167  * given a datagram's src ip address.
168  */
169 static u_int32_t last_encap_src;
170 static struct vif *last_encap_vif;
171 
172 /*
173  * whether or not special PIM assert processing is enabled.
174  */
175 static int pim_assert;
176 /*
177  * Rate limit for assert notification messages, in usec
178  */
179 #define ASSERT_MSG_TIME		3000000
180 
181 /*
182  * Find a route for a given origin IP address and Multicast group address
183  * Type of service parameter to be added in the future!!!
184  */
185 
186 #define MFCFIND(o, g, rt) { \
187 	register struct mfc *_rt; \
188 	(rt) = NULL; \
189 	++mrtstat.mrts_mfc_lookups; \
190 	for (_rt = mfchashtbl[MFCHASH(o, g)].lh_first; \
191 	     _rt; _rt = _rt->mfc_hash.le_next) { \
192 		if (_rt->mfc_origin.s_addr == (o) && \
193 		    _rt->mfc_mcastgrp.s_addr == (g) && \
194 		    _rt->mfc_stall == NULL) { \
195 			(rt) = _rt; \
196 			break; \
197 		} \
198 	} \
199 	if ((rt) == NULL) \
200 		++mrtstat.mrts_mfc_misses; \
201 }
202 
203 /*
204  * Macros to compute elapsed time efficiently
205  * Borrowed from Van Jacobson's scheduling code
206  */
207 #define TV_DELTA(a, b, delta) { \
208 	register int xxs; \
209 	delta = (a).tv_usec - (b).tv_usec; \
210 	xxs = (a).tv_sec - (b).tv_sec; \
211 	switch (xxs) { \
212 	case 2: \
213 		delta += 1000000; \
214 		/* fall through */ \
215 	case 1: \
216 		delta += 1000000; \
217 		/* fall through */ \
218 	case 0: \
219 		break; \
220 	default: \
221 		delta += (1000000 * xxs); \
222 		break; \
223 	} \
224 }
225 
226 #ifdef UPCALL_TIMING
227 u_int32_t upcall_data[51];
228 #endif /* UPCALL_TIMING */
229 
230 /*
231  * Handle MRT setsockopt commands to modify the multicast routing tables.
232  */
233 int
234 ip_mrouter_set(cmd, so, m)
235 	int cmd;
236 	struct socket *so;
237 	struct mbuf **m;
238 {
239 	int error;
240 
241 	if (cmd != MRT_INIT && so != ip_mrouter)
242 		error = EACCES;
243 	else
244 		switch (cmd) {
245 		case MRT_INIT:
246 			error = ip_mrouter_init(so, *m);
247 			break;
248 		case MRT_DONE:
249 			error = ip_mrouter_done();
250 			break;
251 		case MRT_ADD_VIF:
252 			error = add_vif(*m);
253 			break;
254 		case MRT_DEL_VIF:
255 			error = del_vif(*m);
256 			break;
257 		case MRT_ADD_MFC:
258 			error = add_mfc(*m);
259 			break;
260 		case MRT_DEL_MFC:
261 			error = del_mfc(*m);
262 			break;
263 		case MRT_ASSERT:
264 			error = set_assert(*m);
265 			break;
266 		default:
267 			error = EOPNOTSUPP;
268 			break;
269 		}
270 
271 	if (*m)
272 		m_free(*m);
273 	return (error);
274 }
275 
276 /*
277  * Handle MRT getsockopt commands
278  */
279 int
280 ip_mrouter_get(cmd, so, m)
281 	int cmd;
282 	struct socket *so;
283 	struct mbuf **m;
284 {
285 	struct mbuf *mb;
286 	int error;
287 
288 	if (so != ip_mrouter)
289 		error = EACCES;
290 	else {
291 		*m = mb = m_get(M_WAIT, MT_SOOPTS);
292 
293 		switch (cmd) {
294 		case MRT_VERSION:
295 			error = get_version(mb);
296 			break;
297 		case MRT_ASSERT:
298 			error = get_assert(mb);
299 			break;
300 		default:
301 			error = EOPNOTSUPP;
302 			break;
303 		}
304 
305 		if (error)
306 			m_free(mb);
307 	}
308 
309 	return (error);
310 }
311 
312 /*
313  * Handle ioctl commands to obtain information from the cache
314  */
315 int
316 mrt_ioctl(cmd, data)
317 	u_long cmd;
318 	caddr_t data;
319 {
320 	int error;
321 
322 	switch (cmd) {
323 	case SIOCGETVIFCNT:
324 		error = get_vif_cnt((struct sioc_vif_req *)data);
325 		break;
326 	case SIOCGETSGCNT:
327 		error = get_sg_cnt((struct sioc_sg_req *)data);
328 		break;
329 	default:
330 		error = EINVAL;
331 		break;
332 	}
333 
334 	return (error);
335 }
336 
337 /*
338  * returns the packet, byte, rpf-failure count for the source group provided
339  */
340 static int
341 get_sg_cnt(req)
342 	register struct sioc_sg_req *req;
343 {
344 	register struct mfc *rt;
345 	int s;
346 
347 	s = splsoftnet();
348 	MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
349 	splx(s);
350 	if (rt != NULL) {
351 		req->pktcnt = rt->mfc_pkt_cnt;
352 		req->bytecnt = rt->mfc_byte_cnt;
353 		req->wrong_if = rt->mfc_wrong_if;
354 	} else
355 		req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
356 
357 	return (0);
358 }
359 
360 /*
361  * returns the input and output packet and byte counts on the vif provided
362  */
363 static int
364 get_vif_cnt(req)
365 	register struct sioc_vif_req *req;
366 {
367 	register vifi_t vifi = req->vifi;
368 
369 	if (vifi >= numvifs)
370 		return (EINVAL);
371 
372 	req->icount = viftable[vifi].v_pkt_in;
373 	req->ocount = viftable[vifi].v_pkt_out;
374 	req->ibytes = viftable[vifi].v_bytes_in;
375 	req->obytes = viftable[vifi].v_bytes_out;
376 
377 	return (0);
378 }
379 
380 /*
381  * Enable multicast routing
382  */
383 static int
384 ip_mrouter_init(so, m)
385 	struct socket *so;
386 	struct mbuf *m;
387 {
388 	int *v;
389 
390 	if (mrtdebug)
391 		log(LOG_DEBUG,
392 		    "ip_mrouter_init: so_type = %d, pr_protocol = %d",
393 		    so->so_type, so->so_proto->pr_protocol);
394 
395 	if (so->so_type != SOCK_RAW ||
396 	    so->so_proto->pr_protocol != IPPROTO_IGMP)
397 		return (EOPNOTSUPP);
398 
399 	if (m == 0 || m->m_len < sizeof(int))
400 		return (EINVAL);
401 
402 	v = mtod(m, int *);
403 	if (*v != 1)
404 		return (EINVAL);
405 
406 	if (ip_mrouter != NULL)
407 		return (EADDRINUSE);
408 
409 	ip_mrouter = so;
410 
411 	mfchashtbl = hashinit(MFCTBLSIZ, M_MRTABLE, &mfchash);
412 	bzero((caddr_t)nexpire, sizeof(nexpire));
413 
414 	pim_assert = 0;
415 
416 	timeout(expire_upcalls, (caddr_t)0, EXPIRE_TIMEOUT);
417 
418 	if (mrtdebug)
419 		log(LOG_DEBUG, "ip_mrouter_init");
420 
421 	return (0);
422 }
423 
424 /*
425  * Disable multicast routing
426  */
427 int
428 ip_mrouter_done()
429 {
430 	vifi_t vifi;
431 	register struct vif *vifp;
432 	int i;
433 	int s;
434 
435 	s = splsoftnet();
436 
437 	/* Clear out all the vifs currently in use. */
438 	for (vifi = 0; vifi < numvifs; vifi++) {
439 		vifp = &viftable[vifi];
440 		if (vifp->v_lcl_addr.s_addr != 0)
441 			reset_vif(vifp);
442 	}
443 
444 	bzero((caddr_t)qtable, sizeof(qtable));
445 	numvifs = 0;
446 	pim_assert = 0;
447 
448 	untimeout(expire_upcalls, (caddr_t)NULL);
449 
450 	/*
451 	 * Free all multicast forwarding cache entries.
452 	 */
453 	for (i = 0; i < MFCTBLSIZ; i++) {
454 		register struct mfc *rt, *nrt;
455 
456 		for (rt = mfchashtbl[i].lh_first; rt; rt = nrt) {
457 			nrt = rt->mfc_hash.le_next;
458 
459 			expire_mfc(rt);
460 		}
461 	}
462 	free(mfchashtbl, M_MRTABLE);
463 
464 	/* Reset de-encapsulation cache. */
465 	have_encap_tunnel = 0;
466 
467 	ip_mrouter = NULL;
468 
469 	splx(s);
470 
471 	if (mrtdebug)
472 		log(LOG_DEBUG, "ip_mrouter_done");
473 
474 	return (0);
475 }
476 
477 static int
478 get_version(m)
479 	struct mbuf *m;
480 {
481 	int *v = mtod(m, int *);
482 
483 	*v = 0x0305;	/* XXX !!!! */
484 	m->m_len = sizeof(int);
485 	return (0);
486 }
487 
488 /*
489  * Set PIM assert processing global
490  */
491 static int
492 set_assert(m)
493 	struct mbuf *m;
494 {
495 	int *i;
496 
497 	if (m == 0 || m->m_len < sizeof(int))
498 		return (EINVAL);
499 
500 	i = mtod(m, int *);
501 	pim_assert = !!*i;
502 	return (0);
503 }
504 
505 /*
506  * Get PIM assert processing global
507  */
508 static int
509 get_assert(m)
510 	struct mbuf *m;
511 {
512 	int *i = mtod(m, int *);
513 
514 	*i = pim_assert;
515 	m->m_len = sizeof(int);
516 	return (0);
517 }
518 
519 static struct sockaddr_in sin = { sizeof(sin), AF_INET };
520 
521 /*
522  * Add a vif to the vif table
523  */
524 static int
525 add_vif(m)
526 	struct mbuf *m;
527 {
528 	register struct vifctl *vifcp;
529 	register struct vif *vifp;
530 	struct ifaddr *ifa;
531 	struct ifnet *ifp;
532 	struct ifreq ifr;
533 	int error, s;
534 
535 	if (m == 0 || m->m_len < sizeof(struct vifctl))
536 		return (EINVAL);
537 
538 	vifcp = mtod(m, struct vifctl *);
539 	if (vifcp->vifc_vifi >= MAXVIFS)
540 		return (EINVAL);
541 
542 	vifp = &viftable[vifcp->vifc_vifi];
543 	if (vifp->v_lcl_addr.s_addr != 0)
544 		return (EADDRINUSE);
545 
546 	/* Find the interface with an address in AF_INET family. */
547 	sin.sin_addr = vifcp->vifc_lcl_addr;
548 	ifa = ifa_ifwithaddr(sintosa(&sin));
549 	if (ifa == 0)
550 		return (EADDRNOTAVAIL);
551 
552 	if (vifcp->vifc_flags & VIFF_TUNNEL) {
553 		if (vifcp->vifc_flags & VIFF_SRCRT) {
554 			log(LOG_ERR, "Source routed tunnels not supported.");
555 			return (EOPNOTSUPP);
556 		}
557 
558 		/* Create a fake encapsulation interface. */
559 		ifp = (struct ifnet *)malloc(sizeof(*ifp), M_MRTABLE, M_WAITOK);
560 		bzero(ifp, sizeof(*ifp));
561 		sprintf(ifp->if_xname, "mdecap%d", vifcp->vifc_vifi);
562 
563 		/* Prepare cached route entry. */
564 		bzero(&vifp->v_route, sizeof(vifp->v_route));
565 
566 		/* Tell ipip_input() to start looking at encapsulated packets. */
567 		have_encap_tunnel = 1;
568 	} else {
569 		/* Use the physical interface associated with the address. */
570 		ifp = ifa->ifa_ifp;
571 
572 		/* Make sure the interface supports multicast. */
573 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
574 			return (EOPNOTSUPP);
575 
576 		/* Enable promiscuous reception of all IP multicasts. */
577 		satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
578 		satosin(&ifr.ifr_addr)->sin_family = AF_INET;
579 		satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;
580 		error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
581 		if (error)
582 			return (error);
583 	}
584 
585 	s = splsoftnet();
586 	/* Define parameters for the tbf structure. */
587 	vifp->v_tbf.q_len = 0;
588 	vifp->v_tbf.n_tok = 0;
589 	vifp->v_tbf.last_pkt_t = 0;
590 
591 	vifp->v_flags = vifcp->vifc_flags;
592 	vifp->v_threshold = vifcp->vifc_threshold;
593 	vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
594 	vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
595 	vifp->v_ifp = ifp;
596 	vifp->v_rate_limit = vifcp->vifc_rate_limit;
597 #ifdef RSVP_ISI
598 	vifp->v_rsvp_on = 0;
599 	vifp->v_rsvpd = NULL;
600 #endif /* RSVP_ISI */
601 	/* Initialize per vif pkt counters. */
602 	vifp->v_pkt_in = 0;
603 	vifp->v_pkt_out = 0;
604 	vifp->v_bytes_in = 0;
605 	vifp->v_bytes_out = 0;
606 	splx(s);
607 
608 	/* Adjust numvifs up if the vifi is higher than numvifs. */
609 	if (numvifs <= vifcp->vifc_vifi)
610 		numvifs = vifcp->vifc_vifi + 1;
611 
612 	if (mrtdebug)
613 		log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d",
614 		    vifcp->vifc_vifi,
615 		    ntohl(vifcp->vifc_lcl_addr.s_addr),
616 		    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
617 		    ntohl(vifcp->vifc_rmt_addr.s_addr),
618 		    vifcp->vifc_threshold,
619 		    vifcp->vifc_rate_limit);
620 
621 	return (0);
622 }
623 
624 void
625 reset_vif(vifp)
626 	register struct vif *vifp;
627 {
628 	struct ifnet *ifp;
629 	struct ifreq ifr;
630 
631 	if (vifp->v_flags & VIFF_TUNNEL) {
632 		free(vifp->v_ifp, M_MRTABLE);
633 		if (vifp == last_encap_vif) {
634 			last_encap_vif = 0;
635 			last_encap_src = 0;
636 		}
637 	} else {
638 		satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
639 		satosin(&ifr.ifr_addr)->sin_family = AF_INET;
640 		satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;
641 		ifp = vifp->v_ifp;
642 		(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
643 	}
644 	bzero((caddr_t)vifp, sizeof(*vifp));
645 }
646 
647 /*
648  * Delete a vif from the vif table
649  */
650 static int
651 del_vif(m)
652 	struct mbuf *m;
653 {
654 	vifi_t *vifip;
655 	register struct vif *vifp;
656 	register vifi_t vifi;
657 	int s;
658 
659 	if (m == 0 || m->m_len < sizeof(vifi_t))
660 		return (EINVAL);
661 
662 	vifip = mtod(m, vifi_t *);
663 	if (*vifip >= numvifs)
664 		return (EINVAL);
665 
666 	vifp = &viftable[*vifip];
667 	if (vifp->v_lcl_addr.s_addr == 0)
668 		return (EADDRNOTAVAIL);
669 
670 	s = splsoftnet();
671 
672 	reset_vif(vifp);
673 
674 	bzero((caddr_t)qtable[*vifip], sizeof(qtable[*vifip]));
675 
676 	/* Adjust numvifs down */
677 	for (vifi = numvifs; vifi > 0; vifi--)
678 		if (viftable[vifi-1].v_lcl_addr.s_addr != 0)
679 			break;
680 	numvifs = vifi;
681 
682 	splx(s);
683 
684 	if (mrtdebug)
685 		log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs);
686 
687 	return (0);
688 }
689 
690 static void
691 update_mfc(mfccp, rt)
692 	struct mfcctl *mfccp;
693 	struct mfc *rt;
694 {
695 	vifi_t vifi;
696 
697 	rt->mfc_parent = mfccp->mfcc_parent;
698 	for (vifi = 0; vifi < numvifs; vifi++)
699 		rt->mfc_ttls[vifi] = mfccp->mfcc_ttls[vifi];
700 	rt->mfc_expire = 0;
701 	rt->mfc_stall = 0;
702 }
703 
704 static void
705 expire_mfc(rt)
706 	struct mfc *rt;
707 {
708 	struct rtdetq *rte, *nrte;
709 
710 	for (rte = rt->mfc_stall; rte != NULL; rte = nrte) {
711 		nrte = rte->next;
712 		m_freem(rte->m);
713 		free(rte, M_MRTABLE);
714 	}
715 
716 	LIST_REMOVE(rt, mfc_hash);
717 	free(rt, M_MRTABLE);
718 }
719 
720 /*
721  * Add an mfc entry
722  */
723 static int
724 add_mfc(m)
725 	struct mbuf *m;
726 {
727 	struct mfcctl *mfccp;
728 	struct mfc *rt;
729 	u_int32_t hash = 0;
730 	struct rtdetq *rte, *nrte;
731 	register u_short nstl;
732 	int s;
733 
734 	if (m == 0 || m->m_len < sizeof(struct mfcctl))
735 		return (EINVAL);
736 
737 	mfccp = mtod(m, struct mfcctl *);
738 
739 	s = splsoftnet();
740 	MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
741 
742 	/* If an entry already exists, just update the fields */
743 	if (rt) {
744 		if (mrtdebug & DEBUG_MFC)
745 			log(LOG_DEBUG,"add_mfc update o %x g %x p %x",
746 			    ntohl(mfccp->mfcc_origin.s_addr),
747 			    ntohl(mfccp->mfcc_mcastgrp.s_addr),
748 			    mfccp->mfcc_parent);
749 
750 		if (rt->mfc_expire)
751 			nexpire[hash]--;
752 
753 		update_mfc(mfccp, rt);
754 
755 		splx(s);
756 		return (0);
757 	}
758 
759 	/*
760 	 * Find the entry for which the upcall was made and update
761 	 */
762 	nstl = 0;
763 	hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
764 	for (rt = mfchashtbl[hash].lh_first; rt; rt = rt->mfc_hash.le_next) {
765 		if (rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr &&
766 		    rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr &&
767 		    rt->mfc_stall != NULL) {
768 			if (nstl++)
769 				log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %p",
770 				    "multiple kernel entries",
771 				    ntohl(mfccp->mfcc_origin.s_addr),
772 				    ntohl(mfccp->mfcc_mcastgrp.s_addr),
773 				    mfccp->mfcc_parent, rt->mfc_stall);
774 
775 			if (mrtdebug & DEBUG_MFC)
776 				log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %p",
777 				    ntohl(mfccp->mfcc_origin.s_addr),
778 				    ntohl(mfccp->mfcc_mcastgrp.s_addr),
779 				    mfccp->mfcc_parent, rt->mfc_stall);
780 
781 			if (rt->mfc_expire)
782 				nexpire[hash]--;
783 
784 			/* free packets Qed at the end of this entry */
785 			for (rte = rt->mfc_stall; rte != NULL; rte = nrte) {
786 				nrte = rte->next;
787 #ifdef RSVP_ISI
788 				ip_mdq(rte->m, rte->ifp, rt, -1);
789 #else
790 				ip_mdq(rte->m, rte->ifp, rt);
791 #endif /* RSVP_ISI */
792 				m_freem(rte->m);
793 #ifdef UPCALL_TIMING
794 				collate(&rte->t);
795 #endif /* UPCALL_TIMING */
796 				free(rte, M_MRTABLE);
797 			}
798 
799 			update_mfc(mfccp, rt);
800 		}
801 	}
802 
803 	if (nstl == 0) {
804 		/*
805 		 * No mfc; make a new one
806 		 */
807 		if (mrtdebug & DEBUG_MFC)
808 			log(LOG_DEBUG,"add_mfc no upcall o %x g %x p %x",
809 			    ntohl(mfccp->mfcc_origin.s_addr),
810 			    ntohl(mfccp->mfcc_mcastgrp.s_addr),
811 			    mfccp->mfcc_parent);
812 
813 		rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
814 		if (rt == NULL) {
815 			splx(s);
816 			return (ENOBUFS);
817 		}
818 
819 		rt->mfc_origin = mfccp->mfcc_origin;
820 		rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
821 		/* initialize pkt counters per src-grp */
822 		rt->mfc_pkt_cnt = 0;
823 		rt->mfc_byte_cnt = 0;
824 		rt->mfc_wrong_if = 0;
825 		timerclear(&rt->mfc_last_assert);
826 		update_mfc(mfccp, rt);
827 
828 		/* insert new entry at head of hash chain */
829 		LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
830 	}
831 
832 	splx(s);
833 	return (0);
834 }
835 
836 #ifdef UPCALL_TIMING
837 /*
838  * collect delay statistics on the upcalls
839  */
840 static void collate(t)
841 register struct timeval *t;
842 {
843     register u_int32_t d;
844     register struct timeval tp;
845     register u_int32_t delta;
846 
847     microtime(&tp);
848 
849     if (timercmp(t, &tp, <)) {
850 	TV_DELTA(tp, *t, delta);
851 
852 	d = delta >> 10;
853 	if (d > 50)
854 	    d = 50;
855 
856 	++upcall_data[d];
857     }
858 }
859 #endif /* UPCALL_TIMING */
860 
861 /*
862  * Delete an mfc entry
863  */
864 static int
865 del_mfc(m)
866 	struct mbuf *m;
867 {
868 	struct mfcctl *mfccp;
869 	struct mfc *rt;
870 	int s;
871 
872 	if (m == 0 || m->m_len < sizeof(struct mfcctl))
873 		return (EINVAL);
874 
875 	mfccp = mtod(m, struct mfcctl *);
876 
877 	if (mrtdebug & DEBUG_MFC)
878 		log(LOG_DEBUG, "del_mfc origin %x mcastgrp %x",
879 		    ntohl(mfccp->mfcc_origin.s_addr), ntohl(mfccp->mfcc_mcastgrp.s_addr));
880 
881 	s = splsoftnet();
882 
883 	MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
884 	if (rt == NULL) {
885 		splx(s);
886 		return (EADDRNOTAVAIL);
887 	}
888 
889 	LIST_REMOVE(rt, mfc_hash);
890 	free(rt, M_MRTABLE);
891 
892 	splx(s);
893 	return (0);
894 }
895 
896 static int
897 socket_send(s, mm, src)
898     struct socket *s;
899     struct mbuf *mm;
900     struct sockaddr_in *src;
901 {
902     if (s) {
903 	if (sbappendaddr(&s->so_rcv, sintosa(src), mm, (struct mbuf *)0) != 0) {
904 	    sorwakeup(s);
905 	    return (0);
906 	}
907     }
908     m_freem(mm);
909     return (-1);
910 }
911 
912 /*
913  * IP multicast forwarding function. This function assumes that the packet
914  * pointed to by "ip" has arrived on (or is about to be sent to) the interface
915  * pointed to by "ifp", and the packet is to be relayed to other networks
916  * that have members of the packet's destination IP multicast group.
917  *
918  * The packet is returned unscathed to the caller, unless it is
919  * erroneous, in which case a non-zero return value tells the caller to
920  * discard it.
921  */
922 
923 #define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
924 #define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
925 
926 int
927 #ifdef RSVP_ISI
928 ip_mforward(m, ifp, imo)
929 #else
930 ip_mforward(m, ifp)
931 #endif /* RSVP_ISI */
932     struct mbuf *m;
933     struct ifnet *ifp;
934 #ifdef RSVP_ISI
935     struct ip_moptions *imo;
936 #endif /* RSVP_ISI */
937 {
938     register struct ip *ip = mtod(m, struct ip *);
939     register struct mfc *rt;
940     register u_char *ipoptions;
941     static int srctun = 0;
942     register struct mbuf *mm;
943     int s;
944 #ifdef RSVP_ISI
945     register struct vif *vifp;
946     vifi_t vifi;
947 #endif /* RSVP_ISI */
948 
949     if (mrtdebug & DEBUG_FORWARD)
950 	log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %p",
951 	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
952 
953     if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
954 	(ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR) {
955 	/*
956 	 * Packet arrived via a physical interface or
957 	 * an encapuslated tunnel.
958 	 */
959     } else {
960 	/*
961 	 * Packet arrived through a source-route tunnel.
962 	 * Source-route tunnels are no longer supported.
963 	 */
964 	if ((srctun++ % 1000) == 0)
965 	    log(LOG_ERR, "ip_mforward: received source-routed packet from %x",
966 		ntohl(ip->ip_src.s_addr));
967 
968 	return (1);
969     }
970 
971 #ifdef RSVP_ISI
972     if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) {
973 	if (ip->ip_ttl < 255)
974 	    ip->ip_ttl++;	/* compensate for -1 in *_send routines */
975 	if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
976 	    vifp = viftable + vifi;
977 	    printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s)\n",
978 		ntohl(ip->ip_src), ntohl(ip->ip_dst), vifi,
979 		(vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
980 		vifp->v_ifp->if_xname);
981 	}
982 	return (ip_mdq(m, ifp, rt, vifi));
983     }
984     if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
985 	printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
986 	    ntohl(ip->ip_src), ntohl(ip->ip_dst));
987     }
988 #endif /* RSVP_ISI */
989 
990     /*
991      * Don't forward a packet with time-to-live of zero or one,
992      * or a packet destined to a local-only group.
993      */
994     if (ip->ip_ttl <= 1 ||
995 	IN_LOCAL_GROUP(ip->ip_dst.s_addr))
996 	return (0);
997 
998     /*
999      * Determine forwarding vifs from the forwarding cache table
1000      */
1001     s = splsoftnet();
1002     MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
1003 
1004     /* Entry exists, so forward if necessary */
1005     if (rt != NULL) {
1006 	splx(s);
1007 #ifdef RSVP_ISI
1008 	return (ip_mdq(m, ifp, rt, -1));
1009 #else
1010 	return (ip_mdq(m, ifp, rt));
1011 #endif /* RSVP_ISI */
1012     } else {
1013 	/*
1014 	 * If we don't have a route for packet's origin,
1015 	 * Make a copy of the packet &
1016 	 * send message to routing daemon
1017 	 */
1018 
1019 	register struct mbuf *mb0;
1020 	register struct rtdetq *rte;
1021 	register u_int32_t hash;
1022 #ifdef UPCALL_TIMING
1023 	struct timeval tp;
1024 
1025 	microtime(&tp);
1026 #endif /* UPCALL_TIMING */
1027 
1028 	mrtstat.mrts_no_route++;
1029 	if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1030 	    log(LOG_DEBUG, "ip_mforward: no rte s %x g %x",
1031 		ntohl(ip->ip_src.s_addr),
1032 		ntohl(ip->ip_dst.s_addr));
1033 
1034 	/*
1035 	 * Allocate mbufs early so that we don't do extra work if we are
1036 	 * just going to fail anyway.
1037 	 */
1038 	rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE, M_NOWAIT);
1039 	if (rte == NULL) {
1040 	    splx(s);
1041 	    return (ENOBUFS);
1042 	}
1043 	mb0 = m_copy(m, 0, M_COPYALL);
1044 	if (mb0 == NULL) {
1045 	    free(rte, M_MRTABLE);
1046 	    splx(s);
1047 	    return (ENOBUFS);
1048 	}
1049 
1050 	/* is there an upcall waiting for this packet? */
1051 	hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1052 	for (rt = mfchashtbl[hash].lh_first; rt; rt = rt->mfc_hash.le_next) {
1053 	    if (ip->ip_src.s_addr == rt->mfc_origin.s_addr &&
1054 		ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr &&
1055 		rt->mfc_stall != NULL)
1056 		break;
1057 	}
1058 
1059 	if (rt == NULL) {
1060 	    int hlen = ip->ip_hl << 2;
1061 	    int i;
1062 	    struct igmpmsg *im;
1063 
1064 	    /* no upcall, so make a new entry */
1065 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1066 	    if (rt == NULL) {
1067 		free(rte, M_MRTABLE);
1068 		m_free(mb0);
1069 		splx(s);
1070 		return (ENOBUFS);
1071 	    }
1072 	    /* Make a copy of the header to send to the user level process */
1073 	    mm = m_copy(m, 0, hlen);
1074 	    M_PULLUP(mm, hlen);
1075 	    if (mm == NULL) {
1076 		free(rte, M_MRTABLE);
1077 		m_free(mb0);
1078 		free(rt, M_MRTABLE);
1079 		splx(s);
1080 		return (ENOBUFS);
1081 	    }
1082 
1083 	    /*
1084 	     * Send message to routing daemon to install
1085 	     * a route into the kernel table
1086 	     */
1087 	    sin.sin_addr = ip->ip_src;
1088 
1089 	    im = mtod(mm, struct igmpmsg *);
1090 	    im->im_msgtype	= IGMPMSG_NOCACHE;
1091 	    im->im_mbz		= 0;
1092 
1093 	    mrtstat.mrts_upcalls++;
1094 
1095 	    if (socket_send(ip_mrouter, mm, &sin) < 0) {
1096 		log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full");
1097 		++mrtstat.mrts_upq_sockfull;
1098 		free(rte, M_MRTABLE);
1099 		m_free(mb0);
1100 		free(rt, M_MRTABLE);
1101 		splx(s);
1102 		return (ENOBUFS);
1103 	    }
1104 
1105 	    /* insert new entry at head of hash chain */
1106 	    rt->mfc_origin = ip->ip_src;
1107 	    rt->mfc_mcastgrp = ip->ip_dst;
1108 	    rt->mfc_pkt_cnt = 0;
1109 	    rt->mfc_byte_cnt = 0;
1110 	    rt->mfc_wrong_if = 0;
1111 	    rt->mfc_expire = UPCALL_EXPIRE;
1112 	    nexpire[hash]++;
1113 	    for (i = 0; i < numvifs; i++)
1114 		rt->mfc_ttls[i] = 0;
1115 	    rt->mfc_parent = -1;
1116 
1117 	    /* link into table */
1118 	    LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
1119 	    /* Add this entry to the end of the queue */
1120 	    rt->mfc_stall = rte;
1121 	} else {
1122 	    /* determine if q has overflowed */
1123 	    struct rtdetq **p;
1124 	    register int npkts = 0;
1125 
1126 	    for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next)
1127 		if (++npkts > MAX_UPQ) {
1128 		    mrtstat.mrts_upq_ovflw++;
1129 		    free(rte, M_MRTABLE);
1130 		    m_free(mb0);
1131 		    splx(s);
1132 		    return (0);
1133 	        }
1134 
1135 	    /* Add this entry to the end of the queue */
1136 	    *p = rte;
1137 	}
1138 
1139 	rte->next		= NULL;
1140 	rte->m 			= mb0;
1141 	rte->ifp 		= ifp;
1142 #ifdef UPCALL_TIMING
1143 	rte->t			= tp;
1144 #endif /* UPCALL_TIMING */
1145 
1146 
1147 	splx(s);
1148 
1149 	return (0);
1150     }
1151 }
1152 
1153 
1154 /*ARGSUSED*/
1155 static void
1156 expire_upcalls(v)
1157 	void *v;
1158 {
1159 	int i;
1160 	int s;
1161 
1162 	s = splsoftnet();
1163 
1164 	for (i = 0; i < MFCTBLSIZ; i++) {
1165 		register struct mfc *rt, *nrt;
1166 
1167 		if (nexpire[i] == 0)
1168 			continue;
1169 
1170 		for (rt = mfchashtbl[i].lh_first; rt; rt = nrt) {
1171 			nrt = rt->mfc_hash.le_next;
1172 
1173 			if (rt->mfc_expire == 0 ||
1174 			    --rt->mfc_expire > 0)
1175 				continue;
1176 			nexpire[i]--;
1177 
1178 			++mrtstat.mrts_cache_cleanups;
1179 			if (mrtdebug & DEBUG_EXPIRE)
1180 				log(LOG_DEBUG,
1181 				    "expire_upcalls: expiring (%x %x)",
1182 				    ntohl(rt->mfc_origin.s_addr),
1183 				    ntohl(rt->mfc_mcastgrp.s_addr));
1184 
1185 			expire_mfc(rt);
1186 		}
1187 	}
1188 
1189 	splx(s);
1190 	timeout(expire_upcalls, (caddr_t)0, EXPIRE_TIMEOUT);
1191 }
1192 
1193 /*
1194  * Packet forwarding routine once entry in the cache is made
1195  */
1196 static int
1197 #ifdef RSVP_ISI
1198 ip_mdq(m, ifp, rt, xmt_vif)
1199 #else
1200 ip_mdq(m, ifp, rt)
1201 #endif /* RSVP_ISI */
1202     register struct mbuf *m;
1203     register struct ifnet *ifp;
1204     register struct mfc *rt;
1205 #ifdef RSVP_ISI
1206     register vifi_t xmt_vif;
1207 #endif /* RSVP_ISI */
1208 {
1209     register struct ip  *ip = mtod(m, struct ip *);
1210     register vifi_t vifi;
1211     register struct vif *vifp;
1212     register int plen = ntohs(ip->ip_len);
1213 
1214 /*
1215  * Macro to send packet on vif.  Since RSVP packets don't get counted on
1216  * input, they shouldn't get counted on output, so statistics keeping is
1217  * seperate.
1218  */
1219 #define MC_SEND(ip,vifp,m) {                             \
1220                 if ((vifp)->v_flags & VIFF_TUNNEL)	 \
1221                     encap_send((ip), (vifp), (m));       \
1222                 else                                     \
1223                     phyint_send((ip), (vifp), (m));      \
1224 }
1225 
1226 #ifdef RSVP_ISI
1227     /*
1228      * If xmt_vif is not -1, send on only the requested vif.
1229      *
1230      * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.
1231      */
1232     if (xmt_vif < numvifs) {
1233         MC_SEND(ip, viftable + xmt_vif, m);
1234 	return (1);
1235     }
1236 #endif /* RSVP_ISI */
1237 
1238     /*
1239      * Don't forward if it didn't arrive from the parent vif for its origin.
1240      */
1241     vifi = rt->mfc_parent;
1242     if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1243 	/* came in the wrong interface */
1244 	if (mrtdebug & DEBUG_FORWARD)
1245 	    log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p",
1246 		ifp, vifi, viftable[vifi].v_ifp);
1247 	++mrtstat.mrts_wrong_if;
1248 	++rt->mfc_wrong_if;
1249 	/*
1250 	 * If we are doing PIM assert processing, and we are forwarding
1251 	 * packets on this interface, and it is a broadcast medium
1252 	 * interface (and not a tunnel), send a message to the routing daemon.
1253 	 */
1254 	if (pim_assert && rt->mfc_ttls[vifi] &&
1255 		(ifp->if_flags & IFF_BROADCAST) &&
1256 		!(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1257 	    struct mbuf *mm;
1258 	    struct igmpmsg *im;
1259 	    int hlen = ip->ip_hl << 2;
1260 	    struct timeval now;
1261 	    register u_int32_t delta;
1262 
1263 	    microtime(&now);
1264 
1265 	    TV_DELTA(rt->mfc_last_assert, now, delta);
1266 
1267 	    if (delta > ASSERT_MSG_TIME) {
1268 		mm = m_copy(m, 0, hlen);
1269 		M_PULLUP(mm, hlen);
1270 		if (mm == NULL) {
1271 		    return (ENOBUFS);
1272 		}
1273 
1274 		rt->mfc_last_assert = now;
1275 
1276 		im = mtod(mm, struct igmpmsg *);
1277 		im->im_msgtype	= IGMPMSG_WRONGVIF;
1278 		im->im_mbz	= 0;
1279 		im->im_vif	= vifi;
1280 
1281 		sin.sin_addr = im->im_src;
1282 
1283 		socket_send(ip_mrouter, m, &sin);
1284 	    }
1285 	}
1286 	return (0);
1287     }
1288 
1289     /* If I sourced this packet, it counts as output, else it was input. */
1290     if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
1291 	viftable[vifi].v_pkt_out++;
1292 	viftable[vifi].v_bytes_out += plen;
1293     } else {
1294 	viftable[vifi].v_pkt_in++;
1295 	viftable[vifi].v_bytes_in += plen;
1296     }
1297     rt->mfc_pkt_cnt++;
1298     rt->mfc_byte_cnt += plen;
1299 
1300     /*
1301      * For each vif, decide if a copy of the packet should be forwarded.
1302      * Forward if:
1303      *		- the ttl exceeds the vif's threshold
1304      *		- there are group members downstream on interface
1305      */
1306     for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1307 	if ((rt->mfc_ttls[vifi] > 0) &&
1308 	    (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1309 	    vifp->v_pkt_out++;
1310 	    vifp->v_bytes_out += plen;
1311 	    MC_SEND(ip, vifp, m);
1312 	}
1313 
1314     return (0);
1315 }
1316 
1317 #ifdef RSVP_ISI
1318 /*
1319  * check if a vif number is legal/ok. This is used by ip_output, to export
1320  * numvifs there,
1321  */
1322 int
1323 legal_vif_num(vif)
1324     int vif;
1325 {
1326     if (vif >= 0 && vif < numvifs)
1327        return (1);
1328     else
1329        return (0);
1330 }
1331 #endif /* RSVP_ISI */
1332 
1333 static void
1334 phyint_send(ip, vifp, m)
1335 	struct ip *ip;
1336 	struct vif *vifp;
1337 	struct mbuf *m;
1338 {
1339 	register struct mbuf *mb_copy;
1340 	register int hlen = ip->ip_hl << 2;
1341 
1342 	/*
1343 	 * Make a new reference to the packet; make sure that
1344 	 * the IP header is actually copied, not just referenced,
1345 	 * so that ip_output() only scribbles on the copy.
1346 	 */
1347 	mb_copy = m_copy(m, 0, M_COPYALL);
1348 	M_PULLUP(mb_copy, hlen);
1349 	if (mb_copy == NULL)
1350 		return;
1351 
1352 	if (vifp->v_rate_limit <= 0)
1353 		tbf_send_packet(vifp, mb_copy);
1354 	else
1355 		tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
1356 }
1357 
1358 static void
1359 encap_send(ip, vifp, m)
1360 	register struct ip *ip;
1361 	register struct vif *vifp;
1362 	register struct mbuf *m;
1363 {
1364 	register struct mbuf *mb_copy;
1365 	register struct ip *ip_copy;
1366 	register int i, len = ip->ip_len + sizeof(multicast_encap_iphdr);
1367 
1368 	/*
1369 	 * copy the old packet & pullup it's IP header into the
1370 	 * new mbuf so we can modify it.  Try to fill the new
1371 	 * mbuf since if we don't the ethernet driver will.
1372 	 */
1373 	MGETHDR(mb_copy, M_DONTWAIT, MT_DATA);
1374 	if (mb_copy == NULL)
1375 		return;
1376 	mb_copy->m_data += max_linkhdr;
1377 	mb_copy->m_pkthdr.len = len;
1378 	mb_copy->m_len = sizeof(multicast_encap_iphdr);
1379 
1380 	if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1381 		m_freem(mb_copy);
1382 		return;
1383 	}
1384 	i = MHLEN - max_linkhdr;
1385 	if (i > len)
1386 		i = len;
1387 	mb_copy = m_pullup(mb_copy, i);
1388 	if (mb_copy == NULL)
1389 		return;
1390 
1391 	/*
1392 	 * fill in the encapsulating IP header.
1393 	 */
1394 	ip_copy = mtod(mb_copy, struct ip *);
1395 	*ip_copy = multicast_encap_iphdr;
1396 	ip_copy->ip_id = htons(ip_id++);
1397 	ip_copy->ip_len = len;
1398 	ip_copy->ip_src = vifp->v_lcl_addr;
1399 	ip_copy->ip_dst = vifp->v_rmt_addr;
1400 
1401 	/*
1402 	 * turn the encapsulated IP header back into a valid one.
1403 	 */
1404 	ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1405 	--ip->ip_ttl;
1406 	HTONS(ip->ip_len);
1407 	HTONS(ip->ip_off);
1408 	ip->ip_sum = 0;
1409 #if defined(LBL) && !defined(ultrix) && !defined(i386)
1410 	ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0);
1411 #else
1412 	mb_copy->m_data += sizeof(multicast_encap_iphdr);
1413 	ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1414 	mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1415 #endif
1416 
1417 	if (vifp->v_rate_limit <= 0)
1418 		tbf_send_packet(vifp, mb_copy);
1419 	else
1420 		tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
1421 }
1422 
1423 /*
1424  * De-encapsulate a packet and feed it back through ip input (this
1425  * routine is called whenever IP gets a packet with proto type
1426  * ENCAP_PROTO and a local destination address).
1427  */
1428 void
1429 #if __STDC__
1430 ipip_input(struct mbuf *m, ...)
1431 #else
1432 ipip_input(m, va_alist)
1433 	struct mbuf *m;
1434 	va_dcl
1435 #endif
1436 {
1437 	register int hlen;
1438 	register struct ip *ip = mtod(m, struct ip *);
1439 	register int s;
1440 	register struct ifqueue *ifq;
1441 	register struct vif *vifp;
1442 	va_list ap;
1443 
1444 	va_start(ap, m);
1445 	hlen = va_arg(ap, int);
1446 	va_end(ap);
1447 
1448 	if (!have_encap_tunnel) {
1449 		rip_input(m);
1450 		return;
1451 	}
1452 
1453 	/*
1454 	 * dump the packet if it's not to a multicast destination or if
1455 	 * we don't have an encapsulating tunnel with the source.
1456 	 * Note:  This code assumes that the remote site IP address
1457 	 * uniquely identifies the tunnel (i.e., that this site has
1458 	 * at most one tunnel with the remote site).
1459 	 */
1460 	if (!IN_MULTICAST(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr)) {
1461 		++mrtstat.mrts_bad_tunnel;
1462 		m_freem(m);
1463 		return;
1464 	}
1465 
1466 	if (ip->ip_src.s_addr != last_encap_src) {
1467 		register struct vif *vife;
1468 
1469 		vifp = viftable;
1470 		vife = vifp + numvifs;
1471 		for (; vifp < vife; vifp++)
1472 			if (vifp->v_flags & VIFF_TUNNEL &&
1473 			    vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr)
1474 				break;
1475 		if (vifp == vife) {
1476 			mrtstat.mrts_cant_tunnel++; /*XXX*/
1477 			m_freem(m);
1478 			if (mrtdebug)
1479 				log(LOG_DEBUG, "ip_mforward: no tunnel with %x",
1480 				    ntohl(ip->ip_src.s_addr));
1481 			return;
1482 		}
1483 		last_encap_vif = vifp;
1484 		last_encap_src = ip->ip_src.s_addr;
1485 	} else
1486 		vifp = last_encap_vif;
1487 
1488 	m->m_data += hlen;
1489 	m->m_len -= hlen;
1490 	m->m_pkthdr.len -= hlen;
1491 	m->m_pkthdr.rcvif = vifp->v_ifp;
1492 	ifq = &ipintrq;
1493 	s = splimp();
1494 	if (IF_QFULL(ifq)) {
1495 		IF_DROP(ifq);
1496 		m_freem(m);
1497 	} else {
1498 		IF_ENQUEUE(ifq, m);
1499 		/*
1500 		 * normally we would need a "schednetisr(NETISR_IP)"
1501 		 * here but we were called by ip_input and it is going
1502 		 * to loop back & try to dequeue the packet we just
1503 		 * queued as soon as we return so we avoid the
1504 		 * unnecessary software interrrupt.
1505 		 */
1506 	}
1507 	splx(s);
1508 }
1509 
1510 /*
1511  * Token bucket filter module
1512  */
1513 static void
1514 tbf_control(vifp, m, ip, p_len)
1515 	register struct vif *vifp;
1516 	register struct mbuf *m;
1517 	register struct ip *ip;
1518 	register u_int32_t p_len;
1519 {
1520 
1521 	tbf_update_tokens(vifp);
1522 
1523 	/*
1524 	 * If there are enough tokens, and the queue is empty, send this packet
1525 	 * out immediately.  Otherwise, try to insert it on this vif's queue.
1526 	 */
1527 	if (vifp->v_tbf.q_len == 0) {
1528 		if (p_len <= vifp->v_tbf.n_tok) {
1529 			vifp->v_tbf.n_tok -= p_len;
1530 			tbf_send_packet(vifp, m);
1531 		} else if (p_len > MAX_BKT_SIZE) {
1532 			/* drop if packet is too large */
1533 			mrtstat.mrts_pkt2large++;
1534 			m_freem(m);
1535 		} else {
1536 			/* queue packet and timeout till later */
1537 			tbf_queue(vifp, m, ip);
1538 			timeout(tbf_reprocess_q, vifp, 1);
1539 		}
1540 	} else {
1541 		if (vifp->v_tbf.q_len >= MAXQSIZE &&
1542 		    !tbf_dq_sel(vifp, ip)) {
1543 			/* queue length too much, and couldn't make room */
1544 			mrtstat.mrts_q_overflow++;
1545 			m_freem(m);
1546 		} else {
1547 			/* queue length low enough, or made room */
1548 			tbf_queue(vifp, m, ip);
1549 			tbf_process_q(vifp);
1550 		}
1551 	}
1552 }
1553 
1554 /*
1555  * adds a packet to the queue at the interface
1556  */
1557 static void
1558 tbf_queue(vifp, m, ip)
1559     register struct vif *vifp;
1560     register struct mbuf *m;
1561     register struct ip *ip;
1562 {
1563     register u_int32_t ql;
1564     register int index = (vifp - viftable);
1565     register int s = splsoftnet();
1566 
1567     ql = vifp->v_tbf.q_len;
1568 
1569     qtable[index][ql].pkt_m = m;
1570     qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len;
1571     qtable[index][ql].pkt_ip = ip;
1572 
1573     vifp->v_tbf.q_len++;
1574     splx(s);
1575 }
1576 
1577 
1578 /*
1579  * processes the queue at the interface
1580  */
1581 static void
1582 tbf_process_q(vifp)
1583     register struct vif *vifp;
1584 {
1585     register struct pkt_queue pkt_1;
1586     register int index = (vifp - viftable);
1587     register int s = splsoftnet();
1588 
1589     /* loop through the queue at the interface and send as many packets
1590      * as possible
1591      */
1592     while (vifp->v_tbf.q_len > 0) {
1593 	/* locate the first packet */
1594 	pkt_1 = qtable[index][0];
1595 
1596 	/* determine if the packet can be sent */
1597 	if (pkt_1.pkt_len <= vifp->v_tbf.n_tok) {
1598 	    /* if so,
1599 	     * reduce no of tokens, dequeue the queue,
1600 	     * send the packet.
1601 	     */
1602 	    vifp->v_tbf.n_tok -= pkt_1.pkt_len;
1603 
1604 	    tbf_dequeue(vifp, 0);
1605 	    tbf_send_packet(vifp, pkt_1.pkt_m);
1606 	} else
1607 	    break;
1608     }
1609     splx(s);
1610 }
1611 
1612 /*
1613  * removes the jth packet from the queue at the interface
1614  */
1615 static void
1616 tbf_dequeue(vifp, j)
1617     register struct vif *vifp;
1618     register int j;
1619 {
1620     register u_int32_t index = vifp - viftable;
1621     register int i;
1622 
1623     for (i=j+1; i <= vifp->v_tbf.q_len - 1; i++) {
1624 	qtable[index][i-1] = qtable[index][i];
1625     }
1626     qtable[index][i-1].pkt_m = NULL;
1627     qtable[index][i-1].pkt_len = NULL;
1628     qtable[index][i-1].pkt_ip = NULL;
1629 
1630     vifp->v_tbf.q_len--;
1631 
1632     if (tbfdebug > 1)
1633 	log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1);
1634 }
1635 
1636 static void
1637 tbf_reprocess_q(arg)
1638 	void *arg;
1639 {
1640 	register struct vif *vifp = arg;
1641 
1642 	if (ip_mrouter == NULL)
1643 		return;
1644 
1645 	tbf_update_tokens(vifp);
1646 	tbf_process_q(vifp);
1647 
1648 	if (vifp->v_tbf.q_len)
1649 		timeout(tbf_reprocess_q, vifp, 1);
1650 }
1651 
1652 /* function that will selectively discard a member of the queue
1653  * based on the precedence value and the priority obtained through
1654  * a lookup table - not yet implemented accurately!
1655  */
1656 static int
1657 tbf_dq_sel(vifp, ip)
1658     register struct vif *vifp;
1659     register struct ip *ip;
1660 {
1661     register int i;
1662     register int s = splsoftnet();
1663     register u_int p;
1664 
1665     p = priority(vifp, ip);
1666 
1667     for(i=vifp->v_tbf.q_len-1;i >= 0;i--) {
1668 	if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) {
1669 	    m_freem(qtable[vifp-viftable][i].pkt_m);
1670 	    tbf_dequeue(vifp, i);
1671 	    splx(s);
1672 	    mrtstat.mrts_drop_sel++;
1673 	    return (1);
1674 	}
1675     }
1676     splx(s);
1677     return (0);
1678 }
1679 
1680 static void
1681 tbf_send_packet(vifp,m)
1682     register struct vif *vifp;
1683     register struct mbuf *m;
1684 {
1685     int error;
1686     int s = splsoftnet();
1687 
1688     if (vifp->v_flags & VIFF_TUNNEL) {
1689 	/* If tunnel options */
1690 	ip_output(m, (struct mbuf *)0, &vifp->v_route,
1691 		  IP_FORWARDING, NULL);
1692     } else {
1693 	/* if physical interface option, extract the options and then send */
1694 	struct ip *ip = mtod(m, struct ip *);
1695 	struct ip_moptions imo;
1696 	imo.imo_multicast_ifp  = vifp->v_ifp;
1697 	imo.imo_multicast_ttl  = ip->ip_ttl - 1;
1698 	imo.imo_multicast_loop = 1;
1699 #ifdef RSVP_ISI
1700 	imo.imo_multicast_vif  = -1;
1701 #endif
1702 
1703 	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1704 			  IP_FORWARDING|IP_MULTICASTOPTS, &imo);
1705 	if (mrtdebug & DEBUG_XMIT)
1706 	    log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error);
1707     }
1708     splx(s);
1709 }
1710 
1711 /* determine the current time and then
1712  * the elapsed time (between the last time and time now)
1713  * in milliseconds & update the no. of tokens in the bucket
1714  */
1715 static void
1716 tbf_update_tokens(vifp)
1717     register struct vif *vifp;
1718 {
1719     struct timeval tp;
1720     register u_int32_t t;
1721     register u_int32_t elapsed;
1722     register int s = splsoftnet();
1723 
1724     microtime(&tp);
1725 
1726     t = tp.tv_sec*1000 + tp.tv_usec/1000;
1727 
1728     elapsed = (t - vifp->v_tbf.last_pkt_t) * vifp->v_rate_limit /8;
1729     vifp->v_tbf.n_tok += elapsed;
1730     vifp->v_tbf.last_pkt_t = t;
1731 
1732     if (vifp->v_tbf.n_tok > MAX_BKT_SIZE)
1733 	vifp->v_tbf.n_tok = MAX_BKT_SIZE;
1734 
1735     splx(s);
1736 }
1737 
1738 static int
1739 priority(vifp, ip)
1740     register struct vif *vifp;
1741     register struct ip *ip;
1742 {
1743     register int prio;
1744 
1745     /* temporary hack; may add general packet classifier some day */
1746 
1747     /*
1748      * The UDP port space is divided up into four priority ranges:
1749      * [0, 16384)     : unclassified - lowest priority
1750      * [16384, 32768) : audio - highest priority
1751      * [32768, 49152) : whiteboard - medium priority
1752      * [49152, 65536) : video - low priority
1753      */
1754     if (ip->ip_p == IPPROTO_UDP) {
1755 	struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1756 
1757 	switch (ntohs(udp->uh_dport) & 0xc000) {
1758 	    case 0x4000:
1759 		prio = 70;
1760 		break;
1761 	    case 0x8000:
1762 		prio = 60;
1763 		break;
1764 	    case 0xc000:
1765 		prio = 55;
1766 		break;
1767 	    default:
1768 		prio = 50;
1769 		break;
1770 	}
1771 
1772 	if (tbfdebug > 1) log(LOG_DEBUG, "port %x prio %d", ntohs(udp->uh_dport), prio);
1773     } else
1774 	prio = 50;
1775 
1776 
1777     return (prio);
1778 }
1779 
1780 /*
1781  * End of token bucket filter modifications
1782  */
1783 
1784 #ifdef RSVP_ISI
1785 
1786 int
1787 ip_rsvp_vif_init(so, m)
1788     struct socket *so;
1789     struct mbuf *m;
1790 {
1791     int i;
1792     register int s;
1793 
1794     if (rsvpdebug)
1795 	printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
1796 	       so->so_type, so->so_proto->pr_protocol);
1797 
1798     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1799 	return (EOPNOTSUPP);
1800 
1801     /* Check mbuf. */
1802     if (m == NULL || m->m_len != sizeof(int)) {
1803 	return (EINVAL);
1804     }
1805     i = *(mtod(m, int *));
1806 
1807     if (rsvpdebug)
1808 	printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on);
1809 
1810     s = splsoftnet();
1811 
1812     /* Check vif. */
1813     if (!legal_vif_num(i)) {
1814 	splx(s);
1815 	return (EADDRNOTAVAIL);
1816     }
1817 
1818     /* Check if socket is available. */
1819     if (viftable[i].v_rsvpd != NULL) {
1820 	splx(s);
1821 	return (EADDRINUSE);
1822     }
1823 
1824     viftable[i].v_rsvpd = so;
1825     /* This may seem silly, but we need to be sure we don't over-increment
1826      * the RSVP counter, in case something slips up.
1827      */
1828     if (!viftable[i].v_rsvp_on) {
1829 	viftable[i].v_rsvp_on = 1;
1830 	rsvp_on++;
1831     }
1832 
1833     splx(s);
1834     return (0);
1835 }
1836 
1837 int
1838 ip_rsvp_vif_done(so, m)
1839     struct socket *so;
1840     struct mbuf *m;
1841 {
1842     int i;
1843     register int s;
1844 
1845     if (rsvpdebug)
1846 	printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
1847 	       so->so_type, so->so_proto->pr_protocol);
1848 
1849     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1850 	return (EOPNOTSUPP);
1851 
1852     /* Check mbuf. */
1853     if (m == NULL || m->m_len != sizeof(int)) {
1854 	return (EINVAL);
1855     }
1856     i = *(mtod(m, int *));
1857 
1858     s = splsoftnet();
1859 
1860     /* Check vif. */
1861     if (!legal_vif_num(i)) {
1862 	splx(s);
1863         return (EADDRNOTAVAIL);
1864     }
1865 
1866     if (rsvpdebug)
1867 	printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n",
1868 	       viftable[i].v_rsvpd, so);
1869 
1870     viftable[i].v_rsvpd = NULL;
1871     /* This may seem silly, but we need to be sure we don't over-decrement
1872      * the RSVP counter, in case something slips up.
1873      */
1874     if (viftable[i].v_rsvp_on) {
1875 	viftable[i].v_rsvp_on = 0;
1876 	rsvp_on--;
1877     }
1878 
1879     splx(s);
1880     return (0);
1881 }
1882 
1883 void
1884 ip_rsvp_force_done(so)
1885     struct socket *so;
1886 {
1887     int vifi;
1888     register int s;
1889 
1890     /* Don't bother if it is not the right type of socket. */
1891     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1892 	return;
1893 
1894     s = splsoftnet();
1895 
1896     /* The socket may be attached to more than one vif...this
1897      * is perfectly legal.
1898      */
1899     for (vifi = 0; vifi < numvifs; vifi++) {
1900 	if (viftable[vifi].v_rsvpd == so) {
1901 	    viftable[vifi].v_rsvpd = NULL;
1902 	    /* This may seem silly, but we need to be sure we don't
1903 	     * over-decrement the RSVP counter, in case something slips up.
1904 	     */
1905 	    if (viftable[vifi].v_rsvp_on) {
1906 		viftable[vifi].v_rsvp_on = 0;
1907 		rsvp_on--;
1908 	    }
1909 	}
1910     }
1911 
1912     splx(s);
1913     return;
1914 }
1915 
1916 void
1917 rsvp_input(m, ifp)
1918     struct mbuf *m;
1919     struct ifnet *ifp;
1920 {
1921     int vifi;
1922     register struct ip *ip = mtod(m, struct ip *);
1923     static struct sockaddr_in rsvp_src = { sizeof(sin), AF_INET };
1924     register int s;
1925 
1926     if (rsvpdebug)
1927 	printf("rsvp_input: rsvp_on %d\n",rsvp_on);
1928 
1929     /* Can still get packets with rsvp_on = 0 if there is a local member
1930      * of the group to which the RSVP packet is addressed.  But in this
1931      * case we want to throw the packet away.
1932      */
1933     if (!rsvp_on) {
1934 	m_freem(m);
1935 	return;
1936     }
1937 
1938     /* If the old-style non-vif-associated socket is set, then use
1939      * it and ignore the new ones.
1940      */
1941     if (ip_rsvpd != NULL) {
1942 	if (rsvpdebug)
1943 	    printf("rsvp_input: Sending packet up old-style socket\n");
1944 	rip_input(m);
1945 	return;
1946     }
1947 
1948     s = splsoftnet();
1949 
1950     if (rsvpdebug)
1951 	printf("rsvp_input: check vifs\n");
1952 
1953     /* Find which vif the packet arrived on. */
1954     for (vifi = 0; vifi < numvifs; vifi++) {
1955 	if (viftable[vifi].v_ifp == ifp)
1956 	    break;
1957     }
1958 
1959     if (vifi == numvifs) {
1960 	/* Can't find vif packet arrived on. Drop packet. */
1961 	if (rsvpdebug)
1962 	    printf("rsvp_input: Can't find vif for packet...dropping it.\n");
1963 	m_freem(m);
1964 	splx(s);
1965 	return;
1966     }
1967 
1968     if (rsvpdebug)
1969 	printf("rsvp_input: check socket\n");
1970 
1971     if (viftable[vifi].v_rsvpd == NULL) {
1972 	/* drop packet, since there is no specific socket for this
1973 	 * interface */
1974 	if (rsvpdebug)
1975 	    printf("rsvp_input: No socket defined for vif %d\n",vifi);
1976 	m_freem(m);
1977 	splx(s);
1978 	return;
1979     }
1980 
1981     rsvp_src.sin_addr = ip->ip_src;
1982 
1983     if (rsvpdebug && m)
1984 	printf("rsvp_input: m->m_len = %d, sbspace() = %d\n",
1985 	       m->m_len,sbspace(&viftable[vifi].v_rsvpd->so_rcv));
1986 
1987     if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0)
1988 	if (rsvpdebug)
1989 	    printf("rsvp_input: Failed to append to socket\n");
1990     else
1991 	if (rsvpdebug)
1992 	    printf("rsvp_input: send packet up\n");
1993 
1994     splx(s);
1995 }
1996 #endif /* RSVP_ISI */
1997