xref: /dragonfly/sys/net/route.c (revision 611395e5)
1 /*
2  * Copyright (c) 1980, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)route.c	8.3 (Berkeley) 1/9/95
34  * $FreeBSD: src/sys/net/route.c,v 1.59.2.10 2003/01/17 08:04:00 ru Exp $
35  * $DragonFly: src/sys/net/route.c,v 1.9 2004/12/15 00:11:04 hsu Exp $
36  */
37 
38 #include "opt_inet.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45 #include <sys/domain.h>
46 #include <sys/kernel.h>
47 
48 #include <net/if.h>
49 #include <net/route.h>
50 
51 #include <netinet/in.h>
52 #include <net/ip_mroute/ip_mroute.h>
53 
54 #define	SA(p) ((struct sockaddr *)(p))
55 
56 struct route_cb route_cb;
57 static struct rtstat rtstat;
58 struct radix_node_head *rt_tables[AF_MAX+1];
59 
60 static int rttrash;		/* routes not in table but not freed */
61 
62 static void rt_maskedcopy (struct sockaddr *, struct sockaddr *,
63     struct sockaddr *);
64 static void rtable_init (void **);
65 
66 static void
67 rtable_init(void **table)
68 {
69 	struct domain *dom;
70 
71 	for (dom = domains; dom; dom = dom->dom_next)
72 		if (dom->dom_rtattach)
73 			dom->dom_rtattach(&table[dom->dom_family],
74 			    dom->dom_rtoffset);
75 }
76 
77 void
78 route_init()
79 {
80 	rn_init();	/* initialize all zeroes, all ones, mask table */
81 	rtable_init((void **)rt_tables);
82 }
83 
84 /*
85  * Packet routing routines.
86  */
87 void
88 rtalloc(struct route *ro)
89 {
90 	rtalloc_ign(ro, 0UL);
91 }
92 
93 void
94 rtalloc_ign(struct route *ro, u_long ignore)
95 {
96 	struct rtentry *rt;
97 	int s;
98 
99 	if ((rt = ro->ro_rt) != NULL) {
100 		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
101 			return;
102 		/* XXX - We are probably always at splnet here already. */
103 		s = splnet();
104 		RTFREE(rt);
105 		ro->ro_rt = NULL;
106 		splx(s);
107 	}
108 	ro->ro_rt = rtalloc1(&ro->ro_dst, 1, ignore);
109 }
110 
111 /*
112  * Look up the route that matches the address given
113  * Or, at least try.. Create a cloned route if needed.
114  */
115 struct rtentry *
116 rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
117 {
118 	struct radix_node_head *rnh = rt_tables[dst->sa_family];
119 	struct rtentry *rt;
120 	struct radix_node *rn;
121 	struct rtentry *newrt = NULL;
122 	struct rt_addrinfo info;
123 	u_long nflags;
124 	int  s = splnet(), err = 0, msgtype = RTM_MISS;
125 
126 	/*
127 	 * Look up the address in the table for that Address Family
128 	 */
129 	if (rnh != NULL && (rn = rnh->rnh_matchaddr((char *)dst, rnh)) &&
130 	    !(rn->rn_flags & RNF_ROOT)) {
131 		/*
132 		 * If we find it and it's not the root node, then
133 		 * get a refernce on the rtentry associated.
134 		 */
135 		newrt = rt = (struct rtentry *)rn;
136 		nflags = rt->rt_flags & ~ignflags;
137 		if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) {
138 			/*
139 			 * We are apparently adding (report = 0 in delete).
140 			 * If it requires that it be cloned, do so.
141 			 * (This implies it wasn't a HOST route.)
142 			 */
143 			err = rtrequest(RTM_RESOLVE, dst, SA(0),
144 					      SA(0), 0, &newrt);
145 			if (err) {
146 				/*
147 				 * If the cloning didn't succeed, maybe
148 				 * what we have will do. Return that.
149 				 */
150 				newrt = rt;
151 				rt->rt_refcnt++;
152 				goto miss;
153 			}
154 			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
155 				/*
156 				 * If the new route specifies it be
157 				 * externally resolved, then go do that.
158 				 */
159 				msgtype = RTM_RESOLVE;
160 				goto miss;
161 			}
162 			/* Inform listeners of the new route. */
163 			bzero(&info, sizeof(info));
164 			info.rti_info[RTAX_DST] = rt_key(rt);
165 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
166 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
167 			if (rt->rt_ifp != NULL) {
168 				info.rti_info[RTAX_IFP] =
169 				    TAILQ_FIRST(&rt->rt_ifp->if_addrhead)->
170 								    ifa_addr;
171 				info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
172 			}
173 			rt_missmsg(RTM_ADD, &info, rt->rt_flags, 0);
174 		} else
175 			rt->rt_refcnt++;
176 	} else {
177 		/*
178 		 * Either we hit the root or couldn't find any match,
179 		 * Which basically means
180 		 * "caint get there frm here"
181 		 */
182 		rtstat.rts_unreach++;
183 miss:
184 		if (report) {
185 			/*
186 			 * If required, report the failure to the supervising
187 			 * Authorities.
188 			 * For a delete, this is not an error. (report == 0)
189 			 */
190 			bzero(&info, sizeof(info));
191 			info.rti_info[RTAX_DST] = dst;
192 			rt_missmsg(msgtype, &info, 0, err);
193 		}
194 	}
195 	splx(s);
196 	return (newrt);
197 }
198 
199 /*
200  * Remove a reference count from an rtentry.
201  * If the count gets low enough, take it out of the routing table
202  */
203 void
204 rtfree(struct rtentry *rt)
205 {
206 	/*
207 	 * find the tree for that address family
208 	 */
209 	struct radix_node_head *rnh = rt_tables[rt_key(rt)->sa_family];
210 	struct ifaddr *ifa;
211 
212 	if (rt == NULL || rnh == NULL)
213 		panic("rtfree");
214 
215 	/*
216 	 * decrement the reference count by one and if it reaches 0,
217 	 * and there is a close function defined, call the close function
218 	 */
219 	rt->rt_refcnt--;
220 	if(rnh->rnh_close && rt->rt_refcnt == 0) {
221 		rnh->rnh_close((struct radix_node *)rt, rnh);
222 	}
223 
224 	/*
225 	 * If we are no longer "up" (and ref == 0)
226 	 * then we can free the resources associated
227 	 * with the route.
228 	 */
229 	if (rt->rt_refcnt <= 0 && !(rt->rt_flags & RTF_UP)) {
230 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
231 			panic ("rtfree 2");
232 		/*
233 		 * the rtentry must have been removed from the routing table
234 		 * so it is represented in rttrash.. remove that now.
235 		 */
236 		rttrash--;
237 
238 #ifdef	DIAGNOSTIC
239 		if (rt->rt_refcnt < 0) {
240 			printf("rtfree: %p not freed (neg refs)\n", rt);
241 			return;
242 		}
243 #endif
244 
245 		/*
246 		 * release references on items we hold them on..
247 		 * e.g other routes and ifaddrs.
248 		 */
249 		if((ifa = rt->rt_ifa))
250 			IFAFREE(ifa);
251 		if (rt->rt_parent) {
252 			RTFREE(rt->rt_parent);
253 		}
254 
255 		/*
256 		 * The key is separatly alloc'd so free it (see rt_setgate()).
257 		 * This also frees the gateway, as they are always malloc'd
258 		 * together.
259 		 */
260 		Free(rt_key(rt));
261 
262 		/*
263 		 * and the rtentry itself of course
264 		 */
265 		Free(rt);
266 	}
267 }
268 
269 void
270 ifafree(struct ifaddr *ifa)
271 {
272 	if (ifa == NULL)
273 		panic("ifafree");
274 	if (ifa->ifa_refcnt == 0)
275 		free(ifa, M_IFADDR);
276 	else
277 		ifa->ifa_refcnt--;
278 }
279 
280 #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
281 
282 /*
283  * Force a routing table entry to the specified
284  * destination to go through the given gateway.
285  * Normally called as a result of a routing redirect
286  * message from the network layer.
287  *
288  * N.B.: must be called at splnet
289  *
290  */
291 void
292 rtredirect(
293 	struct sockaddr *dst,
294 	struct sockaddr *gateway,
295 	struct sockaddr *netmask,
296 	int flags,
297 	struct sockaddr *src,
298 	struct rtentry **rtp)
299 {
300 	struct rtentry *rt;
301 	int error = 0;
302 	short *stat = NULL;
303 	struct rt_addrinfo info;
304 	struct ifaddr *ifa;
305 
306 	/* verify the gateway is directly reachable */
307 	if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
308 		error = ENETUNREACH;
309 		goto out;
310 	}
311 	rt = rtalloc1(dst, 0, 0UL);
312 	/*
313 	 * If the redirect isn't from our current router for this dst,
314 	 * it's either old or wrong.  If it redirects us to ourselves,
315 	 * we have a routing loop, perhaps as a result of an interface
316 	 * going down recently.
317 	 */
318 	if (!(flags & RTF_DONE) && rt &&
319 	     (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
320 		error = EINVAL;
321 	else if (ifa_ifwithaddr(gateway))
322 		error = EHOSTUNREACH;
323 	if (error)
324 		goto done;
325 	/*
326 	 * Create a new entry if we just got back a wildcard entry
327 	 * or the the lookup failed.  This is necessary for hosts
328 	 * which use routing redirects generated by smart gateways
329 	 * to dynamically build the routing tables.
330 	 */
331 	if ((rt == NULL) || (rt_mask(rt) != NULL && rt_mask(rt)->sa_len < 2))
332 		goto create;
333 	/*
334 	 * Don't listen to the redirect if it's
335 	 * for a route to an interface.
336 	 */
337 	if (rt->rt_flags & RTF_GATEWAY) {
338 		if ((!(rt->rt_flags & RTF_HOST)) && (flags & RTF_HOST)) {
339 			/*
340 			 * Changing from route to net => route to host.
341 			 * Create new route, rather than smashing route to net.
342 			 */
343 		create:
344 			if (rt)
345 				rtfree(rt);
346 			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
347 			bzero(&info, sizeof(info));
348 			info.rti_info[RTAX_DST] = dst;
349 			info.rti_info[RTAX_GATEWAY] = gateway;
350 			info.rti_info[RTAX_NETMASK] = netmask;
351 			info.rti_ifa = ifa;
352 			info.rti_flags = flags;
353 			rt = NULL;
354 			error = rtrequest1(RTM_ADD, &info, &rt);
355 			if (rt != NULL)
356 				flags = rt->rt_flags;
357 			stat = &rtstat.rts_dynamic;
358 		} else {
359 			/*
360 			 * Smash the current notion of the gateway to
361 			 * this destination.  Should check about netmask!!!
362 			 */
363 			rt->rt_flags |= RTF_MODIFIED;
364 			flags |= RTF_MODIFIED;
365 			stat = &rtstat.rts_newgateway;
366 			/*
367 			 * add the key and gateway (in one malloc'd chunk).
368 			 */
369 			rt_setgate(rt, rt_key(rt), gateway);
370 		}
371 	} else
372 		error = EHOSTUNREACH;
373 done:
374 	if (rt) {
375 		if (rtp != NULL && !error)
376 			*rtp = rt;
377 		else
378 			rtfree(rt);
379 	}
380 out:
381 	if (error)
382 		rtstat.rts_badredirect++;
383 	else if (stat != NULL)
384 		(*stat)++;
385 	bzero(&info, sizeof(info));
386 	info.rti_info[RTAX_DST] = dst;
387 	info.rti_info[RTAX_GATEWAY] = gateway;
388 	info.rti_info[RTAX_NETMASK] = netmask;
389 	info.rti_info[RTAX_AUTHOR] = src;
390 	rt_missmsg(RTM_REDIRECT, &info, flags, error);
391 }
392 
393 /*
394 * Routing table ioctl interface.
395 */
396 int
397 rtioctl(u_long req, caddr_t data, struct thread *td)
398 {
399 #ifdef INET
400 	/* Multicast goop, grrr... */
401 	return mrt_ioctl ? mrt_ioctl(req, data) : EOPNOTSUPP;
402 #else
403 	return ENXIO;
404 #endif
405 }
406 
407 struct ifaddr *
408 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
409 {
410 	struct ifaddr *ifa;
411 
412 	if (!(flags & RTF_GATEWAY)) {
413 		/*
414 		 * If we are adding a route to an interface,
415 		 * and the interface is a pt to pt link
416 		 * we should search for the destination
417 		 * as our clue to the interface.  Otherwise
418 		 * we can use the local address.
419 		 */
420 		ifa = NULL;
421 		if (flags & RTF_HOST) {
422 			ifa = ifa_ifwithdstaddr(dst);
423 		}
424 		if (ifa == NULL)
425 			ifa = ifa_ifwithaddr(gateway);
426 	} else {
427 		/*
428 		 * If we are adding a route to a remote net
429 		 * or host, the gateway may still be on the
430 		 * other end of a pt to pt link.
431 		 */
432 		ifa = ifa_ifwithdstaddr(gateway);
433 	}
434 	if (ifa == NULL)
435 		ifa = ifa_ifwithnet(gateway);
436 	if (ifa == NULL) {
437 		struct rtentry *rt = rtalloc1(gateway, 0, 0UL);
438 		if (rt == NULL)
439 			return (NULL);
440 		rt->rt_refcnt--;
441 		if ((ifa = rt->rt_ifa) == NULL)
442 			return (NULL);
443 	}
444 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
445 		struct ifaddr *oifa = ifa;
446 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
447 		if (ifa == NULL)
448 			ifa = oifa;
449 	}
450 	return (ifa);
451 }
452 
453 static int rt_fixdelete (struct radix_node *, void *);
454 static int rt_fixchange (struct radix_node *, void *);
455 
456 struct rtfc_arg {
457 	struct rtentry *rt0;
458 	struct radix_node_head *rnh;
459 };
460 
461 /*
462  * Do appropriate manipulations of a routing tree given
463  * all the bits of info needed
464  */
465 int
466 rtrequest(
467 	int req,
468 	struct sockaddr *dst,
469 	struct sockaddr *gateway,
470 	struct sockaddr *netmask,
471 	int flags,
472 	struct rtentry **ret_nrt)
473 {
474 	struct rt_addrinfo info;
475 
476 	bzero(&info, sizeof(info));
477 	info.rti_flags = flags;
478 	info.rti_info[RTAX_DST] = dst;
479 	info.rti_info[RTAX_GATEWAY] = gateway;
480 	info.rti_info[RTAX_NETMASK] = netmask;
481 	return rtrequest1(req, &info, ret_nrt);
482 }
483 
484 int
485 rt_getifa(struct rt_addrinfo *info)
486 {
487 	struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
488 	struct sockaddr *dst = info->rti_info[RTAX_DST];
489 	struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
490 	struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
491 	int flags = info->rti_flags;
492 	struct ifaddr *ifa;
493 	int error = 0;
494 
495 	/*
496 	 * ifp may be specified by sockaddr_dl
497 	 * when protocol address is ambiguous.
498 	 */
499 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
500 	    ifpaddr->sa_family == AF_LINK &&
501 	    (ifa = ifa_ifwithnet(ifpaddr)) != NULL)
502 		info->rti_ifp = ifa->ifa_ifp;
503 	if (info->rti_ifa == NULL && ifaaddr != NULL)
504 		info->rti_ifa = ifa_ifwithaddr(ifaaddr);
505 	if (info->rti_ifa == NULL) {
506 		struct sockaddr *sa;
507 
508 		sa = ifaaddr != NULL ? ifaaddr :
509 		    (gateway != NULL ? gateway : dst);
510 		if (sa != NULL && info->rti_ifp != NULL)
511 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
512 		else if (dst != NULL && gateway != NULL)
513 			info->rti_ifa = ifa_ifwithroute(flags, dst, gateway);
514 		else if (sa != NULL)
515 			info->rti_ifa = ifa_ifwithroute(flags, sa, sa);
516 	}
517 	if ((ifa = info->rti_ifa) != NULL) {
518 		if (info->rti_ifp == NULL)
519 			info->rti_ifp = ifa->ifa_ifp;
520 	} else
521 		error = ENETUNREACH;
522 	return (error);
523 }
524 
525 int
526 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
527 {
528 	struct sockaddr *dst = info->rti_info[RTAX_DST];
529 	struct rtentry *rt;
530 	struct radix_node *rn;
531 	struct radix_node_head *rnh;
532 	struct ifaddr *ifa;
533 	struct sockaddr *ndst;
534 	int s = splnet();
535 	int error = 0;
536 
537 #define gotoerr(x) { error = x ; goto bad; }
538 
539 	/*
540 	 * Find the correct routing tree to use for this Address Family
541 	 */
542 	if ((rnh = rt_tables[dst->sa_family]) == NULL)
543 		gotoerr(EAFNOSUPPORT);
544 	/*
545 	 * If we are adding a host route then we don't want to put
546 	 * a netmask in the tree, nor do we want to clone it.
547 	 */
548 	if (info->rti_flags & RTF_HOST) {
549 		info->rti_info[RTAX_NETMASK] = NULL;
550 		info->rti_flags &= ~(RTF_CLONING | RTF_PRCLONING);
551 	}
552 	switch (req) {
553 	case RTM_DELETE:
554 		/*
555 		 * Remove the item from the tree and return it.
556 		 * Complain if it is not there and do no more processing.
557 		 */
558 		if ((rn = rnh->rnh_deladdr((char *)dst,
559 		    (char *)info->rti_info[RTAX_NETMASK], rnh)) == NULL)
560 			gotoerr(ESRCH);
561 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
562 			panic ("rtrequest delete");
563 		rt = (struct rtentry *)rn;
564 
565 		/*
566 		 * Now search what's left of the subtree for any cloned
567 		 * routes which might have been formed from this node.
568 		 */
569 		if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) &&
570 		    rt_mask(rt) != NULL) {
571 			rnh->rnh_walktree_from(rnh, (char *)dst,
572 					       (char *)rt_mask(rt),
573 					       rt_fixdelete, rt);
574 		}
575 
576 		/*
577 		 * Remove any external references we may have.
578 		 * This might result in another rtentry being freed if
579 		 * we held its last reference.
580 		 */
581 		if (rt->rt_gwroute) {
582 			rt = rt->rt_gwroute;
583 			RTFREE(rt);
584 			(rt = (struct rtentry *)rn)->rt_gwroute = NULL;
585 		}
586 
587 		/*
588 		 * NB: RTF_UP must be set during the search above,
589 		 * because we might delete the last ref, causing
590 		 * rt to get freed prematurely.
591 		 *  eh? then why not just add a reference?
592 		 * I'm not sure how RTF_UP helps matters. (JRE)
593 		 */
594 		rt->rt_flags &= ~RTF_UP;
595 
596 		/*
597 		 * give the protocol a chance to keep things in sync.
598 		 */
599 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
600 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
601 
602 		/*
603 		 * one more rtentry floating around that is not
604 		 * linked to the routing table.
605 		 */
606 		rttrash++;
607 
608 		/*
609 		 * If the caller wants it, then it can have it,
610 		 * but it's up to it to free the rtentry as we won't be
611 		 * doing it.
612 		 */
613 		if (ret_nrt != NULL)
614 			*ret_nrt = rt;
615 		else if (rt->rt_refcnt <= 0) {
616 			rt->rt_refcnt++; /* make a 1->0 transition */
617 			rtfree(rt);
618 		}
619 		break;
620 
621 	case RTM_RESOLVE:
622 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
623 			gotoerr(EINVAL);
624 		ifa = rt->rt_ifa;
625 		info->rti_flags = rt->rt_flags &
626 		    ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
627 		info->rti_flags |= RTF_WASCLONED;
628 		info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
629 		if ((info->rti_info[RTAX_NETMASK] = rt->rt_genmask) == NULL)
630 			info->rti_flags |= RTF_HOST;
631 		goto makeroute;
632 
633 	case RTM_ADD:
634 		if ((info->rti_flags & RTF_GATEWAY) &&
635 		    !info->rti_info[RTAX_GATEWAY])
636 			panic("rtrequest: GATEWAY but no gateway");
637 
638 		if (info->rti_ifa == NULL && (error = rt_getifa(info)))
639 			gotoerr(error);
640 		ifa = info->rti_ifa;
641 
642 makeroute:
643 		R_Malloc(rt, struct rtentry *, sizeof(*rt));
644 		if (rt == NULL)
645 			gotoerr(ENOBUFS);
646 		bzero(rt, sizeof(*rt));
647 		rt->rt_flags = RTF_UP | info->rti_flags;
648 		/*
649 		 * Add the gateway. Possibly re-malloc-ing the storage for it
650 		 * also add the rt_gwroute if possible.
651 		 */
652 		if ((error = rt_setgate(rt, dst, info->rti_info[RTAX_GATEWAY]))
653 		    != 0) {
654 			Free(rt);
655 			gotoerr(error);
656 		}
657 
658 		/*
659 		 * point to the (possibly newly malloc'd) dest address.
660 		 */
661 		ndst = rt_key(rt);
662 
663 		/*
664 		 * make sure it contains the value we want (masked if needed).
665 		 */
666 		if (info->rti_info[RTAX_NETMASK] != NULL) {
667 			rt_maskedcopy(dst, ndst, info->rti_info[RTAX_NETMASK]);
668 		} else
669 			bcopy(dst, ndst, dst->sa_len);
670 
671 		/*
672 		 * Note that we now have a reference to the ifa.
673 		 * This moved from below so that rnh->rnh_addaddr() can
674 		 * examine the ifa and  ifa->ifa_ifp if it so desires.
675 		 */
676 		ifa->ifa_refcnt++;
677 		rt->rt_ifa = ifa;
678 		rt->rt_ifp = ifa->ifa_ifp;
679 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
680 
681 		rn = rnh->rnh_addaddr((char *)ndst,
682 				      (char *)info->rti_info[RTAX_NETMASK],
683 				      rnh, rt->rt_nodes);
684 		if (rn == NULL) {
685 			struct rtentry *rt2;
686 			/*
687 			 * Uh-oh, we already have one of these in the tree.
688 			 * We do a special hack: if the route that's already
689 			 * there was generated by the protocol-cloning
690 			 * mechanism, then we just blow it away and retry
691 			 * the insertion of the new one.
692 			 */
693 			rt2 = rtalloc1(dst, 0, RTF_PRCLONING);
694 			if (rt2 != NULL && rt2->rt_parent) {
695 				rtrequest(RTM_DELETE,
696 					  (struct sockaddr *)rt_key(rt2),
697 					  rt2->rt_gateway,
698 					  rt_mask(rt2), rt2->rt_flags, 0);
699 				RTFREE(rt2);
700 				rn = rnh->rnh_addaddr((char *)ndst,
701 				    (char *)info->rti_info[RTAX_NETMASK],
702 				    rnh, rt->rt_nodes);
703 			} else if (rt2 != NULL) {
704 				/* undo the extra ref we got */
705 				RTFREE(rt2);
706 			}
707 		}
708 
709 		/*
710 		 * If it still failed to go into the tree,
711 		 * then un-make it (this should be a function)
712 		 */
713 		if (rn == NULL) {
714 			if (rt->rt_gwroute)
715 				rtfree(rt->rt_gwroute);
716 			if (rt->rt_ifa) {
717 				IFAFREE(rt->rt_ifa);
718 			}
719 			Free(rt_key(rt));
720 			Free(rt);
721 			gotoerr(EEXIST);
722 		}
723 
724 		rt->rt_parent = 0;
725 
726 		/*
727 		 * If we got here from RESOLVE, then we are cloning
728 		 * so clone the rest, and note that we
729 		 * are a clone (and increment the parent's references)
730 		 */
731 		if (req == RTM_RESOLVE) {
732 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
733 			rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */
734 			if ((*ret_nrt)->rt_flags &
735 			    (RTF_CLONING | RTF_PRCLONING)) {
736 				rt->rt_parent = *ret_nrt;
737 				(*ret_nrt)->rt_refcnt++;
738 			}
739 		}
740 
741 		/*
742 		 * if this protocol has something to add to this then
743 		 * allow it to do that as well.
744 		 */
745 		if (ifa->ifa_rtrequest)
746 			ifa->ifa_rtrequest(req, rt, info);
747 
748 		/*
749 		 * We repeat the same procedure from rt_setgate() here because
750 		 * it doesn't fire when we call it there because the node
751 		 * hasn't been added to the tree yet.
752 		 */
753 		if (req == RTM_ADD && !(rt->rt_flags & RTF_HOST) &&
754 		    rt_mask(rt) != NULL) {
755 			struct rtfc_arg arg;
756 			arg.rnh = rnh;
757 			arg.rt0 = rt;
758 			rnh->rnh_walktree_from(rnh, (char *)rt_key(rt),
759 					       (char *)rt_mask(rt),
760 					       rt_fixchange, &arg);
761 		}
762 
763 		/*
764 		 * actually return a resultant rtentry and
765 		 * give the caller a single reference.
766 		 */
767 		if (ret_nrt != NULL) {
768 			*ret_nrt = rt;
769 			rt->rt_refcnt++;
770 		}
771 		break;
772 	default:
773 		error = EOPNOTSUPP;
774 	}
775 bad:
776 	splx(s);
777 	return (error);
778 }
779 
780 /*
781  * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
782  * (i.e., the routes related to it by the operation of cloning).  This
783  * routine is iterated over all potential former-child-routes by way of
784  * rnh->rnh_walktree_from() above, and those that actually are children of
785  * the late parent (passed in as VP here) are themselves deleted.
786  */
787 static int
788 rt_fixdelete(struct radix_node *rn, void *vp)
789 {
790 	struct rtentry *rt = (struct rtentry *)rn;
791 	struct rtentry *rt0 = vp;
792 
793 	if (rt->rt_parent == rt0 &&
794 	    !(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
795 		return rtrequest(RTM_DELETE, rt_key(rt),
796 				 (struct sockaddr *)0, rt_mask(rt),
797 				 rt->rt_flags, (struct rtentry **)0);
798 	}
799 	return 0;
800 }
801 
802 /*
803  * This routine is called from rt_setgate() to do the analogous thing for
804  * adds and changes.  There is the added complication in this case of a
805  * middle insert; i.e., insertion of a new network route between an older
806  * network route and (cloned) host routes.  For this reason, a simple check
807  * of rt->rt_parent is insufficient; each candidate route must be tested
808  * against the (mask, value) of the new route (passed as before in vp)
809  * to see if the new route matches it.
810  *
811  * XXX - it may be possible to do fixdelete() for changes and reserve this
812  * routine just for adds.  I'm not sure why I thought it was necessary to do
813  * changes this way.
814  */
815 #ifdef DEBUG
816 static int rtfcdebug = 0;
817 #endif
818 
819 static int
820 rt_fixchange(struct radix_node *rn, void *vp)
821 {
822 	struct rtentry *rt = (struct rtentry *)rn;
823 	struct rtfc_arg *ap = vp;
824 	struct rtentry *rt0 = ap->rt0;
825 	struct radix_node_head *rnh = ap->rnh;
826 	u_char *xk1, *xm1, *xk2, *xmp;
827 	int i, len, mlen;
828 
829 #ifdef DEBUG
830 	if (rtfcdebug)
831 		printf("rt_fixchange: rt %p, rt0 %p\n", rt, rt0);
832 #endif
833 
834 	if (!rt->rt_parent ||
835 	    (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
836 #ifdef DEBUG
837 		if(rtfcdebug) printf("no parent, pinned or cloning\n");
838 #endif
839 		return 0;
840 	}
841 
842 	if (rt->rt_parent == rt0) {
843 #ifdef DEBUG
844 		if(rtfcdebug) printf("parent match\n");
845 #endif
846 		return rtrequest(RTM_DELETE, rt_key(rt),
847 				 (struct sockaddr *)0, rt_mask(rt),
848 				 rt->rt_flags, (struct rtentry **)0);
849 	}
850 
851 	/*
852 	 * There probably is a function somewhere which does this...
853 	 * if not, there should be.
854 	 */
855 	len = imin(((struct sockaddr *)rt_key(rt0))->sa_len,
856 		   ((struct sockaddr *)rt_key(rt))->sa_len);
857 
858 	xk1 = (u_char *)rt_key(rt0);
859 	xm1 = (u_char *)rt_mask(rt0);
860 	xk2 = (u_char *)rt_key(rt);
861 
862 	/* avoid applying a less specific route */
863 	xmp = (u_char *)rt_mask(rt->rt_parent);
864 	mlen = ((struct sockaddr *)rt_key(rt->rt_parent))->sa_len;
865 	if (mlen > ((struct sockaddr *)rt_key(rt0))->sa_len) {
866 #ifdef DEBUG
867 		if (rtfcdebug)
868 			printf("rt_fixchange: inserting a less "
869 			       "specific route\n");
870 #endif
871 		return 0;
872 	}
873 	for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) {
874 		if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) {
875 #ifdef DEBUG
876 			if (rtfcdebug)
877 				printf("rt_fixchange: inserting a less "
878 				       "specific route\n");
879 #endif
880 			return 0;
881 		}
882 	}
883 
884 	for (i = rnh->rnh_treetop->rn_offset; i < len; i++) {
885 		if ((xk2[i] & xm1[i]) != xk1[i]) {
886 #ifdef DEBUG
887 			if(rtfcdebug) printf("no match\n");
888 #endif
889 			return 0;
890 		}
891 	}
892 
893 	/*
894 	 * OK, this node is a clone, and matches the node currently being
895 	 * changed/added under the node's mask.  So, get rid of it.
896 	 */
897 #ifdef DEBUG
898 	if(rtfcdebug) printf("deleting\n");
899 #endif
900 	return rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0,
901 			 rt_mask(rt), rt->rt_flags, (struct rtentry **)0);
902 }
903 
904 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
905 
906 int
907 rt_setgate(struct rtentry *rt0, struct sockaddr *dst, struct sockaddr *gate)
908 {
909 	caddr_t newkey, oldkey;
910 	int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
911 	struct rtentry *rt = rt0;
912 	struct radix_node_head *rnh = rt_tables[dst->sa_family];
913 
914 	/*
915 	 * A host route with the destination equal to the gateway
916 	 * will interfere with keeping LLINFO in the routing
917 	 * table, so disallow it.
918 	 */
919 	if (((rt0->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
920 					(RTF_HOST|RTF_GATEWAY)) &&
921 	    (dst->sa_len == gate->sa_len) &&
922 	    (bcmp(dst, gate, dst->sa_len) == 0)) {
923 		/*
924 		 * The route might already exist if this is an RTM_CHANGE
925 		 * or a routing redirect, so try to delete it.
926 		 */
927 		if (rt_key(rt0) != NULL)
928 			rtrequest(RTM_DELETE, rt_key(rt0), rt0->rt_gateway,
929 			    rt_mask(rt0), rt0->rt_flags, 0);
930 		return EADDRNOTAVAIL;
931 	}
932 
933 	/*
934 	 * Both dst and gateway are stored in the same malloc'd chunk
935 	 * (If I ever get my hands on....)
936 	 * if we need to malloc a new chunk, then keep the old one around
937 	 * till we don't need it any more.
938 	 */
939 	if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
940 		oldkey = (caddr_t)rt_key(rt);
941 		R_Malloc(newkey, caddr_t, dlen + glen);
942 		if (newkey == NULL)
943 			return ENOBUFS;
944 		rt->rt_nodes->rn_key = newkey;
945 	} else {
946 		/*
947 		 * otherwise just overwrite the old one
948 		 */
949 		newkey = rt->rt_nodes->rn_key;
950 		oldkey = NULL;
951 	}
952 
953 	/*
954 	 * copy the new gateway value into the memory chunk
955 	 */
956 	rt->rt_gateway = (struct sockaddr *)(newkey + dlen);
957 	bcopy(gate, rt->rt_gateway, glen);
958 
959 	/*
960 	 * if we are replacing the chunk (or it's new) we need to
961 	 * replace the dst as well
962 	 */
963 	if (oldkey != NULL) {
964 		bcopy(dst, newkey, dlen);
965 		Free(oldkey);
966 	}
967 
968 	/*
969 	 * If there is already a gwroute, it's now almost definitly wrong
970 	 * so drop it.
971 	 */
972 	if (rt->rt_gwroute != NULL) {
973 		RTFREE(rt->rt_gwroute);
974 		rt->rt_gwroute = NULL;
975 	}
976 	/*
977 	 * Cloning loop avoidance:
978 	 * In the presence of protocol-cloning and bad configuration,
979 	 * it is possible to get stuck in bottomless mutual recursion
980 	 * (rtrequest rt_setgate rtalloc1).  We avoid this by not allowing
981 	 * protocol-cloning to operate for gateways (which is probably the
982 	 * correct choice anyway), and avoid the resulting reference loops
983 	 * by disallowing any route to run through itself as a gateway.
984 	 * This is obviously mandatory when we get rt->rt_output().
985 	 */
986 	if (rt->rt_flags & RTF_GATEWAY) {
987 		rt->rt_gwroute = rtalloc1(gate, 1, RTF_PRCLONING);
988 		if (rt->rt_gwroute == rt) {
989 			RTFREE(rt->rt_gwroute);
990 			rt->rt_gwroute = 0;
991 			return EDQUOT; /* failure */
992 		}
993 	}
994 
995 	/*
996 	 * This isn't going to do anything useful for host routes, so
997 	 * don't bother.  Also make sure we have a reasonable mask
998 	 * (we don't yet have one during adds).
999 	 */
1000 	if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) {
1001 		struct rtfc_arg arg;
1002 		arg.rnh = rnh;
1003 		arg.rt0 = rt;
1004 		rnh->rnh_walktree_from(rnh, (char*)rt_key(rt),
1005 				       (char *)rt_mask(rt),
1006 				       rt_fixchange, &arg);
1007 	}
1008 
1009 	return 0;
1010 }
1011 
1012 static void
1013 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
1014     struct sockaddr *netmask)
1015 {
1016 	u_char *cp1 = (u_char *)src;
1017 	u_char *cp2 = (u_char *)dst;
1018 	u_char *cp3 = (u_char *)netmask;
1019 	u_char *cplim = cp2 + *cp3;
1020 	u_char *cplim2 = cp2 + *cp1;
1021 
1022 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1023 	cp3 += 2;
1024 	if (cplim > cplim2)
1025 		cplim = cplim2;
1026 	while (cp2 < cplim)
1027 		*cp2++ = *cp1++ & *cp3++;
1028 	if (cp2 < cplim2)
1029 		bzero(cp2, (unsigned)(cplim2 - cp2));
1030 }
1031 
1032 /*
1033  * Set up a routing table entry, normally
1034  * for an interface.
1035  */
1036 int
1037 rtinit(struct ifaddr *ifa, int cmd, int flags)
1038 {
1039 	struct rtentry *rt;
1040 	struct sockaddr *dst;
1041 	struct sockaddr *deldst;
1042 	struct sockaddr *netmask;
1043 	struct mbuf *m = NULL;
1044 	struct rtentry *nrt = NULL;
1045 	struct radix_node_head *rnh;
1046 	struct radix_node *rn;
1047 	int error;
1048 	struct rt_addrinfo info;
1049 
1050 	if (flags & RTF_HOST) {
1051 		dst = ifa->ifa_dstaddr;
1052 		netmask = NULL;
1053 	} else {
1054 		dst = ifa->ifa_addr;
1055 		netmask = ifa->ifa_netmask;
1056 	}
1057 	/*
1058 	 * If it's a delete, check that if it exists, it's on the correct
1059 	 * interface or we might scrub a route to another ifa which would
1060 	 * be confusing at best and possibly worse.
1061 	 */
1062 	if (cmd == RTM_DELETE) {
1063 		/*
1064 		 * It's a delete, so it should already exist..
1065 		 * If it's a net, mask off the host bits
1066 		 * (Assuming we have a mask)
1067 		 */
1068 		if (netmask != NULL) {
1069 			m = m_get(MB_DONTWAIT, MT_SONAME);
1070 			if (m == NULL)
1071 				return(ENOBUFS);
1072 			deldst = mtod(m, struct sockaddr *);
1073 			rt_maskedcopy(dst, deldst, netmask);
1074 			dst = deldst;
1075 		}
1076 		/*
1077 		 * Look up an rtentry that is in the routing tree and
1078 		 * contains the correct info.
1079 		 */
1080 		if ((rnh = rt_tables[dst->sa_family]) == NULL ||
1081 		    (rn = rnh->rnh_lookup((char *)dst, (char *)netmask,
1082 		     rnh)) == NULL ||
1083 		    (rn->rn_flags & RNF_ROOT) ||
1084 		    ((struct rtentry *)rn)->rt_ifa != ifa ||
1085 		    !sa_equal(SA(rn->rn_key), dst)) {
1086 			if (m != NULL)
1087 				(void) m_free(m);
1088 			return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1089 		}
1090 		/* XXX */
1091 #if 0
1092 		else {
1093 			/*
1094 			 * One would think that as we are deleting, and we know
1095 			 * it doesn't exist, we could just return at this point
1096 			 * with an "ELSE" clause, but apparently not..
1097 			 */
1098 			return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1099 		}
1100 #endif
1101 	}
1102 	/*
1103 	 * Do the actual request
1104 	 */
1105 	bzero(&info, sizeof(info));
1106 	info.rti_ifa = ifa;
1107 	info.rti_flags = flags | ifa->ifa_flags;
1108 	info.rti_info[RTAX_DST] = dst;
1109 	info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1110 	info.rti_info[RTAX_NETMASK] = netmask;
1111 	error = rtrequest1(cmd, &info, &nrt);
1112 	if (error == 0 && (rt = nrt) != NULL) {
1113 		/*
1114 		 * notify any listening routing agents of the change
1115 		 */
1116 		rt_newaddrmsg(cmd, ifa, error, rt);
1117 		if (cmd == RTM_DELETE) {
1118 			/*
1119 			 * If we are deleting, and we found an entry, then
1120 			 * it's been removed from the tree.. now throw it away.
1121 			 */
1122 			if (rt->rt_refcnt <= 0) {
1123 				rt->rt_refcnt++; /* make a 1->0 transition */
1124 				rtfree(rt);
1125 			}
1126 		} else if (cmd == RTM_ADD) {
1127 			/*
1128 			 * We just wanted to add it.. we don't actually
1129 			 * need a reference.
1130 			 */
1131 			rt->rt_refcnt--;
1132 		}
1133 	}
1134 	if (m != NULL)
1135 		(void) m_free(m);
1136 	return (error);
1137 }
1138 
1139 /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */
1140 SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
1141