xref: /openbsd/sys/net/route.c (revision 84d9c64a)
1 /*	$OpenBSD: route.c,v 1.438 2025/01/03 21:27:40 bluhm Exp $	*/
2 /*	$NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)route.c	8.2 (Berkeley) 11/15/93
62  */
63 
64 /*
65  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
66  *
67  * NRL grants permission for redistribution and use in source and binary
68  * forms, with or without modification, of the software and documentation
69  * created at NRL provided that the following conditions are met:
70  *
71  * 1. Redistributions of source code must retain the above copyright
72  *    notice, this list of conditions and the following disclaimer.
73  * 2. Redistributions in binary form must reproduce the above copyright
74  *    notice, this list of conditions and the following disclaimer in the
75  *    documentation and/or other materials provided with the distribution.
76  * 3. All advertising materials mentioning features or use of this software
77  *    must display the following acknowledgements:
78  *	This product includes software developed by the University of
79  *	California, Berkeley and its contributors.
80  *	This product includes software developed at the Information
81  *	Technology Division, US Naval Research Laboratory.
82  * 4. Neither the name of the NRL nor the names of its contributors
83  *    may be used to endorse or promote products derived from this software
84  *    without specific prior written permission.
85  *
86  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
87  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
88  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
89  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
90  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
91  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
92  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
93  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
94  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
95  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
96  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97  *
98  * The views and conclusions contained in the software and documentation
99  * are those of the authors and should not be interpreted as representing
100  * official policies, either expressed or implied, of the US Naval
101  * Research Laboratory (NRL).
102  */
103 
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/mbuf.h>
107 #include <sys/socket.h>
108 #include <sys/socketvar.h>
109 #include <sys/timeout.h>
110 #include <sys/domain.h>
111 #include <sys/ioctl.h>
112 #include <sys/kernel.h>
113 #include <sys/queue.h>
114 #include <sys/pool.h>
115 #include <sys/atomic.h>
116 #include <sys/mutex.h>
117 
118 #include <net/if.h>
119 #include <net/if_var.h>
120 #include <net/if_dl.h>
121 #include <net/route.h>
122 
123 #include <netinet/in.h>
124 #include <netinet/ip_var.h>
125 #include <netinet/in_var.h>
126 
127 #ifdef INET6
128 #include <netinet/ip6.h>
129 #include <netinet6/ip6_var.h>
130 #include <netinet6/in6_var.h>
131 #endif
132 
133 #ifdef MPLS
134 #include <netmpls/mpls.h>
135 #endif
136 
137 #ifdef BFD
138 #include <net/bfd.h>
139 #endif
140 
141 /*
142  * Locks used to protect struct members:
143  *      a       atomic operations
144  *      I       immutable after creation
145  *      L       rtlabel_mtx
146  *      T       rttimer_mtx
147  */
148 
149 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
150 
151 /* Give some jitter to hash, to avoid synchronization between routers. */
152 static uint32_t		rt_hashjitter;
153 
154 extern unsigned int	rtmap_limit;
155 
156 struct cpumem	*rtcounters;
157 int		 rttrash;	/* [a] routes not in table but not freed */
158 u_long		 rtgeneration;	/* [a] generation number, routes changed */
159 
160 struct pool	rtentry_pool;		/* pool for rtentry structures */
161 struct pool	rttimer_pool;		/* pool for rttimer structures */
162 
163 int	rt_setgwroute(struct rtentry *, const struct sockaddr *, u_int);
164 void	rt_putgwroute(struct rtentry *, struct rtentry *);
165 int	rtflushclone1(struct rtentry *, void *, u_int);
166 int	rtflushclone(struct rtentry *, unsigned int);
167 int	rt_ifa_purge_walker(struct rtentry *, void *, unsigned int);
168 struct rtentry *rt_match(const struct sockaddr *, uint32_t *, int,
169     unsigned int);
170 int	rt_clone(struct rtentry **, const struct sockaddr *, unsigned int);
171 struct sockaddr *rt_plentosa(sa_family_t, int, struct sockaddr_in6 *);
172 static int rt_copysa(const struct sockaddr *, const struct sockaddr *,
173     struct sockaddr **);
174 
175 #define	LABELID_MAX	50000
176 
177 struct rt_label {
178 	TAILQ_ENTRY(rt_label)	rtl_entry;		/* [L] */
179 	char			rtl_name[RTLABEL_LEN];	/* [I] */
180 	u_int16_t		rtl_id;			/* [I] */
181 	int			rtl_ref;		/* [L] */
182 };
183 
184 TAILQ_HEAD(rt_labels, rt_label)	rt_labels =
185     TAILQ_HEAD_INITIALIZER(rt_labels);		/* [L] */
186 struct mutex rtlabel_mtx = MUTEX_INITIALIZER(IPL_NET);
187 
188 void
route_init(void)189 route_init(void)
190 {
191 	rtcounters = counters_alloc(rts_ncounters);
192 
193 	pool_init(&rtentry_pool, sizeof(struct rtentry), 0, IPL_MPFLOOR, 0,
194 	    "rtentry", NULL);
195 
196 	while (rt_hashjitter == 0)
197 		rt_hashjitter = arc4random();
198 
199 #ifdef BFD
200 	bfdinit();
201 #endif
202 }
203 
204 int
route_cache(struct route * ro,const struct in_addr * dst,const struct in_addr * src,u_int rtableid)205 route_cache(struct route *ro, const struct in_addr *dst,
206     const struct in_addr *src, u_int rtableid)
207 {
208 	u_long gen;
209 
210 	gen = atomic_load_long(&rtgeneration);
211 	membar_consumer();
212 
213 	if (rtisvalid(ro->ro_rt) &&
214 	    ro->ro_generation == gen &&
215 	    ro->ro_tableid == rtableid &&
216 	    ro->ro_dstsa.sa_family == AF_INET &&
217 	    ro->ro_dstsin.sin_addr.s_addr == dst->s_addr) {
218 		if (src == NULL || !ipmultipath ||
219 		    !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
220 		    (ro->ro_srcin.s_addr != INADDR_ANY &&
221 		    ro->ro_srcin.s_addr == src->s_addr)) {
222 			ipstat_inc(ips_rtcachehit);
223 			return (0);
224 		}
225 	}
226 
227 	ipstat_inc(ips_rtcachemiss);
228 	rtfree(ro->ro_rt);
229 	memset(ro, 0, sizeof(*ro));
230 	ro->ro_generation = gen;
231 	ro->ro_tableid = rtableid;
232 
233 	ro->ro_dstsin.sin_family = AF_INET;
234 	ro->ro_dstsin.sin_len = sizeof(struct sockaddr_in);
235 	ro->ro_dstsin.sin_addr = *dst;
236 	if (src != NULL)
237 		ro->ro_srcin = *src;
238 
239 	return (ESRCH);
240 }
241 
242 /*
243  * Check cache for route, else allocate a new one, potentially using multipath
244  * to select the peer.  Update cache and return valid route or NULL.
245  */
246 struct rtentry *
route_mpath(struct route * ro,const struct in_addr * dst,const struct in_addr * src,u_int rtableid)247 route_mpath(struct route *ro, const struct in_addr *dst,
248     const struct in_addr *src, u_int rtableid)
249 {
250 	if (route_cache(ro, dst, src, rtableid)) {
251 		uint32_t *s = NULL;
252 
253 		if (ro->ro_srcin.s_addr != INADDR_ANY)
254 			s = &ro->ro_srcin.s_addr;
255 		ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, s, ro->ro_tableid);
256 	}
257 	return (ro->ro_rt);
258 }
259 
260 #ifdef INET6
261 int
route6_cache(struct route * ro,const struct in6_addr * dst,const struct in6_addr * src,u_int rtableid)262 route6_cache(struct route *ro, const struct in6_addr *dst,
263     const struct in6_addr *src, u_int rtableid)
264 {
265 	u_long gen;
266 
267 	gen = atomic_load_long(&rtgeneration);
268 	membar_consumer();
269 
270 	if (rtisvalid(ro->ro_rt) &&
271 	    ro->ro_generation == gen &&
272 	    ro->ro_tableid == rtableid &&
273 	    ro->ro_dstsa.sa_family == AF_INET6 &&
274 	    IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, dst)) {
275 		if (src == NULL || !ip6_multipath ||
276 		    !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
277 		    (!IN6_IS_ADDR_UNSPECIFIED(&ro->ro_srcin6) &&
278 		    IN6_ARE_ADDR_EQUAL(&ro->ro_srcin6, src))) {
279 			ip6stat_inc(ip6s_rtcachehit);
280 			return (0);
281 		}
282 	}
283 
284 	ip6stat_inc(ip6s_rtcachemiss);
285 	rtfree(ro->ro_rt);
286 	memset(ro, 0, sizeof(*ro));
287 	ro->ro_generation = gen;
288 	ro->ro_tableid = rtableid;
289 
290 	ro->ro_dstsin6.sin6_family = AF_INET6;
291 	ro->ro_dstsin6.sin6_len = sizeof(struct sockaddr_in6);
292 	ro->ro_dstsin6.sin6_addr = *dst;
293 	if (src != NULL)
294 		ro->ro_srcin6 = *src;
295 
296 	return (ESRCH);
297 }
298 
299 struct rtentry *
route6_mpath(struct route * ro,const struct in6_addr * dst,const struct in6_addr * src,u_int rtableid)300 route6_mpath(struct route *ro, const struct in6_addr *dst,
301     const struct in6_addr *src, u_int rtableid)
302 {
303 	if (route6_cache(ro, dst, src, rtableid)) {
304 		uint32_t *s = NULL;
305 
306 		if (!IN6_IS_ADDR_UNSPECIFIED(&ro->ro_srcin6))
307 			s = &ro->ro_srcin6.s6_addr32[0];
308 		ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, s, ro->ro_tableid);
309 	}
310 	return (ro->ro_rt);
311 }
312 #endif
313 
314 /*
315  * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise.
316  */
317 int
rtisvalid(struct rtentry * rt)318 rtisvalid(struct rtentry *rt)
319 {
320 	if (rt == NULL)
321 		return (0);
322 
323 	if (!ISSET(rt->rt_flags, RTF_UP))
324 		return (0);
325 
326 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
327 		KASSERT(rt->rt_gwroute != NULL);
328 		KASSERT(!ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY));
329 		if (!ISSET(rt->rt_gwroute->rt_flags, RTF_UP))
330 			return (0);
331 	}
332 
333 	return (1);
334 }
335 
336 /*
337  * Do the actual lookup for rtalloc(9), do not use directly!
338  *
339  * Return the best matching entry for the destination ``dst''.
340  *
341  * "RT_RESOLVE" means that a corresponding L2 entry should
342  * be added to the routing table and resolved (via ARP or
343  * NDP), if it does not exist.
344  */
345 struct rtentry *
rt_match(const struct sockaddr * dst,uint32_t * src,int flags,unsigned int tableid)346 rt_match(const struct sockaddr *dst, uint32_t *src, int flags,
347     unsigned int tableid)
348 {
349 	struct rtentry		*rt = NULL;
350 
351 	rt = rtable_match(tableid, dst, src);
352 	if (rt == NULL) {
353 		rtstat_inc(rts_unreach);
354 		return (NULL);
355 	}
356 
357 	if (ISSET(rt->rt_flags, RTF_CLONING) && ISSET(flags, RT_RESOLVE))
358 		rt_clone(&rt, dst, tableid);
359 
360 	rt->rt_use++;
361 	return (rt);
362 }
363 
364 int
rt_clone(struct rtentry ** rtp,const struct sockaddr * dst,unsigned int rtableid)365 rt_clone(struct rtentry **rtp, const struct sockaddr *dst,
366     unsigned int rtableid)
367 {
368 	struct rt_addrinfo	 info;
369 	struct rtentry		*rt = *rtp;
370 	int			 error = 0;
371 
372 	memset(&info, 0, sizeof(info));
373 	info.rti_info[RTAX_DST] = dst;
374 
375 	/*
376 	 * The priority of cloned route should be different
377 	 * to avoid conflict with /32 cloning routes.
378 	 *
379 	 * It should also be higher to let the ARP layer find
380 	 * cloned routes instead of the cloning one.
381 	 */
382 	KERNEL_LOCK();
383 	error = rtrequest(RTM_RESOLVE, &info, rt->rt_priority - 1, &rt,
384 	    rtableid);
385 	KERNEL_UNLOCK();
386 	if (error) {
387 		rtm_miss(RTM_MISS, &info, 0, RTP_NONE, 0, error, rtableid);
388 	} else {
389 		/* Inform listeners of the new route */
390 		rtm_send(rt, RTM_ADD, 0, rtableid);
391 		rtfree(*rtp);
392 		*rtp = rt;
393 	}
394 	return (error);
395 }
396 
397 /*
398  * Originated from bridge_hash() in if_bridge.c
399  */
400 #define mix(a, b, c) do {						\
401 	a -= b; a -= c; a ^= (c >> 13);					\
402 	b -= c; b -= a; b ^= (a << 8);					\
403 	c -= a; c -= b; c ^= (b >> 13);					\
404 	a -= b; a -= c; a ^= (c >> 12);					\
405 	b -= c; b -= a; b ^= (a << 16);					\
406 	c -= a; c -= b; c ^= (b >> 5);					\
407 	a -= b; a -= c; a ^= (c >> 3);					\
408 	b -= c; b -= a; b ^= (a << 10);					\
409 	c -= a; c -= b; c ^= (b >> 15);					\
410 } while (0)
411 
412 int
rt_hash(struct rtentry * rt,const struct sockaddr * dst,uint32_t * src)413 rt_hash(struct rtentry *rt, const struct sockaddr *dst, uint32_t *src)
414 {
415 	uint32_t a, b, c;
416 
417 	if (src == NULL || !rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPATH))
418 		return (-1);
419 
420 	a = b = 0x9e3779b9;
421 	c = rt_hashjitter;
422 
423 	switch (dst->sa_family) {
424 	case AF_INET:
425 	    {
426 		const struct sockaddr_in *sin;
427 
428 		if (!ipmultipath)
429 			return (-1);
430 
431 		sin = satosin_const(dst);
432 		a += sin->sin_addr.s_addr;
433 		b += src[0];
434 		mix(a, b, c);
435 		break;
436 	    }
437 #ifdef INET6
438 	case AF_INET6:
439 	    {
440 		const struct sockaddr_in6 *sin6;
441 
442 		if (!ip6_multipath)
443 			return (-1);
444 
445 		sin6 = satosin6_const(dst);
446 		a += sin6->sin6_addr.s6_addr32[0];
447 		b += sin6->sin6_addr.s6_addr32[2];
448 		c += src[0];
449 		mix(a, b, c);
450 		a += sin6->sin6_addr.s6_addr32[1];
451 		b += sin6->sin6_addr.s6_addr32[3];
452 		c += src[1];
453 		mix(a, b, c);
454 		a += sin6->sin6_addr.s6_addr32[2];
455 		b += sin6->sin6_addr.s6_addr32[1];
456 		c += src[2];
457 		mix(a, b, c);
458 		a += sin6->sin6_addr.s6_addr32[3];
459 		b += sin6->sin6_addr.s6_addr32[0];
460 		c += src[3];
461 		mix(a, b, c);
462 		break;
463 	    }
464 #endif /* INET6 */
465 	}
466 
467 	return (c & 0xffff);
468 }
469 
470 /*
471  * Allocate a route, potentially using multipath to select the peer.
472  */
473 struct rtentry *
rtalloc_mpath(const struct sockaddr * dst,uint32_t * src,unsigned int rtableid)474 rtalloc_mpath(const struct sockaddr *dst, uint32_t *src, unsigned int rtableid)
475 {
476 	return (rt_match(dst, src, RT_RESOLVE, rtableid));
477 }
478 
479 /*
480  * Look in the routing table for the best matching entry for
481  * ``dst''.
482  *
483  * If a route with a gateway is found and its next hop is no
484  * longer valid, try to cache it.
485  */
486 struct rtentry *
rtalloc(const struct sockaddr * dst,int flags,unsigned int rtableid)487 rtalloc(const struct sockaddr *dst, int flags, unsigned int rtableid)
488 {
489 	return (rt_match(dst, NULL, flags, rtableid));
490 }
491 
492 /*
493  * Cache the route entry corresponding to a reachable next hop in
494  * the gateway entry ``rt''.
495  */
496 int
rt_setgwroute(struct rtentry * rt,const struct sockaddr * gate,u_int rtableid)497 rt_setgwroute(struct rtentry *rt, const struct sockaddr *gate, u_int rtableid)
498 {
499 	struct rtentry *prt, *nhrt;
500 	unsigned int rdomain = rtable_l2(rtableid);
501 	int error;
502 
503 	NET_ASSERT_LOCKED();
504 
505 	/* If we cannot find a valid next hop bail. */
506 	nhrt = rt_match(gate, NULL, RT_RESOLVE, rdomain);
507 	if (nhrt == NULL)
508 		return (ENOENT);
509 
510 	/* Next hop entry must be on the same interface. */
511 	if (nhrt->rt_ifidx != rt->rt_ifidx) {
512 		struct sockaddr_in6	sa_mask;
513 
514 		if (!ISSET(nhrt->rt_flags, RTF_LLINFO) ||
515 		    !ISSET(nhrt->rt_flags, RTF_CLONED)) {
516 			rtfree(nhrt);
517 			return (EHOSTUNREACH);
518 		}
519 
520 		/*
521 		 * We found a L2 entry, so we might have multiple
522 		 * RTF_CLONING routes for the same subnet.  Query
523 		 * the first route of the multipath chain and iterate
524 		 * until we find the correct one.
525 		 */
526 		prt = rtable_lookup(rdomain, rt_key(nhrt->rt_parent),
527 		    rt_plen2mask(nhrt->rt_parent, &sa_mask), NULL, RTP_ANY);
528 		rtfree(nhrt);
529 
530 		while (prt != NULL && prt->rt_ifidx != rt->rt_ifidx)
531 			prt = rtable_iterate(prt);
532 
533 		/* We found nothing or a non-cloning MPATH route. */
534 		if (prt == NULL || !ISSET(prt->rt_flags, RTF_CLONING)) {
535 			rtfree(prt);
536 			return (EHOSTUNREACH);
537 		}
538 
539 		error = rt_clone(&prt, gate, rdomain);
540 		if (error) {
541 			rtfree(prt);
542 			return (error);
543 		}
544 		nhrt = prt;
545 	}
546 
547 	/*
548 	 * Next hop must be reachable, this also prevents rtentry
549 	 * loops for example when rt->rt_gwroute points to rt.
550 	 */
551 	if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) {
552 		rtfree(nhrt);
553 		return (ENETUNREACH);
554 	}
555 
556 	/*
557 	 * If the MTU of next hop is 0, this will reset the MTU of the
558 	 * route to run PMTUD again from scratch.
559 	 */
560 	if (!ISSET(rt->rt_locks, RTV_MTU)) {
561 		u_int mtu, nhmtu;
562 
563 		mtu = atomic_load_int(&rt->rt_mtu);
564 		nhmtu = atomic_load_int(&nhrt->rt_mtu);
565 		if (mtu > nhmtu)
566 			atomic_cas_uint(&rt->rt_mtu, mtu, nhmtu);
567 	}
568 
569 	/*
570 	 * To avoid reference counting problems when writing link-layer
571 	 * addresses in an outgoing packet, we ensure that the lifetime
572 	 * of a cached entry is greater than the bigger lifetime of the
573 	 * gateway entries it is pointed by.
574 	 */
575 	nhrt->rt_flags |= RTF_CACHED;
576 	nhrt->rt_cachecnt++;
577 
578 	/* commit */
579 	rt_putgwroute(rt, nhrt);
580 
581 	return (0);
582 }
583 
584 /*
585  * Invalidate the cached route entry of the gateway entry ``rt''.
586  */
587 void
rt_putgwroute(struct rtentry * rt,struct rtentry * nhrt)588 rt_putgwroute(struct rtentry *rt, struct rtentry *nhrt)
589 {
590 	struct rtentry *onhrt;
591 
592 	NET_ASSERT_LOCKED();
593 
594 	if (!ISSET(rt->rt_flags, RTF_GATEWAY))
595 		return;
596 
597 	/* this is protected as per [X] in route.h */
598 	onhrt = rt->rt_gwroute;
599 	rt->rt_gwroute = nhrt;
600 
601 	if (onhrt != NULL) {
602 		KASSERT(onhrt->rt_cachecnt > 0);
603 		KASSERT(ISSET(onhrt->rt_flags, RTF_CACHED));
604 
605 		--onhrt->rt_cachecnt;
606 		if (onhrt->rt_cachecnt == 0)
607 			CLR(onhrt->rt_flags, RTF_CACHED);
608 
609 		rtfree(onhrt);
610 	}
611 }
612 
613 void
rtref(struct rtentry * rt)614 rtref(struct rtentry *rt)
615 {
616 	refcnt_take(&rt->rt_refcnt);
617 }
618 
619 void
rtfree(struct rtentry * rt)620 rtfree(struct rtentry *rt)
621 {
622 	if (rt == NULL)
623 		return;
624 
625 	if (refcnt_rele(&rt->rt_refcnt) == 0)
626 		return;
627 
628 	KASSERT(!ISSET(rt->rt_flags, RTF_UP));
629 	KASSERT(!RT_ROOT(rt));
630 	atomic_dec_int(&rttrash);
631 
632 	rt_timer_remove_all(rt);
633 	ifafree(rt->rt_ifa);
634 	rtlabel_unref(rt->rt_labelid);
635 #ifdef MPLS
636 	rt_mpls_clear(rt);
637 #endif
638 	if (rt->rt_gateway != NULL) {
639 		free(rt->rt_gateway, M_RTABLE,
640 		    ROUNDUP(rt->rt_gateway->sa_len));
641 	}
642 	free(rt_key(rt), M_RTABLE, rt_key(rt)->sa_len);
643 
644 	pool_put(&rtentry_pool, rt);
645 }
646 
647 struct ifaddr *
ifaref(struct ifaddr * ifa)648 ifaref(struct ifaddr *ifa)
649 {
650 	refcnt_take(&ifa->ifa_refcnt);
651 	return ifa;
652 }
653 
654 void
ifafree(struct ifaddr * ifa)655 ifafree(struct ifaddr *ifa)
656 {
657 	if (refcnt_rele(&ifa->ifa_refcnt) == 0)
658 		return;
659 	free(ifa, M_IFADDR, 0);
660 }
661 
662 /*
663  * Force a routing table entry to the specified
664  * destination to go through the given gateway.
665  * Normally called as a result of a routing redirect
666  * message from the network layer.
667  */
668 void
rtredirect(struct sockaddr * dst,struct sockaddr * gateway,struct sockaddr * src,struct rtentry ** rtp,unsigned int rdomain)669 rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
670     struct sockaddr *src, struct rtentry **rtp, unsigned int rdomain)
671 {
672 	struct rtentry		*rt;
673 	int			 error = 0;
674 	enum rtstat_counters	 stat = rts_ncounters;
675 	struct rt_addrinfo	 info;
676 	struct ifaddr		*ifa;
677 	unsigned int		 ifidx = 0;
678 	int			 flags = RTF_GATEWAY|RTF_HOST;
679 	uint8_t			 prio = RTP_NONE;
680 
681 	NET_ASSERT_LOCKED();
682 
683 	/* verify the gateway is directly reachable */
684 	rt = rtalloc(gateway, 0, rdomain);
685 	if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_GATEWAY)) {
686 		rtfree(rt);
687 		error = ENETUNREACH;
688 		goto out;
689 	}
690 	ifidx = rt->rt_ifidx;
691 	ifa = rt->rt_ifa;
692 	rtfree(rt);
693 	rt = NULL;
694 
695 	rt = rtable_lookup(rdomain, dst, NULL, NULL, RTP_ANY);
696 	/*
697 	 * If the redirect isn't from our current router for this dst,
698 	 * it's either old or wrong.  If it redirects us to ourselves,
699 	 * we have a routing loop, perhaps as a result of an interface
700 	 * going down recently.
701 	 */
702 #define	equal(a1, a2) \
703 	((a1)->sa_len == (a2)->sa_len && \
704 	 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
705 	if (rt != NULL && (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
706 		error = EINVAL;
707 	else if (ifa_ifwithaddr(gateway, rdomain) != NULL ||
708 	    (gateway->sa_family == AF_INET &&
709 	    in_broadcast(satosin(gateway)->sin_addr, rdomain)))
710 		error = EHOSTUNREACH;
711 	if (error)
712 		goto done;
713 	/*
714 	 * Create a new entry if we just got back a wildcard entry
715 	 * or the lookup failed.  This is necessary for hosts
716 	 * which use routing redirects generated by smart gateways
717 	 * to dynamically build the routing tables.
718 	 */
719 	if (rt == NULL)
720 		goto create;
721 	/*
722 	 * Don't listen to the redirect if it's
723 	 * for a route to an interface.
724 	 */
725 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
726 		if (!ISSET(rt->rt_flags, RTF_HOST)) {
727 			/*
728 			 * Changing from route to net => route to host.
729 			 * Create new route, rather than smashing route to net.
730 			 */
731 create:
732 			rtfree(rt);
733 			flags |= RTF_DYNAMIC;
734 			bzero(&info, sizeof(info));
735 			info.rti_info[RTAX_DST] = dst;
736 			info.rti_info[RTAX_GATEWAY] = gateway;
737 			info.rti_ifa = ifa;
738 			info.rti_flags = flags;
739 			rt = NULL;
740 			error = rtrequest(RTM_ADD, &info, RTP_DEFAULT, &rt,
741 			    rdomain);
742 			if (error == 0) {
743 				flags = rt->rt_flags;
744 				prio = rt->rt_priority;
745 			}
746 			stat = rts_dynamic;
747 		} else {
748 			/*
749 			 * Smash the current notion of the gateway to
750 			 * this destination.  Should check about netmask!!!
751 			 */
752 			rt->rt_flags |= RTF_MODIFIED;
753 			flags |= RTF_MODIFIED;
754 			prio = rt->rt_priority;
755 			stat = rts_newgateway;
756 			rt_setgate(rt, gateway, rdomain);
757 		}
758 	} else
759 		error = EHOSTUNREACH;
760 done:
761 	if (rt) {
762 		if (rtp && !error)
763 			*rtp = rt;
764 		else
765 			rtfree(rt);
766 	}
767 out:
768 	if (error)
769 		rtstat_inc(rts_badredirect);
770 	else if (stat != rts_ncounters)
771 		rtstat_inc(stat);
772 	bzero((caddr_t)&info, sizeof(info));
773 	info.rti_info[RTAX_DST] = dst;
774 	info.rti_info[RTAX_GATEWAY] = gateway;
775 	info.rti_info[RTAX_AUTHOR] = src;
776 	rtm_miss(RTM_REDIRECT, &info, flags, prio, ifidx, error, rdomain);
777 }
778 
779 /*
780  * Delete a route and generate a message
781  */
782 int
rtdeletemsg(struct rtentry * rt,struct ifnet * ifp,u_int tableid)783 rtdeletemsg(struct rtentry *rt, struct ifnet *ifp, u_int tableid)
784 {
785 	int			error;
786 	struct rt_addrinfo	info;
787 	struct sockaddr_rtlabel sa_rl;
788 	struct sockaddr_in6	sa_mask;
789 
790 	KASSERT(rt->rt_ifidx == ifp->if_index);
791 
792 	/*
793 	 * Request the new route so that the entry is not actually
794 	 * deleted.  That will allow the information being reported to
795 	 * be accurate (and consistent with route_output()).
796 	 */
797 	memset(&info, 0, sizeof(info));
798 	info.rti_info[RTAX_DST] = rt_key(rt);
799 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
800 	if (!ISSET(rt->rt_flags, RTF_HOST))
801 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
802 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
803 	info.rti_flags = rt->rt_flags;
804 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
805 	info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
806 	error = rtrequest_delete(&info, rt->rt_priority, ifp, &rt, tableid);
807 	rtm_miss(RTM_DELETE, &info, info.rti_flags, rt->rt_priority,
808 	    rt->rt_ifidx, error, tableid);
809 	if (error == 0)
810 		rtfree(rt);
811 	return (error);
812 }
813 
814 static inline int
rtequal(struct rtentry * a,struct rtentry * b)815 rtequal(struct rtentry *a, struct rtentry *b)
816 {
817 	if (a == b)
818 		return 1;
819 
820 	if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 &&
821 	    rt_plen(a) == rt_plen(b))
822 		return 1;
823 	else
824 		return 0;
825 }
826 
827 int
rtflushclone1(struct rtentry * rt,void * arg,u_int id)828 rtflushclone1(struct rtentry *rt, void *arg, u_int id)
829 {
830 	struct rtentry *cloningrt = arg;
831 	struct ifnet *ifp;
832 
833 	if (!ISSET(rt->rt_flags, RTF_CLONED))
834 		return 0;
835 
836 	/* Cached route must stay alive as long as their parent are alive. */
837 	if (ISSET(rt->rt_flags, RTF_CACHED) && (rt->rt_parent != cloningrt))
838 		return 0;
839 
840 	if (!rtequal(rt->rt_parent, cloningrt))
841 		return 0;
842 	/*
843 	 * This happens when an interface with a RTF_CLONING route is
844 	 * being detached.  In this case it's safe to bail because all
845 	 * the routes are being purged by rt_ifa_purge().
846 	 */
847 	ifp = if_get(rt->rt_ifidx);
848 	if (ifp == NULL)
849 		return 0;
850 
851 	if_put(ifp);
852 	return EEXIST;
853 }
854 
855 int
rtflushclone(struct rtentry * parent,unsigned int rtableid)856 rtflushclone(struct rtentry *parent, unsigned int rtableid)
857 {
858 	struct rtentry *rt = NULL;
859 	struct ifnet *ifp;
860 	int error;
861 
862 #ifdef DIAGNOSTIC
863 	if (!parent || (parent->rt_flags & RTF_CLONING) == 0)
864 		panic("rtflushclone: called with a non-cloning route");
865 #endif
866 
867 	do {
868 		error = rtable_walk(rtableid, rt_key(parent)->sa_family, &rt,
869 		    rtflushclone1, parent);
870 		if (rt != NULL && error == EEXIST) {
871 			ifp = if_get(rt->rt_ifidx);
872 			if (ifp == NULL) {
873 				error = EAGAIN;
874 			} else {
875 				error = rtdeletemsg(rt, ifp, rtableid);
876 				if (error == 0)
877 					error = EAGAIN;
878 				if_put(ifp);
879 			}
880 		}
881 		rtfree(rt);
882 		rt = NULL;
883 	} while (error == EAGAIN);
884 
885 	return error;
886 
887 }
888 
889 int
rtrequest_delete(struct rt_addrinfo * info,u_int8_t prio,struct ifnet * ifp,struct rtentry ** ret_nrt,u_int tableid)890 rtrequest_delete(struct rt_addrinfo *info, u_int8_t prio, struct ifnet *ifp,
891     struct rtentry **ret_nrt, u_int tableid)
892 {
893 	struct rtentry	*rt;
894 	int		 error;
895 
896 	NET_ASSERT_LOCKED();
897 
898 	if (!rtable_exists(tableid))
899 		return (EAFNOSUPPORT);
900 	rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
901 	    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], prio);
902 	if (rt == NULL)
903 		return (ESRCH);
904 
905 	/* Make sure that's the route the caller want to delete. */
906 	if (ifp != NULL && ifp->if_index != rt->rt_ifidx) {
907 		rtfree(rt);
908 		return (ESRCH);
909 	}
910 
911 #ifdef BFD
912 	if (ISSET(rt->rt_flags, RTF_BFD))
913 		bfdclear(rt);
914 #endif
915 
916 	error = rtable_delete(tableid, info->rti_info[RTAX_DST],
917 	    info->rti_info[RTAX_NETMASK], rt);
918 	if (error != 0) {
919 		rtfree(rt);
920 		return (ESRCH);
921 	}
922 
923 	/* Release next hop cache before flushing cloned entries. */
924 	rt_putgwroute(rt, NULL);
925 
926 	/* Clean up any cloned children. */
927 	if (ISSET(rt->rt_flags, RTF_CLONING))
928 		rtflushclone(rt, tableid);
929 
930 	rtfree(rt->rt_parent);
931 	rt->rt_parent = NULL;
932 
933 	rt->rt_flags &= ~RTF_UP;
934 
935 	KASSERT(ifp->if_index == rt->rt_ifidx);
936 	ifp->if_rtrequest(ifp, RTM_DELETE, rt);
937 
938 	atomic_inc_int(&rttrash);
939 
940 	if (ret_nrt != NULL)
941 		*ret_nrt = rt;
942 	else
943 		rtfree(rt);
944 
945 	membar_producer();
946 	atomic_inc_long(&rtgeneration);
947 
948 	return (0);
949 }
950 
951 int
rtrequest(int req,struct rt_addrinfo * info,u_int8_t prio,struct rtentry ** ret_nrt,u_int tableid)952 rtrequest(int req, struct rt_addrinfo *info, u_int8_t prio,
953     struct rtentry **ret_nrt, u_int tableid)
954 {
955 	struct ifnet		*ifp;
956 	struct rtentry		*rt, *crt;
957 	struct ifaddr		*ifa;
958 	struct sockaddr		*ndst;
959 	struct sockaddr_rtlabel	*sa_rl, sa_rl2;
960 	struct sockaddr_dl	 sa_dl = { sizeof(sa_dl), AF_LINK };
961 	int			 error;
962 
963 	NET_ASSERT_LOCKED();
964 
965 	if (!rtable_exists(tableid))
966 		return (EAFNOSUPPORT);
967 	if (info->rti_flags & RTF_HOST)
968 		info->rti_info[RTAX_NETMASK] = NULL;
969 	switch (req) {
970 	case RTM_DELETE:
971 		return (EINVAL);
972 
973 	case RTM_RESOLVE:
974 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
975 			return (EINVAL);
976 		if ((rt->rt_flags & RTF_CLONING) == 0)
977 			return (EINVAL);
978 		KASSERT(rt->rt_ifa->ifa_ifp != NULL);
979 		info->rti_ifa = rt->rt_ifa;
980 		info->rti_flags = rt->rt_flags | (RTF_CLONED|RTF_HOST);
981 		info->rti_flags &= ~(RTF_CLONING|RTF_CONNECTED|RTF_STATIC);
982 		info->rti_info[RTAX_GATEWAY] = sdltosa(&sa_dl);
983 		info->rti_info[RTAX_LABEL] =
984 		    rtlabel_id2sa(rt->rt_labelid, &sa_rl2);
985 		/* FALLTHROUGH */
986 
987 	case RTM_ADD:
988 		if (info->rti_ifa == NULL)
989 			return (EINVAL);
990 		ifa = info->rti_ifa;
991 		ifp = ifa->ifa_ifp;
992 		if (prio == 0)
993 			prio = ifp->if_priority + RTP_STATIC;
994 
995 		error = rt_copysa(info->rti_info[RTAX_DST],
996 		    info->rti_info[RTAX_NETMASK], &ndst);
997 		if (error)
998 			return (error);
999 
1000 		rt = pool_get(&rtentry_pool, PR_NOWAIT | PR_ZERO);
1001 		if (rt == NULL) {
1002 			free(ndst, M_RTABLE, ndst->sa_len);
1003 			return (ENOBUFS);
1004 		}
1005 
1006 		refcnt_init_trace(&rt->rt_refcnt, DT_REFCNT_IDX_RTENTRY);
1007 		rt->rt_flags = info->rti_flags | RTF_UP;
1008 		rt->rt_priority = prio;	/* init routing priority */
1009 		LIST_INIT(&rt->rt_timer);
1010 
1011 		/* Check the link state if the table supports it. */
1012 		if (rtable_mpath_capable(tableid, ndst->sa_family) &&
1013 		    !ISSET(rt->rt_flags, RTF_LOCAL) &&
1014 		    (!LINK_STATE_IS_UP(ifp->if_link_state) ||
1015 		    !ISSET(ifp->if_flags, IFF_UP))) {
1016 			rt->rt_flags &= ~RTF_UP;
1017 			rt->rt_priority |= RTP_DOWN;
1018 		}
1019 
1020 		if (info->rti_info[RTAX_LABEL] != NULL) {
1021 			sa_rl = (struct sockaddr_rtlabel *)
1022 			    info->rti_info[RTAX_LABEL];
1023 			rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label);
1024 		}
1025 
1026 #ifdef MPLS
1027 		/* We have to allocate additional space for MPLS infos */
1028 		if (info->rti_flags & RTF_MPLS &&
1029 		    (info->rti_info[RTAX_SRC] != NULL ||
1030 		    info->rti_info[RTAX_DST]->sa_family == AF_MPLS)) {
1031 			error = rt_mpls_set(rt, info->rti_info[RTAX_SRC],
1032 			    info->rti_mpls);
1033 			if (error) {
1034 				free(ndst, M_RTABLE, ndst->sa_len);
1035 				pool_put(&rtentry_pool, rt);
1036 				return (error);
1037 			}
1038 		} else
1039 			rt_mpls_clear(rt);
1040 #endif
1041 
1042 		rt->rt_ifa = ifaref(ifa);
1043 		rt->rt_ifidx = ifp->if_index;
1044 		/*
1045 		 * Copy metrics and a back pointer from the cloned
1046 		 * route's parent.
1047 		 */
1048 		if (ISSET(rt->rt_flags, RTF_CLONED)) {
1049 			rtref(*ret_nrt);
1050 			rt->rt_parent = *ret_nrt;
1051 			rt->rt_rmx = (*ret_nrt)->rt_rmx;
1052 		}
1053 
1054 		/*
1055 		 * We must set rt->rt_gateway before adding ``rt'' to
1056 		 * the routing table because the radix MPATH code use
1057 		 * it to (re)order routes.
1058 		 */
1059 		if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY],
1060 		    tableid))) {
1061 			ifafree(ifa);
1062 			rtfree(rt->rt_parent);
1063 			rt_putgwroute(rt, NULL);
1064 			if (rt->rt_gateway != NULL) {
1065 				free(rt->rt_gateway, M_RTABLE,
1066 				    ROUNDUP(rt->rt_gateway->sa_len));
1067 			}
1068 			free(ndst, M_RTABLE, ndst->sa_len);
1069 			pool_put(&rtentry_pool, rt);
1070 			return (error);
1071 		}
1072 
1073 		error = rtable_insert(tableid, ndst,
1074 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1075 		    rt->rt_priority, rt);
1076 		if (error != 0 &&
1077 		    (crt = rtable_match(tableid, ndst, NULL)) != NULL) {
1078 			/* overwrite cloned route */
1079 			if (ISSET(crt->rt_flags, RTF_CLONED) &&
1080 			    !ISSET(crt->rt_flags, RTF_CACHED)) {
1081 				struct ifnet *cifp;
1082 
1083 				cifp = if_get(crt->rt_ifidx);
1084 				KASSERT(cifp != NULL);
1085 				rtdeletemsg(crt, cifp, tableid);
1086 				if_put(cifp);
1087 
1088 				error = rtable_insert(tableid, ndst,
1089 				    info->rti_info[RTAX_NETMASK],
1090 				    info->rti_info[RTAX_GATEWAY],
1091 				    rt->rt_priority, rt);
1092 			}
1093 			rtfree(crt);
1094 		}
1095 		if (error != 0) {
1096 			ifafree(ifa);
1097 			rtfree(rt->rt_parent);
1098 			rt_putgwroute(rt, NULL);
1099 			if (rt->rt_gateway != NULL) {
1100 				free(rt->rt_gateway, M_RTABLE,
1101 				    ROUNDUP(rt->rt_gateway->sa_len));
1102 			}
1103 			free(ndst, M_RTABLE, ndst->sa_len);
1104 			pool_put(&rtentry_pool, rt);
1105 			return (EEXIST);
1106 		}
1107 		ifp->if_rtrequest(ifp, req, rt);
1108 
1109 		if_group_routechange(info->rti_info[RTAX_DST],
1110 			info->rti_info[RTAX_NETMASK]);
1111 
1112 		if (ret_nrt != NULL)
1113 			*ret_nrt = rt;
1114 		else
1115 			rtfree(rt);
1116 
1117 		membar_producer();
1118 		atomic_inc_long(&rtgeneration);
1119 
1120 		break;
1121 	}
1122 
1123 	return (0);
1124 }
1125 
1126 int
rt_setgate(struct rtentry * rt,const struct sockaddr * gate,u_int rtableid)1127 rt_setgate(struct rtentry *rt, const struct sockaddr *gate, u_int rtableid)
1128 {
1129 	int glen = ROUNDUP(gate->sa_len);
1130 	struct sockaddr *sa, *osa;
1131 	int error = 0;
1132 
1133 	KASSERT(gate != NULL);
1134 	if (rt->rt_gateway == gate) {
1135 		/* nop */
1136 		return (0);
1137 	}
1138 
1139 	sa = malloc(glen, M_RTABLE, M_NOWAIT | M_ZERO);
1140 	if (sa == NULL)
1141 		return (ENOBUFS);
1142 	memcpy(sa, gate, gate->sa_len);
1143 
1144 	KERNEL_LOCK(); /* see [X] in route.h */
1145 	osa = rt->rt_gateway;
1146 	rt->rt_gateway = sa;
1147 
1148 	if (ISSET(rt->rt_flags, RTF_GATEWAY))
1149 		error = rt_setgwroute(rt, gate, rtableid);
1150 	KERNEL_UNLOCK();
1151 
1152 	if (osa != NULL)
1153 		free(osa, M_RTABLE, ROUNDUP(osa->sa_len));
1154 
1155 	return (error);
1156 }
1157 
1158 /*
1159  * Return the route entry containing the next hop link-layer
1160  * address corresponding to ``rt''.
1161  */
1162 struct rtentry *
rt_getll(struct rtentry * rt)1163 rt_getll(struct rtentry *rt)
1164 {
1165 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1166 		KASSERT(rt->rt_gwroute != NULL);
1167 		return (rt->rt_gwroute);
1168 	}
1169 
1170 	return (rt);
1171 }
1172 
1173 void
rt_maskedcopy(struct sockaddr * src,struct sockaddr * dst,struct sockaddr * netmask)1174 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
1175     struct sockaddr *netmask)
1176 {
1177 	u_char	*cp1 = (u_char *)src;
1178 	u_char	*cp2 = (u_char *)dst;
1179 	u_char	*cp3 = (u_char *)netmask;
1180 	u_char	*cplim = cp2 + *cp3;
1181 	u_char	*cplim2 = cp2 + *cp1;
1182 
1183 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1184 	cp3 += 2;
1185 	if (cplim > cplim2)
1186 		cplim = cplim2;
1187 	while (cp2 < cplim)
1188 		*cp2++ = *cp1++ & *cp3++;
1189 	if (cp2 < cplim2)
1190 		bzero(cp2, cplim2 - cp2);
1191 }
1192 
1193 /*
1194  * allocate new sockaddr structure based on the user supplied src and mask
1195  * that is useable for the routing table.
1196  */
1197 static int
rt_copysa(const struct sockaddr * src,const struct sockaddr * mask,struct sockaddr ** dst)1198 rt_copysa(const struct sockaddr *src, const struct sockaddr *mask,
1199     struct sockaddr **dst)
1200 {
1201 	static const u_char maskarray[] = {
1202 	    0x0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
1203 	struct sockaddr *ndst;
1204 	const struct domain *dp;
1205 	u_char *csrc, *cdst;
1206 	int i, plen;
1207 
1208 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1209 		if (dp->dom_rtoffset == 0)
1210 			continue;
1211 		if (src->sa_family == dp->dom_family)
1212 			break;
1213 	}
1214 	if (dp == NULL)
1215 		return (EAFNOSUPPORT);
1216 
1217 	if (src->sa_len < dp->dom_sasize)
1218 		return (EINVAL);
1219 
1220 	plen = rtable_satoplen(src->sa_family, mask);
1221 	if (plen == -1)
1222 		return (EINVAL);
1223 
1224 	ndst = malloc(dp->dom_sasize, M_RTABLE, M_NOWAIT|M_ZERO);
1225 	if (ndst == NULL)
1226 		return (ENOBUFS);
1227 
1228 	ndst->sa_family = src->sa_family;
1229 	ndst->sa_len = dp->dom_sasize;
1230 
1231 	csrc = (u_char *)src + dp->dom_rtoffset;
1232 	cdst = (u_char *)ndst + dp->dom_rtoffset;
1233 
1234 	memcpy(cdst, csrc, plen / 8);
1235 	if (plen % 8 != 0)
1236 		cdst[plen / 8] = csrc[plen / 8] & maskarray[plen % 8];
1237 
1238 	*dst = ndst;
1239 	return (0);
1240 }
1241 
1242 int
rt_ifa_add(struct ifaddr * ifa,int flags,struct sockaddr * dst,unsigned int rdomain)1243 rt_ifa_add(struct ifaddr *ifa, int flags, struct sockaddr *dst,
1244     unsigned int rdomain)
1245 {
1246 	struct ifnet		*ifp = ifa->ifa_ifp;
1247 	struct rtentry		*rt;
1248 	struct sockaddr_rtlabel	 sa_rl;
1249 	struct rt_addrinfo	 info;
1250 	uint8_t			 prio = ifp->if_priority + RTP_STATIC;
1251 	int			 error;
1252 
1253 	KASSERT(rdomain == rtable_l2(rdomain));
1254 
1255 	memset(&info, 0, sizeof(info));
1256 	info.rti_ifa = ifa;
1257 	info.rti_flags = flags;
1258 	info.rti_info[RTAX_DST] = dst;
1259 	if (flags & RTF_LLINFO)
1260 		info.rti_info[RTAX_GATEWAY] = sdltosa(ifp->if_sadl);
1261 	else
1262 		info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1263 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl);
1264 
1265 #ifdef MPLS
1266 	if ((flags & RTF_MPLS) == RTF_MPLS)
1267 		info.rti_mpls = MPLS_OP_POP;
1268 #endif /* MPLS */
1269 
1270 	if ((flags & RTF_HOST) == 0)
1271 		info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1272 
1273 	if (flags & (RTF_LOCAL|RTF_BROADCAST))
1274 		prio = RTP_LOCAL;
1275 
1276 	if (flags & RTF_CONNECTED)
1277 		prio = ifp->if_priority + RTP_CONNECTED;
1278 
1279 	error = rtrequest(RTM_ADD, &info, prio, &rt, rdomain);
1280 	if (error == 0) {
1281 		/*
1282 		 * A local route is created for every address configured
1283 		 * on an interface, so use this information to notify
1284 		 * userland that a new address has been added.
1285 		 */
1286 		if (flags & RTF_LOCAL)
1287 			rtm_addr(RTM_NEWADDR, ifa);
1288 		rtm_send(rt, RTM_ADD, 0, rdomain);
1289 		rtfree(rt);
1290 	}
1291 	return (error);
1292 }
1293 
1294 int
rt_ifa_del(struct ifaddr * ifa,int flags,struct sockaddr * dst,unsigned int rdomain)1295 rt_ifa_del(struct ifaddr *ifa, int flags, struct sockaddr *dst,
1296     unsigned int rdomain)
1297 {
1298 	struct ifnet		*ifp = ifa->ifa_ifp;
1299 	struct rtentry		*rt;
1300 	struct mbuf		*m = NULL;
1301 	struct sockaddr		*deldst;
1302 	struct rt_addrinfo	 info;
1303 	struct sockaddr_rtlabel	 sa_rl;
1304 	uint8_t			 prio = ifp->if_priority + RTP_STATIC;
1305 	int			 error;
1306 
1307 	KASSERT(rdomain == rtable_l2(rdomain));
1308 
1309 	if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1310 		m = m_get(M_DONTWAIT, MT_SONAME);
1311 		if (m == NULL)
1312 			return (ENOBUFS);
1313 		deldst = mtod(m, struct sockaddr *);
1314 		rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
1315 		dst = deldst;
1316 	}
1317 
1318 	memset(&info, 0, sizeof(info));
1319 	info.rti_ifa = ifa;
1320 	info.rti_flags = flags;
1321 	info.rti_info[RTAX_DST] = dst;
1322 	if ((flags & RTF_LLINFO) == 0)
1323 		info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1324 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl);
1325 
1326 	if ((flags & RTF_HOST) == 0)
1327 		info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1328 
1329 	if (flags & (RTF_LOCAL|RTF_BROADCAST))
1330 		prio = RTP_LOCAL;
1331 
1332 	if (flags & RTF_CONNECTED)
1333 		prio = ifp->if_priority + RTP_CONNECTED;
1334 
1335 	rtable_clearsource(rdomain, ifa->ifa_addr);
1336 	error = rtrequest_delete(&info, prio, ifp, &rt, rdomain);
1337 	if (error == 0) {
1338 		rtm_send(rt, RTM_DELETE, 0, rdomain);
1339 		if (flags & RTF_LOCAL)
1340 			rtm_addr(RTM_DELADDR, ifa);
1341 		rtfree(rt);
1342 	}
1343 	m_free(m);
1344 
1345 	return (error);
1346 }
1347 
1348 /*
1349  * Add ifa's address as a local rtentry.
1350  */
1351 int
rt_ifa_addlocal(struct ifaddr * ifa)1352 rt_ifa_addlocal(struct ifaddr *ifa)
1353 {
1354 	struct ifnet *ifp = ifa->ifa_ifp;
1355 	struct rtentry *rt;
1356 	u_int flags = RTF_HOST|RTF_LOCAL;
1357 	int error = 0;
1358 
1359 	/*
1360 	 * If the configured address correspond to the magical "any"
1361 	 * address do not add a local route entry because that might
1362 	 * corrupt the routing tree which uses this value for the
1363 	 * default routes.
1364 	 */
1365 	switch (ifa->ifa_addr->sa_family) {
1366 	case AF_INET:
1367 		if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY)
1368 			return (0);
1369 		break;
1370 #ifdef INET6
1371 	case AF_INET6:
1372 		if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr,
1373 		    &in6addr_any))
1374 			return (0);
1375 		break;
1376 #endif
1377 	default:
1378 		break;
1379 	}
1380 
1381 	if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT)))
1382 		flags |= RTF_LLINFO;
1383 
1384 	/* If there is no local entry, allocate one. */
1385 	rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain);
1386 	if (rt == NULL || ISSET(rt->rt_flags, flags) != flags) {
1387 		error = rt_ifa_add(ifa, flags | RTF_MPATH, ifa->ifa_addr,
1388 		    ifp->if_rdomain);
1389 	}
1390 	rtfree(rt);
1391 
1392 	return (error);
1393 }
1394 
1395 /*
1396  * Remove local rtentry of ifa's address if it exists.
1397  */
1398 int
rt_ifa_dellocal(struct ifaddr * ifa)1399 rt_ifa_dellocal(struct ifaddr *ifa)
1400 {
1401 	struct ifnet *ifp = ifa->ifa_ifp;
1402 	struct rtentry *rt;
1403 	u_int flags = RTF_HOST|RTF_LOCAL;
1404 	int error = 0;
1405 
1406 	/*
1407 	 * We do not add local routes for such address, so do not bother
1408 	 * removing them.
1409 	 */
1410 	switch (ifa->ifa_addr->sa_family) {
1411 	case AF_INET:
1412 		if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY)
1413 			return (0);
1414 		break;
1415 #ifdef INET6
1416 	case AF_INET6:
1417 		if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr,
1418 		    &in6addr_any))
1419 			return (0);
1420 		break;
1421 #endif
1422 	default:
1423 		break;
1424 	}
1425 
1426 	if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT)))
1427 		flags |= RTF_LLINFO;
1428 
1429 	/*
1430 	 * Before deleting, check if a corresponding local host
1431 	 * route surely exists.  With this check, we can avoid to
1432 	 * delete an interface direct route whose destination is same
1433 	 * as the address being removed.  This can happen when removing
1434 	 * a subnet-router anycast address on an interface attached
1435 	 * to a shared medium.
1436 	 */
1437 	rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain);
1438 	if (rt != NULL && ISSET(rt->rt_flags, flags) == flags) {
1439 		error = rt_ifa_del(ifa, flags, ifa->ifa_addr,
1440 		    ifp->if_rdomain);
1441 	}
1442 	rtfree(rt);
1443 
1444 	return (error);
1445 }
1446 
1447 /*
1448  * Remove all addresses attached to ``ifa''.
1449  */
1450 void
rt_ifa_purge(struct ifaddr * ifa)1451 rt_ifa_purge(struct ifaddr *ifa)
1452 {
1453 	struct ifnet		*ifp = ifa->ifa_ifp;
1454 	struct rtentry		*rt = NULL;
1455 	unsigned int		 rtableid;
1456 	int			 error, af = ifa->ifa_addr->sa_family;
1457 
1458 	KASSERT(ifp != NULL);
1459 
1460 	for (rtableid = 0; rtableid < rtmap_limit; rtableid++) {
1461 		/* skip rtables that are not in the rdomain of the ifp */
1462 		if (rtable_l2(rtableid) != ifp->if_rdomain)
1463 			continue;
1464 
1465 		do {
1466 			error = rtable_walk(rtableid, af, &rt,
1467 			    rt_ifa_purge_walker, ifa);
1468 			if (rt != NULL && error == EEXIST) {
1469 				error = rtdeletemsg(rt, ifp, rtableid);
1470 				if (error == 0)
1471 					error = EAGAIN;
1472 			}
1473 			rtfree(rt);
1474 			rt = NULL;
1475 		} while (error == EAGAIN);
1476 
1477 		if (error == EAFNOSUPPORT)
1478 			error = 0;
1479 
1480 		if (error)
1481 			break;
1482 	}
1483 }
1484 
1485 int
rt_ifa_purge_walker(struct rtentry * rt,void * vifa,unsigned int rtableid)1486 rt_ifa_purge_walker(struct rtentry *rt, void *vifa, unsigned int rtableid)
1487 {
1488 	struct ifaddr		*ifa = vifa;
1489 
1490 	if (rt->rt_ifa == ifa)
1491 		return EEXIST;
1492 
1493 	return 0;
1494 }
1495 
1496 /*
1497  * Route timer routines.  These routines allow functions to be called
1498  * for various routes at any time.  This is useful in supporting
1499  * path MTU discovery and redirect route deletion.
1500  *
1501  * This is similar to some BSDI internal functions, but it provides
1502  * for multiple queues for efficiency's sake...
1503  */
1504 
1505 struct mutex			rttimer_mtx;
1506 
1507 struct rttimer {
1508 	TAILQ_ENTRY(rttimer)	rtt_next;	/* [T] entry on timer queue */
1509 	LIST_ENTRY(rttimer)	rtt_link;	/* [T] timers per rtentry */
1510 	struct timeout		rtt_timeout;	/* [I] timeout for this entry */
1511 	struct rttimer_queue	*rtt_queue;	/* [I] back pointer to queue */
1512 	struct rtentry		*rtt_rt;	/* [T] back pointer to route */
1513 	time_t			rtt_expire;	/* [I] rt expire time */
1514 	u_int			rtt_tableid;	/* [I] rtable id of rtt_rt */
1515 };
1516 
1517 #define RTTIMER_CALLOUT(r)	{					\
1518 	if (r->rtt_queue->rtq_func != NULL) {				\
1519 		(*r->rtt_queue->rtq_func)(r->rtt_rt, r->rtt_tableid);	\
1520 	} else {							\
1521 		struct ifnet *ifp;					\
1522 									\
1523 		ifp = if_get(r->rtt_rt->rt_ifidx);			\
1524 		if (ifp != NULL &&					\
1525 		    (r->rtt_rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) ==	\
1526 		    (RTF_DYNAMIC|RTF_HOST))				\
1527 			rtdeletemsg(r->rtt_rt, ifp, r->rtt_tableid);	\
1528 		if_put(ifp);						\
1529 	}								\
1530 }
1531 
1532 void
rt_timer_init(void)1533 rt_timer_init(void)
1534 {
1535 	pool_init(&rttimer_pool, sizeof(struct rttimer), 0,
1536 	    IPL_MPFLOOR, 0, "rttmr", NULL);
1537 	mtx_init(&rttimer_mtx, IPL_MPFLOOR);
1538 }
1539 
1540 void
rt_timer_queue_init(struct rttimer_queue * rtq,int timeout,void (* func)(struct rtentry *,u_int))1541 rt_timer_queue_init(struct rttimer_queue *rtq, int timeout,
1542     void (*func)(struct rtentry *, u_int))
1543 {
1544 	rtq->rtq_timeout = timeout;
1545 	rtq->rtq_count = 0;
1546 	rtq->rtq_func = func;
1547 	TAILQ_INIT(&rtq->rtq_head);
1548 }
1549 
1550 void
rt_timer_queue_change(struct rttimer_queue * rtq,int timeout)1551 rt_timer_queue_change(struct rttimer_queue *rtq, int timeout)
1552 {
1553 	mtx_enter(&rttimer_mtx);
1554 	rtq->rtq_timeout = timeout;
1555 	mtx_leave(&rttimer_mtx);
1556 }
1557 
1558 void
rt_timer_queue_flush(struct rttimer_queue * rtq)1559 rt_timer_queue_flush(struct rttimer_queue *rtq)
1560 {
1561 	struct rttimer		*r;
1562 	TAILQ_HEAD(, rttimer)	 rttlist;
1563 
1564 	NET_ASSERT_LOCKED();
1565 
1566 	TAILQ_INIT(&rttlist);
1567 	mtx_enter(&rttimer_mtx);
1568 	while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1569 		LIST_REMOVE(r, rtt_link);
1570 		TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1571 		TAILQ_INSERT_TAIL(&rttlist, r, rtt_next);
1572 		KASSERT(rtq->rtq_count > 0);
1573 		rtq->rtq_count--;
1574 	}
1575 	mtx_leave(&rttimer_mtx);
1576 
1577 	while ((r = TAILQ_FIRST(&rttlist)) != NULL) {
1578 		TAILQ_REMOVE(&rttlist, r, rtt_next);
1579 		RTTIMER_CALLOUT(r);
1580 		pool_put(&rttimer_pool, r);
1581 	}
1582 }
1583 
1584 unsigned long
rt_timer_queue_count(struct rttimer_queue * rtq)1585 rt_timer_queue_count(struct rttimer_queue *rtq)
1586 {
1587 	return (rtq->rtq_count);
1588 }
1589 
1590 static inline struct rttimer *
rt_timer_unlink(struct rttimer * r)1591 rt_timer_unlink(struct rttimer *r)
1592 {
1593 	MUTEX_ASSERT_LOCKED(&rttimer_mtx);
1594 
1595 	LIST_REMOVE(r, rtt_link);
1596 	r->rtt_rt = NULL;
1597 
1598 	if (timeout_del(&r->rtt_timeout) == 0) {
1599 		/* timeout fired, so rt_timer_timer will do the cleanup */
1600 		return NULL;
1601 	}
1602 
1603 	TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1604 	KASSERT(r->rtt_queue->rtq_count > 0);
1605 	r->rtt_queue->rtq_count--;
1606 	return r;
1607 }
1608 
1609 void
rt_timer_remove_all(struct rtentry * rt)1610 rt_timer_remove_all(struct rtentry *rt)
1611 {
1612 	struct rttimer		*r;
1613 	TAILQ_HEAD(, rttimer)	 rttlist;
1614 
1615 	TAILQ_INIT(&rttlist);
1616 	mtx_enter(&rttimer_mtx);
1617 	while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1618 		r = rt_timer_unlink(r);
1619 		if (r != NULL)
1620 			TAILQ_INSERT_TAIL(&rttlist, r, rtt_next);
1621 	}
1622 	mtx_leave(&rttimer_mtx);
1623 
1624 	while ((r = TAILQ_FIRST(&rttlist)) != NULL) {
1625 		TAILQ_REMOVE(&rttlist, r, rtt_next);
1626 		pool_put(&rttimer_pool, r);
1627 	}
1628 }
1629 
1630 time_t
rt_timer_get_expire(const struct rtentry * rt)1631 rt_timer_get_expire(const struct rtentry *rt)
1632 {
1633 	const struct rttimer	*r;
1634 	time_t			 expire = 0;
1635 
1636 	mtx_enter(&rttimer_mtx);
1637 	LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1638 		if (expire == 0 || expire > r->rtt_expire)
1639 			expire = r->rtt_expire;
1640 	}
1641 	mtx_leave(&rttimer_mtx);
1642 
1643 	return expire;
1644 }
1645 
1646 int
rt_timer_add(struct rtentry * rt,struct rttimer_queue * queue,u_int rtableid)1647 rt_timer_add(struct rtentry *rt, struct rttimer_queue *queue, u_int rtableid)
1648 {
1649 	struct rttimer	*r, *rnew;
1650 
1651 	rnew = pool_get(&rttimer_pool, PR_NOWAIT | PR_ZERO);
1652 	if (rnew == NULL)
1653 		return (ENOBUFS);
1654 
1655 	rnew->rtt_rt = rt;
1656 	rnew->rtt_queue = queue;
1657 	rnew->rtt_tableid = rtableid;
1658 	rnew->rtt_expire = getuptime() + queue->rtq_timeout;
1659 	timeout_set_proc(&rnew->rtt_timeout, rt_timer_timer, rnew);
1660 
1661 	mtx_enter(&rttimer_mtx);
1662 	/*
1663 	 * If there's already a timer with this action, destroy it before
1664 	 * we add a new one.
1665 	 */
1666 	LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1667 		if (r->rtt_queue == queue) {
1668 			r = rt_timer_unlink(r);
1669 			break;  /* only one per list, so we can quit... */
1670 		}
1671 	}
1672 
1673 	LIST_INSERT_HEAD(&rt->rt_timer, rnew, rtt_link);
1674 	TAILQ_INSERT_TAIL(&queue->rtq_head, rnew, rtt_next);
1675 	timeout_add_sec(&rnew->rtt_timeout, queue->rtq_timeout);
1676 	rnew->rtt_queue->rtq_count++;
1677 	mtx_leave(&rttimer_mtx);
1678 
1679 	if (r != NULL)
1680 		pool_put(&rttimer_pool, r);
1681 
1682 	return (0);
1683 }
1684 
1685 void
rt_timer_timer(void * arg)1686 rt_timer_timer(void *arg)
1687 {
1688 	struct rttimer		*r = arg;
1689 	struct rttimer_queue	*rtq = r->rtt_queue;
1690 
1691 	NET_LOCK();
1692 	mtx_enter(&rttimer_mtx);
1693 
1694 	if (r->rtt_rt != NULL)
1695 		LIST_REMOVE(r, rtt_link);
1696 	TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1697 	KASSERT(rtq->rtq_count > 0);
1698 	rtq->rtq_count--;
1699 
1700 	mtx_leave(&rttimer_mtx);
1701 
1702 	if (r->rtt_rt != NULL)
1703 		RTTIMER_CALLOUT(r);
1704 	NET_UNLOCK();
1705 
1706 	pool_put(&rttimer_pool, r);
1707 }
1708 
1709 #ifdef MPLS
1710 int
rt_mpls_set(struct rtentry * rt,const struct sockaddr * src,uint8_t op)1711 rt_mpls_set(struct rtentry *rt, const struct sockaddr *src, uint8_t op)
1712 {
1713 	struct sockaddr_mpls	*psa_mpls = (struct sockaddr_mpls *)src;
1714 	struct rt_mpls		*rt_mpls;
1715 
1716 	if (psa_mpls == NULL && op != MPLS_OP_POP)
1717 		return (EOPNOTSUPP);
1718 	if (psa_mpls != NULL && psa_mpls->smpls_len != sizeof(*psa_mpls))
1719 		return (EINVAL);
1720 	if (psa_mpls != NULL && psa_mpls->smpls_family != AF_MPLS)
1721 		return (EAFNOSUPPORT);
1722 
1723 	rt->rt_llinfo = malloc(sizeof(struct rt_mpls), M_TEMP, M_NOWAIT|M_ZERO);
1724 	if (rt->rt_llinfo == NULL)
1725 		return (ENOMEM);
1726 
1727 	rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
1728 	if (psa_mpls != NULL)
1729 		rt_mpls->mpls_label = psa_mpls->smpls_label;
1730 	rt_mpls->mpls_operation = op;
1731 	/* XXX: set experimental bits */
1732 	rt->rt_flags |= RTF_MPLS;
1733 
1734 	return (0);
1735 }
1736 
1737 void
rt_mpls_clear(struct rtentry * rt)1738 rt_mpls_clear(struct rtentry *rt)
1739 {
1740 	if (rt->rt_llinfo != NULL && rt->rt_flags & RTF_MPLS) {
1741 		free(rt->rt_llinfo, M_TEMP, sizeof(struct rt_mpls));
1742 		rt->rt_llinfo = NULL;
1743 	}
1744 	rt->rt_flags &= ~RTF_MPLS;
1745 }
1746 #endif
1747 
1748 u_int16_t
rtlabel_name2id(const char * name)1749 rtlabel_name2id(const char *name)
1750 {
1751 	struct rt_label		*label, *p;
1752 	u_int16_t		 new_id = 1, id = 0;
1753 
1754 	if (!name[0])
1755 		return (0);
1756 
1757 	mtx_enter(&rtlabel_mtx);
1758 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1759 		if (strcmp(name, label->rtl_name) == 0) {
1760 			label->rtl_ref++;
1761 			id = label->rtl_id;
1762 			goto out;
1763 		}
1764 
1765 	/*
1766 	 * to avoid fragmentation, we do a linear search from the beginning
1767 	 * and take the first free slot we find. if there is none or the list
1768 	 * is empty, append a new entry at the end.
1769 	 */
1770 	TAILQ_FOREACH(p, &rt_labels, rtl_entry) {
1771 		if (p->rtl_id != new_id)
1772 			break;
1773 		new_id = p->rtl_id + 1;
1774 	}
1775 	if (new_id > LABELID_MAX)
1776 		goto out;
1777 
1778 	label = malloc(sizeof(*label), M_RTABLE, M_NOWAIT|M_ZERO);
1779 	if (label == NULL)
1780 		goto out;
1781 	strlcpy(label->rtl_name, name, sizeof(label->rtl_name));
1782 	label->rtl_id = new_id;
1783 	label->rtl_ref++;
1784 
1785 	if (p != NULL)	/* insert new entry before p */
1786 		TAILQ_INSERT_BEFORE(p, label, rtl_entry);
1787 	else		/* either list empty or no free slot in between */
1788 		TAILQ_INSERT_TAIL(&rt_labels, label, rtl_entry);
1789 
1790 	id = label->rtl_id;
1791 out:
1792 	mtx_leave(&rtlabel_mtx);
1793 
1794 	return (id);
1795 }
1796 
1797 const char *
rtlabel_id2name_locked(u_int16_t id)1798 rtlabel_id2name_locked(u_int16_t id)
1799 {
1800 	struct rt_label	*label;
1801 
1802 	MUTEX_ASSERT_LOCKED(&rtlabel_mtx);
1803 
1804 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1805 		if (label->rtl_id == id)
1806 			return (label->rtl_name);
1807 
1808 	return (NULL);
1809 }
1810 
1811 const char *
rtlabel_id2name(u_int16_t id,char * rtlabelbuf,size_t sz)1812 rtlabel_id2name(u_int16_t id, char *rtlabelbuf, size_t sz)
1813 {
1814 	const char *label;
1815 
1816 	if (id == 0)
1817 		return (NULL);
1818 
1819 	mtx_enter(&rtlabel_mtx);
1820 	if ((label = rtlabel_id2name_locked(id)) != NULL)
1821 		strlcpy(rtlabelbuf, label, sz);
1822 	mtx_leave(&rtlabel_mtx);
1823 
1824 	if (label == NULL)
1825 		return (NULL);
1826 
1827 	return (rtlabelbuf);
1828 }
1829 
1830 struct sockaddr *
rtlabel_id2sa(u_int16_t labelid,struct sockaddr_rtlabel * sa_rl)1831 rtlabel_id2sa(u_int16_t labelid, struct sockaddr_rtlabel *sa_rl)
1832 {
1833 	const char	*label;
1834 
1835 	if (labelid == 0)
1836 		return (NULL);
1837 
1838 	mtx_enter(&rtlabel_mtx);
1839 	if ((label = rtlabel_id2name_locked(labelid)) != NULL) {
1840 		bzero(sa_rl, sizeof(*sa_rl));
1841 		sa_rl->sr_len = sizeof(*sa_rl);
1842 		sa_rl->sr_family = AF_UNSPEC;
1843 		strlcpy(sa_rl->sr_label, label, sizeof(sa_rl->sr_label));
1844 	}
1845 	mtx_leave(&rtlabel_mtx);
1846 
1847 	if (label == NULL)
1848 		return (NULL);
1849 
1850 	return ((struct sockaddr *)sa_rl);
1851 }
1852 
1853 void
rtlabel_unref(u_int16_t id)1854 rtlabel_unref(u_int16_t id)
1855 {
1856 	struct rt_label	*p, *next;
1857 
1858 	if (id == 0)
1859 		return;
1860 
1861 	mtx_enter(&rtlabel_mtx);
1862 	TAILQ_FOREACH_SAFE(p, &rt_labels, rtl_entry, next) {
1863 		if (id == p->rtl_id) {
1864 			if (--p->rtl_ref == 0) {
1865 				TAILQ_REMOVE(&rt_labels, p, rtl_entry);
1866 				free(p, M_RTABLE, sizeof(*p));
1867 			}
1868 			break;
1869 		}
1870 	}
1871 	mtx_leave(&rtlabel_mtx);
1872 }
1873 
1874 int
rt_if_track(struct ifnet * ifp)1875 rt_if_track(struct ifnet *ifp)
1876 {
1877 	unsigned int rtableid;
1878 	struct rtentry *rt = NULL;
1879 	int i, error = 0;
1880 
1881 	for (rtableid = 0; rtableid < rtmap_limit; rtableid++) {
1882 		/* skip rtables that are not in the rdomain of the ifp */
1883 		if (rtable_l2(rtableid) != ifp->if_rdomain)
1884 			continue;
1885 		for (i = 1; i <= AF_MAX; i++) {
1886 			if (!rtable_mpath_capable(rtableid, i))
1887 				continue;
1888 
1889 			do {
1890 				error = rtable_walk(rtableid, i, &rt,
1891 				    rt_if_linkstate_change, ifp);
1892 				if (rt != NULL && error == EEXIST) {
1893 					error = rtdeletemsg(rt, ifp, rtableid);
1894 					if (error == 0)
1895 						error = EAGAIN;
1896 				}
1897 				rtfree(rt);
1898 				rt = NULL;
1899 			} while (error == EAGAIN);
1900 
1901 			if (error == EAFNOSUPPORT)
1902 				error = 0;
1903 
1904 			if (error)
1905 				break;
1906 		}
1907 	}
1908 
1909 	return (error);
1910 }
1911 
1912 int
rt_if_linkstate_change(struct rtentry * rt,void * arg,u_int id)1913 rt_if_linkstate_change(struct rtentry *rt, void *arg, u_int id)
1914 {
1915 	struct ifnet *ifp = arg;
1916 	struct sockaddr_in6 sa_mask;
1917 	int error;
1918 
1919 	if (rt->rt_ifidx != ifp->if_index)
1920 		return (0);
1921 
1922 	/* Local routes are always usable. */
1923 	if (rt->rt_flags & RTF_LOCAL) {
1924 		rt->rt_flags |= RTF_UP;
1925 		return (0);
1926 	}
1927 
1928 	if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP) {
1929 		if (ISSET(rt->rt_flags, RTF_UP))
1930 			return (0);
1931 
1932 		/* bring route up */
1933 		rt->rt_flags |= RTF_UP;
1934 		error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt),
1935 		    rt->rt_priority & RTP_MASK, rt);
1936 	} else {
1937 		/*
1938 		 * Remove redirected and cloned routes (mainly ARP)
1939 		 * from down interfaces so we have a chance to get
1940 		 * new routes from a better source.
1941 		 */
1942 		if (ISSET(rt->rt_flags, RTF_CLONED|RTF_DYNAMIC) &&
1943 		    !ISSET(rt->rt_flags, RTF_CACHED|RTF_BFD)) {
1944 			return (EEXIST);
1945 		}
1946 
1947 		if (!ISSET(rt->rt_flags, RTF_UP))
1948 			return (0);
1949 
1950 		/* take route down */
1951 		rt->rt_flags &= ~RTF_UP;
1952 		error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt),
1953 		    rt->rt_priority | RTP_DOWN, rt);
1954 	}
1955 	if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask));
1956 
1957 	membar_producer();
1958 	atomic_inc_long(&rtgeneration);
1959 
1960 	return (error);
1961 }
1962 
1963 struct sockaddr *
rt_plentosa(sa_family_t af,int plen,struct sockaddr_in6 * sa_mask)1964 rt_plentosa(sa_family_t af, int plen, struct sockaddr_in6 *sa_mask)
1965 {
1966 	struct sockaddr_in	*sin = (struct sockaddr_in *)sa_mask;
1967 #ifdef INET6
1968 	struct sockaddr_in6	*sin6 = (struct sockaddr_in6 *)sa_mask;
1969 #endif
1970 
1971 	KASSERT(plen >= 0 || plen == -1);
1972 
1973 	if (plen == -1)
1974 		return (NULL);
1975 
1976 	memset(sa_mask, 0, sizeof(*sa_mask));
1977 
1978 	switch (af) {
1979 	case AF_INET:
1980 		sin->sin_family = AF_INET;
1981 		sin->sin_len = sizeof(struct sockaddr_in);
1982 		in_prefixlen2mask(&sin->sin_addr, plen);
1983 		break;
1984 #ifdef INET6
1985 	case AF_INET6:
1986 		sin6->sin6_family = AF_INET6;
1987 		sin6->sin6_len = sizeof(struct sockaddr_in6);
1988 		in6_prefixlen2mask(&sin6->sin6_addr, plen);
1989 		break;
1990 #endif /* INET6 */
1991 	default:
1992 		return (NULL);
1993 	}
1994 
1995 	return ((struct sockaddr *)sa_mask);
1996 }
1997 
1998 struct sockaddr *
rt_plen2mask(struct rtentry * rt,struct sockaddr_in6 * sa_mask)1999 rt_plen2mask(struct rtentry *rt, struct sockaddr_in6 *sa_mask)
2000 {
2001 	return (rt_plentosa(rt_key(rt)->sa_family, rt_plen(rt), sa_mask));
2002 }
2003 
2004 #ifdef DDB
2005 #include <machine/db_machdep.h>
2006 #include <ddb/db_output.h>
2007 
2008 void	db_print_sa(struct sockaddr *);
2009 void	db_print_ifa(struct ifaddr *);
2010 
2011 void
db_print_sa(struct sockaddr * sa)2012 db_print_sa(struct sockaddr *sa)
2013 {
2014 	int len;
2015 	u_char *p;
2016 
2017 	if (sa == NULL) {
2018 		db_printf("[NULL]");
2019 		return;
2020 	}
2021 
2022 	p = (u_char *)sa;
2023 	len = sa->sa_len;
2024 	db_printf("[");
2025 	while (len > 0) {
2026 		db_printf("%d", *p);
2027 		p++;
2028 		len--;
2029 		if (len)
2030 			db_printf(",");
2031 	}
2032 	db_printf("]\n");
2033 }
2034 
2035 void
db_print_ifa(struct ifaddr * ifa)2036 db_print_ifa(struct ifaddr *ifa)
2037 {
2038 	if (ifa == NULL)
2039 		return;
2040 	db_printf("  ifa_addr=");
2041 	db_print_sa(ifa->ifa_addr);
2042 	db_printf("  ifa_dsta=");
2043 	db_print_sa(ifa->ifa_dstaddr);
2044 	db_printf("  ifa_mask=");
2045 	db_print_sa(ifa->ifa_netmask);
2046 	db_printf("  flags=0x%x, refcnt=%u, metric=%d\n",
2047 	    ifa->ifa_flags, ifa->ifa_refcnt.r_refs, ifa->ifa_metric);
2048 }
2049 
2050 /*
2051  * Function to pass to rtable_walk().
2052  * Return non-zero error to abort walk.
2053  */
2054 int
db_show_rtentry(struct rtentry * rt,void * w,unsigned int id)2055 db_show_rtentry(struct rtentry *rt, void *w, unsigned int id)
2056 {
2057 	db_printf("rtentry=%p", rt);
2058 
2059 	db_printf(" flags=0x%x refcnt=%u use=%llu expire=%lld\n",
2060 	    rt->rt_flags, rt->rt_refcnt.r_refs, rt->rt_use, rt->rt_expire);
2061 
2062 	db_printf(" key="); db_print_sa(rt_key(rt));
2063 	db_printf(" plen=%d", rt_plen(rt));
2064 	db_printf(" gw="); db_print_sa(rt->rt_gateway);
2065 	db_printf(" ifidx=%u ", rt->rt_ifidx);
2066 	db_printf(" ifa=%p\n", rt->rt_ifa);
2067 	db_print_ifa(rt->rt_ifa);
2068 
2069 	db_printf(" gwroute=%p llinfo=%p priority=%d\n",
2070 	    rt->rt_gwroute, rt->rt_llinfo, rt->rt_priority);
2071 	return (0);
2072 }
2073 
2074 /*
2075  * Function to print all the route trees.
2076  */
2077 int
db_show_rtable(int af,unsigned int rtableid)2078 db_show_rtable(int af, unsigned int rtableid)
2079 {
2080 	db_printf("Route tree for af %d, rtableid %u\n", af, rtableid);
2081 	rtable_walk(rtableid, af, NULL, db_show_rtentry, NULL);
2082 	return (0);
2083 }
2084 #endif /* DDB */
2085