1 /* $OpenBSD: route.c,v 1.437 2024/09/20 02:00:46 jsg Exp $ */
2 /* $NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1980, 1986, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)route.c 8.2 (Berkeley) 11/15/93
62 */
63
64 /*
65 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
66 *
67 * NRL grants permission for redistribution and use in source and binary
68 * forms, with or without modification, of the software and documentation
69 * created at NRL provided that the following conditions are met:
70 *
71 * 1. Redistributions of source code must retain the above copyright
72 * notice, this list of conditions and the following disclaimer.
73 * 2. Redistributions in binary form must reproduce the above copyright
74 * notice, this list of conditions and the following disclaimer in the
75 * documentation and/or other materials provided with the distribution.
76 * 3. All advertising materials mentioning features or use of this software
77 * must display the following acknowledgements:
78 * This product includes software developed by the University of
79 * California, Berkeley and its contributors.
80 * This product includes software developed at the Information
81 * Technology Division, US Naval Research Laboratory.
82 * 4. Neither the name of the NRL nor the names of its contributors
83 * may be used to endorse or promote products derived from this software
84 * without specific prior written permission.
85 *
86 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
87 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
88 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
89 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
90 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
91 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
92 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
93 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
94 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
95 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
96 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97 *
98 * The views and conclusions contained in the software and documentation
99 * are those of the authors and should not be interpreted as representing
100 * official policies, either expressed or implied, of the US Naval
101 * Research Laboratory (NRL).
102 */
103
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/mbuf.h>
107 #include <sys/socket.h>
108 #include <sys/socketvar.h>
109 #include <sys/timeout.h>
110 #include <sys/domain.h>
111 #include <sys/ioctl.h>
112 #include <sys/kernel.h>
113 #include <sys/queue.h>
114 #include <sys/pool.h>
115 #include <sys/atomic.h>
116 #include <sys/mutex.h>
117
118 #include <net/if.h>
119 #include <net/if_var.h>
120 #include <net/if_dl.h>
121 #include <net/route.h>
122
123 #include <netinet/in.h>
124 #include <netinet/ip_var.h>
125 #include <netinet/in_var.h>
126
127 #ifdef INET6
128 #include <netinet/ip6.h>
129 #include <netinet6/ip6_var.h>
130 #include <netinet6/in6_var.h>
131 #endif
132
133 #ifdef MPLS
134 #include <netmpls/mpls.h>
135 #endif
136
137 #ifdef BFD
138 #include <net/bfd.h>
139 #endif
140
141 /*
142 * Locks used to protect struct members:
143 * a atomic operations
144 * I immutable after creation
145 * L rtlabel_mtx
146 * T rttimer_mtx
147 */
148
149 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
150
151 /* Give some jitter to hash, to avoid synchronization between routers. */
152 static uint32_t rt_hashjitter;
153
154 extern unsigned int rtmap_limit;
155
156 struct cpumem *rtcounters;
157 int rttrash; /* [a] routes not in table but not freed */
158 u_long rtgeneration; /* [a] generation number, routes changed */
159
160 struct pool rtentry_pool; /* pool for rtentry structures */
161 struct pool rttimer_pool; /* pool for rttimer structures */
162
163 int rt_setgwroute(struct rtentry *, const struct sockaddr *, u_int);
164 void rt_putgwroute(struct rtentry *, struct rtentry *);
165 int rtflushclone1(struct rtentry *, void *, u_int);
166 int rtflushclone(struct rtentry *, unsigned int);
167 int rt_ifa_purge_walker(struct rtentry *, void *, unsigned int);
168 struct rtentry *rt_match(const struct sockaddr *, uint32_t *, int,
169 unsigned int);
170 int rt_clone(struct rtentry **, const struct sockaddr *, unsigned int);
171 struct sockaddr *rt_plentosa(sa_family_t, int, struct sockaddr_in6 *);
172 static int rt_copysa(const struct sockaddr *, const struct sockaddr *,
173 struct sockaddr **);
174
175 #define LABELID_MAX 50000
176
177 struct rt_label {
178 TAILQ_ENTRY(rt_label) rtl_entry; /* [L] */
179 char rtl_name[RTLABEL_LEN]; /* [I] */
180 u_int16_t rtl_id; /* [I] */
181 int rtl_ref; /* [L] */
182 };
183
184 TAILQ_HEAD(rt_labels, rt_label) rt_labels =
185 TAILQ_HEAD_INITIALIZER(rt_labels); /* [L] */
186 struct mutex rtlabel_mtx = MUTEX_INITIALIZER(IPL_NET);
187
188 void
route_init(void)189 route_init(void)
190 {
191 rtcounters = counters_alloc(rts_ncounters);
192
193 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, IPL_MPFLOOR, 0,
194 "rtentry", NULL);
195
196 while (rt_hashjitter == 0)
197 rt_hashjitter = arc4random();
198
199 #ifdef BFD
200 bfdinit();
201 #endif
202 }
203
204 int
route_cache(struct route * ro,const struct in_addr * dst,const struct in_addr * src,u_int rtableid)205 route_cache(struct route *ro, const struct in_addr *dst,
206 const struct in_addr *src, u_int rtableid)
207 {
208 u_long gen;
209
210 gen = atomic_load_long(&rtgeneration);
211 membar_consumer();
212
213 if (rtisvalid(ro->ro_rt) &&
214 ro->ro_generation == gen &&
215 ro->ro_tableid == rtableid &&
216 ro->ro_dstsa.sa_family == AF_INET &&
217 ro->ro_dstsin.sin_addr.s_addr == dst->s_addr) {
218 if (src == NULL || !ipmultipath ||
219 !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
220 (ro->ro_srcin.s_addr != INADDR_ANY &&
221 ro->ro_srcin.s_addr == src->s_addr)) {
222 ipstat_inc(ips_rtcachehit);
223 return (0);
224 }
225 }
226
227 ipstat_inc(ips_rtcachemiss);
228 rtfree(ro->ro_rt);
229 memset(ro, 0, sizeof(*ro));
230 ro->ro_generation = gen;
231 ro->ro_tableid = rtableid;
232
233 ro->ro_dstsin.sin_family = AF_INET;
234 ro->ro_dstsin.sin_len = sizeof(struct sockaddr_in);
235 ro->ro_dstsin.sin_addr = *dst;
236 if (src != NULL)
237 ro->ro_srcin = *src;
238
239 return (ESRCH);
240 }
241
242 /*
243 * Check cache for route, else allocate a new one, potentially using multipath
244 * to select the peer. Update cache and return valid route or NULL.
245 */
246 struct rtentry *
route_mpath(struct route * ro,const struct in_addr * dst,const struct in_addr * src,u_int rtableid)247 route_mpath(struct route *ro, const struct in_addr *dst,
248 const struct in_addr *src, u_int rtableid)
249 {
250 if (route_cache(ro, dst, src, rtableid)) {
251 uint32_t *s = NULL;
252
253 if (ro->ro_srcin.s_addr != INADDR_ANY)
254 s = &ro->ro_srcin.s_addr;
255 ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, s, ro->ro_tableid);
256 }
257 return (ro->ro_rt);
258 }
259
260 #ifdef INET6
261 int
route6_cache(struct route * ro,const struct in6_addr * dst,const struct in6_addr * src,u_int rtableid)262 route6_cache(struct route *ro, const struct in6_addr *dst,
263 const struct in6_addr *src, u_int rtableid)
264 {
265 u_long gen;
266
267 gen = atomic_load_long(&rtgeneration);
268 membar_consumer();
269
270 if (rtisvalid(ro->ro_rt) &&
271 ro->ro_generation == gen &&
272 ro->ro_tableid == rtableid &&
273 ro->ro_dstsa.sa_family == AF_INET6 &&
274 IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, dst)) {
275 if (src == NULL || !ip6_multipath ||
276 !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
277 (!IN6_IS_ADDR_UNSPECIFIED(&ro->ro_srcin6) &&
278 IN6_ARE_ADDR_EQUAL(&ro->ro_srcin6, src))) {
279 ip6stat_inc(ip6s_rtcachehit);
280 return (0);
281 }
282 }
283
284 ip6stat_inc(ip6s_rtcachemiss);
285 rtfree(ro->ro_rt);
286 memset(ro, 0, sizeof(*ro));
287 ro->ro_generation = gen;
288 ro->ro_tableid = rtableid;
289
290 ro->ro_dstsin6.sin6_family = AF_INET6;
291 ro->ro_dstsin6.sin6_len = sizeof(struct sockaddr_in6);
292 ro->ro_dstsin6.sin6_addr = *dst;
293 if (src != NULL)
294 ro->ro_srcin6 = *src;
295
296 return (ESRCH);
297 }
298
299 struct rtentry *
route6_mpath(struct route * ro,const struct in6_addr * dst,const struct in6_addr * src,u_int rtableid)300 route6_mpath(struct route *ro, const struct in6_addr *dst,
301 const struct in6_addr *src, u_int rtableid)
302 {
303 if (route6_cache(ro, dst, src, rtableid)) {
304 uint32_t *s = NULL;
305
306 if (!IN6_IS_ADDR_UNSPECIFIED(&ro->ro_srcin6))
307 s = &ro->ro_srcin6.s6_addr32[0];
308 ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, s, ro->ro_tableid);
309 }
310 return (ro->ro_rt);
311 }
312 #endif
313
314 /*
315 * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise.
316 */
317 int
rtisvalid(struct rtentry * rt)318 rtisvalid(struct rtentry *rt)
319 {
320 if (rt == NULL)
321 return (0);
322
323 if (!ISSET(rt->rt_flags, RTF_UP))
324 return (0);
325
326 if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
327 KASSERT(rt->rt_gwroute != NULL);
328 KASSERT(!ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY));
329 if (!ISSET(rt->rt_gwroute->rt_flags, RTF_UP))
330 return (0);
331 }
332
333 return (1);
334 }
335
336 /*
337 * Do the actual lookup for rtalloc(9), do not use directly!
338 *
339 * Return the best matching entry for the destination ``dst''.
340 *
341 * "RT_RESOLVE" means that a corresponding L2 entry should
342 * be added to the routing table and resolved (via ARP or
343 * NDP), if it does not exist.
344 */
345 struct rtentry *
rt_match(const struct sockaddr * dst,uint32_t * src,int flags,unsigned int tableid)346 rt_match(const struct sockaddr *dst, uint32_t *src, int flags,
347 unsigned int tableid)
348 {
349 struct rtentry *rt = NULL;
350
351 rt = rtable_match(tableid, dst, src);
352 if (rt == NULL) {
353 rtstat_inc(rts_unreach);
354 return (NULL);
355 }
356
357 if (ISSET(rt->rt_flags, RTF_CLONING) && ISSET(flags, RT_RESOLVE))
358 rt_clone(&rt, dst, tableid);
359
360 rt->rt_use++;
361 return (rt);
362 }
363
364 int
rt_clone(struct rtentry ** rtp,const struct sockaddr * dst,unsigned int rtableid)365 rt_clone(struct rtentry **rtp, const struct sockaddr *dst,
366 unsigned int rtableid)
367 {
368 struct rt_addrinfo info;
369 struct rtentry *rt = *rtp;
370 int error = 0;
371
372 memset(&info, 0, sizeof(info));
373 info.rti_info[RTAX_DST] = dst;
374
375 /*
376 * The priority of cloned route should be different
377 * to avoid conflict with /32 cloning routes.
378 *
379 * It should also be higher to let the ARP layer find
380 * cloned routes instead of the cloning one.
381 */
382 KERNEL_LOCK();
383 error = rtrequest(RTM_RESOLVE, &info, rt->rt_priority - 1, &rt,
384 rtableid);
385 KERNEL_UNLOCK();
386 if (error) {
387 rtm_miss(RTM_MISS, &info, 0, RTP_NONE, 0, error, rtableid);
388 } else {
389 /* Inform listeners of the new route */
390 rtm_send(rt, RTM_ADD, 0, rtableid);
391 rtfree(*rtp);
392 *rtp = rt;
393 }
394 return (error);
395 }
396
397 /*
398 * Originated from bridge_hash() in if_bridge.c
399 */
400 #define mix(a, b, c) do { \
401 a -= b; a -= c; a ^= (c >> 13); \
402 b -= c; b -= a; b ^= (a << 8); \
403 c -= a; c -= b; c ^= (b >> 13); \
404 a -= b; a -= c; a ^= (c >> 12); \
405 b -= c; b -= a; b ^= (a << 16); \
406 c -= a; c -= b; c ^= (b >> 5); \
407 a -= b; a -= c; a ^= (c >> 3); \
408 b -= c; b -= a; b ^= (a << 10); \
409 c -= a; c -= b; c ^= (b >> 15); \
410 } while (0)
411
412 int
rt_hash(struct rtentry * rt,const struct sockaddr * dst,uint32_t * src)413 rt_hash(struct rtentry *rt, const struct sockaddr *dst, uint32_t *src)
414 {
415 uint32_t a, b, c;
416
417 if (src == NULL || !rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPATH))
418 return (-1);
419
420 a = b = 0x9e3779b9;
421 c = rt_hashjitter;
422
423 switch (dst->sa_family) {
424 case AF_INET:
425 {
426 const struct sockaddr_in *sin;
427
428 if (!ipmultipath)
429 return (-1);
430
431 sin = satosin_const(dst);
432 a += sin->sin_addr.s_addr;
433 b += src[0];
434 mix(a, b, c);
435 break;
436 }
437 #ifdef INET6
438 case AF_INET6:
439 {
440 const struct sockaddr_in6 *sin6;
441
442 if (!ip6_multipath)
443 return (-1);
444
445 sin6 = satosin6_const(dst);
446 a += sin6->sin6_addr.s6_addr32[0];
447 b += sin6->sin6_addr.s6_addr32[2];
448 c += src[0];
449 mix(a, b, c);
450 a += sin6->sin6_addr.s6_addr32[1];
451 b += sin6->sin6_addr.s6_addr32[3];
452 c += src[1];
453 mix(a, b, c);
454 a += sin6->sin6_addr.s6_addr32[2];
455 b += sin6->sin6_addr.s6_addr32[1];
456 c += src[2];
457 mix(a, b, c);
458 a += sin6->sin6_addr.s6_addr32[3];
459 b += sin6->sin6_addr.s6_addr32[0];
460 c += src[3];
461 mix(a, b, c);
462 break;
463 }
464 #endif /* INET6 */
465 }
466
467 return (c & 0xffff);
468 }
469
470 /*
471 * Allocate a route, potentially using multipath to select the peer.
472 */
473 struct rtentry *
rtalloc_mpath(const struct sockaddr * dst,uint32_t * src,unsigned int rtableid)474 rtalloc_mpath(const struct sockaddr *dst, uint32_t *src, unsigned int rtableid)
475 {
476 return (rt_match(dst, src, RT_RESOLVE, rtableid));
477 }
478
479 /*
480 * Look in the routing table for the best matching entry for
481 * ``dst''.
482 *
483 * If a route with a gateway is found and its next hop is no
484 * longer valid, try to cache it.
485 */
486 struct rtentry *
rtalloc(const struct sockaddr * dst,int flags,unsigned int rtableid)487 rtalloc(const struct sockaddr *dst, int flags, unsigned int rtableid)
488 {
489 return (rt_match(dst, NULL, flags, rtableid));
490 }
491
492 /*
493 * Cache the route entry corresponding to a reachable next hop in
494 * the gateway entry ``rt''.
495 */
496 int
rt_setgwroute(struct rtentry * rt,const struct sockaddr * gate,u_int rtableid)497 rt_setgwroute(struct rtentry *rt, const struct sockaddr *gate, u_int rtableid)
498 {
499 struct rtentry *prt, *nhrt;
500 unsigned int rdomain = rtable_l2(rtableid);
501 int error;
502
503 NET_ASSERT_LOCKED();
504
505 /* If we cannot find a valid next hop bail. */
506 nhrt = rt_match(gate, NULL, RT_RESOLVE, rdomain);
507 if (nhrt == NULL)
508 return (ENOENT);
509
510 /* Next hop entry must be on the same interface. */
511 if (nhrt->rt_ifidx != rt->rt_ifidx) {
512 struct sockaddr_in6 sa_mask;
513
514 if (!ISSET(nhrt->rt_flags, RTF_LLINFO) ||
515 !ISSET(nhrt->rt_flags, RTF_CLONED)) {
516 rtfree(nhrt);
517 return (EHOSTUNREACH);
518 }
519
520 /*
521 * We found a L2 entry, so we might have multiple
522 * RTF_CLONING routes for the same subnet. Query
523 * the first route of the multipath chain and iterate
524 * until we find the correct one.
525 */
526 prt = rtable_lookup(rdomain, rt_key(nhrt->rt_parent),
527 rt_plen2mask(nhrt->rt_parent, &sa_mask), NULL, RTP_ANY);
528 rtfree(nhrt);
529
530 while (prt != NULL && prt->rt_ifidx != rt->rt_ifidx)
531 prt = rtable_iterate(prt);
532
533 /* We found nothing or a non-cloning MPATH route. */
534 if (prt == NULL || !ISSET(prt->rt_flags, RTF_CLONING)) {
535 rtfree(prt);
536 return (EHOSTUNREACH);
537 }
538
539 error = rt_clone(&prt, gate, rdomain);
540 if (error) {
541 rtfree(prt);
542 return (error);
543 }
544 nhrt = prt;
545 }
546
547 /*
548 * Next hop must be reachable, this also prevents rtentry
549 * loops for example when rt->rt_gwroute points to rt.
550 */
551 if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) {
552 rtfree(nhrt);
553 return (ENETUNREACH);
554 }
555
556 /*
557 * If the MTU of next hop is 0, this will reset the MTU of the
558 * route to run PMTUD again from scratch.
559 */
560 if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
561 rt->rt_mtu = nhrt->rt_mtu;
562
563 /*
564 * To avoid reference counting problems when writing link-layer
565 * addresses in an outgoing packet, we ensure that the lifetime
566 * of a cached entry is greater than the bigger lifetime of the
567 * gateway entries it is pointed by.
568 */
569 nhrt->rt_flags |= RTF_CACHED;
570 nhrt->rt_cachecnt++;
571
572 /* commit */
573 rt_putgwroute(rt, nhrt);
574
575 return (0);
576 }
577
578 /*
579 * Invalidate the cached route entry of the gateway entry ``rt''.
580 */
581 void
rt_putgwroute(struct rtentry * rt,struct rtentry * nhrt)582 rt_putgwroute(struct rtentry *rt, struct rtentry *nhrt)
583 {
584 struct rtentry *onhrt;
585
586 NET_ASSERT_LOCKED();
587
588 if (!ISSET(rt->rt_flags, RTF_GATEWAY))
589 return;
590
591 /* this is protected as per [X] in route.h */
592 onhrt = rt->rt_gwroute;
593 rt->rt_gwroute = nhrt;
594
595 if (onhrt != NULL) {
596 KASSERT(onhrt->rt_cachecnt > 0);
597 KASSERT(ISSET(onhrt->rt_flags, RTF_CACHED));
598
599 --onhrt->rt_cachecnt;
600 if (onhrt->rt_cachecnt == 0)
601 CLR(onhrt->rt_flags, RTF_CACHED);
602
603 rtfree(onhrt);
604 }
605 }
606
607 void
rtref(struct rtentry * rt)608 rtref(struct rtentry *rt)
609 {
610 refcnt_take(&rt->rt_refcnt);
611 }
612
613 void
rtfree(struct rtentry * rt)614 rtfree(struct rtentry *rt)
615 {
616 if (rt == NULL)
617 return;
618
619 if (refcnt_rele(&rt->rt_refcnt) == 0)
620 return;
621
622 KASSERT(!ISSET(rt->rt_flags, RTF_UP));
623 KASSERT(!RT_ROOT(rt));
624 atomic_dec_int(&rttrash);
625
626 rt_timer_remove_all(rt);
627 ifafree(rt->rt_ifa);
628 rtlabel_unref(rt->rt_labelid);
629 #ifdef MPLS
630 rt_mpls_clear(rt);
631 #endif
632 if (rt->rt_gateway != NULL) {
633 free(rt->rt_gateway, M_RTABLE,
634 ROUNDUP(rt->rt_gateway->sa_len));
635 }
636 free(rt_key(rt), M_RTABLE, rt_key(rt)->sa_len);
637
638 pool_put(&rtentry_pool, rt);
639 }
640
641 struct ifaddr *
ifaref(struct ifaddr * ifa)642 ifaref(struct ifaddr *ifa)
643 {
644 refcnt_take(&ifa->ifa_refcnt);
645 return ifa;
646 }
647
648 void
ifafree(struct ifaddr * ifa)649 ifafree(struct ifaddr *ifa)
650 {
651 if (refcnt_rele(&ifa->ifa_refcnt) == 0)
652 return;
653 free(ifa, M_IFADDR, 0);
654 }
655
656 /*
657 * Force a routing table entry to the specified
658 * destination to go through the given gateway.
659 * Normally called as a result of a routing redirect
660 * message from the network layer.
661 */
662 void
rtredirect(struct sockaddr * dst,struct sockaddr * gateway,struct sockaddr * src,struct rtentry ** rtp,unsigned int rdomain)663 rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
664 struct sockaddr *src, struct rtentry **rtp, unsigned int rdomain)
665 {
666 struct rtentry *rt;
667 int error = 0;
668 enum rtstat_counters stat = rts_ncounters;
669 struct rt_addrinfo info;
670 struct ifaddr *ifa;
671 unsigned int ifidx = 0;
672 int flags = RTF_GATEWAY|RTF_HOST;
673 uint8_t prio = RTP_NONE;
674
675 NET_ASSERT_LOCKED();
676
677 /* verify the gateway is directly reachable */
678 rt = rtalloc(gateway, 0, rdomain);
679 if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_GATEWAY)) {
680 rtfree(rt);
681 error = ENETUNREACH;
682 goto out;
683 }
684 ifidx = rt->rt_ifidx;
685 ifa = rt->rt_ifa;
686 rtfree(rt);
687 rt = NULL;
688
689 rt = rtable_lookup(rdomain, dst, NULL, NULL, RTP_ANY);
690 /*
691 * If the redirect isn't from our current router for this dst,
692 * it's either old or wrong. If it redirects us to ourselves,
693 * we have a routing loop, perhaps as a result of an interface
694 * going down recently.
695 */
696 #define equal(a1, a2) \
697 ((a1)->sa_len == (a2)->sa_len && \
698 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
699 if (rt != NULL && (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
700 error = EINVAL;
701 else if (ifa_ifwithaddr(gateway, rdomain) != NULL ||
702 (gateway->sa_family == AF_INET &&
703 in_broadcast(satosin(gateway)->sin_addr, rdomain)))
704 error = EHOSTUNREACH;
705 if (error)
706 goto done;
707 /*
708 * Create a new entry if we just got back a wildcard entry
709 * or the lookup failed. This is necessary for hosts
710 * which use routing redirects generated by smart gateways
711 * to dynamically build the routing tables.
712 */
713 if (rt == NULL)
714 goto create;
715 /*
716 * Don't listen to the redirect if it's
717 * for a route to an interface.
718 */
719 if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
720 if (!ISSET(rt->rt_flags, RTF_HOST)) {
721 /*
722 * Changing from route to net => route to host.
723 * Create new route, rather than smashing route to net.
724 */
725 create:
726 rtfree(rt);
727 flags |= RTF_DYNAMIC;
728 bzero(&info, sizeof(info));
729 info.rti_info[RTAX_DST] = dst;
730 info.rti_info[RTAX_GATEWAY] = gateway;
731 info.rti_ifa = ifa;
732 info.rti_flags = flags;
733 rt = NULL;
734 error = rtrequest(RTM_ADD, &info, RTP_DEFAULT, &rt,
735 rdomain);
736 if (error == 0) {
737 flags = rt->rt_flags;
738 prio = rt->rt_priority;
739 }
740 stat = rts_dynamic;
741 } else {
742 /*
743 * Smash the current notion of the gateway to
744 * this destination. Should check about netmask!!!
745 */
746 rt->rt_flags |= RTF_MODIFIED;
747 flags |= RTF_MODIFIED;
748 prio = rt->rt_priority;
749 stat = rts_newgateway;
750 rt_setgate(rt, gateway, rdomain);
751 }
752 } else
753 error = EHOSTUNREACH;
754 done:
755 if (rt) {
756 if (rtp && !error)
757 *rtp = rt;
758 else
759 rtfree(rt);
760 }
761 out:
762 if (error)
763 rtstat_inc(rts_badredirect);
764 else if (stat != rts_ncounters)
765 rtstat_inc(stat);
766 bzero((caddr_t)&info, sizeof(info));
767 info.rti_info[RTAX_DST] = dst;
768 info.rti_info[RTAX_GATEWAY] = gateway;
769 info.rti_info[RTAX_AUTHOR] = src;
770 rtm_miss(RTM_REDIRECT, &info, flags, prio, ifidx, error, rdomain);
771 }
772
773 /*
774 * Delete a route and generate a message
775 */
776 int
rtdeletemsg(struct rtentry * rt,struct ifnet * ifp,u_int tableid)777 rtdeletemsg(struct rtentry *rt, struct ifnet *ifp, u_int tableid)
778 {
779 int error;
780 struct rt_addrinfo info;
781 struct sockaddr_rtlabel sa_rl;
782 struct sockaddr_in6 sa_mask;
783
784 KASSERT(rt->rt_ifidx == ifp->if_index);
785
786 /*
787 * Request the new route so that the entry is not actually
788 * deleted. That will allow the information being reported to
789 * be accurate (and consistent with route_output()).
790 */
791 memset(&info, 0, sizeof(info));
792 info.rti_info[RTAX_DST] = rt_key(rt);
793 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
794 if (!ISSET(rt->rt_flags, RTF_HOST))
795 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
796 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
797 info.rti_flags = rt->rt_flags;
798 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
799 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
800 error = rtrequest_delete(&info, rt->rt_priority, ifp, &rt, tableid);
801 rtm_miss(RTM_DELETE, &info, info.rti_flags, rt->rt_priority,
802 rt->rt_ifidx, error, tableid);
803 if (error == 0)
804 rtfree(rt);
805 return (error);
806 }
807
808 static inline int
rtequal(struct rtentry * a,struct rtentry * b)809 rtequal(struct rtentry *a, struct rtentry *b)
810 {
811 if (a == b)
812 return 1;
813
814 if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 &&
815 rt_plen(a) == rt_plen(b))
816 return 1;
817 else
818 return 0;
819 }
820
821 int
rtflushclone1(struct rtentry * rt,void * arg,u_int id)822 rtflushclone1(struct rtentry *rt, void *arg, u_int id)
823 {
824 struct rtentry *cloningrt = arg;
825 struct ifnet *ifp;
826
827 if (!ISSET(rt->rt_flags, RTF_CLONED))
828 return 0;
829
830 /* Cached route must stay alive as long as their parent are alive. */
831 if (ISSET(rt->rt_flags, RTF_CACHED) && (rt->rt_parent != cloningrt))
832 return 0;
833
834 if (!rtequal(rt->rt_parent, cloningrt))
835 return 0;
836 /*
837 * This happens when an interface with a RTF_CLONING route is
838 * being detached. In this case it's safe to bail because all
839 * the routes are being purged by rt_ifa_purge().
840 */
841 ifp = if_get(rt->rt_ifidx);
842 if (ifp == NULL)
843 return 0;
844
845 if_put(ifp);
846 return EEXIST;
847 }
848
849 int
rtflushclone(struct rtentry * parent,unsigned int rtableid)850 rtflushclone(struct rtentry *parent, unsigned int rtableid)
851 {
852 struct rtentry *rt = NULL;
853 struct ifnet *ifp;
854 int error;
855
856 #ifdef DIAGNOSTIC
857 if (!parent || (parent->rt_flags & RTF_CLONING) == 0)
858 panic("rtflushclone: called with a non-cloning route");
859 #endif
860
861 do {
862 error = rtable_walk(rtableid, rt_key(parent)->sa_family, &rt,
863 rtflushclone1, parent);
864 if (rt != NULL && error == EEXIST) {
865 ifp = if_get(rt->rt_ifidx);
866 if (ifp == NULL) {
867 error = EAGAIN;
868 } else {
869 error = rtdeletemsg(rt, ifp, rtableid);
870 if (error == 0)
871 error = EAGAIN;
872 if_put(ifp);
873 }
874 }
875 rtfree(rt);
876 rt = NULL;
877 } while (error == EAGAIN);
878
879 return error;
880
881 }
882
883 int
rtrequest_delete(struct rt_addrinfo * info,u_int8_t prio,struct ifnet * ifp,struct rtentry ** ret_nrt,u_int tableid)884 rtrequest_delete(struct rt_addrinfo *info, u_int8_t prio, struct ifnet *ifp,
885 struct rtentry **ret_nrt, u_int tableid)
886 {
887 struct rtentry *rt;
888 int error;
889
890 NET_ASSERT_LOCKED();
891
892 if (!rtable_exists(tableid))
893 return (EAFNOSUPPORT);
894 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
895 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], prio);
896 if (rt == NULL)
897 return (ESRCH);
898
899 /* Make sure that's the route the caller want to delete. */
900 if (ifp != NULL && ifp->if_index != rt->rt_ifidx) {
901 rtfree(rt);
902 return (ESRCH);
903 }
904
905 #ifdef BFD
906 if (ISSET(rt->rt_flags, RTF_BFD))
907 bfdclear(rt);
908 #endif
909
910 error = rtable_delete(tableid, info->rti_info[RTAX_DST],
911 info->rti_info[RTAX_NETMASK], rt);
912 if (error != 0) {
913 rtfree(rt);
914 return (ESRCH);
915 }
916
917 /* Release next hop cache before flushing cloned entries. */
918 rt_putgwroute(rt, NULL);
919
920 /* Clean up any cloned children. */
921 if (ISSET(rt->rt_flags, RTF_CLONING))
922 rtflushclone(rt, tableid);
923
924 rtfree(rt->rt_parent);
925 rt->rt_parent = NULL;
926
927 rt->rt_flags &= ~RTF_UP;
928
929 KASSERT(ifp->if_index == rt->rt_ifidx);
930 ifp->if_rtrequest(ifp, RTM_DELETE, rt);
931
932 atomic_inc_int(&rttrash);
933
934 if (ret_nrt != NULL)
935 *ret_nrt = rt;
936 else
937 rtfree(rt);
938
939 membar_producer();
940 atomic_inc_long(&rtgeneration);
941
942 return (0);
943 }
944
945 int
rtrequest(int req,struct rt_addrinfo * info,u_int8_t prio,struct rtentry ** ret_nrt,u_int tableid)946 rtrequest(int req, struct rt_addrinfo *info, u_int8_t prio,
947 struct rtentry **ret_nrt, u_int tableid)
948 {
949 struct ifnet *ifp;
950 struct rtentry *rt, *crt;
951 struct ifaddr *ifa;
952 struct sockaddr *ndst;
953 struct sockaddr_rtlabel *sa_rl, sa_rl2;
954 struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK };
955 int error;
956
957 NET_ASSERT_LOCKED();
958
959 if (!rtable_exists(tableid))
960 return (EAFNOSUPPORT);
961 if (info->rti_flags & RTF_HOST)
962 info->rti_info[RTAX_NETMASK] = NULL;
963 switch (req) {
964 case RTM_DELETE:
965 return (EINVAL);
966
967 case RTM_RESOLVE:
968 if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
969 return (EINVAL);
970 if ((rt->rt_flags & RTF_CLONING) == 0)
971 return (EINVAL);
972 KASSERT(rt->rt_ifa->ifa_ifp != NULL);
973 info->rti_ifa = rt->rt_ifa;
974 info->rti_flags = rt->rt_flags | (RTF_CLONED|RTF_HOST);
975 info->rti_flags &= ~(RTF_CLONING|RTF_CONNECTED|RTF_STATIC);
976 info->rti_info[RTAX_GATEWAY] = sdltosa(&sa_dl);
977 info->rti_info[RTAX_LABEL] =
978 rtlabel_id2sa(rt->rt_labelid, &sa_rl2);
979 /* FALLTHROUGH */
980
981 case RTM_ADD:
982 if (info->rti_ifa == NULL)
983 return (EINVAL);
984 ifa = info->rti_ifa;
985 ifp = ifa->ifa_ifp;
986 if (prio == 0)
987 prio = ifp->if_priority + RTP_STATIC;
988
989 error = rt_copysa(info->rti_info[RTAX_DST],
990 info->rti_info[RTAX_NETMASK], &ndst);
991 if (error)
992 return (error);
993
994 rt = pool_get(&rtentry_pool, PR_NOWAIT | PR_ZERO);
995 if (rt == NULL) {
996 free(ndst, M_RTABLE, ndst->sa_len);
997 return (ENOBUFS);
998 }
999
1000 refcnt_init_trace(&rt->rt_refcnt, DT_REFCNT_IDX_RTENTRY);
1001 rt->rt_flags = info->rti_flags | RTF_UP;
1002 rt->rt_priority = prio; /* init routing priority */
1003 LIST_INIT(&rt->rt_timer);
1004
1005 /* Check the link state if the table supports it. */
1006 if (rtable_mpath_capable(tableid, ndst->sa_family) &&
1007 !ISSET(rt->rt_flags, RTF_LOCAL) &&
1008 (!LINK_STATE_IS_UP(ifp->if_link_state) ||
1009 !ISSET(ifp->if_flags, IFF_UP))) {
1010 rt->rt_flags &= ~RTF_UP;
1011 rt->rt_priority |= RTP_DOWN;
1012 }
1013
1014 if (info->rti_info[RTAX_LABEL] != NULL) {
1015 sa_rl = (struct sockaddr_rtlabel *)
1016 info->rti_info[RTAX_LABEL];
1017 rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label);
1018 }
1019
1020 #ifdef MPLS
1021 /* We have to allocate additional space for MPLS infos */
1022 if (info->rti_flags & RTF_MPLS &&
1023 (info->rti_info[RTAX_SRC] != NULL ||
1024 info->rti_info[RTAX_DST]->sa_family == AF_MPLS)) {
1025 error = rt_mpls_set(rt, info->rti_info[RTAX_SRC],
1026 info->rti_mpls);
1027 if (error) {
1028 free(ndst, M_RTABLE, ndst->sa_len);
1029 pool_put(&rtentry_pool, rt);
1030 return (error);
1031 }
1032 } else
1033 rt_mpls_clear(rt);
1034 #endif
1035
1036 rt->rt_ifa = ifaref(ifa);
1037 rt->rt_ifidx = ifp->if_index;
1038 /*
1039 * Copy metrics and a back pointer from the cloned
1040 * route's parent.
1041 */
1042 if (ISSET(rt->rt_flags, RTF_CLONED)) {
1043 rtref(*ret_nrt);
1044 rt->rt_parent = *ret_nrt;
1045 rt->rt_rmx = (*ret_nrt)->rt_rmx;
1046 }
1047
1048 /*
1049 * We must set rt->rt_gateway before adding ``rt'' to
1050 * the routing table because the radix MPATH code use
1051 * it to (re)order routes.
1052 */
1053 if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY],
1054 tableid))) {
1055 ifafree(ifa);
1056 rtfree(rt->rt_parent);
1057 rt_putgwroute(rt, NULL);
1058 if (rt->rt_gateway != NULL) {
1059 free(rt->rt_gateway, M_RTABLE,
1060 ROUNDUP(rt->rt_gateway->sa_len));
1061 }
1062 free(ndst, M_RTABLE, ndst->sa_len);
1063 pool_put(&rtentry_pool, rt);
1064 return (error);
1065 }
1066
1067 error = rtable_insert(tableid, ndst,
1068 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1069 rt->rt_priority, rt);
1070 if (error != 0 &&
1071 (crt = rtable_match(tableid, ndst, NULL)) != NULL) {
1072 /* overwrite cloned route */
1073 if (ISSET(crt->rt_flags, RTF_CLONED) &&
1074 !ISSET(crt->rt_flags, RTF_CACHED)) {
1075 struct ifnet *cifp;
1076
1077 cifp = if_get(crt->rt_ifidx);
1078 KASSERT(cifp != NULL);
1079 rtdeletemsg(crt, cifp, tableid);
1080 if_put(cifp);
1081
1082 error = rtable_insert(tableid, ndst,
1083 info->rti_info[RTAX_NETMASK],
1084 info->rti_info[RTAX_GATEWAY],
1085 rt->rt_priority, rt);
1086 }
1087 rtfree(crt);
1088 }
1089 if (error != 0) {
1090 ifafree(ifa);
1091 rtfree(rt->rt_parent);
1092 rt_putgwroute(rt, NULL);
1093 if (rt->rt_gateway != NULL) {
1094 free(rt->rt_gateway, M_RTABLE,
1095 ROUNDUP(rt->rt_gateway->sa_len));
1096 }
1097 free(ndst, M_RTABLE, ndst->sa_len);
1098 pool_put(&rtentry_pool, rt);
1099 return (EEXIST);
1100 }
1101 ifp->if_rtrequest(ifp, req, rt);
1102
1103 if_group_routechange(info->rti_info[RTAX_DST],
1104 info->rti_info[RTAX_NETMASK]);
1105
1106 if (ret_nrt != NULL)
1107 *ret_nrt = rt;
1108 else
1109 rtfree(rt);
1110
1111 membar_producer();
1112 atomic_inc_long(&rtgeneration);
1113
1114 break;
1115 }
1116
1117 return (0);
1118 }
1119
1120 int
rt_setgate(struct rtentry * rt,const struct sockaddr * gate,u_int rtableid)1121 rt_setgate(struct rtentry *rt, const struct sockaddr *gate, u_int rtableid)
1122 {
1123 int glen = ROUNDUP(gate->sa_len);
1124 struct sockaddr *sa, *osa;
1125 int error = 0;
1126
1127 KASSERT(gate != NULL);
1128 if (rt->rt_gateway == gate) {
1129 /* nop */
1130 return (0);
1131 }
1132
1133 sa = malloc(glen, M_RTABLE, M_NOWAIT | M_ZERO);
1134 if (sa == NULL)
1135 return (ENOBUFS);
1136 memcpy(sa, gate, gate->sa_len);
1137
1138 KERNEL_LOCK(); /* see [X] in route.h */
1139 osa = rt->rt_gateway;
1140 rt->rt_gateway = sa;
1141
1142 if (ISSET(rt->rt_flags, RTF_GATEWAY))
1143 error = rt_setgwroute(rt, gate, rtableid);
1144 KERNEL_UNLOCK();
1145
1146 if (osa != NULL)
1147 free(osa, M_RTABLE, ROUNDUP(osa->sa_len));
1148
1149 return (error);
1150 }
1151
1152 /*
1153 * Return the route entry containing the next hop link-layer
1154 * address corresponding to ``rt''.
1155 */
1156 struct rtentry *
rt_getll(struct rtentry * rt)1157 rt_getll(struct rtentry *rt)
1158 {
1159 if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1160 KASSERT(rt->rt_gwroute != NULL);
1161 return (rt->rt_gwroute);
1162 }
1163
1164 return (rt);
1165 }
1166
1167 void
rt_maskedcopy(struct sockaddr * src,struct sockaddr * dst,struct sockaddr * netmask)1168 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
1169 struct sockaddr *netmask)
1170 {
1171 u_char *cp1 = (u_char *)src;
1172 u_char *cp2 = (u_char *)dst;
1173 u_char *cp3 = (u_char *)netmask;
1174 u_char *cplim = cp2 + *cp3;
1175 u_char *cplim2 = cp2 + *cp1;
1176
1177 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1178 cp3 += 2;
1179 if (cplim > cplim2)
1180 cplim = cplim2;
1181 while (cp2 < cplim)
1182 *cp2++ = *cp1++ & *cp3++;
1183 if (cp2 < cplim2)
1184 bzero(cp2, cplim2 - cp2);
1185 }
1186
1187 /*
1188 * allocate new sockaddr structure based on the user supplied src and mask
1189 * that is useable for the routing table.
1190 */
1191 static int
rt_copysa(const struct sockaddr * src,const struct sockaddr * mask,struct sockaddr ** dst)1192 rt_copysa(const struct sockaddr *src, const struct sockaddr *mask,
1193 struct sockaddr **dst)
1194 {
1195 static const u_char maskarray[] = {
1196 0x0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
1197 struct sockaddr *ndst;
1198 const struct domain *dp;
1199 u_char *csrc, *cdst;
1200 int i, plen;
1201
1202 for (i = 0; (dp = domains[i]) != NULL; i++) {
1203 if (dp->dom_rtoffset == 0)
1204 continue;
1205 if (src->sa_family == dp->dom_family)
1206 break;
1207 }
1208 if (dp == NULL)
1209 return (EAFNOSUPPORT);
1210
1211 if (src->sa_len < dp->dom_sasize)
1212 return (EINVAL);
1213
1214 plen = rtable_satoplen(src->sa_family, mask);
1215 if (plen == -1)
1216 return (EINVAL);
1217
1218 ndst = malloc(dp->dom_sasize, M_RTABLE, M_NOWAIT|M_ZERO);
1219 if (ndst == NULL)
1220 return (ENOBUFS);
1221
1222 ndst->sa_family = src->sa_family;
1223 ndst->sa_len = dp->dom_sasize;
1224
1225 csrc = (u_char *)src + dp->dom_rtoffset;
1226 cdst = (u_char *)ndst + dp->dom_rtoffset;
1227
1228 memcpy(cdst, csrc, plen / 8);
1229 if (plen % 8 != 0)
1230 cdst[plen / 8] = csrc[plen / 8] & maskarray[plen % 8];
1231
1232 *dst = ndst;
1233 return (0);
1234 }
1235
1236 int
rt_ifa_add(struct ifaddr * ifa,int flags,struct sockaddr * dst,unsigned int rdomain)1237 rt_ifa_add(struct ifaddr *ifa, int flags, struct sockaddr *dst,
1238 unsigned int rdomain)
1239 {
1240 struct ifnet *ifp = ifa->ifa_ifp;
1241 struct rtentry *rt;
1242 struct sockaddr_rtlabel sa_rl;
1243 struct rt_addrinfo info;
1244 uint8_t prio = ifp->if_priority + RTP_STATIC;
1245 int error;
1246
1247 KASSERT(rdomain == rtable_l2(rdomain));
1248
1249 memset(&info, 0, sizeof(info));
1250 info.rti_ifa = ifa;
1251 info.rti_flags = flags;
1252 info.rti_info[RTAX_DST] = dst;
1253 if (flags & RTF_LLINFO)
1254 info.rti_info[RTAX_GATEWAY] = sdltosa(ifp->if_sadl);
1255 else
1256 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1257 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl);
1258
1259 #ifdef MPLS
1260 if ((flags & RTF_MPLS) == RTF_MPLS)
1261 info.rti_mpls = MPLS_OP_POP;
1262 #endif /* MPLS */
1263
1264 if ((flags & RTF_HOST) == 0)
1265 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1266
1267 if (flags & (RTF_LOCAL|RTF_BROADCAST))
1268 prio = RTP_LOCAL;
1269
1270 if (flags & RTF_CONNECTED)
1271 prio = ifp->if_priority + RTP_CONNECTED;
1272
1273 error = rtrequest(RTM_ADD, &info, prio, &rt, rdomain);
1274 if (error == 0) {
1275 /*
1276 * A local route is created for every address configured
1277 * on an interface, so use this information to notify
1278 * userland that a new address has been added.
1279 */
1280 if (flags & RTF_LOCAL)
1281 rtm_addr(RTM_NEWADDR, ifa);
1282 rtm_send(rt, RTM_ADD, 0, rdomain);
1283 rtfree(rt);
1284 }
1285 return (error);
1286 }
1287
1288 int
rt_ifa_del(struct ifaddr * ifa,int flags,struct sockaddr * dst,unsigned int rdomain)1289 rt_ifa_del(struct ifaddr *ifa, int flags, struct sockaddr *dst,
1290 unsigned int rdomain)
1291 {
1292 struct ifnet *ifp = ifa->ifa_ifp;
1293 struct rtentry *rt;
1294 struct mbuf *m = NULL;
1295 struct sockaddr *deldst;
1296 struct rt_addrinfo info;
1297 struct sockaddr_rtlabel sa_rl;
1298 uint8_t prio = ifp->if_priority + RTP_STATIC;
1299 int error;
1300
1301 KASSERT(rdomain == rtable_l2(rdomain));
1302
1303 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1304 m = m_get(M_DONTWAIT, MT_SONAME);
1305 if (m == NULL)
1306 return (ENOBUFS);
1307 deldst = mtod(m, struct sockaddr *);
1308 rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
1309 dst = deldst;
1310 }
1311
1312 memset(&info, 0, sizeof(info));
1313 info.rti_ifa = ifa;
1314 info.rti_flags = flags;
1315 info.rti_info[RTAX_DST] = dst;
1316 if ((flags & RTF_LLINFO) == 0)
1317 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1318 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl);
1319
1320 if ((flags & RTF_HOST) == 0)
1321 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1322
1323 if (flags & (RTF_LOCAL|RTF_BROADCAST))
1324 prio = RTP_LOCAL;
1325
1326 if (flags & RTF_CONNECTED)
1327 prio = ifp->if_priority + RTP_CONNECTED;
1328
1329 rtable_clearsource(rdomain, ifa->ifa_addr);
1330 error = rtrequest_delete(&info, prio, ifp, &rt, rdomain);
1331 if (error == 0) {
1332 rtm_send(rt, RTM_DELETE, 0, rdomain);
1333 if (flags & RTF_LOCAL)
1334 rtm_addr(RTM_DELADDR, ifa);
1335 rtfree(rt);
1336 }
1337 m_free(m);
1338
1339 return (error);
1340 }
1341
1342 /*
1343 * Add ifa's address as a local rtentry.
1344 */
1345 int
rt_ifa_addlocal(struct ifaddr * ifa)1346 rt_ifa_addlocal(struct ifaddr *ifa)
1347 {
1348 struct ifnet *ifp = ifa->ifa_ifp;
1349 struct rtentry *rt;
1350 u_int flags = RTF_HOST|RTF_LOCAL;
1351 int error = 0;
1352
1353 /*
1354 * If the configured address correspond to the magical "any"
1355 * address do not add a local route entry because that might
1356 * corrupt the routing tree which uses this value for the
1357 * default routes.
1358 */
1359 switch (ifa->ifa_addr->sa_family) {
1360 case AF_INET:
1361 if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY)
1362 return (0);
1363 break;
1364 #ifdef INET6
1365 case AF_INET6:
1366 if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr,
1367 &in6addr_any))
1368 return (0);
1369 break;
1370 #endif
1371 default:
1372 break;
1373 }
1374
1375 if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT)))
1376 flags |= RTF_LLINFO;
1377
1378 /* If there is no local entry, allocate one. */
1379 rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain);
1380 if (rt == NULL || ISSET(rt->rt_flags, flags) != flags) {
1381 error = rt_ifa_add(ifa, flags | RTF_MPATH, ifa->ifa_addr,
1382 ifp->if_rdomain);
1383 }
1384 rtfree(rt);
1385
1386 return (error);
1387 }
1388
1389 /*
1390 * Remove local rtentry of ifa's address if it exists.
1391 */
1392 int
rt_ifa_dellocal(struct ifaddr * ifa)1393 rt_ifa_dellocal(struct ifaddr *ifa)
1394 {
1395 struct ifnet *ifp = ifa->ifa_ifp;
1396 struct rtentry *rt;
1397 u_int flags = RTF_HOST|RTF_LOCAL;
1398 int error = 0;
1399
1400 /*
1401 * We do not add local routes for such address, so do not bother
1402 * removing them.
1403 */
1404 switch (ifa->ifa_addr->sa_family) {
1405 case AF_INET:
1406 if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY)
1407 return (0);
1408 break;
1409 #ifdef INET6
1410 case AF_INET6:
1411 if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr,
1412 &in6addr_any))
1413 return (0);
1414 break;
1415 #endif
1416 default:
1417 break;
1418 }
1419
1420 if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT)))
1421 flags |= RTF_LLINFO;
1422
1423 /*
1424 * Before deleting, check if a corresponding local host
1425 * route surely exists. With this check, we can avoid to
1426 * delete an interface direct route whose destination is same
1427 * as the address being removed. This can happen when removing
1428 * a subnet-router anycast address on an interface attached
1429 * to a shared medium.
1430 */
1431 rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain);
1432 if (rt != NULL && ISSET(rt->rt_flags, flags) == flags) {
1433 error = rt_ifa_del(ifa, flags, ifa->ifa_addr,
1434 ifp->if_rdomain);
1435 }
1436 rtfree(rt);
1437
1438 return (error);
1439 }
1440
1441 /*
1442 * Remove all addresses attached to ``ifa''.
1443 */
1444 void
rt_ifa_purge(struct ifaddr * ifa)1445 rt_ifa_purge(struct ifaddr *ifa)
1446 {
1447 struct ifnet *ifp = ifa->ifa_ifp;
1448 struct rtentry *rt = NULL;
1449 unsigned int rtableid;
1450 int error, af = ifa->ifa_addr->sa_family;
1451
1452 KASSERT(ifp != NULL);
1453
1454 for (rtableid = 0; rtableid < rtmap_limit; rtableid++) {
1455 /* skip rtables that are not in the rdomain of the ifp */
1456 if (rtable_l2(rtableid) != ifp->if_rdomain)
1457 continue;
1458
1459 do {
1460 error = rtable_walk(rtableid, af, &rt,
1461 rt_ifa_purge_walker, ifa);
1462 if (rt != NULL && error == EEXIST) {
1463 error = rtdeletemsg(rt, ifp, rtableid);
1464 if (error == 0)
1465 error = EAGAIN;
1466 }
1467 rtfree(rt);
1468 rt = NULL;
1469 } while (error == EAGAIN);
1470
1471 if (error == EAFNOSUPPORT)
1472 error = 0;
1473
1474 if (error)
1475 break;
1476 }
1477 }
1478
1479 int
rt_ifa_purge_walker(struct rtentry * rt,void * vifa,unsigned int rtableid)1480 rt_ifa_purge_walker(struct rtentry *rt, void *vifa, unsigned int rtableid)
1481 {
1482 struct ifaddr *ifa = vifa;
1483
1484 if (rt->rt_ifa == ifa)
1485 return EEXIST;
1486
1487 return 0;
1488 }
1489
1490 /*
1491 * Route timer routines. These routines allow functions to be called
1492 * for various routes at any time. This is useful in supporting
1493 * path MTU discovery and redirect route deletion.
1494 *
1495 * This is similar to some BSDI internal functions, but it provides
1496 * for multiple queues for efficiency's sake...
1497 */
1498
1499 struct mutex rttimer_mtx;
1500
1501 struct rttimer {
1502 TAILQ_ENTRY(rttimer) rtt_next; /* [T] entry on timer queue */
1503 LIST_ENTRY(rttimer) rtt_link; /* [T] timers per rtentry */
1504 struct timeout rtt_timeout; /* [I] timeout for this entry */
1505 struct rttimer_queue *rtt_queue; /* [I] back pointer to queue */
1506 struct rtentry *rtt_rt; /* [T] back pointer to route */
1507 time_t rtt_expire; /* [I] rt expire time */
1508 u_int rtt_tableid; /* [I] rtable id of rtt_rt */
1509 };
1510
1511 #define RTTIMER_CALLOUT(r) { \
1512 if (r->rtt_queue->rtq_func != NULL) { \
1513 (*r->rtt_queue->rtq_func)(r->rtt_rt, r->rtt_tableid); \
1514 } else { \
1515 struct ifnet *ifp; \
1516 \
1517 ifp = if_get(r->rtt_rt->rt_ifidx); \
1518 if (ifp != NULL && \
1519 (r->rtt_rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == \
1520 (RTF_DYNAMIC|RTF_HOST)) \
1521 rtdeletemsg(r->rtt_rt, ifp, r->rtt_tableid); \
1522 if_put(ifp); \
1523 } \
1524 }
1525
1526 void
rt_timer_init(void)1527 rt_timer_init(void)
1528 {
1529 pool_init(&rttimer_pool, sizeof(struct rttimer), 0,
1530 IPL_MPFLOOR, 0, "rttmr", NULL);
1531 mtx_init(&rttimer_mtx, IPL_MPFLOOR);
1532 }
1533
1534 void
rt_timer_queue_init(struct rttimer_queue * rtq,int timeout,void (* func)(struct rtentry *,u_int))1535 rt_timer_queue_init(struct rttimer_queue *rtq, int timeout,
1536 void (*func)(struct rtentry *, u_int))
1537 {
1538 rtq->rtq_timeout = timeout;
1539 rtq->rtq_count = 0;
1540 rtq->rtq_func = func;
1541 TAILQ_INIT(&rtq->rtq_head);
1542 }
1543
1544 void
rt_timer_queue_change(struct rttimer_queue * rtq,int timeout)1545 rt_timer_queue_change(struct rttimer_queue *rtq, int timeout)
1546 {
1547 mtx_enter(&rttimer_mtx);
1548 rtq->rtq_timeout = timeout;
1549 mtx_leave(&rttimer_mtx);
1550 }
1551
1552 void
rt_timer_queue_flush(struct rttimer_queue * rtq)1553 rt_timer_queue_flush(struct rttimer_queue *rtq)
1554 {
1555 struct rttimer *r;
1556 TAILQ_HEAD(, rttimer) rttlist;
1557
1558 NET_ASSERT_LOCKED();
1559
1560 TAILQ_INIT(&rttlist);
1561 mtx_enter(&rttimer_mtx);
1562 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1563 LIST_REMOVE(r, rtt_link);
1564 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1565 TAILQ_INSERT_TAIL(&rttlist, r, rtt_next);
1566 KASSERT(rtq->rtq_count > 0);
1567 rtq->rtq_count--;
1568 }
1569 mtx_leave(&rttimer_mtx);
1570
1571 while ((r = TAILQ_FIRST(&rttlist)) != NULL) {
1572 TAILQ_REMOVE(&rttlist, r, rtt_next);
1573 RTTIMER_CALLOUT(r);
1574 pool_put(&rttimer_pool, r);
1575 }
1576 }
1577
1578 unsigned long
rt_timer_queue_count(struct rttimer_queue * rtq)1579 rt_timer_queue_count(struct rttimer_queue *rtq)
1580 {
1581 return (rtq->rtq_count);
1582 }
1583
1584 static inline struct rttimer *
rt_timer_unlink(struct rttimer * r)1585 rt_timer_unlink(struct rttimer *r)
1586 {
1587 MUTEX_ASSERT_LOCKED(&rttimer_mtx);
1588
1589 LIST_REMOVE(r, rtt_link);
1590 r->rtt_rt = NULL;
1591
1592 if (timeout_del(&r->rtt_timeout) == 0) {
1593 /* timeout fired, so rt_timer_timer will do the cleanup */
1594 return NULL;
1595 }
1596
1597 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1598 KASSERT(r->rtt_queue->rtq_count > 0);
1599 r->rtt_queue->rtq_count--;
1600 return r;
1601 }
1602
1603 void
rt_timer_remove_all(struct rtentry * rt)1604 rt_timer_remove_all(struct rtentry *rt)
1605 {
1606 struct rttimer *r;
1607 TAILQ_HEAD(, rttimer) rttlist;
1608
1609 TAILQ_INIT(&rttlist);
1610 mtx_enter(&rttimer_mtx);
1611 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1612 r = rt_timer_unlink(r);
1613 if (r != NULL)
1614 TAILQ_INSERT_TAIL(&rttlist, r, rtt_next);
1615 }
1616 mtx_leave(&rttimer_mtx);
1617
1618 while ((r = TAILQ_FIRST(&rttlist)) != NULL) {
1619 TAILQ_REMOVE(&rttlist, r, rtt_next);
1620 pool_put(&rttimer_pool, r);
1621 }
1622 }
1623
1624 time_t
rt_timer_get_expire(const struct rtentry * rt)1625 rt_timer_get_expire(const struct rtentry *rt)
1626 {
1627 const struct rttimer *r;
1628 time_t expire = 0;
1629
1630 mtx_enter(&rttimer_mtx);
1631 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1632 if (expire == 0 || expire > r->rtt_expire)
1633 expire = r->rtt_expire;
1634 }
1635 mtx_leave(&rttimer_mtx);
1636
1637 return expire;
1638 }
1639
1640 int
rt_timer_add(struct rtentry * rt,struct rttimer_queue * queue,u_int rtableid)1641 rt_timer_add(struct rtentry *rt, struct rttimer_queue *queue, u_int rtableid)
1642 {
1643 struct rttimer *r, *rnew;
1644
1645 rnew = pool_get(&rttimer_pool, PR_NOWAIT | PR_ZERO);
1646 if (rnew == NULL)
1647 return (ENOBUFS);
1648
1649 rnew->rtt_rt = rt;
1650 rnew->rtt_queue = queue;
1651 rnew->rtt_tableid = rtableid;
1652 rnew->rtt_expire = getuptime() + queue->rtq_timeout;
1653 timeout_set_proc(&rnew->rtt_timeout, rt_timer_timer, rnew);
1654
1655 mtx_enter(&rttimer_mtx);
1656 /*
1657 * If there's already a timer with this action, destroy it before
1658 * we add a new one.
1659 */
1660 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1661 if (r->rtt_queue == queue) {
1662 r = rt_timer_unlink(r);
1663 break; /* only one per list, so we can quit... */
1664 }
1665 }
1666
1667 LIST_INSERT_HEAD(&rt->rt_timer, rnew, rtt_link);
1668 TAILQ_INSERT_TAIL(&queue->rtq_head, rnew, rtt_next);
1669 timeout_add_sec(&rnew->rtt_timeout, queue->rtq_timeout);
1670 rnew->rtt_queue->rtq_count++;
1671 mtx_leave(&rttimer_mtx);
1672
1673 if (r != NULL)
1674 pool_put(&rttimer_pool, r);
1675
1676 return (0);
1677 }
1678
1679 void
rt_timer_timer(void * arg)1680 rt_timer_timer(void *arg)
1681 {
1682 struct rttimer *r = arg;
1683 struct rttimer_queue *rtq = r->rtt_queue;
1684
1685 NET_LOCK();
1686 mtx_enter(&rttimer_mtx);
1687
1688 if (r->rtt_rt != NULL)
1689 LIST_REMOVE(r, rtt_link);
1690 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1691 KASSERT(rtq->rtq_count > 0);
1692 rtq->rtq_count--;
1693
1694 mtx_leave(&rttimer_mtx);
1695
1696 if (r->rtt_rt != NULL)
1697 RTTIMER_CALLOUT(r);
1698 NET_UNLOCK();
1699
1700 pool_put(&rttimer_pool, r);
1701 }
1702
1703 #ifdef MPLS
1704 int
rt_mpls_set(struct rtentry * rt,const struct sockaddr * src,uint8_t op)1705 rt_mpls_set(struct rtentry *rt, const struct sockaddr *src, uint8_t op)
1706 {
1707 struct sockaddr_mpls *psa_mpls = (struct sockaddr_mpls *)src;
1708 struct rt_mpls *rt_mpls;
1709
1710 if (psa_mpls == NULL && op != MPLS_OP_POP)
1711 return (EOPNOTSUPP);
1712 if (psa_mpls != NULL && psa_mpls->smpls_len != sizeof(*psa_mpls))
1713 return (EINVAL);
1714 if (psa_mpls != NULL && psa_mpls->smpls_family != AF_MPLS)
1715 return (EAFNOSUPPORT);
1716
1717 rt->rt_llinfo = malloc(sizeof(struct rt_mpls), M_TEMP, M_NOWAIT|M_ZERO);
1718 if (rt->rt_llinfo == NULL)
1719 return (ENOMEM);
1720
1721 rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
1722 if (psa_mpls != NULL)
1723 rt_mpls->mpls_label = psa_mpls->smpls_label;
1724 rt_mpls->mpls_operation = op;
1725 /* XXX: set experimental bits */
1726 rt->rt_flags |= RTF_MPLS;
1727
1728 return (0);
1729 }
1730
1731 void
rt_mpls_clear(struct rtentry * rt)1732 rt_mpls_clear(struct rtentry *rt)
1733 {
1734 if (rt->rt_llinfo != NULL && rt->rt_flags & RTF_MPLS) {
1735 free(rt->rt_llinfo, M_TEMP, sizeof(struct rt_mpls));
1736 rt->rt_llinfo = NULL;
1737 }
1738 rt->rt_flags &= ~RTF_MPLS;
1739 }
1740 #endif
1741
1742 u_int16_t
rtlabel_name2id(const char * name)1743 rtlabel_name2id(const char *name)
1744 {
1745 struct rt_label *label, *p;
1746 u_int16_t new_id = 1, id = 0;
1747
1748 if (!name[0])
1749 return (0);
1750
1751 mtx_enter(&rtlabel_mtx);
1752 TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1753 if (strcmp(name, label->rtl_name) == 0) {
1754 label->rtl_ref++;
1755 id = label->rtl_id;
1756 goto out;
1757 }
1758
1759 /*
1760 * to avoid fragmentation, we do a linear search from the beginning
1761 * and take the first free slot we find. if there is none or the list
1762 * is empty, append a new entry at the end.
1763 */
1764 TAILQ_FOREACH(p, &rt_labels, rtl_entry) {
1765 if (p->rtl_id != new_id)
1766 break;
1767 new_id = p->rtl_id + 1;
1768 }
1769 if (new_id > LABELID_MAX)
1770 goto out;
1771
1772 label = malloc(sizeof(*label), M_RTABLE, M_NOWAIT|M_ZERO);
1773 if (label == NULL)
1774 goto out;
1775 strlcpy(label->rtl_name, name, sizeof(label->rtl_name));
1776 label->rtl_id = new_id;
1777 label->rtl_ref++;
1778
1779 if (p != NULL) /* insert new entry before p */
1780 TAILQ_INSERT_BEFORE(p, label, rtl_entry);
1781 else /* either list empty or no free slot in between */
1782 TAILQ_INSERT_TAIL(&rt_labels, label, rtl_entry);
1783
1784 id = label->rtl_id;
1785 out:
1786 mtx_leave(&rtlabel_mtx);
1787
1788 return (id);
1789 }
1790
1791 const char *
rtlabel_id2name_locked(u_int16_t id)1792 rtlabel_id2name_locked(u_int16_t id)
1793 {
1794 struct rt_label *label;
1795
1796 MUTEX_ASSERT_LOCKED(&rtlabel_mtx);
1797
1798 TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1799 if (label->rtl_id == id)
1800 return (label->rtl_name);
1801
1802 return (NULL);
1803 }
1804
1805 const char *
rtlabel_id2name(u_int16_t id,char * rtlabelbuf,size_t sz)1806 rtlabel_id2name(u_int16_t id, char *rtlabelbuf, size_t sz)
1807 {
1808 const char *label;
1809
1810 if (id == 0)
1811 return (NULL);
1812
1813 mtx_enter(&rtlabel_mtx);
1814 if ((label = rtlabel_id2name_locked(id)) != NULL)
1815 strlcpy(rtlabelbuf, label, sz);
1816 mtx_leave(&rtlabel_mtx);
1817
1818 if (label == NULL)
1819 return (NULL);
1820
1821 return (rtlabelbuf);
1822 }
1823
1824 struct sockaddr *
rtlabel_id2sa(u_int16_t labelid,struct sockaddr_rtlabel * sa_rl)1825 rtlabel_id2sa(u_int16_t labelid, struct sockaddr_rtlabel *sa_rl)
1826 {
1827 const char *label;
1828
1829 if (labelid == 0)
1830 return (NULL);
1831
1832 mtx_enter(&rtlabel_mtx);
1833 if ((label = rtlabel_id2name_locked(labelid)) != NULL) {
1834 bzero(sa_rl, sizeof(*sa_rl));
1835 sa_rl->sr_len = sizeof(*sa_rl);
1836 sa_rl->sr_family = AF_UNSPEC;
1837 strlcpy(sa_rl->sr_label, label, sizeof(sa_rl->sr_label));
1838 }
1839 mtx_leave(&rtlabel_mtx);
1840
1841 if (label == NULL)
1842 return (NULL);
1843
1844 return ((struct sockaddr *)sa_rl);
1845 }
1846
1847 void
rtlabel_unref(u_int16_t id)1848 rtlabel_unref(u_int16_t id)
1849 {
1850 struct rt_label *p, *next;
1851
1852 if (id == 0)
1853 return;
1854
1855 mtx_enter(&rtlabel_mtx);
1856 TAILQ_FOREACH_SAFE(p, &rt_labels, rtl_entry, next) {
1857 if (id == p->rtl_id) {
1858 if (--p->rtl_ref == 0) {
1859 TAILQ_REMOVE(&rt_labels, p, rtl_entry);
1860 free(p, M_RTABLE, sizeof(*p));
1861 }
1862 break;
1863 }
1864 }
1865 mtx_leave(&rtlabel_mtx);
1866 }
1867
1868 int
rt_if_track(struct ifnet * ifp)1869 rt_if_track(struct ifnet *ifp)
1870 {
1871 unsigned int rtableid;
1872 struct rtentry *rt = NULL;
1873 int i, error = 0;
1874
1875 for (rtableid = 0; rtableid < rtmap_limit; rtableid++) {
1876 /* skip rtables that are not in the rdomain of the ifp */
1877 if (rtable_l2(rtableid) != ifp->if_rdomain)
1878 continue;
1879 for (i = 1; i <= AF_MAX; i++) {
1880 if (!rtable_mpath_capable(rtableid, i))
1881 continue;
1882
1883 do {
1884 error = rtable_walk(rtableid, i, &rt,
1885 rt_if_linkstate_change, ifp);
1886 if (rt != NULL && error == EEXIST) {
1887 error = rtdeletemsg(rt, ifp, rtableid);
1888 if (error == 0)
1889 error = EAGAIN;
1890 }
1891 rtfree(rt);
1892 rt = NULL;
1893 } while (error == EAGAIN);
1894
1895 if (error == EAFNOSUPPORT)
1896 error = 0;
1897
1898 if (error)
1899 break;
1900 }
1901 }
1902
1903 return (error);
1904 }
1905
1906 int
rt_if_linkstate_change(struct rtentry * rt,void * arg,u_int id)1907 rt_if_linkstate_change(struct rtentry *rt, void *arg, u_int id)
1908 {
1909 struct ifnet *ifp = arg;
1910 struct sockaddr_in6 sa_mask;
1911 int error;
1912
1913 if (rt->rt_ifidx != ifp->if_index)
1914 return (0);
1915
1916 /* Local routes are always usable. */
1917 if (rt->rt_flags & RTF_LOCAL) {
1918 rt->rt_flags |= RTF_UP;
1919 return (0);
1920 }
1921
1922 if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP) {
1923 if (ISSET(rt->rt_flags, RTF_UP))
1924 return (0);
1925
1926 /* bring route up */
1927 rt->rt_flags |= RTF_UP;
1928 error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt),
1929 rt->rt_priority & RTP_MASK, rt);
1930 } else {
1931 /*
1932 * Remove redirected and cloned routes (mainly ARP)
1933 * from down interfaces so we have a chance to get
1934 * new routes from a better source.
1935 */
1936 if (ISSET(rt->rt_flags, RTF_CLONED|RTF_DYNAMIC) &&
1937 !ISSET(rt->rt_flags, RTF_CACHED|RTF_BFD)) {
1938 return (EEXIST);
1939 }
1940
1941 if (!ISSET(rt->rt_flags, RTF_UP))
1942 return (0);
1943
1944 /* take route down */
1945 rt->rt_flags &= ~RTF_UP;
1946 error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt),
1947 rt->rt_priority | RTP_DOWN, rt);
1948 }
1949 if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask));
1950
1951 membar_producer();
1952 atomic_inc_long(&rtgeneration);
1953
1954 return (error);
1955 }
1956
1957 struct sockaddr *
rt_plentosa(sa_family_t af,int plen,struct sockaddr_in6 * sa_mask)1958 rt_plentosa(sa_family_t af, int plen, struct sockaddr_in6 *sa_mask)
1959 {
1960 struct sockaddr_in *sin = (struct sockaddr_in *)sa_mask;
1961 #ifdef INET6
1962 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa_mask;
1963 #endif
1964
1965 KASSERT(plen >= 0 || plen == -1);
1966
1967 if (plen == -1)
1968 return (NULL);
1969
1970 memset(sa_mask, 0, sizeof(*sa_mask));
1971
1972 switch (af) {
1973 case AF_INET:
1974 sin->sin_family = AF_INET;
1975 sin->sin_len = sizeof(struct sockaddr_in);
1976 in_prefixlen2mask(&sin->sin_addr, plen);
1977 break;
1978 #ifdef INET6
1979 case AF_INET6:
1980 sin6->sin6_family = AF_INET6;
1981 sin6->sin6_len = sizeof(struct sockaddr_in6);
1982 in6_prefixlen2mask(&sin6->sin6_addr, plen);
1983 break;
1984 #endif /* INET6 */
1985 default:
1986 return (NULL);
1987 }
1988
1989 return ((struct sockaddr *)sa_mask);
1990 }
1991
1992 struct sockaddr *
rt_plen2mask(struct rtentry * rt,struct sockaddr_in6 * sa_mask)1993 rt_plen2mask(struct rtentry *rt, struct sockaddr_in6 *sa_mask)
1994 {
1995 return (rt_plentosa(rt_key(rt)->sa_family, rt_plen(rt), sa_mask));
1996 }
1997
1998 #ifdef DDB
1999 #include <machine/db_machdep.h>
2000 #include <ddb/db_output.h>
2001
2002 void db_print_sa(struct sockaddr *);
2003 void db_print_ifa(struct ifaddr *);
2004
2005 void
db_print_sa(struct sockaddr * sa)2006 db_print_sa(struct sockaddr *sa)
2007 {
2008 int len;
2009 u_char *p;
2010
2011 if (sa == NULL) {
2012 db_printf("[NULL]");
2013 return;
2014 }
2015
2016 p = (u_char *)sa;
2017 len = sa->sa_len;
2018 db_printf("[");
2019 while (len > 0) {
2020 db_printf("%d", *p);
2021 p++;
2022 len--;
2023 if (len)
2024 db_printf(",");
2025 }
2026 db_printf("]\n");
2027 }
2028
2029 void
db_print_ifa(struct ifaddr * ifa)2030 db_print_ifa(struct ifaddr *ifa)
2031 {
2032 if (ifa == NULL)
2033 return;
2034 db_printf(" ifa_addr=");
2035 db_print_sa(ifa->ifa_addr);
2036 db_printf(" ifa_dsta=");
2037 db_print_sa(ifa->ifa_dstaddr);
2038 db_printf(" ifa_mask=");
2039 db_print_sa(ifa->ifa_netmask);
2040 db_printf(" flags=0x%x, refcnt=%u, metric=%d\n",
2041 ifa->ifa_flags, ifa->ifa_refcnt.r_refs, ifa->ifa_metric);
2042 }
2043
2044 /*
2045 * Function to pass to rtable_walk().
2046 * Return non-zero error to abort walk.
2047 */
2048 int
db_show_rtentry(struct rtentry * rt,void * w,unsigned int id)2049 db_show_rtentry(struct rtentry *rt, void *w, unsigned int id)
2050 {
2051 db_printf("rtentry=%p", rt);
2052
2053 db_printf(" flags=0x%x refcnt=%u use=%llu expire=%lld\n",
2054 rt->rt_flags, rt->rt_refcnt.r_refs, rt->rt_use, rt->rt_expire);
2055
2056 db_printf(" key="); db_print_sa(rt_key(rt));
2057 db_printf(" plen=%d", rt_plen(rt));
2058 db_printf(" gw="); db_print_sa(rt->rt_gateway);
2059 db_printf(" ifidx=%u ", rt->rt_ifidx);
2060 db_printf(" ifa=%p\n", rt->rt_ifa);
2061 db_print_ifa(rt->rt_ifa);
2062
2063 db_printf(" gwroute=%p llinfo=%p priority=%d\n",
2064 rt->rt_gwroute, rt->rt_llinfo, rt->rt_priority);
2065 return (0);
2066 }
2067
2068 /*
2069 * Function to print all the route trees.
2070 */
2071 int
db_show_rtable(int af,unsigned int rtableid)2072 db_show_rtable(int af, unsigned int rtableid)
2073 {
2074 db_printf("Route tree for af %d, rtableid %u\n", af, rtableid);
2075 rtable_walk(rtableid, af, NULL, db_show_rtentry, NULL);
2076 return (0);
2077 }
2078 #endif /* DDB */
2079