1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_route.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/syslog.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/rmlock.h>
43
44 #include <net/if.h>
45 #include <net/if_var.h>
46 #include <net/if_private.h>
47 #include <net/if_dl.h>
48 #include <net/vnet.h>
49 #include <net/route.h>
50 #include <net/route/route_ctl.h>
51 #include <net/route/route_var.h>
52 #include <net/route/nhop_utils.h>
53 #include <net/route/nhop.h>
54 #include <net/route/nhop_var.h>
55 #include <netinet/in.h>
56 #include <netinet6/scope6_var.h>
57 #include <netinet6/in6_var.h>
58
59 #define DEBUG_MOD_NAME route_ctl
60 #define DEBUG_MAX_LEVEL LOG_DEBUG
61 #include <net/route/route_debug.h>
62 _DECLARE_DEBUG(LOG_INFO);
63
64 /*
65 * This file contains control plane routing tables functions.
66 *
67 * All functions assumes they are called in net epoch.
68 */
69
70 union sockaddr_union {
71 struct sockaddr sa;
72 struct sockaddr_in sin;
73 struct sockaddr_in6 sin6;
74 char _buf[32];
75 };
76
77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
78 struct rib_cmd_info *rc);
79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
80 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
81 struct rib_cmd_info *rc);
82
83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt,
84 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc);
85 #ifdef ROUTE_MPATH
86 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
87 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
88 int op_flags, struct rib_cmd_info *rc);
89 #endif
90
91 static int add_route(struct rib_head *rnh, struct rtentry *rt,
92 struct route_nhop_data *rnd, struct rib_cmd_info *rc);
93 static int delete_route(struct rib_head *rnh, struct rtentry *rt,
94 struct rib_cmd_info *rc);
95 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
96 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc);
97
98 static bool fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
99 struct sockaddr **pmask);
100 static int get_prio_from_info(const struct rt_addrinfo *info);
101 static int nhop_get_prio(const struct nhop_object *nh);
102
103 #ifdef ROUTE_MPATH
104 static bool rib_can_multipath(struct rib_head *rh);
105 #endif
106
107 /* Per-vnet multipath routing configuration */
108 SYSCTL_DECL(_net_route);
109 #define V_rib_route_multipath VNET(rib_route_multipath)
110 #ifdef ROUTE_MPATH
111 #define _MP_FLAGS CTLFLAG_RW
112 #else
113 #define _MP_FLAGS CTLFLAG_RD
114 #endif
115 VNET_DEFINE(u_int, rib_route_multipath) = 1;
116 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
117 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
118 #undef _MP_FLAGS
119
120 #ifdef ROUTE_MPATH
121 VNET_DEFINE(u_int, fib_hash_outbound) = 0;
122 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET,
123 &VNET_NAME(fib_hash_outbound), 0,
124 "Compute flowid for locally-originated packets");
125
126 /* Default entropy to add to the hash calculation for the outbound connections*/
127 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
128 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
129 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
130 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
131 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
132 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
133 };
134 #endif
135
136 #if defined(INET) && defined(INET6)
137 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
138 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
139 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1;
140 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
141 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
142 #endif
143
144 /* Debug bits */
145 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
146
147 static struct rib_head *
get_rnh(uint32_t fibnum,const struct rt_addrinfo * info)148 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
149 {
150 struct rib_head *rnh;
151 struct sockaddr *dst;
152
153 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
154
155 dst = info->rti_info[RTAX_DST];
156 rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
157
158 return (rnh);
159 }
160
161 #if defined(INET) && defined(INET6)
162 bool
rib_can_4o6_nhop(void)163 rib_can_4o6_nhop(void)
164 {
165 return (!!V_rib_route_ipv6_nexthop);
166 }
167 #endif
168
169 #ifdef ROUTE_MPATH
170 static bool
rib_can_multipath(struct rib_head * rh)171 rib_can_multipath(struct rib_head *rh)
172 {
173 int result;
174
175 CURVNET_SET(rh->rib_vnet);
176 result = !!V_rib_route_multipath;
177 CURVNET_RESTORE();
178
179 return (result);
180 }
181
182 /*
183 * Check is nhop is multipath-eligible.
184 * Avoid nhops without gateways and redirects.
185 *
186 * Returns 1 for multipath-eligible nexthop,
187 * 0 otherwise.
188 */
189 bool
nhop_can_multipath(const struct nhop_object * nh)190 nhop_can_multipath(const struct nhop_object *nh)
191 {
192
193 if ((nh->nh_flags & NHF_MULTIPATH) != 0)
194 return (1);
195 if ((nh->nh_flags & NHF_GATEWAY) == 0)
196 return (0);
197 if ((nh->nh_flags & NHF_REDIRECT) != 0)
198 return (0);
199
200 return (1);
201 }
202 #endif
203
204 static int
get_info_weight(const struct rt_addrinfo * info,uint32_t default_weight)205 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
206 {
207 uint32_t weight;
208
209 if (info->rti_mflags & RTV_WEIGHT)
210 weight = info->rti_rmx->rmx_weight;
211 else
212 weight = default_weight;
213 /* Keep upper 1 byte for adm distance purposes */
214 if (weight > RT_MAX_WEIGHT)
215 weight = RT_MAX_WEIGHT;
216 else if (weight == 0)
217 weight = default_weight;
218
219 return (weight);
220 }
221
222 /*
223 * File-local concept for distingushing between the normal and
224 * RTF_PINNED routes tha can override the "normal" one.
225 */
226 #define NH_PRIORITY_HIGH 2
227 #define NH_PRIORITY_NORMAL 1
228 static int
get_prio_from_info(const struct rt_addrinfo * info)229 get_prio_from_info(const struct rt_addrinfo *info)
230 {
231 if (info->rti_flags & RTF_PINNED)
232 return (NH_PRIORITY_HIGH);
233 return (NH_PRIORITY_NORMAL);
234 }
235
236 static int
nhop_get_prio(const struct nhop_object * nh)237 nhop_get_prio(const struct nhop_object *nh)
238 {
239 if (NH_IS_PINNED(nh))
240 return (NH_PRIORITY_HIGH);
241 return (NH_PRIORITY_NORMAL);
242 }
243
244 /*
245 * Check if specified @gw matches gw data in the nexthop @nh.
246 *
247 * Returns true if matches, false otherwise.
248 */
249 bool
match_nhop_gw(const struct nhop_object * nh,const struct sockaddr * gw)250 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
251 {
252
253 if (nh->gw_sa.sa_family != gw->sa_family)
254 return (false);
255
256 switch (gw->sa_family) {
257 case AF_INET:
258 return (nh->gw4_sa.sin_addr.s_addr ==
259 ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
260 case AF_INET6:
261 {
262 const struct sockaddr_in6 *gw6;
263 gw6 = (const struct sockaddr_in6 *)gw;
264
265 /*
266 * Currently (2020-09) IPv6 gws in kernel have their
267 * scope embedded. Once this becomes false, this code
268 * has to be revisited.
269 */
270 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
271 &gw6->sin6_addr))
272 return (true);
273 return (false);
274 }
275 case AF_LINK:
276 {
277 const struct sockaddr_dl *sdl;
278 sdl = (const struct sockaddr_dl *)gw;
279 return (nh->gwl_sa.sdl_index == sdl->sdl_index);
280 }
281 default:
282 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
283 }
284
285 /* NOTREACHED */
286 return (false);
287 }
288
289 /*
290 * Matches all nexthop with given @gw.
291 * Can be used as rib_filter_f callback.
292 */
293 int
rib_match_gw(const struct rtentry * rt,const struct nhop_object * nh,void * gw_sa)294 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa)
295 {
296 const struct sockaddr *gw = (const struct sockaddr *)gw_sa;
297
298 return (match_nhop_gw(nh, gw));
299 }
300
301 struct gw_filter_data {
302 const struct sockaddr *gw;
303 int count;
304 };
305
306 /*
307 * Matches first occurence of the gateway provided in @gwd
308 */
309 static int
match_gw_one(const struct rtentry * rt,const struct nhop_object * nh,void * _data)310 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
311 {
312 struct gw_filter_data *gwd = (struct gw_filter_data *)_data;
313
314 /* Return only first match to make rtsock happy */
315 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0)
316 return (1);
317 return (0);
318 }
319
320 /*
321 * Checks if data in @info matches nexhop @nh.
322 *
323 * Returns 0 on success,
324 * ESRCH if not matched,
325 * ENOENT if filter function returned false
326 */
327 int
check_info_match_nhop(const struct rt_addrinfo * info,const struct rtentry * rt,const struct nhop_object * nh)328 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
329 const struct nhop_object *nh)
330 {
331 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
332
333 if (info->rti_filter != NULL) {
334 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
335 return (ENOENT);
336 else
337 return (0);
338 }
339 if ((gw != NULL) && !match_nhop_gw(nh, gw))
340 return (ESRCH);
341
342 return (0);
343 }
344
345 /*
346 * Runs exact prefix match based on @dst and @netmask.
347 * Returns matched @rtentry if found or NULL.
348 * If rtentry was found, saves nexthop / weight value into @rnd.
349 */
350 static struct rtentry *
lookup_prefix_bysa(struct rib_head * rnh,const struct sockaddr * dst,const struct sockaddr * netmask,struct route_nhop_data * rnd)351 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
352 const struct sockaddr *netmask, struct route_nhop_data *rnd)
353 {
354 struct rtentry *rt;
355
356 RIB_LOCK_ASSERT(rnh);
357
358 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
359 if (rt != NULL) {
360 rnd->rnd_nhop = rt->rt_nhop;
361 rnd->rnd_weight = rt->rt_weight;
362 } else {
363 rnd->rnd_nhop = NULL;
364 rnd->rnd_weight = 0;
365 }
366
367 return (rt);
368 }
369
370 struct rtentry *
lookup_prefix_rt(struct rib_head * rnh,const struct rtentry * rt,struct route_nhop_data * rnd)371 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
372 struct route_nhop_data *rnd)
373 {
374 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
375 }
376
377 /*
378 * Runs exact prefix match based on dst/netmask from @info.
379 * Assumes RIB lock is held.
380 * Returns matched @rtentry if found or NULL.
381 * If rtentry was found, saves nexthop / weight value into @rnd.
382 */
383 struct rtentry *
lookup_prefix(struct rib_head * rnh,const struct rt_addrinfo * info,struct route_nhop_data * rnd)384 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
385 struct route_nhop_data *rnd)
386 {
387 struct rtentry *rt;
388
389 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
390 info->rti_info[RTAX_NETMASK], rnd);
391
392 return (rt);
393 }
394
395 const struct rtentry *
rib_lookup_prefix_plen(struct rib_head * rnh,struct sockaddr * dst,int plen,struct route_nhop_data * rnd)396 rib_lookup_prefix_plen(struct rib_head *rnh, struct sockaddr *dst, int plen,
397 struct route_nhop_data *rnd)
398 {
399 union sockaddr_union mask_storage;
400 struct sockaddr *netmask = &mask_storage.sa;
401
402 if (fill_pxmask_family(dst->sa_family, plen, dst, &netmask))
403 return (lookup_prefix_bysa(rnh, dst, netmask, rnd));
404 return (NULL);
405 }
406
407 static bool
fill_pxmask_family(int family,int plen,struct sockaddr * _dst,struct sockaddr ** pmask)408 fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
409 struct sockaddr **pmask)
410 {
411 if (plen == -1) {
412 *pmask = NULL;
413 return (true);
414 }
415
416 switch (family) {
417 #ifdef INET
418 case AF_INET:
419 {
420 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask);
421 struct sockaddr_in *dst= (struct sockaddr_in *)_dst;
422
423 memset(mask, 0, sizeof(*mask));
424 mask->sin_family = family;
425 mask->sin_len = sizeof(*mask);
426 if (plen == 32)
427 *pmask = NULL;
428 else if (plen > 32 || plen < 0)
429 return (false);
430 else {
431 uint32_t daddr, maddr;
432 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
433 mask->sin_addr.s_addr = maddr;
434 daddr = dst->sin_addr.s_addr;
435 daddr = htonl(ntohl(daddr) & ntohl(maddr));
436 dst->sin_addr.s_addr = daddr;
437 }
438 return (true);
439 }
440 break;
441 #endif
442 #ifdef INET6
443 case AF_INET6:
444 {
445 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask);
446 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst;
447
448 memset(mask, 0, sizeof(*mask));
449 mask->sin6_family = family;
450 mask->sin6_len = sizeof(*mask);
451 if (plen == 128)
452 *pmask = NULL;
453 else if (plen > 128 || plen < 0)
454 return (false);
455 else {
456 ip6_writemask(&mask->sin6_addr, plen);
457 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr);
458 }
459 return (true);
460 }
461 break;
462 #endif
463 }
464 return (false);
465 }
466
467 /*
468 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd
469 * to the routing table.
470 *
471 * @fibnum: verified kernel rtable id to insert route to
472 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
473 * @plen: prefix length (or -1 if host route or not applicable for AF)
474 * @op_flags: combination of RTM_F_ flags
475 * @rc: storage to report operation result
476 *
477 * Returns 0 on success.
478 */
479 int
rib_add_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,struct route_nhop_data * rnd,int op_flags,struct rib_cmd_info * rc)480 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
481 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc)
482 {
483 union sockaddr_union mask_storage;
484 struct sockaddr *netmask = &mask_storage.sa;
485 struct rtentry *rt = NULL;
486
487 NET_EPOCH_ASSERT();
488
489 bzero(rc, sizeof(struct rib_cmd_info));
490 rc->rc_cmd = RTM_ADD;
491
492 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
493 if (rnh == NULL)
494 return (EAFNOSUPPORT);
495
496 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
497 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
498 return (EINVAL);
499 }
500
501 if (op_flags & RTM_F_CREATE) {
502 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) {
503 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed");
504 return (ENOMEM);
505 }
506 } else {
507 struct route_nhop_data rnd_tmp;
508 RIB_RLOCK_TRACKER;
509
510 RIB_RLOCK(rnh);
511 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp);
512 RIB_RUNLOCK(rnh);
513
514 if (rt == NULL)
515 return (ESRCH);
516 }
517
518 return (add_route_flags(rnh, rt, rnd, op_flags, rc));
519 }
520
521 /*
522 * Attempts to delete @dst/plen prefix matching gateway @gw from the
523 * routing rable.
524 *
525 * @fibnum: rtable id to remove route from
526 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
527 * @plen: prefix length (or -1 if host route or not applicable for AF)
528 * @gw: gateway to match
529 * @op_flags: combination of RTM_F_ flags
530 * @rc: storage to report operation result
531 *
532 * Returns 0 on success.
533 */
534 int
rib_del_route_px_gw(uint32_t fibnum,struct sockaddr * dst,int plen,const struct sockaddr * gw,int op_flags,struct rib_cmd_info * rc)535 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen,
536 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc)
537 {
538 struct gw_filter_data gwd = { .gw = gw };
539
540 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc));
541 }
542
543 /*
544 * Attempts to delete @dst/plen prefix matching @filter_func from the
545 * routing rable.
546 *
547 * @fibnum: rtable id to remove route from
548 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
549 * @plen: prefix length (or -1 if host route or not applicable for AF)
550 * @filter_func: func to be called for each nexthop of the prefix for matching
551 * @filter_arg: argument to pass to @filter_func
552 * @op_flags: combination of RTM_F_ flags
553 * @rc: storage to report operation result
554 *
555 * Returns 0 on success.
556 */
557 int
rib_del_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,rib_filter_f_t * filter_func,void * filter_arg,int op_flags,struct rib_cmd_info * rc)558 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
559 rib_filter_f_t *filter_func, void *filter_arg, int op_flags,
560 struct rib_cmd_info *rc)
561 {
562 union sockaddr_union mask_storage;
563 struct sockaddr *netmask = &mask_storage.sa;
564 int error;
565
566 NET_EPOCH_ASSERT();
567
568 bzero(rc, sizeof(struct rib_cmd_info));
569 rc->rc_cmd = RTM_DELETE;
570
571 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
572 if (rnh == NULL)
573 return (EAFNOSUPPORT);
574
575 if (dst->sa_len > sizeof(mask_storage)) {
576 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len);
577 return (EINVAL);
578 }
579
580 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
581 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
582 return (EINVAL);
583 }
584
585 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL;
586
587 RIB_WLOCK(rnh);
588 struct route_nhop_data rnd;
589 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
590 if (rt != NULL) {
591 error = rt_delete_conditional(rnh, rt, prio, filter_func,
592 filter_arg, rc);
593 } else
594 error = ESRCH;
595 RIB_WUNLOCK(rnh);
596
597 if (error != 0)
598 return (error);
599
600 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
601
602 if (rc->rc_cmd == RTM_DELETE)
603 rt_free(rc->rc_rt);
604 #ifdef ROUTE_MPATH
605 else {
606 /*
607 * Deleting 1 path may result in RTM_CHANGE to
608 * a different mpath group/nhop.
609 * Free old mpath group.
610 */
611 nhop_free_any(rc->rc_nh_old);
612 }
613 #endif
614
615 return (0);
616 }
617
618 /*
619 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh.
620 * @rt: route to copy.
621 * @rnd_src: nhop and weight. Multipath routes are not supported
622 * @rh_dst: target rtable.
623 * @rc: operation result storage
624 *
625 * Return 0 on success.
626 */
627 int
rib_copy_route(struct rtentry * rt,const struct route_nhop_data * rnd_src,struct rib_head * rh_dst,struct rib_cmd_info * rc)628 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
629 struct rib_head *rh_dst, struct rib_cmd_info *rc)
630 {
631 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop;
632 int error;
633
634 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0);
635
636 IF_DEBUG_LEVEL(LOG_DEBUG2) {
637 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE];
638 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf));
639 rt_print_buf(rt, rtbuf, sizeof(rtbuf));
640 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u",
641 rtbuf, nhbuf, nhop_get_fibnum(nh_src));
642 }
643 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family);
644 if (nh == NULL) {
645 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop");
646 return (ENOMEM);
647 }
648 nhop_copy(nh, rnd_src->rnd_nhop);
649 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop));
650 nhop_set_fibnum(nh, rh_dst->rib_fibnum);
651 nh = nhop_get_nhop_internal(rh_dst, nh, &error);
652 if (error != 0) {
653 FIB_RH_LOG(LOG_INFO, rh_dst,
654 "unable to finalize new nexthop: error %d", error);
655 return (ENOMEM);
656 }
657
658 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt));
659 if (rt_new == NULL) {
660 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry");
661 nhop_free(nh);
662 return (ENOMEM);
663 }
664
665 struct route_nhop_data rnd = {
666 .rnd_nhop = nh,
667 .rnd_weight = rnd_src->rnd_weight
668 };
669 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0);
670 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc);
671
672 if (error != 0) {
673 IF_DEBUG_LEVEL(LOG_DEBUG2) {
674 char buf[NHOP_PRINT_BUFSIZE];
675 rt_print_buf(rt_new, buf, sizeof(buf));
676 FIB_RH_LOG(LOG_DEBUG, rh_dst,
677 "Unable to add route %s: error %d", buf, error);
678 }
679 nhop_free(nh);
680 rt_free_immediate(rt_new);
681 }
682 return (error);
683 }
684
685 /*
686 * Adds route defined by @info into the kernel table specified by @fibnum and
687 * sa_family in @info->rti_info[RTAX_DST].
688 *
689 * Returns 0 on success and fills in operation metadata into @rc.
690 */
691 int
rib_add_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)692 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
693 struct rib_cmd_info *rc)
694 {
695 struct rib_head *rnh;
696 int error;
697
698 NET_EPOCH_ASSERT();
699
700 rnh = get_rnh(fibnum, info);
701 if (rnh == NULL)
702 return (EAFNOSUPPORT);
703
704 /*
705 * Check consistency between RTF_HOST flag and netmask
706 * existence.
707 */
708 if (info->rti_flags & RTF_HOST)
709 info->rti_info[RTAX_NETMASK] = NULL;
710 else if (info->rti_info[RTAX_NETMASK] == NULL) {
711 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask");
712 return (EINVAL);
713 }
714
715 bzero(rc, sizeof(struct rib_cmd_info));
716 rc->rc_cmd = RTM_ADD;
717
718 error = add_route_byinfo(rnh, info, rc);
719 if (error == 0)
720 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
721
722 return (error);
723 }
724
725 static int
add_route_byinfo(struct rib_head * rnh,struct rt_addrinfo * info,struct rib_cmd_info * rc)726 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
727 struct rib_cmd_info *rc)
728 {
729 struct route_nhop_data rnd_add;
730 struct nhop_object *nh;
731 struct rtentry *rt;
732 struct sockaddr *dst, *gateway, *netmask;
733 int error;
734
735 dst = info->rti_info[RTAX_DST];
736 gateway = info->rti_info[RTAX_GATEWAY];
737 netmask = info->rti_info[RTAX_NETMASK];
738
739 if ((info->rti_flags & RTF_GATEWAY) && !gateway) {
740 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
741 return (EINVAL);
742 }
743 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) {
744 FIB_RH_LOG(LOG_DEBUG, rnh,
745 "error: invalid dst/gateway family combination (%d, %d)",
746 dst->sa_family, gateway->sa_family);
747 return (EINVAL);
748 }
749
750 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) {
751 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d",
752 dst->sa_len);
753 return (EINVAL);
754 }
755
756 if (info->rti_ifa == NULL) {
757 error = rt_getifa_fib(info, rnh->rib_fibnum);
758 if (error)
759 return (error);
760 }
761
762 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL)
763 return (ENOBUFS);
764
765 error = nhop_create_from_info(rnh, info, &nh);
766 if (error != 0) {
767 rt_free_immediate(rt);
768 return (error);
769 }
770
771 rnd_add.rnd_nhop = nh;
772 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
773
774 int op_flags = RTM_F_CREATE;
775 if (get_prio_from_info(info) == NH_PRIORITY_HIGH)
776 op_flags |= RTM_F_FORCE;
777 else
778 op_flags |= RTM_F_APPEND;
779 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc));
780
781 }
782
783 static int
add_route_flags(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,int op_flags,struct rib_cmd_info * rc)784 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add,
785 int op_flags, struct rib_cmd_info *rc)
786 {
787 struct route_nhop_data rnd_orig;
788 struct nhop_object *nh;
789 struct rtentry *rt_orig;
790 int error = 0;
791
792 MPASS(rt != NULL);
793
794 nh = rnd_add->rnd_nhop;
795
796 RIB_WLOCK(rnh);
797
798 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig);
799
800 if (rt_orig == NULL) {
801 if (op_flags & RTM_F_CREATE)
802 error = add_route(rnh, rt, rnd_add, rc);
803 else
804 error = ESRCH; /* no entry but creation was not required */
805 RIB_WUNLOCK(rnh);
806 if (error != 0)
807 goto out;
808 return (0);
809 }
810
811 if (op_flags & RTM_F_EXCL) {
812 /* We have existing route in the RIB but not allowed to replace. */
813 RIB_WUNLOCK(rnh);
814 error = EEXIST;
815 goto out;
816 }
817
818 /* Now either append or replace */
819 if (op_flags & RTM_F_REPLACE) {
820 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) {
821 /* Old path is "better" (e.g. has PINNED flag set) */
822 RIB_WUNLOCK(rnh);
823 error = EEXIST;
824 goto out;
825 }
826 change_route(rnh, rt_orig, rnd_add, rc);
827 RIB_WUNLOCK(rnh);
828 nh = rc->rc_nh_old;
829 goto out;
830 }
831
832 RIB_WUNLOCK(rnh);
833
834 #ifdef ROUTE_MPATH
835 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) &&
836 nhop_can_multipath(rnd_add->rnd_nhop) &&
837 nhop_can_multipath(rnd_orig.rnd_nhop)) {
838
839 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
840 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig,
841 op_flags, rc);
842 if (error != EAGAIN)
843 break;
844 RTSTAT_INC(rts_add_retry);
845 }
846
847 /*
848 * Original nhop reference is unused in any case.
849 */
850 nhop_free_any(rnd_add->rnd_nhop);
851 if (op_flags & RTM_F_CREATE) {
852 if (error != 0 || rc->rc_cmd != RTM_ADD)
853 rt_free_immediate(rt);
854 }
855 return (error);
856 }
857 #endif
858 /* Out of options - free state and return error */
859 error = EEXIST;
860 out:
861 if (op_flags & RTM_F_CREATE)
862 rt_free_immediate(rt);
863 nhop_free_any(nh);
864
865 return (error);
866 }
867
868 #ifdef ROUTE_MPATH
869 static int
add_route_flags_mpath(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,struct route_nhop_data * rnd_orig,int op_flags,struct rib_cmd_info * rc)870 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
871 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
872 int op_flags, struct rib_cmd_info *rc)
873 {
874 RIB_RLOCK_TRACKER;
875 struct route_nhop_data rnd_new;
876 int error = 0;
877
878 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new);
879 if (error != 0) {
880 if (error == EAGAIN) {
881 /*
882 * Group creation failed, most probably because
883 * @rnd_orig data got scheduled for deletion.
884 * Refresh @rnd_orig data and retry.
885 */
886 RIB_RLOCK(rnh);
887 lookup_prefix_rt(rnh, rt, rnd_orig);
888 RIB_RUNLOCK(rnh);
889 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) {
890 /* In this iteration route doesn't exist */
891 error = ENOENT;
892 }
893 }
894 return (error);
895 }
896 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
897 if (error != 0)
898 return (error);
899
900 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) {
901 /*
902 * First multipath route got installed. Enable local
903 * outbound connections hashing.
904 */
905 if (bootverbose)
906 printf("FIB: enabled flowid calculation for locally-originated packets\n");
907 V_fib_hash_outbound = 1;
908 }
909
910 return (0);
911 }
912 #endif
913
914 /*
915 * Removes route defined by @info from the kernel table specified by @fibnum and
916 * sa_family in @info->rti_info[RTAX_DST].
917 *
918 * Returns 0 on success and fills in operation metadata into @rc.
919 */
920 int
rib_del_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)921 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
922 {
923 struct rib_head *rnh;
924 struct sockaddr *dst, *netmask;
925 struct sockaddr_storage mdst;
926 int error;
927
928 NET_EPOCH_ASSERT();
929
930 rnh = get_rnh(fibnum, info);
931 if (rnh == NULL)
932 return (EAFNOSUPPORT);
933
934 bzero(rc, sizeof(struct rib_cmd_info));
935 rc->rc_cmd = RTM_DELETE;
936
937 dst = info->rti_info[RTAX_DST];
938 netmask = info->rti_info[RTAX_NETMASK];
939
940 if (netmask != NULL) {
941 /* Ensure @dst is always properly masked */
942 if (dst->sa_len > sizeof(mdst)) {
943 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large");
944 return (EINVAL);
945 }
946 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
947 dst = (struct sockaddr *)&mdst;
948 }
949
950 rib_filter_f_t *filter_func = NULL;
951 void *filter_arg = NULL;
952 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] };
953
954 if (info->rti_filter != NULL) {
955 filter_func = info->rti_filter;
956 filter_arg = info->rti_filterdata;
957 } else if (gwd.gw != NULL) {
958 filter_func = match_gw_one;
959 filter_arg = &gwd;
960 }
961
962 int prio = get_prio_from_info(info);
963
964 RIB_WLOCK(rnh);
965 struct route_nhop_data rnd;
966 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
967 if (rt != NULL) {
968 error = rt_delete_conditional(rnh, rt, prio, filter_func,
969 filter_arg, rc);
970 } else
971 error = ESRCH;
972 RIB_WUNLOCK(rnh);
973
974 if (error != 0)
975 return (error);
976
977 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
978
979 if (rc->rc_cmd == RTM_DELETE)
980 rt_free(rc->rc_rt);
981 #ifdef ROUTE_MPATH
982 else {
983 /*
984 * Deleting 1 path may result in RTM_CHANGE to
985 * a different mpath group/nhop.
986 * Free old mpath group.
987 */
988 nhop_free_any(rc->rc_nh_old);
989 }
990 #endif
991
992 return (0);
993 }
994
995 /*
996 * Conditionally unlinks rtentry paths from @rnh matching @cb.
997 * Returns 0 on success with operation result stored in @rc.
998 * On error, returns:
999 * ESRCH - if prefix was not found or filter function failed to match
1000 * EADDRINUSE - if trying to delete higher priority route.
1001 */
1002 static int
rt_delete_conditional(struct rib_head * rnh,struct rtentry * rt,int prio,rib_filter_f_t * cb,void * cbdata,struct rib_cmd_info * rc)1003 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
1004 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc)
1005 {
1006 struct nhop_object *nh = rt->rt_nhop;
1007
1008 #ifdef ROUTE_MPATH
1009 if (NH_IS_NHGRP(nh)) {
1010 struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
1011 struct route_nhop_data rnd;
1012 int error;
1013
1014 if (cb == NULL)
1015 return (ESRCH);
1016 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd);
1017 if (error == 0) {
1018 if (rnd.rnd_nhgrp == nhg) {
1019 /* No match, unreference new group and return. */
1020 nhop_free_any(rnd.rnd_nhop);
1021 return (ESRCH);
1022 }
1023 error = change_route(rnh, rt, &rnd, rc);
1024 }
1025 return (error);
1026 }
1027 #endif
1028 if (cb != NULL && !cb(rt, nh, cbdata))
1029 return (ESRCH);
1030
1031 if (prio < nhop_get_prio(nh))
1032 return (EADDRINUSE);
1033
1034 return (delete_route(rnh, rt, rc));
1035 }
1036
1037 int
rib_change_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)1038 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
1039 struct rib_cmd_info *rc)
1040 {
1041 RIB_RLOCK_TRACKER;
1042 struct route_nhop_data rnd_orig;
1043 struct rib_head *rnh;
1044 struct rtentry *rt;
1045 int error;
1046
1047 NET_EPOCH_ASSERT();
1048
1049 rnh = get_rnh(fibnum, info);
1050 if (rnh == NULL)
1051 return (EAFNOSUPPORT);
1052
1053 bzero(rc, sizeof(struct rib_cmd_info));
1054 rc->rc_cmd = RTM_CHANGE;
1055
1056 /* Check if updated gateway exists */
1057 if ((info->rti_flags & RTF_GATEWAY) &&
1058 (info->rti_info[RTAX_GATEWAY] == NULL)) {
1059
1060 /*
1061 * route(8) adds RTF_GATEWAY flag if -interface is not set.
1062 * Remove RTF_GATEWAY to enforce consistency and maintain
1063 * compatibility..
1064 */
1065 info->rti_flags &= ~RTF_GATEWAY;
1066 }
1067
1068 /*
1069 * route change is done in multiple steps, with dropping and
1070 * reacquiring lock. In the situations with multiple processes
1071 * changes the same route in can lead to the case when route
1072 * is changed between the steps. Address it by retrying the operation
1073 * multiple times before failing.
1074 */
1075
1076 RIB_RLOCK(rnh);
1077 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1078 info->rti_info[RTAX_NETMASK], &rnh->head);
1079
1080 if (rt == NULL) {
1081 RIB_RUNLOCK(rnh);
1082 return (ESRCH);
1083 }
1084
1085 rnd_orig.rnd_nhop = rt->rt_nhop;
1086 rnd_orig.rnd_weight = rt->rt_weight;
1087
1088 RIB_RUNLOCK(rnh);
1089
1090 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1091 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
1092 if (error != EAGAIN)
1093 break;
1094 }
1095
1096 return (error);
1097 }
1098
1099 static int
change_nhop(struct rib_head * rnh,struct rt_addrinfo * info,struct nhop_object * nh_orig,struct nhop_object ** nh_new)1100 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
1101 struct nhop_object *nh_orig, struct nhop_object **nh_new)
1102 {
1103 int error;
1104
1105 /*
1106 * New gateway could require new ifaddr, ifp;
1107 * flags may also be different; ifp may be specified
1108 * by ll sockaddr when protocol address is ambiguous
1109 */
1110 if (((nh_orig->nh_flags & NHF_GATEWAY) &&
1111 info->rti_info[RTAX_GATEWAY] != NULL) ||
1112 info->rti_info[RTAX_IFP] != NULL ||
1113 (info->rti_info[RTAX_IFA] != NULL &&
1114 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
1115 error = rt_getifa_fib(info, rnh->rib_fibnum);
1116
1117 if (error != 0) {
1118 info->rti_ifa = NULL;
1119 return (error);
1120 }
1121 }
1122
1123 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
1124 info->rti_ifa = NULL;
1125
1126 return (error);
1127 }
1128
1129 #ifdef ROUTE_MPATH
1130 static int
change_mpath_route(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1131 change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
1132 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1133 struct rib_cmd_info *rc)
1134 {
1135 int error = 0, found_idx = 0;
1136 struct nhop_object *nh_orig = NULL, *nh_new;
1137 struct route_nhop_data rnd_new = {};
1138 const struct weightened_nhop *wn = NULL;
1139 struct weightened_nhop *wn_new;
1140 uint32_t num_nhops;
1141
1142 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
1143 for (int i = 0; i < num_nhops; i++) {
1144 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) {
1145 nh_orig = wn[i].nh;
1146 found_idx = i;
1147 break;
1148 }
1149 }
1150
1151 if (nh_orig == NULL)
1152 return (ESRCH);
1153
1154 error = change_nhop(rnh, info, nh_orig, &nh_new);
1155 if (error != 0)
1156 return (error);
1157
1158 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
1159 M_TEMP, M_NOWAIT | M_ZERO);
1160 if (wn_new == NULL) {
1161 nhop_free(nh_new);
1162 return (EAGAIN);
1163 }
1164
1165 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
1166 wn_new[found_idx].nh = nh_new;
1167 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
1168
1169 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp);
1170 nhop_free(nh_new);
1171 free(wn_new, M_TEMP);
1172
1173 if (error != 0)
1174 return (error);
1175
1176 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1177
1178 return (error);
1179 }
1180 #endif
1181
1182 static int
change_route_byinfo(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1183 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
1184 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1185 struct rib_cmd_info *rc)
1186 {
1187 int error = 0;
1188 struct nhop_object *nh_orig;
1189 struct route_nhop_data rnd_new;
1190
1191 nh_orig = rnd_orig->rnd_nhop;
1192 if (nh_orig == NULL)
1193 return (ESRCH);
1194
1195 #ifdef ROUTE_MPATH
1196 if (NH_IS_NHGRP(nh_orig))
1197 return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
1198 #endif
1199
1200 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
1201 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
1202 if (error != 0)
1203 return (error);
1204 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1205
1206 return (error);
1207 }
1208
1209 /*
1210 * Insert @rt with nhop data from @rnd_new to @rnh.
1211 * Returns 0 on success and stores operation results in @rc.
1212 */
1213 static int
add_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1214 add_route(struct rib_head *rnh, struct rtentry *rt,
1215 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1216 {
1217 struct radix_node *rn;
1218
1219 RIB_WLOCK_ASSERT(rnh);
1220
1221 rt->rt_nhop = rnd->rnd_nhop;
1222 rt->rt_weight = rnd->rnd_weight;
1223 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes);
1224
1225 if (rn != NULL) {
1226 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1227 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1228
1229 /* Finalize notification */
1230 rib_bump_gen(rnh);
1231 rnh->rnh_prefixes++;
1232
1233 rc->rc_cmd = RTM_ADD;
1234 rc->rc_rt = rt;
1235 rc->rc_nh_old = NULL;
1236 rc->rc_nh_new = rnd->rnd_nhop;
1237 rc->rc_nh_weight = rnd->rnd_weight;
1238
1239 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1240 return (0);
1241 }
1242
1243 /* Existing route or memory allocation failure. */
1244 return (EEXIST);
1245 }
1246
1247 /*
1248 * Unconditionally deletes @rt from @rnh.
1249 */
1250 static int
delete_route(struct rib_head * rnh,struct rtentry * rt,struct rib_cmd_info * rc)1251 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
1252 {
1253 RIB_WLOCK_ASSERT(rnh);
1254
1255 /* Route deletion requested. */
1256 struct radix_node *rn;
1257
1258 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
1259 if (rn == NULL)
1260 return (ESRCH);
1261 rt = RNTORT(rn);
1262 rt->rte_flags &= ~RTF_UP;
1263
1264 rib_bump_gen(rnh);
1265 rnh->rnh_prefixes--;
1266
1267 rc->rc_cmd = RTM_DELETE;
1268 rc->rc_rt = rt;
1269 rc->rc_nh_old = rt->rt_nhop;
1270 rc->rc_nh_new = NULL;
1271 rc->rc_nh_weight = rt->rt_weight;
1272
1273 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1274
1275 return (0);
1276 }
1277
1278 /*
1279 * Switch @rt nhop/weigh to the ones specified in @rnd.
1280 * Returns 0 on success.
1281 */
1282 int
change_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1283 change_route(struct rib_head *rnh, struct rtentry *rt,
1284 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1285 {
1286 struct nhop_object *nh_orig;
1287
1288 RIB_WLOCK_ASSERT(rnh);
1289
1290 nh_orig = rt->rt_nhop;
1291
1292 if (rnd->rnd_nhop == NULL)
1293 return (delete_route(rnh, rt, rc));
1294
1295 /* Changing nexthop & weight to a new one */
1296 rt->rt_nhop = rnd->rnd_nhop;
1297 rt->rt_weight = rnd->rnd_weight;
1298 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1299 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1300
1301 /* Finalize notification */
1302 rib_bump_gen(rnh);
1303 rc->rc_cmd = RTM_CHANGE;
1304 rc->rc_rt = rt;
1305 rc->rc_nh_old = nh_orig;
1306 rc->rc_nh_new = rnd->rnd_nhop;
1307 rc->rc_nh_weight = rnd->rnd_weight;
1308
1309 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1310
1311 return (0);
1312 }
1313
1314 /*
1315 * Conditionally update route nhop/weight IFF data in @nhd_orig is
1316 * consistent with the current route data.
1317 * Nexthop in @nhd_new is consumed.
1318 */
1319 int
change_route_conditional(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_orig,struct route_nhop_data * rnd_new,struct rib_cmd_info * rc)1320 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
1321 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
1322 struct rib_cmd_info *rc)
1323 {
1324 struct rtentry *rt_new;
1325 int error = 0;
1326
1327 IF_DEBUG_LEVEL(LOG_DEBUG2) {
1328 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
1329 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE);
1330 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE);
1331 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family,
1332 "trying change %s -> %s", buf_old, buf_new);
1333 }
1334 RIB_WLOCK(rnh);
1335
1336 struct route_nhop_data rnd;
1337 rt_new = lookup_prefix_rt(rnh, rt, &rnd);
1338
1339 if (rt_new == NULL) {
1340 if (rnd_orig->rnd_nhop == NULL)
1341 error = add_route(rnh, rt, rnd_new, rc);
1342 else {
1343 /*
1344 * Prefix does not exist, which was not our assumption.
1345 * Update @rnd_orig with the new data and return
1346 */
1347 rnd_orig->rnd_nhop = NULL;
1348 rnd_orig->rnd_weight = 0;
1349 error = EAGAIN;
1350 }
1351 } else {
1352 /* Prefix exists, try to update */
1353 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
1354 /*
1355 * Nhop/mpath group hasn't changed. Flip
1356 * to the new precalculated one and return
1357 */
1358 error = change_route(rnh, rt_new, rnd_new, rc);
1359 } else {
1360 /* Update and retry */
1361 rnd_orig->rnd_nhop = rt_new->rt_nhop;
1362 rnd_orig->rnd_weight = rt_new->rt_weight;
1363 error = EAGAIN;
1364 }
1365 }
1366
1367 RIB_WUNLOCK(rnh);
1368
1369 if (error == 0) {
1370 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
1371
1372 if (rnd_orig->rnd_nhop != NULL)
1373 nhop_free_any(rnd_orig->rnd_nhop);
1374
1375 } else {
1376 if (rnd_new->rnd_nhop != NULL)
1377 nhop_free_any(rnd_new->rnd_nhop);
1378 }
1379
1380 return (error);
1381 }
1382
1383 /*
1384 * Performs modification of routing table specificed by @action.
1385 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
1386 * Needs to be run in network epoch.
1387 *
1388 * Returns 0 on success and fills in @rc with action result.
1389 */
1390 int
rib_action(uint32_t fibnum,int action,struct rt_addrinfo * info,struct rib_cmd_info * rc)1391 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
1392 struct rib_cmd_info *rc)
1393 {
1394 int error;
1395
1396 switch (action) {
1397 case RTM_ADD:
1398 error = rib_add_route(fibnum, info, rc);
1399 break;
1400 case RTM_DELETE:
1401 error = rib_del_route(fibnum, info, rc);
1402 break;
1403 case RTM_CHANGE:
1404 error = rib_change_route(fibnum, info, rc);
1405 break;
1406 default:
1407 error = ENOTSUP;
1408 }
1409
1410 return (error);
1411 }
1412
1413 struct rt_delinfo
1414 {
1415 struct rib_head *rnh;
1416 struct rtentry *head;
1417 rib_filter_f_t *filter_f;
1418 void *filter_arg;
1419 int prio;
1420 struct rib_cmd_info rc;
1421 };
1422
1423 /*
1424 * Conditionally unlinks rtenties or paths from radix tree based
1425 * on the callback data passed in @arg.
1426 */
1427 static int
rt_checkdelroute(struct radix_node * rn,void * arg)1428 rt_checkdelroute(struct radix_node *rn, void *arg)
1429 {
1430 struct rt_delinfo *di = (struct rt_delinfo *)arg;
1431 struct rtentry *rt = (struct rtentry *)rn;
1432
1433 if (rt_delete_conditional(di->rnh, rt, di->prio,
1434 di->filter_f, di->filter_arg, &di->rc) != 0)
1435 return (0);
1436
1437 /*
1438 * Add deleted rtentries to the list to GC them
1439 * after dropping the lock.
1440 *
1441 * XXX: Delayed notifications not implemented
1442 * for nexthop updates.
1443 */
1444 if (di->rc.rc_cmd == RTM_DELETE) {
1445 /* Add to the list and return */
1446 rt->rt_chain = di->head;
1447 di->head = rt;
1448 #ifdef ROUTE_MPATH
1449 } else {
1450 /*
1451 * RTM_CHANGE to a different nexthop or nexthop group.
1452 * Free old multipath group.
1453 */
1454 nhop_free_any(di->rc.rc_nh_old);
1455 #endif
1456 }
1457
1458 return (0);
1459 }
1460
1461 /*
1462 * Iterates over a routing table specified by @fibnum and @family and
1463 * deletes elements marked by @filter_f.
1464 * @fibnum: rtable id
1465 * @family: AF_ address family
1466 * @filter_f: function returning non-zero value for items to delete
1467 * @arg: data to pass to the @filter_f function
1468 * @report: true if rtsock notification is needed.
1469 */
1470 void
rib_walk_del(u_int fibnum,int family,rib_filter_f_t * filter_f,void * filter_arg,bool report)1471 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg,
1472 bool report)
1473 {
1474 struct rib_head *rnh;
1475 struct rtentry *rt;
1476 struct nhop_object *nh;
1477 struct epoch_tracker et;
1478
1479 rnh = rt_tables_get_rnh(fibnum, family);
1480 if (rnh == NULL)
1481 return;
1482
1483 struct rt_delinfo di = {
1484 .rnh = rnh,
1485 .filter_f = filter_f,
1486 .filter_arg = filter_arg,
1487 .prio = NH_PRIORITY_NORMAL,
1488 };
1489
1490 NET_EPOCH_ENTER(et);
1491
1492 RIB_WLOCK(rnh);
1493 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
1494 RIB_WUNLOCK(rnh);
1495
1496 /* We might have something to reclaim. */
1497 bzero(&di.rc, sizeof(di.rc));
1498 di.rc.rc_cmd = RTM_DELETE;
1499 while (di.head != NULL) {
1500 rt = di.head;
1501 di.head = rt->rt_chain;
1502 rt->rt_chain = NULL;
1503 nh = rt->rt_nhop;
1504
1505 di.rc.rc_rt = rt;
1506 di.rc.rc_nh_old = nh;
1507 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
1508
1509 if (report) {
1510 #ifdef ROUTE_MPATH
1511 struct nhgrp_object *nhg;
1512 const struct weightened_nhop *wn;
1513 uint32_t num_nhops;
1514 if (NH_IS_NHGRP(nh)) {
1515 nhg = (struct nhgrp_object *)nh;
1516 wn = nhgrp_get_nhops(nhg, &num_nhops);
1517 for (int i = 0; i < num_nhops; i++)
1518 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
1519 } else
1520 #endif
1521 rt_routemsg(RTM_DELETE, rt, nh, fibnum);
1522 }
1523 rt_free(rt);
1524 }
1525
1526 NET_EPOCH_EXIT(et);
1527 }
1528
1529 static int
rt_delete_unconditional(struct radix_node * rn,void * arg)1530 rt_delete_unconditional(struct radix_node *rn, void *arg)
1531 {
1532 struct rtentry *rt = RNTORT(rn);
1533 struct rib_head *rnh = (struct rib_head *)arg;
1534
1535 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
1536 if (RNTORT(rn) == rt)
1537 rt_free(rt);
1538
1539 return (0);
1540 }
1541
1542 /*
1543 * Removes all routes from the routing table without executing notifications.
1544 * rtentres will be removed after the end of a current epoch.
1545 */
1546 static void
rib_flush_routes(struct rib_head * rnh)1547 rib_flush_routes(struct rib_head *rnh)
1548 {
1549 RIB_WLOCK(rnh);
1550 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
1551 RIB_WUNLOCK(rnh);
1552 }
1553
1554 void
rib_flush_routes_family(int family)1555 rib_flush_routes_family(int family)
1556 {
1557 struct rib_head *rnh;
1558
1559 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1560 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
1561 rib_flush_routes(rnh);
1562 }
1563 }
1564
1565 const char *
rib_print_family(int family)1566 rib_print_family(int family)
1567 {
1568 switch (family) {
1569 case AF_INET:
1570 return ("inet");
1571 case AF_INET6:
1572 return ("inet6");
1573 case AF_LINK:
1574 return ("link");
1575 }
1576 return ("unknown");
1577 }
1578
1579