xref: /freebsd/sys/netlink/route/rt.c (revision 15f0b8c3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2021 Ng Peng Nam Sean
5  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 #include "opt_route.h"
34 #include <sys/types.h>
35 #include <sys/malloc.h>
36 #include <sys/rmlock.h>
37 #include <sys/socket.h>
38 
39 #include <net/if.h>
40 #include <net/route.h>
41 #include <net/route/nhop.h>
42 #include <net/route/route_ctl.h>
43 #include <net/route/route_var.h>
44 #include <netlink/netlink.h>
45 #include <netlink/netlink_ctl.h>
46 #include <netlink/netlink_route.h>
47 #include <netlink/route/route_var.h>
48 
49 #define	DEBUG_MOD_NAME	nl_route
50 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
51 #include <netlink/netlink_debug.h>
52 _DECLARE_DEBUG(LOG_DEBUG);
53 
54 static unsigned char
55 get_rtm_type(const struct nhop_object *nh)
56 {
57 	int nh_flags = nh->nh_flags;
58 
59 	/* Use the fact that nhg runtime flags are only NHF_MULTIPATH */
60 	if (nh_flags & NHF_BLACKHOLE)
61 		return (RTN_BLACKHOLE);
62 	else if (nh_flags & NHF_REJECT)
63 		return (RTN_PROHIBIT);
64 	return (RTN_UNICAST);
65 }
66 
67 static uint8_t
68 nl_get_rtm_protocol(const struct nhop_object *nh)
69 {
70 #ifdef ROUTE_MPATH
71 	if (NH_IS_NHGRP(nh)) {
72 		const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh;
73 		uint8_t origin = nhgrp_get_origin(nhg);
74 		if (origin != RTPROT_UNSPEC)
75 			return (origin);
76 		nh = nhg->nhops[0];
77 	}
78 #endif
79 	uint8_t origin = nhop_get_origin(nh);
80 	if (origin != RTPROT_UNSPEC)
81 		return (origin);
82 	/* TODO: remove guesswork once all kernel users fill in origin */
83 	int rt_flags = nhop_get_rtflags(nh);
84 	if (rt_flags & RTF_PROTO1)
85 		return (RTPROT_ZEBRA);
86 	if (rt_flags & RTF_STATIC)
87 		return (RTPROT_STATIC);
88 	return (RTPROT_KERNEL);
89 }
90 
91 static int
92 get_rtmsg_type_from_rtsock(int cmd)
93 {
94 	switch (cmd) {
95 	case RTM_ADD:
96 	case RTM_CHANGE:
97 	case RTM_GET:
98 		return NL_RTM_NEWROUTE;
99 	case RTM_DELETE:
100 		return NL_RTM_DELROUTE;
101 	}
102 
103 	return (0);
104 }
105 
106 /*
107  * fibnum heuristics
108  *
109  * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS
110  * msg                rtm_table     RTA_TABLE            result
111  * RTM_GETROUTE/dump          0             -       RT_ALL_FIBS
112  * RTM_GETROUTE/dump          1             -                 1
113  * RTM_GETROUTE/get           0             -                 0
114  *
115  */
116 
117 static struct nhop_object *
118 rc_get_nhop(const struct rib_cmd_info *rc)
119 {
120 	return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new);
121 }
122 
123 static void
124 dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh)
125 {
126 	int upper_family;
127 
128 	switch (nhop_get_neigh_family(nh)) {
129 	case AF_LINK:
130 		/* onlink prefix, skip */
131 		break;
132 	case AF_INET:
133 		nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr);
134 		break;
135 	case AF_INET6:
136 		upper_family = nhop_get_upper_family(nh);
137 		if (upper_family == AF_INET6) {
138 			nlattr_add(nw, NL_RTA_GATEWAY, 16, &nh->gw6_sa.sin6_addr);
139 		} else if (upper_family == AF_INET) {
140 			/* IPv4 over IPv6 */
141 			char buf[20];
142 			struct rtvia *via = (struct rtvia *)&buf[0];
143 			via->rtvia_family = AF_INET6;
144 			memcpy(via->rtvia_addr, &nh->gw6_sa.sin6_addr, 16);
145 			nlattr_add(nw, NL_RTA_VIA, 17, via);
146 		}
147 		break;
148 	}
149 }
150 
151 static void
152 dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh)
153 {
154 	int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t);
155 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
156 
157 	if (nla == NULL)
158 		return;
159 	nla->nla_type = NL_RTA_METRICS;
160 	nla->nla_len = nla_len;
161 	nla++;
162 	nla->nla_type = NL_RTAX_MTU;
163 	nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t);
164 	*((uint32_t *)(nla + 1)) = nh->nh_mtu;
165 }
166 
167 #ifdef ROUTE_MPATH
168 static void
169 dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm)
170 {
171 	uint32_t uidx = nhgrp_get_uidx(nhg);
172 	uint32_t num_nhops;
173 	const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops);
174 	uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh);
175 
176 	if (uidx != 0)
177 		nlattr_add_u32(nw, NL_RTA_NH_ID, uidx);
178 
179 	nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags);
180 	int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH);
181 	if (off == 0)
182 		return;
183 
184 	for (int i = 0; i < num_nhops; i++) {
185 		int nh_off = nlattr_save_offset(nw);
186 		struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop);
187 		if (rtnh == NULL)
188 			return;
189 		rtnh->rtnh_flags = 0;
190 		rtnh->rtnh_ifindex = wn[i].nh->nh_ifp->if_index;
191 		rtnh->rtnh_hops = wn[i].weight;
192 		dump_rc_nhop_gw(nw, wn[i].nh);
193 		uint32_t rtflags = nhop_get_rtflags(wn[i].nh);
194 		if (rtflags != base_rtflags)
195 			nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags);
196 		if (rtflags & RTF_FIXEDMTU)
197 			dump_rc_nhop_mtu(nw, wn[i].nh);
198 		rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop);
199 		/*
200 		 * nlattr_add() allocates 4-byte aligned storage, no need to aligh
201 		 * length here
202 		 * */
203 		rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off;
204 	}
205 	nlattr_set_len(nw, off);
206 }
207 #endif
208 
209 static void
210 dump_rc_nhop(struct nl_writer *nw, const struct nhop_object *nh, struct rtmsg *rtm)
211 {
212 #ifdef ROUTE_MPATH
213 	if (NH_IS_NHGRP(nh)) {
214 		dump_rc_nhg(nw, (const struct nhgrp_object *)nh, rtm);
215 		return;
216 	}
217 #endif
218 	uint32_t rtflags = nhop_get_rtflags(nh);
219 
220 	/*
221 	 * IPv4 over IPv6
222 	 *    ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2),
223 	 * IPv4 w/ gw
224 	 *    ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)],
225 	 * Direct route:
226 	 *    ('RTA_OIF', 2)
227 	 */
228 	if (nh->nh_flags & NHF_GATEWAY)
229 		dump_rc_nhop_gw(nw, nh);
230 
231 	uint32_t uidx = nhop_get_uidx(nh);
232 	if (uidx != 0)
233 		nlattr_add_u32(nw, NL_RTA_NH_ID, uidx);
234 	nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh));
235 	nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags);
236 
237 	if (rtflags & RTF_FIXEDMTU)
238 		dump_rc_nhop_mtu(nw, nh);
239 	uint32_t nh_expire = nhop_get_expire(nh);
240 	if (nh_expire > 0)
241 		nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime);
242 
243 	/* In any case, fill outgoing interface */
244 	nlattr_add_u32(nw, NL_RTA_OIF, nh->nh_ifp->if_index);
245 }
246 
247 /*
248  * Dumps output from a rib command into an rtmsg
249  */
250 
251 static int
252 dump_px(uint32_t fibnum, const struct nlmsghdr *hdr,
253     const struct rtentry *rt, struct route_nhop_data *rnd,
254     struct nl_writer *nw)
255 {
256 	struct rtmsg *rtm;
257 	int error = 0;
258 
259 	NET_EPOCH_ASSERT();
260 
261 	if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg)))
262 		goto enomem;
263 
264 	int family = rt_get_family(rt);
265 	int rtm_off = nlattr_save_offset(nw);
266 	rtm = nlmsg_reserve_object(nw, struct rtmsg);
267 	rtm->rtm_family = family;
268 	rtm->rtm_dst_len = 0;
269 	rtm->rtm_src_len = 0;
270 	rtm->rtm_tos = 0;
271 	if (fibnum < 255)
272 		rtm->rtm_table = (unsigned char)fibnum;
273 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
274 	rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop);
275 	rtm->rtm_type = get_rtm_type(rnd->rnd_nhop);
276 
277 	nlattr_add_u32(nw, NL_RTA_TABLE, fibnum);
278 
279 	int plen = 0;
280 #if defined(INET) || defined(INET6)
281 	uint32_t scopeid;
282 #endif
283 	switch (family) {
284 #ifdef INET
285 	case AF_INET:
286 		{
287 			struct in_addr addr;
288 			rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
289 			nlattr_add(nw, NL_RTA_DST, 4, &addr);
290 			break;
291 		}
292 #endif
293 #ifdef INET6
294 	case AF_INET6:
295 		{
296 			struct in6_addr addr;
297 			rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
298 			nlattr_add(nw, NL_RTA_DST, 16, &addr);
299 			break;
300 		}
301 #endif
302 	default:
303 		FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family);
304 		error = EAFNOSUPPORT;
305 		goto flush;
306 	}
307 
308 	rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg);
309 	if (plen > 0)
310 		rtm->rtm_dst_len = plen;
311 	dump_rc_nhop(nw, rnd->rnd_nhop, rtm);
312 
313 	if (nlmsg_end(nw))
314 		return (0);
315 enomem:
316 	error = ENOMEM;
317 flush:
318 	nlmsg_abort(nw);
319 	return (error);
320 }
321 
322 static int
323 family_to_group(int family)
324 {
325 	switch (family) {
326 	case AF_INET:
327 		return (RTNLGRP_IPV4_ROUTE);
328 	case AF_INET6:
329 		return (RTNLGRP_IPV6_ROUTE);
330 	}
331 	return (0);
332 }
333 
334 
335 static void
336 report_operation(uint32_t fibnum, struct rib_cmd_info *rc,
337     struct nlpcb *nlp, struct nlmsghdr *hdr)
338 {
339 	struct nl_writer nw;
340 
341 	uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt));
342 	if (nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) {
343 		struct route_nhop_data rnd = {
344 			.rnd_nhop = rc_get_nhop(rc),
345 			.rnd_weight = rc->rc_nh_weight,
346 		};
347 		hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE);
348 		hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND);
349 		switch (rc->rc_cmd) {
350 		case RTM_ADD:
351 			hdr->nlmsg_type = NL_RTM_NEWROUTE;
352 			hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
353 			break;
354 		case RTM_CHANGE:
355 			hdr->nlmsg_type = NL_RTM_NEWROUTE;
356 			hdr->nlmsg_flags |= NLM_F_REPLACE;
357 			break;
358 		case RTM_DELETE:
359 			hdr->nlmsg_type = NL_RTM_DELROUTE;
360 			break;
361 		}
362 		dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw);
363 		nlmsg_flush(&nw);
364 	}
365 
366 	rtsock_callback_p->route_f(fibnum, rc);
367 }
368 
369 struct rta_mpath_nh {
370 	struct sockaddr	*gw;
371 	struct ifnet	*ifp;
372 	uint8_t		rtnh_flags;
373 	uint8_t		rtnh_weight;
374 };
375 
376 #define	_IN(_field)	offsetof(struct rtnexthop, _field)
377 #define	_OUT(_field)	offsetof(struct rta_mpath_nh, _field)
378 const static struct nlattr_parser nla_p_rtnh[] = {
379 	{ .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip },
380 	{ .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia },
381 };
382 const static struct nlfield_parser nlf_p_rtnh[] = {
383 	{ .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 },
384 	{ .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 },
385 	{ .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz },
386 };
387 #undef _IN
388 #undef _OUT
389 NL_DECLARE_PARSER(mpath_parser, struct rtnexthop, nlf_p_rtnh, nla_p_rtnh);
390 
391 struct rta_mpath {
392 	int num_nhops;
393 	struct rta_mpath_nh nhops[0];
394 };
395 
396 static int
397 nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
398 {
399 	int data_len = nla->nla_len - sizeof(struct nlattr);
400 	struct rtnexthop *rtnh;
401 
402 	int max_nhops = data_len / sizeof(struct rtnexthop);
403 
404 	struct rta_mpath *mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh));
405 	mp->num_nhops = 0;
406 
407 	for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) {
408 		struct rta_mpath_nh *mpnh = &mp->nhops[mp->num_nhops++];
409 
410 		int error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser,
411 		    npt, mpnh);
412 		if (error != 0) {
413 			NLMSG_REPORT_ERR_MSG(npt, "RTA_MULTIPATH: nexhop %d: parse failed",
414 			    mp->num_nhops - 1);
415 			return (error);
416 		}
417 
418 		int len = NL_ITEM_ALIGN(rtnh->rtnh_len);
419 		data_len -= len;
420 		rtnh = (struct rtnexthop *)((char *)rtnh + len);
421 	}
422 	if (data_len != 0 || mp->num_nhops == 0) {
423 		NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr");
424 		return (EINVAL);
425 	}
426 
427 	*((struct rta_mpath **)target) = mp;
428 	return (0);
429 }
430 
431 
432 struct nl_parsed_route {
433 	struct sockaddr		*rta_dst;
434 	struct sockaddr		*rta_gw;
435 	struct ifnet		*rta_oif;
436 	struct rta_mpath	*rta_multipath;
437 	uint32_t		rta_table;
438 	uint32_t		rta_rtflags;
439 	uint32_t		rta_nh_id;
440 	uint32_t		rtax_mtu;
441 	uint8_t			rtm_family;
442 	uint8_t			rtm_dst_len;
443 	uint8_t			rtm_protocol;
444 };
445 
446 #define	_IN(_field)	offsetof(struct rtmsg, _field)
447 #define	_OUT(_field)	offsetof(struct nl_parsed_route, _field)
448 static struct nlattr_parser nla_p_rtmetrics[] = {
449 	{ .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 },
450 };
451 NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics);
452 
453 static const struct nlattr_parser nla_p_rtmsg[] = {
454 	{ .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip },
455 	{ .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp },
456 	{ .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip },
457 	{ .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested },
458 	{ .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath },
459 	{ .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 },
460 	{ .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 },
461 	{ .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia },
462 	{ .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 },
463 };
464 
465 static const struct nlfield_parser nlf_p_rtmsg[] = {
466 	{.off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 },
467 	{.off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 },
468 	{.off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 },
469 };
470 #undef _IN
471 #undef _OUT
472 NL_DECLARE_PARSER(rtm_parser, struct rtmsg, nlf_p_rtmsg, nla_p_rtmsg);
473 
474 struct netlink_walkargs {
475 	struct nl_writer *nw;
476 	struct route_nhop_data rnd;
477 	struct nlmsghdr hdr;
478 	struct nlpcb *nlp;
479 	uint32_t fibnum;
480 	int family;
481 	int error;
482 	int count;
483 	int dumped;
484 	int dumped_tables;
485 };
486 
487 static int
488 dump_rtentry(struct rtentry *rt, void *_arg)
489 {
490 	struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
491 	int error;
492 
493 	wa->count++;
494 	if (wa->error != 0)
495 		return (0);
496 	wa->dumped++;
497 
498 	rt_get_rnd(rt, &wa->rnd);
499 
500 	error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw);
501 
502 	IF_DEBUG_LEVEL(LOG_DEBUG3) {
503 		char rtbuf[INET6_ADDRSTRLEN + 5];
504 		FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family,
505 		    "Dump %s, offset %u, error %d",
506 		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)),
507 		    wa->nw->offset, error);
508 	}
509 	wa->error = error;
510 
511 	return (0);
512 }
513 
514 static void
515 dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family)
516 {
517 	FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump");
518 	wa->count = 0;
519 	wa->dumped = 0;
520 
521 	rib_walk(fibnum, family, false, dump_rtentry, wa);
522 
523 	wa->dumped_tables++;
524 
525 	FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d",
526 	    wa->count, wa->dumped);
527 	NL_LOG(LOG_DEBUG2, "Current offset: %d", wa->nw->offset);
528 }
529 
530 static int
531 dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family)
532 {
533 	wa->fibnum = fibnum;
534 
535 	if (family == AF_UNSPEC) {
536 		for (int i = 0; i < AF_MAX; i++) {
537 			if (rt_tables_get_rnh(fibnum, i) != 0) {
538 				wa->family = i;
539 				dump_rtable_one(wa, fibnum, i);
540 				if (wa->error != 0)
541 					break;
542 			}
543 		}
544 	} else {
545 		if (rt_tables_get_rnh(fibnum, family) != 0) {
546 			wa->family = family;
547 			dump_rtable_one(wa, fibnum, family);
548 		}
549 	}
550 
551 	return (wa->error);
552 }
553 
554 static int
555 handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs,
556     struct nlmsghdr *hdr, struct nl_pstate *npt)
557 {
558 	RIB_RLOCK_TRACKER;
559 	struct rib_head *rnh;
560 	struct rtentry *rt;
561 	uint32_t fibnum = attrs->rta_table;
562 	sa_family_t family = attrs->rtm_family;
563 
564 	if (attrs->rta_dst == NULL) {
565 		NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied");
566 			return (EINVAL);
567 	}
568 
569 	FIB_LOG(LOG_DEBUG, fibnum, family, "getroute called");
570 
571 	rnh = rt_tables_get_rnh(fibnum, family);
572 	if (rnh == NULL)
573 		return (EAFNOSUPPORT);
574 
575 	RIB_RLOCK(rnh);
576 
577 	rt = (struct rtentry *)rnh->rnh_matchaddr(attrs->rta_dst, &rnh->head);
578 	if (rt == NULL) {
579 		RIB_RUNLOCK(rnh);
580 		return (ESRCH);
581 	}
582 
583 	struct route_nhop_data rnd;
584 	rt_get_rnd(rt, &rnd);
585 	rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0);
586 
587 	RIB_RUNLOCK(rnh);
588 
589 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
590 		char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused;
591 		FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s",
592 		    nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)),
593 		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)));
594 	}
595 
596 	hdr->nlmsg_type = NL_RTM_NEWROUTE;
597 	dump_px(fibnum, hdr, rt, &rnd, npt->nw);
598 
599 	return (0);
600 }
601 
602 static int
603 handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family,
604     struct nlmsghdr *hdr, struct nl_writer *nw)
605 {
606 	struct netlink_walkargs wa = {
607 		.nlp = nlp,
608 		.nw = nw,
609 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
610 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
611 		.hdr.nlmsg_type = NL_RTM_NEWROUTE,
612 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
613 	};
614 
615 	if (fibnum == RT_TABLE_UNSPEC) {
616 		for (int i = 0; i < V_rt_numfibs; i++) {
617 			dump_rtable_fib(&wa, fibnum, family);
618 			if (wa.error != 0)
619 				break;
620 		}
621 	} else
622 		dump_rtable_fib(&wa, fibnum, family);
623 
624 	if (wa.error == 0 && wa.dumped_tables == 0) {
625 		FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family");
626 		wa.error = ESRCH;
627 		// How do we propagate it?
628 	}
629 
630 	if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) {
631                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
632                 return (ENOMEM);
633         }
634 
635 	return (wa.error);
636 }
637 
638 static struct nhop_object *
639 finalize_nhop(struct nhop_object *nh, int *perror)
640 {
641 	/*
642 	 * The following MUST be filled:
643 	 *  nh_ifp, nh_ifa, nh_gw
644 	 */
645 	if (nh->gw_sa.sa_family == 0) {
646 		/*
647 		 * Empty gateway. Can be direct route with RTA_OIF set.
648 		 */
649 		if (nh->nh_ifp != NULL)
650 			nhop_set_direct_gw(nh, nh->nh_ifp);
651 		else {
652 			NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping");
653 			*perror = EINVAL;
654 			return (NULL);
655 		}
656 		/* Both nh_ifp and gateway are set */
657 	} else {
658 		/* Gateway is set up, we can derive ifp if not set */
659 		if (nh->nh_ifp == NULL) {
660 			struct ifaddr *ifa = ifa_ifwithnet(&nh->gw_sa, 1, nhop_get_fibnum(nh));
661 			if (ifa == NULL) {
662 				NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping");
663 				*perror = EINVAL;
664 				return (NULL);
665 			}
666 			nhop_set_transmit_ifp(nh, ifa->ifa_ifp);
667 		}
668 	}
669 	/* Both nh_ifp and gateway are set */
670 	if (nh->nh_ifa == NULL) {
671 		struct ifaddr *ifa = ifaof_ifpforaddr(&nh->gw_sa, nh->nh_ifp);
672 		if (ifa == NULL) {
673 			NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping");
674 			*perror = EINVAL;
675 			return (NULL);
676 		}
677 		nhop_set_src(nh, ifa);
678 	}
679 
680 	return (nhop_get_nhop(nh, perror));
681 }
682 
683 static int
684 get_pxflag(const struct nl_parsed_route *attrs)
685 {
686 	int pxflag = 0;
687 	switch (attrs->rtm_family) {
688 	case AF_INET:
689 		if (attrs->rtm_dst_len == 32)
690 			pxflag = NHF_HOST;
691 		else if (attrs->rtm_dst_len == 0)
692 			pxflag = NHF_DEFAULT;
693 		break;
694 	case AF_INET6:
695 		if (attrs->rtm_dst_len == 32)
696 			pxflag = NHF_HOST;
697 		else if (attrs->rtm_dst_len == 0)
698 			pxflag = NHF_DEFAULT;
699 		break;
700 	}
701 
702 	return (pxflag);
703 }
704 
705 static int
706 get_op_flags(int nlm_flags)
707 {
708 	int op_flags = 0;
709 
710 	op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0;
711 	op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0;
712 	op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0;
713 	op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0;
714 
715 	return (op_flags);
716 }
717 
718 #ifdef ROUTE_MPATH
719 static int
720 create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh,
721     struct nl_pstate *npt, struct nhop_object **pnh)
722 {
723 	int error;
724 
725 	if (mpnh->gw == NULL)
726 		return (EINVAL);
727 
728 	struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family);
729 	if (nh == NULL)
730 		return (ENOMEM);
731 
732 	nhop_set_gw(nh, mpnh->gw, true);
733 	if (mpnh->ifp != NULL)
734 		nhop_set_transmit_ifp(nh, mpnh->ifp);
735 	nhop_set_rtflags(nh, attrs->rta_rtflags);
736 	if (attrs->rtm_protocol > RTPROT_STATIC)
737 		nhop_set_origin(nh, attrs->rtm_protocol);
738 
739 	*pnh = finalize_nhop(nh, &error);
740 
741 	return (error);
742 }
743 #endif
744 
745 static struct nhop_object *
746 create_nexthop_from_attrs(struct nl_parsed_route *attrs,
747     struct nl_pstate *npt, int *perror)
748 {
749 	struct nhop_object *nh = NULL;
750 	int error = 0;
751 
752 	if (attrs->rta_multipath != NULL) {
753 #ifdef ROUTE_MPATH
754 		/* Multipath w/o explicit nexthops */
755 		int num_nhops = attrs->rta_multipath->num_nhops;
756 		struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops);
757 
758 		for (int i = 0; i < num_nhops; i++) {
759 			struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i];
760 
761 			error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh);
762 			if (error != 0) {
763 				for (int j = 0; j < i; j++)
764 					nhop_free(wn[j].nh);
765 				break;
766 			}
767 			wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1;
768 		}
769 		if (error == 0) {
770 			struct rib_head *rh = nhop_get_rh(wn[0].nh);
771 			struct nhgrp_object *nhg;
772 
773 			nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family,
774 			    wn, num_nhops, perror);
775 			if (nhg != NULL) {
776 				if (attrs->rtm_protocol > RTPROT_STATIC)
777 					nhgrp_set_origin(nhg, attrs->rtm_protocol);
778 				nhg = nhgrp_get_nhgrp(nhg, perror);
779 			}
780 			for (int i = 0; i < num_nhops; i++)
781 				nhop_free(wn[i].nh);
782 			if (nhg != NULL)
783 				return ((struct nhop_object *)nhg);
784 			error = *perror;
785 		}
786 #else
787 		error = ENOTSUP;
788 #endif
789 		*perror = error;
790 	} else {
791 		nh = nhop_alloc(attrs->rta_table, attrs->rtm_family);
792 		if (nh == NULL) {
793 			*perror = ENOMEM;
794 			return (NULL);
795 		}
796 		if (attrs->rta_gw != NULL)
797 			nhop_set_gw(nh, attrs->rta_gw, true);
798 		if (attrs->rta_oif != NULL)
799 			nhop_set_transmit_ifp(nh, attrs->rta_oif);
800 		if (attrs->rtax_mtu != 0)
801 			nhop_set_mtu(nh, attrs->rtax_mtu, true);
802 		if (attrs->rta_rtflags & RTF_BROADCAST)
803 			nhop_set_broadcast(nh, true);
804 		if (attrs->rta_rtflags & RTF_BLACKHOLE)
805 			nhop_set_blackhole(nh, NHF_BLACKHOLE);
806 		if (attrs->rta_rtflags & RTF_REJECT)
807 			nhop_set_blackhole(nh, NHF_REJECT);
808 		nhop_set_rtflags(nh, attrs->rta_rtflags);
809 		if (attrs->rtm_protocol > RTPROT_STATIC)
810 			nhop_set_origin(nh, attrs->rtm_protocol);
811 		nh = finalize_nhop(nh, perror);
812 	}
813 
814 	return (nh);
815 }
816 
817 static int
818 rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp,
819     struct nl_pstate *npt)
820 {
821 	struct rib_cmd_info rc = {};
822 	struct nhop_object *nh = NULL;
823 	int error;
824 
825 	struct nl_parsed_route attrs = {};
826 	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
827 	if (error != 0)
828 		return (error);
829 
830 	/* Check if we have enough data */
831 	if (attrs.rta_dst == NULL) {
832 		NL_LOG(LOG_DEBUG, "missing RTA_DST");
833 		return (EINVAL);
834 	}
835 
836 	if (attrs.rta_nh_id != 0) {
837 		/* Referenced uindex */
838 		int pxflag = get_pxflag(&attrs);
839 		nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id,
840 		    pxflag, &error);
841 		if (error != 0)
842 			return (error);
843 	} else {
844 		nh = create_nexthop_from_attrs(&attrs, npt, &error);
845 		if (error != 0) {
846 			NL_LOG(LOG_DEBUG, "Error creating nexthop");
847 			return (error);
848 		}
849 	}
850 
851 	int weight = NH_IS_NHGRP(nh) ? 0 : RT_DEFAULT_WEIGHT;
852 	struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = weight };
853 	int op_flags = get_op_flags(hdr->nlmsg_flags);
854 
855 	error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len,
856 	    &rnd, op_flags, &rc);
857 	if (error == 0)
858 		report_operation(attrs.rta_table, &rc, nlp, hdr);
859 	return (error);
860 }
861 
862 static int
863 path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
864 {
865 	struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data;
866 
867 	if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw))
868 		return (0);
869 
870 	if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp))
871 		return (0);
872 
873 	return (1);
874 }
875 
876 static int
877 rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp,
878     struct nl_pstate *npt)
879 {
880 	struct rib_cmd_info rc;
881 	int error;
882 
883 	struct nl_parsed_route attrs = {};
884 	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
885 	if (error != 0)
886 		return (error);
887 
888 	if (attrs.rta_dst == NULL) {
889 		NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set");
890 		return (ESRCH);
891 	}
892 
893 	error = rib_del_route_px(attrs.rta_table, attrs.rta_dst,
894 	    attrs.rtm_dst_len, path_match_func, &attrs, 0, &rc);
895 	if (error == 0)
896 		report_operation(attrs.rta_table, &rc, nlp, hdr);
897 	return (error);
898 }
899 
900 static int
901 rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
902 {
903 	int error;
904 
905 	struct nl_parsed_route attrs = {};
906 	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
907 	if (error != 0)
908 		return (error);
909 
910 	if (hdr->nlmsg_flags & NLM_F_DUMP)
911 		error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw);
912 	else
913 		error = handle_rtm_getroute(nlp, &attrs, hdr, npt);
914 
915 	return (error);
916 }
917 
918 void
919 rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
920 {
921 	int family, nlm_flags = 0;
922 
923 	struct nl_writer nw;
924 
925 	family = rt_get_family(rc->rc_rt);
926 
927 	/* XXX: check if there are active listeners first */
928 
929 	/* TODO: consider passing PID/type/seq */
930 	switch (rc->rc_cmd) {
931 	case RTM_ADD:
932 		nlm_flags = NLM_F_EXCL | NLM_F_CREATE;
933 		break;
934 	case RTM_CHANGE:
935 		nlm_flags = NLM_F_REPLACE;
936 		break;
937 	case RTM_DELETE:
938 		nlm_flags = 0;
939 		break;
940 	}
941 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
942 		char rtbuf[NHOP_PRINT_BUFSIZE] __unused;
943 		FIB_LOG(LOG_DEBUG2, fibnum, family,
944 		    "received event %s for %s / nlm_flags=%X",
945 		    rib_print_cmd(rc->rc_cmd),
946 		    rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)),
947 		    nlm_flags);
948 	}
949 
950 	struct nlmsghdr hdr = {
951 		.nlmsg_flags = nlm_flags,
952 		.nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd),
953 	};
954 
955 	struct route_nhop_data rnd = {
956 		.rnd_nhop = rc_get_nhop(rc),
957 		.rnd_weight = rc->rc_nh_weight,
958 	};
959 
960 	uint32_t group_id = family_to_group(family);
961 	if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) {
962 		NL_LOG(LOG_DEBUG, "error allocating event buffer");
963 		return;
964 	}
965 
966 	dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw);
967 	nlmsg_flush(&nw);
968 }
969 
970 static const struct rtnl_cmd_handler cmd_handlers[] = {
971 	{
972 		.cmd = NL_RTM_GETROUTE,
973 		.name = "RTM_GETROUTE",
974 		.cb = &rtnl_handle_getroute,
975 	},
976 	{
977 		.cmd = NL_RTM_DELROUTE,
978 		.name = "RTM_DELROUTE",
979 		.cb = &rtnl_handle_delroute,
980 		.priv = PRIV_NET_ROUTE,
981 	},
982 	{
983 		.cmd = NL_RTM_NEWROUTE,
984 		.name = "RTM_NEWROUTE",
985 		.cb = &rtnl_handle_newroute,
986 		.priv = PRIV_NET_ROUTE,
987 	}
988 };
989 
990 static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser};
991 
992 void
993 rtnl_routes_init(void)
994 {
995 	NL_VERIFY_PARSERS(all_parsers);
996 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
997 }
998