xref: /freebsd/sys/netlink/route/iface.c (revision be181ee2)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include <sys/types.h>
33 #include <sys/eventhandler.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/socket.h>
37 #include <sys/sockio.h>
38 #include <sys/syslog.h>
39 
40 #include <net/if.h>
41 #include <net/if_dl.h>
42 #include <net/if_media.h>
43 #include <net/if_var.h>
44 #include <net/if_clone.h>
45 #include <net/route.h>
46 #include <net/route/nhop.h>
47 #include <net/route/route_ctl.h>
48 #include <netlink/netlink.h>
49 #include <netlink/netlink_ctl.h>
50 #include <netlink/netlink_route.h>
51 #include <netlink/route/route_var.h>
52 
53 #include <netinet6/scope6_var.h> /* scope deembedding */
54 
55 #define	DEBUG_MOD_NAME	nl_iface
56 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
57 #include <netlink/netlink_debug.h>
58 _DECLARE_DEBUG(LOG_DEBUG);
59 
60 struct netlink_walkargs {
61 	struct nl_writer *nw;
62 	struct nlmsghdr hdr;
63 	struct nlpcb *so;
64 	uint32_t fibnum;
65 	int family;
66 	int error;
67 	int count;
68 	int dumped;
69 };
70 
71 static eventhandler_tag ifdetach_event, ifattach_event, ifaddr_event;
72 
73 static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners);
74 
75 static struct sx rtnl_cloner_lock;
76 SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock");
77 
78 /*
79  * RTM_GETLINK request
80  * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0},
81  *  {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
82  *
83  * Reply:
84  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0},
85 {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"}
86 
87 [
88 {{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"},
89 {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000},
90 {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6},
91 {{nla_len=5, nla_type=IFLA_LINKMODE}, 0},
92 {{nla_len=8, nla_type=IFLA_MTU}, 1500},
93 {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68},
94  {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000},
95 {{nla_len=8, nla_type=IFLA_GROUP}, 0},
96 {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0},
97 {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1},
98 {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535},
99 {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536},
100 {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1},
101 {{nla_len=5, nla_type=IFLA_CARRIER}, 1},
102 {{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"},
103 {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2},
104 {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0},
105 {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1},
106 {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1},
107  */
108 
109 struct if_state {
110 	uint8_t		ifla_operstate;
111 	uint8_t		ifla_carrier;
112 };
113 
114 static void
115 get_operstate_ether(struct ifnet *ifp, struct if_state *pstate)
116 {
117 	struct ifmediareq ifmr = {};
118 	int error;
119 	error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (void *)&ifmr);
120 
121 	if (error != 0) {
122 		NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d",
123 		    if_name(ifp), error);
124 		return;
125 	}
126 
127 	switch (IFM_TYPE(ifmr.ifm_active)) {
128 	case IFM_ETHER:
129 		if (ifmr.ifm_status & IFM_ACTIVE) {
130 			pstate->ifla_carrier = 1;
131 			if (ifp->if_flags & IFF_MONITOR)
132 				pstate->ifla_operstate = IF_OPER_DORMANT;
133 			else
134 				pstate->ifla_operstate = IF_OPER_UP;
135 		} else
136 			pstate->ifla_operstate = IF_OPER_DOWN;
137 	}
138 }
139 
140 static bool
141 get_stats(struct nl_writer *nw, struct ifnet *ifp)
142 {
143 	struct rtnl_link_stats64 *stats;
144 
145 	int nla_len = sizeof(struct nlattr) + sizeof(*stats);
146 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
147 	if (nla == NULL)
148 		return (false);
149 	nla->nla_type = IFLA_STATS64;
150 	nla->nla_len = nla_len;
151 	stats = (struct rtnl_link_stats64 *)(nla + 1);
152 
153 	stats->rx_packets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
154 	stats->tx_packets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
155 	stats->rx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
156 	stats->tx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
157 	stats->rx_errors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
158 	stats->tx_errors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
159 	stats->rx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
160 	stats->tx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
161 	stats->multicast = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
162 	stats->rx_nohandler = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
163 
164 	return (true);
165 }
166 
167 static void
168 get_operstate(struct ifnet *ifp, struct if_state *pstate)
169 {
170 	pstate->ifla_operstate = IF_OPER_UNKNOWN;
171 	pstate->ifla_carrier = 0; /* no carrier */
172 
173 	switch (ifp->if_type) {
174 	case IFT_ETHER:
175 		get_operstate_ether(ifp, pstate);
176 		break;
177 	case IFT_LOOP:
178 		if (ifp->if_flags & IFF_UP) {
179 			pstate->ifla_operstate = IF_OPER_UP;
180 			pstate->ifla_carrier = 1;
181 		} else
182 			pstate->ifla_operstate = IF_OPER_DOWN;
183 		break;
184 	}
185 }
186 
187 static unsigned
188 ifp_flags_to_netlink(const struct ifnet *ifp)
189 {
190         return (ifp->if_flags | ifp->if_drv_flags);
191 }
192 
193 #define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen))
194 static bool
195 dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa)
196 {
197         uint32_t addr_len = 0;
198         const void *addr_data = NULL;
199         struct in6_addr addr6;
200 
201         if (sa == NULL)
202                 return (true);
203 
204         switch (sa->sa_family) {
205         case AF_INET:
206                 addr_len = sizeof(struct in_addr);
207                 addr_data = &((const struct sockaddr_in *)sa)->sin_addr;
208                 break;
209         case AF_INET6:
210                 in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len);
211                 addr_len = sizeof(struct in6_addr);
212                 addr_data = &addr6;
213                 break;
214         case AF_LINK:
215                 addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen;
216                 addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa);
217                 break;
218         default:
219                 NL_LOG(LOG_DEBUG, "unsupported family: %d, skipping", sa->sa_family);
220                 return (true);
221         }
222 
223         return (nlattr_add(nw, attr, addr_len, addr_data));
224 }
225 
226 /*
227  * Dumps interface state, properties and metrics.
228  * @nw: message writer
229  * @ifp: target interface
230  * @hdr: template header
231  *
232  * This function is called without epoch and MAY sleep.
233  */
234 static bool
235 dump_iface(struct nl_writer *nw, struct ifnet *ifp, const struct nlmsghdr *hdr)
236 {
237         struct ifinfomsg *ifinfo;
238 
239         NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp));
240 
241 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg)))
242 		goto enomem;
243 
244         ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg);
245         ifinfo->ifi_family = AF_UNSPEC;
246         ifinfo->__ifi_pad = 0;
247         ifinfo->ifi_type = ifp->if_type;
248         ifinfo->ifi_index = ifp->if_index;
249         ifinfo->ifi_flags = ifp_flags_to_netlink(ifp);
250         ifinfo->ifi_change = 0;
251 
252         nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp));
253 
254 	struct if_state ifs = {};
255 	get_operstate(ifp, &ifs);
256 
257         nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate);
258         nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier);
259 
260 /*
261         nlattr_add_u8(nw, IFLA_PROTO_DOWN, val);
262         nlattr_add_u8(nw, IFLA_LINKMODE, val);
263 */
264         if ((ifp->if_addr != NULL)) {
265                 dump_sa(nw, IFLA_ADDRESS, ifp->if_addr->ifa_addr);
266         }
267 
268         if ((ifp->if_broadcastaddr != NULL)) {
269 		nlattr_add(nw, IFLA_BROADCAST, ifp->if_addrlen,
270 		    ifp->if_broadcastaddr);
271         }
272 
273         nlattr_add_u32(nw, IFLA_MTU, ifp->if_mtu);
274 /*
275         nlattr_add_u32(nw, IFLA_MIN_MTU, 60);
276         nlattr_add_u32(nw, IFLA_MAX_MTU, 9000);
277         nlattr_add_u32(nw, IFLA_GROUP, 0);
278 */
279 	get_stats(nw, ifp);
280 
281 	uint32_t val = (ifp->if_flags & IFF_PROMISC) != 0;
282         nlattr_add_u32(nw, IFLA_PROMISCUITY, val);
283 
284         if (nlmsg_end(nw))
285 		return (true);
286 
287 enomem:
288         NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp));
289         nlmsg_abort(nw);
290         return (false);
291 }
292 
293 static bool
294 check_ifmsg(void *hdr, struct nl_pstate *npt)
295 {
296 	struct ifinfomsg *ifm = hdr;
297 
298 	if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 ||
299 	    ifm->ifi_flags != 0 || ifm->ifi_change != 0) {
300 		nlmsg_report_err_msg(npt,
301 		    "strict checking: non-zero values in ifinfomsg header");
302 		return (false);
303 	}
304 
305 	return (true);
306 }
307 
308 #define	_IN(_field)	offsetof(struct ifinfomsg, _field)
309 #define	_OUT(_field)	offsetof(struct nl_parsed_link, _field)
310 static const struct nlfield_parser nlf_p_if[] = {
311 	{ .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 },
312 	{ .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 },
313 };
314 
315 static const struct nlattr_parser nla_p_linfo[] = {
316 	{ .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn },
317 	{ .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla },
318 };
319 NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo);
320 
321 static const struct nlattr_parser nla_p_if[] = {
322 	{ .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
323 	{ .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 },
324 	{ .type = IFLA_LINK, .off = _OUT(ifi_index), .cb = nlattr_get_uint32 },
325 	{ .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested },
326 	{ .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string },
327 	{ .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
328 };
329 #undef _IN
330 #undef _OUT
331 NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if);
332 
333 static bool
334 match_iface(struct nl_parsed_link *attrs, struct ifnet *ifp)
335 {
336 	if (attrs->ifi_index != 0 && attrs->ifi_index != ifp->if_index)
337 		return (false);
338 	if (attrs->ifi_type != 0 && attrs->ifi_index != ifp->if_type)
339 		return (false);
340 	if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp)))
341 		return (false);
342 	/* TODO: add group match */
343 
344 	return (true);
345 }
346 
347 /*
348  * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0},
349  *  {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
350  *   [
351  *    [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"],
352  *    [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF]
353  *   ]
354  */
355 static int
356 rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
357 {
358 	struct epoch_tracker et;
359         struct ifnet *ifp;
360 	int error = 0;
361 
362 	struct nl_parsed_link attrs = {};
363 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
364 	if (error != 0)
365 		return (error);
366 
367 	struct netlink_walkargs wa = {
368 		.so = nlp,
369 		.nw = npt->nw,
370 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
371 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
372 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
373 		.hdr.nlmsg_type = NL_RTM_NEWLINK,
374 	};
375 
376 	/* Fast track for an interface w/ explicit index match */
377 	if (attrs.ifi_index != 0) {
378 		NET_EPOCH_ENTER(et);
379 		ifp = ifnet_byindex_ref(attrs.ifi_index);
380 		NET_EPOCH_EXIT(et);
381 		NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u", attrs.ifi_index);
382 		if (ifp != NULL) {
383 			if (match_iface(&attrs, ifp)) {
384 				if (!dump_iface(wa.nw, ifp, &wa.hdr))
385 					error = ENOMEM;
386 			} else
387 				error = ESRCH;
388 			if_rele(ifp);
389 		} else
390 			error = ESRCH;
391 		return (error);
392 	}
393 
394 	/*
395 	 * Fetching some link properties require performing ioctl's that may be blocking.
396 	 * Address it by saving referenced pointers of the matching links,
397 	 * exiting from epoch and going through the list one-by-one.
398 	 */
399 
400 	NL_LOG(LOG_DEBUG2, "Start dump");
401 
402 	struct ifnet **match_array;
403 	int offset = 0, base_count = 16; /* start with 128 bytes */
404 	match_array = malloc(base_count * sizeof(void *), M_TEMP, M_NOWAIT);
405 
406 	NLP_LOG(LOG_DEBUG3, nlp, "MATCHING: index=%u type=%d name=%s",
407 	    attrs.ifi_index, attrs.ifi_type, attrs.ifla_ifname);
408 	NET_EPOCH_ENTER(et);
409         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
410 		wa.count++;
411 		if (match_iface(&attrs, ifp)) {
412 			if (offset < base_count) {
413 				if (!if_try_ref(ifp))
414 					continue;
415 				match_array[offset++] = ifp;
416 				continue;
417 			}
418 			/* Too many matches, need to reallocate */
419 			struct ifnet **new_array;
420 			int sz = base_count * sizeof(void *);
421 			base_count *= 2;
422 			new_array = malloc(sz * 2, M_TEMP, M_NOWAIT);
423 			if (new_array == NULL) {
424 				error = ENOMEM;
425 				break;
426 			}
427 			memcpy(new_array, match_array, sz);
428 			free(match_array, M_TEMP);
429 			match_array = new_array;
430                 }
431         }
432 	NET_EPOCH_EXIT(et);
433 
434 	NL_LOG(LOG_DEBUG2, "Matched %d interface(s), dumping", offset);
435 	for (int i = 0; error == 0 && i < offset; i++) {
436 		if (!dump_iface(wa.nw, match_array[i], &wa.hdr))
437 			error = ENOMEM;
438 	}
439 	for (int i = 0; i < offset; i++)
440 		if_rele(match_array[i]);
441 	free(match_array, M_TEMP);
442 
443 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
444 
445 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
446                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
447                 return (ENOMEM);
448         }
449 
450 	return (error);
451 }
452 
453 /*
454  * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[
455  * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0},
456  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
457  *   {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"],
458  *   [
459  *    {nla_len=16, nla_type=IFLA_LINKINFO},
460  *     [
461  *      {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"...
462  *     ]
463  *    ]
464  */
465 
466 static int
467 rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
468 {
469 	struct epoch_tracker et;
470         struct ifnet *ifp;
471 	int error;
472 
473 	struct nl_parsed_link attrs = {};
474 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
475 	if (error != 0)
476 		return (error);
477 
478 	NET_EPOCH_ENTER(et);
479 	ifp = ifnet_byindex_ref(attrs.ifi_index);
480 	NET_EPOCH_EXIT(et);
481 	if (ifp == NULL) {
482 		NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index);
483 		return (ENOENT);
484 	}
485 	NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp));
486 
487 	sx_xlock(&ifnet_detach_sxlock);
488 	error = if_clone_destroy(if_name(ifp));
489 	sx_xunlock(&ifnet_detach_sxlock);
490 
491 	NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error);
492 
493 	if_rele(ifp);
494 	return (error);
495 }
496 
497 static int
498 rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
499 {
500 	struct nl_cloner *cloner;
501 	int error;
502 
503 	struct nl_parsed_link attrs = {};
504 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
505 	if (error != 0)
506 		return (error);
507 
508 	if (attrs.ifla_ifname == NULL || strlen(attrs.ifla_ifname) == 0) {
509 		/* Applications like ip(8) verify RTM_NEWLINK existance
510 		 * by calling it with empty arguments. Always return "innocent"
511 		 * error.
512 		 */
513 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute");
514 		return (EPERM);
515 	}
516 
517 	if (attrs.ifla_cloner == NULL || strlen(attrs.ifla_cloner) == 0) {
518 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute");
519 		return (EINVAL);
520 	}
521 
522 	sx_slock(&rtnl_cloner_lock);
523 	SLIST_FOREACH(cloner, &nl_cloners, next) {
524 		if (!strcmp(attrs.ifla_cloner, cloner->name)) {
525 			error = cloner->create_f(&attrs, nlp, npt);
526 			sx_sunlock(&rtnl_cloner_lock);
527 			return (error);
528 		}
529 	}
530 	sx_sunlock(&rtnl_cloner_lock);
531 
532 	/* TODO: load cloner module if not exists & privilege permits */
533 	NLMSG_REPORT_ERR_MSG(npt, "interface type %s not supported", attrs.ifla_cloner);
534 	return (ENOTSUP);
535 
536 	return (error);
537 }
538 
539 /*
540 
541 {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")},
542  [
543         {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")},
544         {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")},
545         {{nla_len=7, nla_type=IFA_LABEL}, "lo"},
546         {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT},
547         {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]},
548 ---
549 
550 {{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735},
551  {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")},
552    [
553     {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")},
554    {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}},
555    {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]},
556 */
557 
558 static uint8_t
559 ifa_get_scope(const struct ifaddr *ifa)
560 {
561         const struct sockaddr *sa;
562         uint8_t addr_scope = RT_SCOPE_UNIVERSE;
563 
564         sa = ifa->ifa_addr;
565         switch (sa->sa_family) {
566         case AF_INET:
567                 {
568                         struct in_addr addr;
569                         addr = ((const struct sockaddr_in *)sa)->sin_addr;
570                         if (IN_LOOPBACK(addr.s_addr))
571                                 addr_scope = RT_SCOPE_HOST;
572                         else if (IN_LINKLOCAL(addr.s_addr))
573                                 addr_scope = RT_SCOPE_LINK;
574                         break;
575                 }
576         case AF_INET6:
577                 {
578                         const struct in6_addr *addr;
579                         addr = &((const struct sockaddr_in6 *)sa)->sin6_addr;
580                         if (IN6_IS_ADDR_LOOPBACK(addr))
581                                 addr_scope = RT_SCOPE_HOST;
582                         else if (IN6_IS_ADDR_LINKLOCAL(addr))
583                                 addr_scope = RT_SCOPE_LINK;
584                         break;
585                 }
586         }
587 
588         return (addr_scope);
589 }
590 
591 static uint8_t
592 inet6_get_plen(const struct in6_addr *addr)
593 {
594 
595 	return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
596 	    bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
597 }
598 
599 static uint8_t
600 get_sa_plen(const struct sockaddr *sa)
601 {
602         const struct in6_addr *paddr6;
603         const struct in_addr *paddr;
604 
605         switch (sa->sa_family) {
606         case AF_INET:
607                 if (sa == NULL)
608                         return (32);
609                 paddr = &(((const struct sockaddr_in *)sa)->sin_addr);
610                 return bitcount32(paddr->s_addr);;
611         case AF_INET6:
612                 if (sa == NULL)
613                         return (128);
614                 paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr);
615                 return inet6_get_plen(paddr6);
616         }
617 
618         return (0);
619 }
620 
621 
622 /*
623  * {'attrs': [('IFA_ADDRESS', '12.0.0.1'),
624            ('IFA_LOCAL', '12.0.0.1'),
625            ('IFA_LABEL', 'eth10'),
626            ('IFA_FLAGS', 128),
627            ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})],
628  */
629 static bool
630 dump_iface_addr(struct nl_writer *nw, struct ifnet *ifp, struct ifaddr *ifa,
631     const struct nlmsghdr *hdr)
632 {
633         struct ifaddrmsg *ifamsg;
634         struct sockaddr *sa = ifa->ifa_addr;
635 
636         NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s",
637             ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
638 
639 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg)))
640 		goto enomem;
641 
642         ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg);
643         ifamsg->ifa_family = sa->sa_family;
644         ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask);
645         ifamsg->ifa_flags = 0; // ifa_flags is useless
646         ifamsg->ifa_scope = ifa_get_scope(ifa);
647         ifamsg->ifa_index = ifp->if_index;
648 
649         struct sockaddr *dst_sa = ifa->ifa_dstaddr;
650         if ((dst_sa == NULL) || (dst_sa->sa_family != sa->sa_family))
651                 dst_sa = sa;
652         dump_sa(nw, IFA_ADDRESS, dst_sa);
653         dump_sa(nw, IFA_LOCAL, sa);
654         nlattr_add_string(nw, IFA_LABEL, if_name(ifp));
655 
656         uint32_t val = 0; // ifa->ifa_flags;
657         nlattr_add_u32(nw, IFA_FLAGS, val);
658 
659 	if (nlmsg_end(nw))
660 		return (true);
661 enomem:
662         NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s",
663             rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
664         nlmsg_abort(nw);
665         return (false);
666 }
667 
668 static int
669 rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
670 {
671         struct ifaddr *ifa;
672         struct ifnet *ifp;
673 	int error = 0;
674 
675 	struct netlink_walkargs wa = {
676 		.so = nlp,
677 		.nw = npt->nw,
678 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
679 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
680 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
681 		.hdr.nlmsg_type = NL_RTM_NEWADDR,
682 	};
683 
684 	NL_LOG(LOG_DEBUG2, "Start dump");
685 
686         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
687                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
688                         if (wa.family != 0 && wa.family != ifa->ifa_addr->sa_family)
689                                 continue;
690                         if (ifa->ifa_addr->sa_family == AF_LINK)
691                                 continue;
692 			wa.count++;
693                         if (!dump_iface_addr(wa.nw, ifp, ifa, &wa.hdr)) {
694                                 error = ENOMEM;
695                                 break;
696                         }
697 			wa.dumped++;
698                 }
699                 if (error != 0)
700                         break;
701         }
702 
703 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
704 
705 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
706                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
707                 return (ENOMEM);
708         }
709 
710 	return (error);
711 }
712 
713 static void
714 rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
715 {
716 	struct nlmsghdr hdr = {};
717 	struct nl_writer nw = {};
718 	uint32_t group = 0;
719 
720 	switch (ifa->ifa_addr->sa_family) {
721 	case AF_INET:
722 		group = RTNLGRP_IPV4_IFADDR;
723 		break;
724 	case AF_INET6:
725 		group = RTNLGRP_IPV6_IFADDR;
726 		break;
727 	default:
728 		NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d",
729 		    ifa->ifa_addr->sa_family);
730 		return;
731 	}
732 
733 	if (!nl_has_listeners(NETLINK_ROUTE, group))
734 		return;
735 
736 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, group)) {
737 		NL_LOG(LOG_DEBUG, "error allocating group writer");
738 		return;
739 	}
740 
741 	hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR;
742 
743 	dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr);
744 	nlmsg_flush(&nw);
745 }
746 
747 static void
748 rtnl_handle_ifattach(void *arg, struct ifnet *ifp)
749 {
750 	struct nlmsghdr hdr = { .nlmsg_type = NL_RTM_NEWLINK };
751 	struct nl_writer nw = {};
752 
753 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
754 		return;
755 
756 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
757 		NL_LOG(LOG_DEBUG, "error allocating mbuf");
758 		return;
759 	}
760 	dump_iface(&nw, ifp, &hdr);
761         nlmsg_flush(&nw);
762 }
763 
764 static void
765 rtnl_handle_ifdetach(void *arg, struct ifnet *ifp)
766 {
767 	struct nlmsghdr hdr = { .nlmsg_type = NL_RTM_DELLINK };
768 	struct nl_writer nw = {};
769 
770 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
771 		return;
772 
773 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
774 		NL_LOG(LOG_DEBUG, "error allocating mbuf");
775 		return;
776 	}
777 	dump_iface(&nw, ifp, &hdr);
778         nlmsg_flush(&nw);
779 }
780 
781 static const struct rtnl_cmd_handler cmd_handlers[] = {
782 	{
783 		.cmd = NL_RTM_GETLINK,
784 		.name = "RTM_GETLINK",
785 		.cb = &rtnl_handle_getlink,
786 		.flags = RTNL_F_NOEPOCH,
787 	},
788 	{
789 		.cmd = NL_RTM_DELLINK,
790 		.name = "RTM_DELLINK",
791 		.cb = &rtnl_handle_dellink,
792 		.priv = PRIV_NET_IFDESTROY,
793 		.flags = RTNL_F_NOEPOCH,
794 	},
795 	{
796 		.cmd = NL_RTM_NEWLINK,
797 		.name = "RTM_NEWLINK",
798 		.cb = &rtnl_handle_newlink,
799 		.priv = PRIV_NET_IFCREATE,
800 		.flags = RTNL_F_NOEPOCH,
801 	},
802 	{
803 		.cmd = NL_RTM_GETADDR,
804 		.name = "RTM_GETADDR",
805 		.cb = &rtnl_handle_getaddr,
806 	},
807 	{
808 		.cmd = NL_RTM_NEWADDR,
809 		.name = "RTM_NEWADDR",
810 		.cb = &rtnl_handle_getaddr,
811 	},
812 	{
813 		.cmd = NL_RTM_DELADDR,
814 		.name = "RTM_DELADDR",
815 		.cb = &rtnl_handle_getaddr,
816 	},
817 };
818 
819 static const struct nlhdr_parser *all_parsers[] = { &ifmsg_parser };
820 
821 void
822 rtnl_iface_add_cloner(struct nl_cloner *cloner)
823 {
824 	sx_xlock(&rtnl_cloner_lock);
825 	SLIST_INSERT_HEAD(&nl_cloners, cloner, next);
826 	sx_xunlock(&rtnl_cloner_lock);
827 }
828 
829 void rtnl_iface_del_cloner(struct nl_cloner *cloner)
830 {
831 	sx_xlock(&rtnl_cloner_lock);
832 	SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next);
833 	sx_xunlock(&rtnl_cloner_lock);
834 }
835 
836 void
837 rtnl_ifaces_init(void)
838 {
839 	ifattach_event = EVENTHANDLER_REGISTER(
840 	    ifnet_arrival_event, rtnl_handle_ifattach, NULL,
841 	    EVENTHANDLER_PRI_ANY);
842 	ifdetach_event = EVENTHANDLER_REGISTER(
843 	    ifnet_departure_event, rtnl_handle_ifdetach, NULL,
844 	    EVENTHANDLER_PRI_ANY);
845 	ifaddr_event = EVENTHANDLER_REGISTER(
846 	    rt_addrmsg, rtnl_handle_ifaddr, NULL,
847 	    EVENTHANDLER_PRI_ANY);
848 	NL_VERIFY_PARSERS(all_parsers);
849 	rtnl_iface_drivers_register();
850 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
851 }
852 
853 void
854 rtnl_ifaces_destroy(void)
855 {
856 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event);
857 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event);
858 	EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event);
859 }
860