1 /*
2  * Code for encoding/decoding FPM messages that are in netlink format.
3  *
4  * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
5  * Copyright (C) 2012 by Open Source Routing.
6  * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
7  *
8  * This file is part of GNU Zebra.
9  *
10  * GNU Zebra is free software; you can redistribute it and/or modify it
11  * under the terms of the GNU General Public License as published by the
12  * Free Software Foundation; either version 2, or (at your option) any
13  * later version.
14  *
15  * GNU Zebra is distributed in the hope that it will be useful, but
16  * WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License along
21  * with this program; see the file COPYING; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include <zebra.h>
26 
27 #ifdef HAVE_NETLINK
28 
29 #include "log.h"
30 #include "rib.h"
31 #include "vty.h"
32 #include "prefix.h"
33 
34 #include "zebra/zserv.h"
35 #include "zebra/zebra_router.h"
36 #include "zebra/zebra_dplane.h"
37 #include "zebra/zebra_ns.h"
38 #include "zebra/zebra_vrf.h"
39 #include "zebra/kernel_netlink.h"
40 #include "zebra/rt_netlink.h"
41 #include "nexthop.h"
42 
43 #include "zebra/zebra_fpm_private.h"
44 #include "zebra/zebra_vxlan_private.h"
45 
46 /*
47  * addr_to_a
48  *
49  * Returns string representation of an address of the given AF.
50  */
addr_to_a(uint8_t af,void * addr)51 static inline const char *addr_to_a(uint8_t af, void *addr)
52 {
53 	if (!addr)
54 		return "<No address>";
55 
56 	switch (af) {
57 
58 	case AF_INET:
59 		return inet_ntoa(*((struct in_addr *)addr));
60 	case AF_INET6:
61 		return inet6_ntoa(*((struct in6_addr *)addr));
62 	default:
63 		return "<Addr in unknown AF>";
64 	}
65 }
66 
67 /*
68  * prefix_addr_to_a
69  *
70  * Convience wrapper that returns a human-readable string for the
71  * address in a prefix.
72  */
prefix_addr_to_a(struct prefix * prefix)73 static const char *prefix_addr_to_a(struct prefix *prefix)
74 {
75 	if (!prefix)
76 		return "<No address>";
77 
78 	return addr_to_a(prefix->family, &prefix->u.prefix);
79 }
80 
81 /*
82  * af_addr_size
83  *
84  * The size of an address in a given address family.
85  */
af_addr_size(uint8_t af)86 static size_t af_addr_size(uint8_t af)
87 {
88 	switch (af) {
89 
90 	case AF_INET:
91 		return 4;
92 	case AF_INET6:
93 		return 16;
94 	default:
95 		assert(0);
96 		return 16;
97 	}
98 }
99 
100 /*
101  * We plan to use RTA_ENCAP_TYPE attribute for VxLAN encap as well.
102  * Currently, values 0 to 8 for this attribute are used by lwtunnel_encap_types
103  * So, we cannot use these values for VxLAN encap.
104  */
105 enum fpm_nh_encap_type_t {
106 	FPM_NH_ENCAP_NONE = 0,
107 	FPM_NH_ENCAP_VXLAN = 100,
108 	FPM_NH_ENCAP_MAX,
109 };
110 
111 /*
112  * fpm_nh_encap_type_to_str
113  */
fpm_nh_encap_type_to_str(enum fpm_nh_encap_type_t encap_type)114 static const char *fpm_nh_encap_type_to_str(enum fpm_nh_encap_type_t encap_type)
115 {
116 	switch (encap_type) {
117 	case FPM_NH_ENCAP_NONE:
118 		return "none";
119 
120 	case FPM_NH_ENCAP_VXLAN:
121 		return "VxLAN";
122 
123 	case FPM_NH_ENCAP_MAX:
124 		return "invalid";
125 	}
126 
127 	return "invalid";
128 }
129 
130 struct vxlan_encap_info_t {
131 	vni_t vni;
132 };
133 
134 enum vxlan_encap_info_type_t {
135 	VXLAN_VNI = 0,
136 };
137 
138 struct fpm_nh_encap_info_t {
139 	enum fpm_nh_encap_type_t encap_type;
140 	union {
141 		struct vxlan_encap_info_t vxlan_encap;
142 	};
143 };
144 
145 /*
146  * netlink_nh_info
147  *
148  * Holds information about a single nexthop for netlink. These info
149  * structures are transient and may contain pointers into rib
150  * data structures for convenience.
151  */
152 struct netlink_nh_info {
153 	uint32_t if_index;
154 	union g_addr *gateway;
155 
156 	/*
157 	 * Information from the struct nexthop from which this nh was
158 	 * derived. For debug purposes only.
159 	 */
160 	int recursive;
161 	enum nexthop_types_t type;
162 	struct fpm_nh_encap_info_t encap_info;
163 };
164 
165 /*
166  * netlink_route_info
167  *
168  * A structure for holding information for a netlink route message.
169  */
170 struct netlink_route_info {
171 	uint16_t nlmsg_type;
172 	uint8_t rtm_type;
173 	uint32_t rtm_table;
174 	uint8_t rtm_protocol;
175 	uint8_t af;
176 	struct prefix *prefix;
177 	uint32_t *metric;
178 	unsigned int num_nhs;
179 
180 	/*
181 	 * Nexthop structures
182 	 */
183 	struct netlink_nh_info nhs[MULTIPATH_NUM];
184 	union g_addr *pref_src;
185 };
186 
187 /*
188  * netlink_route_info_add_nh
189  *
190  * Add information about the given nexthop to the given route info
191  * structure.
192  *
193  * Returns true if a nexthop was added, false otherwise.
194  */
netlink_route_info_add_nh(struct netlink_route_info * ri,struct nexthop * nexthop,struct route_entry * re)195 static int netlink_route_info_add_nh(struct netlink_route_info *ri,
196 				     struct nexthop *nexthop,
197 				     struct route_entry *re)
198 {
199 	struct netlink_nh_info nhi;
200 	union g_addr *src;
201 	zebra_l3vni_t *zl3vni = NULL;
202 
203 	memset(&nhi, 0, sizeof(nhi));
204 	src = NULL;
205 
206 	if (ri->num_nhs >= (int)array_size(ri->nhs))
207 		return 0;
208 
209 	nhi.recursive = nexthop->rparent ? 1 : 0;
210 	nhi.type = nexthop->type;
211 	nhi.if_index = nexthop->ifindex;
212 
213 	if (nexthop->type == NEXTHOP_TYPE_IPV4
214 	    || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
215 		nhi.gateway = &nexthop->gate;
216 		if (nexthop->src.ipv4.s_addr != INADDR_ANY)
217 			src = &nexthop->src;
218 	}
219 
220 	if (nexthop->type == NEXTHOP_TYPE_IPV6
221 	    || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
222 		nhi.gateway = &nexthop->gate;
223 	}
224 
225 	if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
226 		if (nexthop->src.ipv4.s_addr != INADDR_ANY)
227 			src = &nexthop->src;
228 	}
229 
230 	if (!nhi.gateway && nhi.if_index == 0)
231 		return 0;
232 
233 	if (re && CHECK_FLAG(re->flags, ZEBRA_FLAG_EVPN_ROUTE)) {
234 		nhi.encap_info.encap_type = FPM_NH_ENCAP_VXLAN;
235 
236 		zl3vni = zl3vni_from_vrf(nexthop->vrf_id);
237 		if (zl3vni && is_l3vni_oper_up(zl3vni)) {
238 
239 			/* Add VNI to VxLAN encap info */
240 			nhi.encap_info.vxlan_encap.vni = zl3vni->vni;
241 		}
242 	}
243 
244 	/*
245 	 * We have a valid nhi. Copy the structure over to the route_info.
246 	 */
247 	ri->nhs[ri->num_nhs] = nhi;
248 	ri->num_nhs++;
249 
250 	if (src && !ri->pref_src)
251 		ri->pref_src = src;
252 
253 	return 1;
254 }
255 
256 /*
257  * netlink_proto_from_route_type
258  */
netlink_proto_from_route_type(int type)259 static uint8_t netlink_proto_from_route_type(int type)
260 {
261 	switch (type) {
262 	case ZEBRA_ROUTE_KERNEL:
263 	case ZEBRA_ROUTE_CONNECT:
264 		return RTPROT_KERNEL;
265 
266 	default:
267 		return RTPROT_ZEBRA;
268 	}
269 }
270 
271 /*
272  * netlink_route_info_fill
273  *
274  * Fill out the route information object from the given route.
275  *
276  * Returns true on success and false on failure.
277  */
netlink_route_info_fill(struct netlink_route_info * ri,int cmd,rib_dest_t * dest,struct route_entry * re)278 static int netlink_route_info_fill(struct netlink_route_info *ri, int cmd,
279 				   rib_dest_t *dest, struct route_entry *re)
280 {
281 	struct nexthop *nexthop;
282 
283 	memset(ri, 0, sizeof(*ri));
284 
285 	ri->prefix = rib_dest_prefix(dest);
286 	ri->af = rib_dest_af(dest);
287 
288 	ri->nlmsg_type = cmd;
289 	ri->rtm_table = rib_table_info(rib_dest_table(dest))->table_id;
290 	ri->rtm_protocol = RTPROT_UNSPEC;
291 
292 	/*
293 	 * An RTM_DELROUTE need not be accompanied by any nexthops,
294 	 * particularly in our communication with the FPM.
295 	 */
296 	if (cmd == RTM_DELROUTE && !re)
297 		return 1;
298 
299 	if (!re) {
300 		zfpm_debug("%s: Expected non-NULL re pointer", __func__);
301 		return 0;
302 	}
303 
304 	ri->rtm_protocol = netlink_proto_from_route_type(re->type);
305 	ri->rtm_type = RTN_UNICAST;
306 	ri->metric = &re->metric;
307 
308 	for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) {
309 		if (ri->num_nhs >= zrouter.multipath_num)
310 			break;
311 
312 		if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
313 			continue;
314 
315 		if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
316 			switch (nexthop->bh_type) {
317 			case BLACKHOLE_ADMINPROHIB:
318 				ri->rtm_type = RTN_PROHIBIT;
319 				break;
320 			case BLACKHOLE_REJECT:
321 				ri->rtm_type = RTN_UNREACHABLE;
322 				break;
323 			case BLACKHOLE_NULL:
324 			default:
325 				ri->rtm_type = RTN_BLACKHOLE;
326 				break;
327 			}
328 		}
329 
330 		if ((cmd == RTM_NEWROUTE
331 		     && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
332 		    || (cmd == RTM_DELROUTE
333 			&& CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED))) {
334 			netlink_route_info_add_nh(ri, nexthop, re);
335 		}
336 	}
337 
338 	if (ri->num_nhs == 0) {
339 		switch (ri->rtm_type) {
340 		case RTN_PROHIBIT:
341 		case RTN_UNREACHABLE:
342 		case RTN_BLACKHOLE:
343 			break;
344 		default:
345 			/* If there is no useful nexthop then return. */
346 			zfpm_debug(
347 				"netlink_encode_route(): No useful nexthop.");
348 			return 0;
349 		}
350 	}
351 
352 	return 1;
353 }
354 
355 /*
356  * netlink_route_info_encode
357  *
358  * Returns the number of bytes written to the buffer. 0 or a negative
359  * value indicates an error.
360  */
netlink_route_info_encode(struct netlink_route_info * ri,char * in_buf,size_t in_buf_len)361 static int netlink_route_info_encode(struct netlink_route_info *ri,
362 				     char *in_buf, size_t in_buf_len)
363 {
364 	size_t bytelen;
365 	unsigned int nexthop_num = 0;
366 	size_t buf_offset;
367 	struct netlink_nh_info *nhi;
368 	enum fpm_nh_encap_type_t encap;
369 	struct rtattr *nest, *inner_nest;
370 	struct rtnexthop *rtnh;
371 	struct vxlan_encap_info_t *vxlan;
372 	struct in6_addr ipv6;
373 
374 	struct {
375 		struct nlmsghdr n;
376 		struct rtmsg r;
377 		char buf[1];
378 	} * req;
379 
380 	req = (void *)in_buf;
381 
382 	buf_offset = ((char *)req->buf) - ((char *)req);
383 
384 	if (in_buf_len < buf_offset) {
385 		assert(0);
386 		return 0;
387 	}
388 
389 	memset(req, 0, buf_offset);
390 
391 	bytelen = af_addr_size(ri->af);
392 
393 	req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
394 	req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
395 	req->n.nlmsg_type = ri->nlmsg_type;
396 	req->r.rtm_family = ri->af;
397 
398 	/*
399 	 * rtm_table field is a uchar field which can accomodate table_id less
400 	 * than 256.
401 	 * To support table id greater than 255, if the table_id is greater than
402 	 * 255, set rtm_table to RT_TABLE_UNSPEC and add RTA_TABLE attribute
403 	 * with 32 bit value as the table_id.
404 	 */
405 	if (ri->rtm_table < 256)
406 		req->r.rtm_table = ri->rtm_table;
407 	else {
408 		req->r.rtm_table = RT_TABLE_UNSPEC;
409 		nl_attr_put32(&req->n, in_buf_len, RTA_TABLE, ri->rtm_table);
410 	}
411 
412 	req->r.rtm_dst_len = ri->prefix->prefixlen;
413 	req->r.rtm_protocol = ri->rtm_protocol;
414 	req->r.rtm_scope = RT_SCOPE_UNIVERSE;
415 
416 	nl_attr_put(&req->n, in_buf_len, RTA_DST, &ri->prefix->u.prefix,
417 		    bytelen);
418 
419 	req->r.rtm_type = ri->rtm_type;
420 
421 	/* Metric. */
422 	if (ri->metric)
423 		nl_attr_put32(&req->n, in_buf_len, RTA_PRIORITY, *ri->metric);
424 
425 	if (ri->num_nhs == 0)
426 		goto done;
427 
428 	if (ri->num_nhs == 1) {
429 		nhi = &ri->nhs[0];
430 
431 		if (nhi->gateway) {
432 			if (nhi->type == NEXTHOP_TYPE_IPV4_IFINDEX
433 			    && ri->af == AF_INET6) {
434 				ipv4_to_ipv4_mapped_ipv6(&ipv6,
435 							 nhi->gateway->ipv4);
436 				nl_attr_put(&req->n, in_buf_len, RTA_GATEWAY,
437 					    &ipv6, bytelen);
438 			} else
439 				nl_attr_put(&req->n, in_buf_len, RTA_GATEWAY,
440 					    nhi->gateway, bytelen);
441 		}
442 
443 		if (nhi->if_index) {
444 			nl_attr_put32(&req->n, in_buf_len, RTA_OIF,
445 				      nhi->if_index);
446 		}
447 
448 		encap = nhi->encap_info.encap_type;
449 		switch (encap) {
450 		case FPM_NH_ENCAP_NONE:
451 		case FPM_NH_ENCAP_MAX:
452 			break;
453 		case FPM_NH_ENCAP_VXLAN:
454 			nl_attr_put16(&req->n, in_buf_len, RTA_ENCAP_TYPE,
455 				      encap);
456 			vxlan = &nhi->encap_info.vxlan_encap;
457 			nest = nl_attr_nest(&req->n, in_buf_len, RTA_ENCAP);
458 			nl_attr_put32(&req->n, in_buf_len, VXLAN_VNI,
459 				      vxlan->vni);
460 			nl_attr_nest_end(&req->n, nest);
461 			break;
462 		}
463 
464 		goto done;
465 	}
466 
467 	/*
468 	 * Multipath case.
469 	 */
470 	nest = nl_attr_nest(&req->n, in_buf_len, RTA_MULTIPATH);
471 
472 	for (nexthop_num = 0; nexthop_num < ri->num_nhs; nexthop_num++) {
473 		rtnh = nl_attr_rtnh(&req->n, in_buf_len);
474 		nhi = &ri->nhs[nexthop_num];
475 
476 		if (nhi->gateway)
477 			nl_attr_put(&req->n, in_buf_len, RTA_GATEWAY,
478 				    nhi->gateway, bytelen);
479 
480 		if (nhi->if_index) {
481 			rtnh->rtnh_ifindex = nhi->if_index;
482 		}
483 
484 		encap = nhi->encap_info.encap_type;
485 		switch (encap) {
486 		case FPM_NH_ENCAP_NONE:
487 		case FPM_NH_ENCAP_MAX:
488 			break;
489 		case FPM_NH_ENCAP_VXLAN:
490 			nl_attr_put16(&req->n, in_buf_len, RTA_ENCAP_TYPE,
491 				      encap);
492 			vxlan = &nhi->encap_info.vxlan_encap;
493 			inner_nest =
494 				nl_attr_nest(&req->n, in_buf_len, RTA_ENCAP);
495 			nl_attr_put32(&req->n, in_buf_len, VXLAN_VNI,
496 				      vxlan->vni);
497 			nl_attr_nest_end(&req->n, inner_nest);
498 			break;
499 		}
500 
501 		nl_attr_rtnh_end(&req->n, rtnh);
502 	}
503 
504 	nl_attr_nest_end(&req->n, nest);
505 	assert(nest->rta_len > RTA_LENGTH(0));
506 
507 done:
508 
509 	if (ri->pref_src) {
510 		nl_attr_put(&req->n, in_buf_len, RTA_PREFSRC, &ri->pref_src,
511 			    bytelen);
512 	}
513 
514 	assert(req->n.nlmsg_len < in_buf_len);
515 	return req->n.nlmsg_len;
516 }
517 
518 /*
519  * zfpm_log_route_info
520  *
521  * Helper function to log the information in a route_info structure.
522  */
zfpm_log_route_info(struct netlink_route_info * ri,const char * label)523 static void zfpm_log_route_info(struct netlink_route_info *ri,
524 				const char *label)
525 {
526 	struct netlink_nh_info *nhi;
527 	unsigned int i;
528 
529 	zfpm_debug("%s : %s %s/%d, Proto: %s, Metric: %u", label,
530 		   nl_msg_type_to_str(ri->nlmsg_type),
531 		   prefix_addr_to_a(ri->prefix), ri->prefix->prefixlen,
532 		   nl_rtproto_to_str(ri->rtm_protocol),
533 		   ri->metric ? *ri->metric : 0);
534 
535 	for (i = 0; i < ri->num_nhs; i++) {
536 		nhi = &ri->nhs[i];
537 		zfpm_debug("  Intf: %u, Gateway: %s, Recursive: %s, Type: %s, Encap type: %s",
538 			   nhi->if_index, addr_to_a(ri->af, nhi->gateway),
539 			   nhi->recursive ? "yes" : "no",
540 			   nexthop_type_to_str(nhi->type),
541 			   fpm_nh_encap_type_to_str(nhi->encap_info.encap_type)
542 			   );
543 	}
544 }
545 
546 /*
547  * zfpm_netlink_encode_route
548  *
549  * Create a netlink message corresponding to the given route in the
550  * given buffer space.
551  *
552  * Returns the number of bytes written to the buffer. 0 or a negative
553  * value indicates an error.
554  */
zfpm_netlink_encode_route(int cmd,rib_dest_t * dest,struct route_entry * re,char * in_buf,size_t in_buf_len)555 int zfpm_netlink_encode_route(int cmd, rib_dest_t *dest, struct route_entry *re,
556 			      char *in_buf, size_t in_buf_len)
557 {
558 	struct netlink_route_info ri_space, *ri;
559 
560 	ri = &ri_space;
561 
562 	if (!netlink_route_info_fill(ri, cmd, dest, re))
563 		return 0;
564 
565 	zfpm_log_route_info(ri, __func__);
566 
567 	return netlink_route_info_encode(ri, in_buf, in_buf_len);
568 }
569 
570 /*
571  * zfpm_netlink_encode_mac
572  *
573  * Create a netlink message corresponding to the given MAC.
574  *
575  * Returns the number of bytes written to the buffer. 0 or a negative
576  * value indicates an error.
577  */
zfpm_netlink_encode_mac(struct fpm_mac_info_t * mac,char * in_buf,size_t in_buf_len)578 int zfpm_netlink_encode_mac(struct fpm_mac_info_t *mac, char *in_buf,
579 			    size_t in_buf_len)
580 {
581 	char buf1[ETHER_ADDR_STRLEN];
582 	size_t buf_offset;
583 
584 	struct macmsg {
585 		struct nlmsghdr hdr;
586 		struct ndmsg ndm;
587 		char buf[0];
588 	} *req;
589 	req = (void *)in_buf;
590 
591 	buf_offset = offsetof(struct macmsg, buf);
592 	if (in_buf_len < buf_offset)
593 		return 0;
594 	memset(req, 0, buf_offset);
595 
596 	/* Construct nlmsg header */
597 	req->hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
598 	req->hdr.nlmsg_type = CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) ?
599 				RTM_DELNEIGH : RTM_NEWNEIGH;
600 	req->hdr.nlmsg_flags = NLM_F_REQUEST;
601 	if (req->hdr.nlmsg_type == RTM_NEWNEIGH)
602 		req->hdr.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
603 
604 	/* Construct ndmsg */
605 	req->ndm.ndm_family = AF_BRIDGE;
606 	req->ndm.ndm_ifindex = mac->vxlan_if;
607 
608 	req->ndm.ndm_state = NUD_REACHABLE;
609 	req->ndm.ndm_flags |= NTF_SELF | NTF_MASTER;
610 	if (CHECK_FLAG(mac->zebra_flags,
611 		(ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW)))
612 		req->ndm.ndm_state |= NUD_NOARP;
613 	else
614 		req->ndm.ndm_flags |= NTF_EXT_LEARNED;
615 
616 	/* Add attributes */
617 	nl_attr_put(&req->hdr, in_buf_len, NDA_LLADDR, &mac->macaddr, 6);
618 	nl_attr_put(&req->hdr, in_buf_len, NDA_DST, &mac->r_vtep_ip, 4);
619 	nl_attr_put32(&req->hdr, in_buf_len, NDA_MASTER, mac->svi_if);
620 	nl_attr_put32(&req->hdr, in_buf_len, NDA_VNI, mac->vni);
621 
622 	assert(req->hdr.nlmsg_len < in_buf_len);
623 
624 	zfpm_debug("Tx %s family %s ifindex %u MAC %s DEST %s",
625 		   nl_msg_type_to_str(req->hdr.nlmsg_type),
626 		   nl_family_to_str(req->ndm.ndm_family), req->ndm.ndm_ifindex,
627 		   prefix_mac2str(&mac->macaddr, buf1, sizeof(buf1)),
628 		   inet_ntoa(mac->r_vtep_ip));
629 
630 	return req->hdr.nlmsg_len;
631 }
632 
633 #endif /* HAVE_NETLINK */
634