1 /*
2  * Zebra Policy Based Routing (PBR) interaction with the kernel using
3  * netlink.
4  * Copyright (C) 2018  Cumulus Networks, Inc.
5  *
6  * This file is part of FRR.
7  *
8  * FRR is free software; you can redistribute it and/or modify it
9  * under the terms of the GNU General Public License as published by the
10  * Free Software Foundation; either version 2, or (at your option) any
11  * later version.
12  *
13  * FRR is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with FRR; see the file COPYING.  If not, write to the Free
20  * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21  * 02111-1307, USA.
22  */
23 
24 #include <zebra.h>
25 
26 #ifdef HAVE_NETLINK
27 
28 #include "if.h"
29 #include "prefix.h"
30 #include "vrf.h"
31 
32 #include <linux/fib_rules.h>
33 #include "zebra/zserv.h"
34 #include "zebra/zebra_ns.h"
35 #include "zebra/zebra_vrf.h"
36 #include "zebra/rt.h"
37 #include "zebra/interface.h"
38 #include "zebra/debug.h"
39 #include "zebra/rtadv.h"
40 #include "zebra/kernel_netlink.h"
41 #include "zebra/rule_netlink.h"
42 #include "zebra/zebra_pbr.h"
43 #include "zebra/zebra_errors.h"
44 #include "zebra/zebra_dplane.h"
45 
46 /* definitions */
47 
48 /* static function declarations */
49 
50 /* Private functions */
51 
52 
53 /*
54  * netlink_rule_msg_encode
55  *
56  * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen.
57  *
58  * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
59  * or the number of bytes written to buf.
60  */
61 static ssize_t
netlink_rule_msg_encode(int cmd,const struct zebra_dplane_ctx * ctx,uint32_t filter_bm,uint32_t priority,uint32_t table,const struct prefix * src_ip,const struct prefix * dst_ip,uint32_t fwmark,uint8_t dsfield,void * buf,size_t buflen)62 netlink_rule_msg_encode(int cmd, const struct zebra_dplane_ctx *ctx,
63 			uint32_t filter_bm, uint32_t priority, uint32_t table,
64 			const struct prefix *src_ip,
65 			const struct prefix *dst_ip, uint32_t fwmark,
66 			uint8_t dsfield, void *buf, size_t buflen)
67 {
68 	uint8_t protocol = RTPROT_ZEBRA;
69 	int family;
70 	int bytelen;
71 	struct {
72 		struct nlmsghdr n;
73 		struct fib_rule_hdr frh;
74 		char buf[];
75 	} *req = buf;
76 
77 	const char *ifname = dplane_ctx_rule_get_ifname(ctx);
78 	char buf1[PREFIX_STRLEN];
79 	char buf2[PREFIX_STRLEN];
80 
81 	if (buflen < sizeof(*req))
82 		return 0;
83 	memset(req, 0, sizeof(*req));
84 	family = PREFIX_FAMILY(src_ip);
85 	bytelen = (family == AF_INET ? 4 : 16);
86 
87 	req->n.nlmsg_type = cmd;
88 	req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
89 	req->n.nlmsg_flags = NLM_F_REQUEST;
90 
91 	req->frh.family = family;
92 	req->frh.action = FR_ACT_TO_TBL;
93 
94 	if (!nl_attr_put(&req->n, buflen, FRA_PROTOCOL, &protocol,
95 			 sizeof(protocol)))
96 		return 0;
97 
98 	/* rule's pref # */
99 	if (!nl_attr_put32(&req->n, buflen, FRA_PRIORITY, priority))
100 		return 0;
101 
102 	/* interface on which applied */
103 	if (!nl_attr_put(&req->n, buflen, FRA_IFNAME, ifname,
104 			 strlen(ifname) + 1))
105 		return 0;
106 
107 	/* source IP, if specified */
108 	if (filter_bm & PBR_FILTER_SRC_IP) {
109 		req->frh.src_len = src_ip->prefixlen;
110 		if (!nl_attr_put(&req->n, buflen, FRA_SRC, &src_ip->u.prefix,
111 				 bytelen))
112 			return 0;
113 	}
114 
115 	/* destination IP, if specified */
116 	if (filter_bm & PBR_FILTER_DST_IP) {
117 		req->frh.dst_len = dst_ip->prefixlen;
118 		if (!nl_attr_put(&req->n, buflen, FRA_DST, &dst_ip->u.prefix,
119 				 bytelen))
120 			return 0;
121 	}
122 
123 	/* fwmark, if specified */
124 	if (filter_bm & PBR_FILTER_FWMARK) {
125 		if (!nl_attr_put32(&req->n, buflen, FRA_FWMARK, fwmark))
126 			return 0;
127 	}
128 
129 	/* dsfield, if specified */
130 	if (filter_bm & PBR_FILTER_DSFIELD)
131 		req->frh.tos = dsfield;
132 
133 	/* Route table to use to forward, if filter criteria matches. */
134 	if (table < 256)
135 		req->frh.table = table;
136 	else {
137 		req->frh.table = RT_TABLE_UNSPEC;
138 		if (!nl_attr_put32(&req->n, buflen, FRA_TABLE, table))
139 			return 0;
140 	}
141 
142 	if (IS_ZEBRA_DEBUG_KERNEL)
143 		zlog_debug(
144 			"Tx %s family %s IF %s Pref %u Fwmark %u Src %s Dst %s Table %u",
145 			nl_msg_type_to_str(cmd), nl_family_to_str(family),
146 			ifname, priority, fwmark,
147 			prefix2str(src_ip, buf1, sizeof(buf1)),
148 			prefix2str(dst_ip, buf2, sizeof(buf2)), table);
149 
150 	return NLMSG_ALIGN(req->n.nlmsg_len);
151 }
152 
netlink_rule_msg_encoder(struct zebra_dplane_ctx * ctx,void * buf,size_t buflen)153 static ssize_t netlink_rule_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf,
154 					size_t buflen)
155 {
156 	int cmd = RTM_NEWRULE;
157 
158 	if (dplane_ctx_get_op(ctx) == DPLANE_OP_RULE_DELETE)
159 		cmd = RTM_DELRULE;
160 
161 	return netlink_rule_msg_encode(
162 		cmd, ctx, dplane_ctx_rule_get_filter_bm(ctx),
163 		dplane_ctx_rule_get_priority(ctx),
164 		dplane_ctx_rule_get_table(ctx), dplane_ctx_rule_get_src_ip(ctx),
165 		dplane_ctx_rule_get_dst_ip(ctx),
166 		dplane_ctx_rule_get_fwmark(ctx),
167 		dplane_ctx_rule_get_dsfield(ctx), buf, buflen);
168 }
169 
netlink_oldrule_msg_encoder(struct zebra_dplane_ctx * ctx,void * buf,size_t buflen)170 static ssize_t netlink_oldrule_msg_encoder(struct zebra_dplane_ctx *ctx,
171 					   void *buf, size_t buflen)
172 {
173 	return netlink_rule_msg_encode(
174 		RTM_DELRULE, ctx, dplane_ctx_rule_get_old_filter_bm(ctx),
175 		dplane_ctx_rule_get_old_priority(ctx),
176 		dplane_ctx_rule_get_old_table(ctx),
177 		dplane_ctx_rule_get_old_src_ip(ctx),
178 		dplane_ctx_rule_get_old_dst_ip(ctx),
179 		dplane_ctx_rule_get_old_fwmark(ctx),
180 		dplane_ctx_rule_get_old_dsfield(ctx), buf, buflen);
181 }
182 
183 /* Public functions */
184 
185 enum netlink_msg_status
netlink_put_rule_update_msg(struct nl_batch * bth,struct zebra_dplane_ctx * ctx)186 netlink_put_rule_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
187 {
188 	enum dplane_op_e op;
189 	enum netlink_msg_status ret;
190 
191 	op = dplane_ctx_get_op(ctx);
192 	if (!(op == DPLANE_OP_RULE_ADD || op == DPLANE_OP_RULE_UPDATE
193 	      || op == DPLANE_OP_RULE_DELETE)) {
194 		flog_err(
195 			EC_ZEBRA_PBR_RULE_UPDATE,
196 			"Context received for kernel rule update with incorrect OP code (%u)",
197 			op);
198 		return FRR_NETLINK_ERROR;
199 	}
200 
201 	ret = netlink_batch_add_msg(bth, ctx, netlink_rule_msg_encoder, false);
202 
203 	/**
204 	 * Delete the old one.
205 	 *
206 	 * Don't care about this result right?
207 	 */
208 	if (op == DPLANE_OP_RULE_UPDATE)
209 		netlink_batch_add_msg(bth, ctx, netlink_oldrule_msg_encoder,
210 				      true);
211 
212 	return ret;
213 }
214 
215 /*
216  * Handle netlink notification informing a rule add or delete.
217  * Handling of an ADD is TBD.
218  * DELs are notified up, if other attributes indicate it may be a
219  * notification of interest. The expectation is that if this corresponds
220  * to a PBR rule added by FRR, it will be readded.
221  *
222  * If startup and we see a rule we created, delete it as its leftover
223  * from a previous instance and should have been removed on shutdown.
224  *
225  */
netlink_rule_change(struct nlmsghdr * h,ns_id_t ns_id,int startup)226 int netlink_rule_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
227 {
228 	struct zebra_ns *zns;
229 	struct fib_rule_hdr *frh;
230 	struct rtattr *tb[FRA_MAX + 1];
231 	int len;
232 	char *ifname;
233 	struct zebra_pbr_rule rule = {};
234 	char buf1[PREFIX_STRLEN];
235 	char buf2[PREFIX_STRLEN];
236 	uint8_t proto = 0;
237 
238 	/* Basic validation followed by extracting attributes. */
239 	if (h->nlmsg_type != RTM_NEWRULE && h->nlmsg_type != RTM_DELRULE)
240 		return 0;
241 
242 	len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct fib_rule_hdr));
243 	if (len < 0) {
244 		zlog_err(
245 			"%s: Message received from netlink is of a broken size: %d %zu",
246 			__func__, h->nlmsg_len,
247 			(size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr)));
248 		return -1;
249 	}
250 
251 	frh = NLMSG_DATA(h);
252 
253 	if (frh->family != AF_INET && frh->family != AF_INET6) {
254 		if (frh->family == RTNL_FAMILY_IPMR
255 		    || frh->family == RTNL_FAMILY_IP6MR) {
256 			if (IS_ZEBRA_DEBUG_KERNEL)
257 				zlog_debug(
258 					"Received rule netlink that we are ignoring for family %u, rule change: %u",
259 					frh->family, h->nlmsg_type);
260 			return 0;
261 		}
262 		flog_warn(
263 			EC_ZEBRA_NETLINK_INVALID_AF,
264 			"Invalid address family: %u received from kernel rule change: %u",
265 			frh->family, h->nlmsg_type);
266 		return 0;
267 	}
268 	if (frh->action != FR_ACT_TO_TBL)
269 		return 0;
270 
271 	memset(tb, 0, sizeof(tb));
272 	netlink_parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
273 
274 	if (tb[FRA_PRIORITY])
275 		rule.rule.priority = *(uint32_t *)RTA_DATA(tb[FRA_PRIORITY]);
276 
277 	if (tb[FRA_SRC]) {
278 		if (frh->family == AF_INET)
279 			memcpy(&rule.rule.filter.src_ip.u.prefix4,
280 			       RTA_DATA(tb[FRA_SRC]), 4);
281 		else
282 			memcpy(&rule.rule.filter.src_ip.u.prefix6,
283 			       RTA_DATA(tb[FRA_SRC]), 16);
284 		rule.rule.filter.src_ip.prefixlen = frh->src_len;
285 		rule.rule.filter.src_ip.family = frh->family;
286 		rule.rule.filter.filter_bm |= PBR_FILTER_SRC_IP;
287 	}
288 
289 	if (tb[FRA_DST]) {
290 		if (frh->family == AF_INET)
291 			memcpy(&rule.rule.filter.dst_ip.u.prefix4,
292 			       RTA_DATA(tb[FRA_DST]), 4);
293 		else
294 			memcpy(&rule.rule.filter.dst_ip.u.prefix6,
295 			       RTA_DATA(tb[FRA_DST]), 16);
296 		rule.rule.filter.dst_ip.prefixlen = frh->dst_len;
297 		rule.rule.filter.dst_ip.family = frh->family;
298 		rule.rule.filter.filter_bm |= PBR_FILTER_DST_IP;
299 	}
300 
301 	if (tb[FRA_TABLE])
302 		rule.rule.action.table = *(uint32_t *)RTA_DATA(tb[FRA_TABLE]);
303 	else
304 		rule.rule.action.table = frh->table;
305 
306 	/* TBD: We don't care about rules not specifying an IIF. */
307 	if (tb[FRA_IFNAME] == NULL)
308 		return 0;
309 
310 	if (tb[FRA_PROTOCOL])
311 		proto = *(uint8_t *)RTA_DATA(tb[FRA_PROTOCOL]);
312 
313 	ifname = (char *)RTA_DATA(tb[FRA_IFNAME]);
314 	strlcpy(rule.ifname, ifname, sizeof(rule.ifname));
315 
316 	if (h->nlmsg_type == RTM_NEWRULE) {
317 		/*
318 		 * If we see a rule at startup we created, delete it now.
319 		 * It should have been flushed on a previous shutdown.
320 		 */
321 		if (startup && proto == RTPROT_ZEBRA) {
322 			enum zebra_dplane_result ret;
323 
324 			ret = dplane_pbr_rule_delete(&rule);
325 
326 			zlog_debug(
327 				"%s: %s leftover rule: family %s IF %s Pref %u Src %s Dst %s Table %u",
328 				__func__,
329 				((ret == ZEBRA_DPLANE_REQUEST_FAILURE)
330 					 ? "Failed to remove"
331 					 : "Removed"),
332 				nl_family_to_str(frh->family), rule.ifname,
333 				rule.rule.priority,
334 				prefix2str(&rule.rule.filter.src_ip, buf1,
335 					   sizeof(buf1)),
336 				prefix2str(&rule.rule.filter.dst_ip, buf2,
337 					   sizeof(buf2)),
338 				rule.rule.action.table);
339 		}
340 
341 		/* TBD */
342 		return 0;
343 	}
344 
345 	zns = zebra_ns_lookup(ns_id);
346 
347 	/* If we don't know the interface, we don't care. */
348 	if (!if_lookup_by_name_per_ns(zns, ifname))
349 		return 0;
350 
351 	if (IS_ZEBRA_DEBUG_KERNEL)
352 		zlog_debug(
353 			"Rx %s family %s IF %s Pref %u Src %s Dst %s Table %u",
354 			nl_msg_type_to_str(h->nlmsg_type),
355 			nl_family_to_str(frh->family), rule.ifname,
356 			rule.rule.priority,
357 			prefix2str(&rule.rule.filter.src_ip, buf1,
358 				   sizeof(buf1)),
359 			prefix2str(&rule.rule.filter.dst_ip, buf2,
360 				   sizeof(buf2)),
361 			rule.rule.action.table);
362 
363 	return kernel_pbr_rule_del(&rule);
364 }
365 
366 /*
367  * Request rules from the kernel
368  */
netlink_request_rules(struct zebra_ns * zns,int family,int type)369 static int netlink_request_rules(struct zebra_ns *zns, int family, int type)
370 {
371 	struct {
372 		struct nlmsghdr n;
373 		struct fib_rule_hdr frh;
374 		char buf[NL_PKT_BUF_SIZE];
375 	} req;
376 
377 	memset(&req, 0, sizeof(req));
378 	req.n.nlmsg_type = type;
379 	req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
380 	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct fib_rule_hdr));
381 	req.frh.family = family;
382 
383 	return netlink_request(&zns->netlink_cmd, &req);
384 }
385 
386 /*
387  * Get to know existing PBR rules in the kernel - typically called at startup.
388  */
netlink_rules_read(struct zebra_ns * zns)389 int netlink_rules_read(struct zebra_ns *zns)
390 {
391 	int ret;
392 	struct zebra_dplane_info dp_info;
393 
394 	zebra_dplane_info_from_zns(&dp_info, zns, true);
395 
396 	ret = netlink_request_rules(zns, AF_INET, RTM_GETRULE);
397 	if (ret < 0)
398 		return ret;
399 
400 	ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd,
401 				 &dp_info, 0, 1);
402 	if (ret < 0)
403 		return ret;
404 
405 	ret = netlink_request_rules(zns, AF_INET6, RTM_GETRULE);
406 	if (ret < 0)
407 		return ret;
408 
409 	ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd,
410 				 &dp_info, 0, 1);
411 	return ret;
412 }
413 
414 #endif /* HAVE_NETLINK */
415