1 /*
2 * Zebra Policy Based Routing (PBR) interaction with the kernel using
3 * netlink.
4 * Copyright (C) 2018 Cumulus Networks, Inc.
5 *
6 * This file is part of FRR.
7 *
8 * FRR is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
11 * later version.
12 *
13 * FRR is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FRR; see the file COPYING. If not, write to the Free
20 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 * 02111-1307, USA.
22 */
23
24 #include <zebra.h>
25
26 #ifdef HAVE_NETLINK
27
28 #include "if.h"
29 #include "prefix.h"
30 #include "vrf.h"
31
32 #include <linux/fib_rules.h>
33 #include "zebra/zserv.h"
34 #include "zebra/zebra_ns.h"
35 #include "zebra/zebra_vrf.h"
36 #include "zebra/rt.h"
37 #include "zebra/interface.h"
38 #include "zebra/debug.h"
39 #include "zebra/rtadv.h"
40 #include "zebra/kernel_netlink.h"
41 #include "zebra/rule_netlink.h"
42 #include "zebra/zebra_pbr.h"
43 #include "zebra/zebra_errors.h"
44 #include "zebra/zebra_dplane.h"
45
46 /* definitions */
47
48 /* static function declarations */
49
50 /* Private functions */
51
52
53 /*
54 * netlink_rule_msg_encode
55 *
56 * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen.
57 *
58 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
59 * or the number of bytes written to buf.
60 */
61 static ssize_t
netlink_rule_msg_encode(int cmd,const struct zebra_dplane_ctx * ctx,uint32_t filter_bm,uint32_t priority,uint32_t table,const struct prefix * src_ip,const struct prefix * dst_ip,uint32_t fwmark,uint8_t dsfield,void * buf,size_t buflen)62 netlink_rule_msg_encode(int cmd, const struct zebra_dplane_ctx *ctx,
63 uint32_t filter_bm, uint32_t priority, uint32_t table,
64 const struct prefix *src_ip,
65 const struct prefix *dst_ip, uint32_t fwmark,
66 uint8_t dsfield, void *buf, size_t buflen)
67 {
68 uint8_t protocol = RTPROT_ZEBRA;
69 int family;
70 int bytelen;
71 struct {
72 struct nlmsghdr n;
73 struct fib_rule_hdr frh;
74 char buf[];
75 } *req = buf;
76
77 const char *ifname = dplane_ctx_rule_get_ifname(ctx);
78 char buf1[PREFIX_STRLEN];
79 char buf2[PREFIX_STRLEN];
80
81 if (buflen < sizeof(*req))
82 return 0;
83 memset(req, 0, sizeof(*req));
84 family = PREFIX_FAMILY(src_ip);
85 bytelen = (family == AF_INET ? 4 : 16);
86
87 req->n.nlmsg_type = cmd;
88 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
89 req->n.nlmsg_flags = NLM_F_REQUEST;
90
91 req->frh.family = family;
92 req->frh.action = FR_ACT_TO_TBL;
93
94 if (!nl_attr_put(&req->n, buflen, FRA_PROTOCOL, &protocol,
95 sizeof(protocol)))
96 return 0;
97
98 /* rule's pref # */
99 if (!nl_attr_put32(&req->n, buflen, FRA_PRIORITY, priority))
100 return 0;
101
102 /* interface on which applied */
103 if (!nl_attr_put(&req->n, buflen, FRA_IFNAME, ifname,
104 strlen(ifname) + 1))
105 return 0;
106
107 /* source IP, if specified */
108 if (filter_bm & PBR_FILTER_SRC_IP) {
109 req->frh.src_len = src_ip->prefixlen;
110 if (!nl_attr_put(&req->n, buflen, FRA_SRC, &src_ip->u.prefix,
111 bytelen))
112 return 0;
113 }
114
115 /* destination IP, if specified */
116 if (filter_bm & PBR_FILTER_DST_IP) {
117 req->frh.dst_len = dst_ip->prefixlen;
118 if (!nl_attr_put(&req->n, buflen, FRA_DST, &dst_ip->u.prefix,
119 bytelen))
120 return 0;
121 }
122
123 /* fwmark, if specified */
124 if (filter_bm & PBR_FILTER_FWMARK) {
125 if (!nl_attr_put32(&req->n, buflen, FRA_FWMARK, fwmark))
126 return 0;
127 }
128
129 /* dsfield, if specified */
130 if (filter_bm & PBR_FILTER_DSFIELD)
131 req->frh.tos = dsfield;
132
133 /* Route table to use to forward, if filter criteria matches. */
134 if (table < 256)
135 req->frh.table = table;
136 else {
137 req->frh.table = RT_TABLE_UNSPEC;
138 if (!nl_attr_put32(&req->n, buflen, FRA_TABLE, table))
139 return 0;
140 }
141
142 if (IS_ZEBRA_DEBUG_KERNEL)
143 zlog_debug(
144 "Tx %s family %s IF %s Pref %u Fwmark %u Src %s Dst %s Table %u",
145 nl_msg_type_to_str(cmd), nl_family_to_str(family),
146 ifname, priority, fwmark,
147 prefix2str(src_ip, buf1, sizeof(buf1)),
148 prefix2str(dst_ip, buf2, sizeof(buf2)), table);
149
150 return NLMSG_ALIGN(req->n.nlmsg_len);
151 }
152
netlink_rule_msg_encoder(struct zebra_dplane_ctx * ctx,void * buf,size_t buflen)153 static ssize_t netlink_rule_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf,
154 size_t buflen)
155 {
156 int cmd = RTM_NEWRULE;
157
158 if (dplane_ctx_get_op(ctx) == DPLANE_OP_RULE_DELETE)
159 cmd = RTM_DELRULE;
160
161 return netlink_rule_msg_encode(
162 cmd, ctx, dplane_ctx_rule_get_filter_bm(ctx),
163 dplane_ctx_rule_get_priority(ctx),
164 dplane_ctx_rule_get_table(ctx), dplane_ctx_rule_get_src_ip(ctx),
165 dplane_ctx_rule_get_dst_ip(ctx),
166 dplane_ctx_rule_get_fwmark(ctx),
167 dplane_ctx_rule_get_dsfield(ctx), buf, buflen);
168 }
169
netlink_oldrule_msg_encoder(struct zebra_dplane_ctx * ctx,void * buf,size_t buflen)170 static ssize_t netlink_oldrule_msg_encoder(struct zebra_dplane_ctx *ctx,
171 void *buf, size_t buflen)
172 {
173 return netlink_rule_msg_encode(
174 RTM_DELRULE, ctx, dplane_ctx_rule_get_old_filter_bm(ctx),
175 dplane_ctx_rule_get_old_priority(ctx),
176 dplane_ctx_rule_get_old_table(ctx),
177 dplane_ctx_rule_get_old_src_ip(ctx),
178 dplane_ctx_rule_get_old_dst_ip(ctx),
179 dplane_ctx_rule_get_old_fwmark(ctx),
180 dplane_ctx_rule_get_old_dsfield(ctx), buf, buflen);
181 }
182
183 /* Public functions */
184
185 enum netlink_msg_status
netlink_put_rule_update_msg(struct nl_batch * bth,struct zebra_dplane_ctx * ctx)186 netlink_put_rule_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
187 {
188 enum dplane_op_e op;
189 enum netlink_msg_status ret;
190
191 op = dplane_ctx_get_op(ctx);
192 if (!(op == DPLANE_OP_RULE_ADD || op == DPLANE_OP_RULE_UPDATE
193 || op == DPLANE_OP_RULE_DELETE)) {
194 flog_err(
195 EC_ZEBRA_PBR_RULE_UPDATE,
196 "Context received for kernel rule update with incorrect OP code (%u)",
197 op);
198 return FRR_NETLINK_ERROR;
199 }
200
201 ret = netlink_batch_add_msg(bth, ctx, netlink_rule_msg_encoder, false);
202
203 /**
204 * Delete the old one.
205 *
206 * Don't care about this result right?
207 */
208 if (op == DPLANE_OP_RULE_UPDATE)
209 netlink_batch_add_msg(bth, ctx, netlink_oldrule_msg_encoder,
210 true);
211
212 return ret;
213 }
214
215 /*
216 * Handle netlink notification informing a rule add or delete.
217 * Handling of an ADD is TBD.
218 * DELs are notified up, if other attributes indicate it may be a
219 * notification of interest. The expectation is that if this corresponds
220 * to a PBR rule added by FRR, it will be readded.
221 *
222 * If startup and we see a rule we created, delete it as its leftover
223 * from a previous instance and should have been removed on shutdown.
224 *
225 */
netlink_rule_change(struct nlmsghdr * h,ns_id_t ns_id,int startup)226 int netlink_rule_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
227 {
228 struct zebra_ns *zns;
229 struct fib_rule_hdr *frh;
230 struct rtattr *tb[FRA_MAX + 1];
231 int len;
232 char *ifname;
233 struct zebra_pbr_rule rule = {};
234 char buf1[PREFIX_STRLEN];
235 char buf2[PREFIX_STRLEN];
236 uint8_t proto = 0;
237
238 /* Basic validation followed by extracting attributes. */
239 if (h->nlmsg_type != RTM_NEWRULE && h->nlmsg_type != RTM_DELRULE)
240 return 0;
241
242 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct fib_rule_hdr));
243 if (len < 0) {
244 zlog_err(
245 "%s: Message received from netlink is of a broken size: %d %zu",
246 __func__, h->nlmsg_len,
247 (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr)));
248 return -1;
249 }
250
251 frh = NLMSG_DATA(h);
252
253 if (frh->family != AF_INET && frh->family != AF_INET6) {
254 if (frh->family == RTNL_FAMILY_IPMR
255 || frh->family == RTNL_FAMILY_IP6MR) {
256 if (IS_ZEBRA_DEBUG_KERNEL)
257 zlog_debug(
258 "Received rule netlink that we are ignoring for family %u, rule change: %u",
259 frh->family, h->nlmsg_type);
260 return 0;
261 }
262 flog_warn(
263 EC_ZEBRA_NETLINK_INVALID_AF,
264 "Invalid address family: %u received from kernel rule change: %u",
265 frh->family, h->nlmsg_type);
266 return 0;
267 }
268 if (frh->action != FR_ACT_TO_TBL)
269 return 0;
270
271 memset(tb, 0, sizeof(tb));
272 netlink_parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
273
274 if (tb[FRA_PRIORITY])
275 rule.rule.priority = *(uint32_t *)RTA_DATA(tb[FRA_PRIORITY]);
276
277 if (tb[FRA_SRC]) {
278 if (frh->family == AF_INET)
279 memcpy(&rule.rule.filter.src_ip.u.prefix4,
280 RTA_DATA(tb[FRA_SRC]), 4);
281 else
282 memcpy(&rule.rule.filter.src_ip.u.prefix6,
283 RTA_DATA(tb[FRA_SRC]), 16);
284 rule.rule.filter.src_ip.prefixlen = frh->src_len;
285 rule.rule.filter.src_ip.family = frh->family;
286 rule.rule.filter.filter_bm |= PBR_FILTER_SRC_IP;
287 }
288
289 if (tb[FRA_DST]) {
290 if (frh->family == AF_INET)
291 memcpy(&rule.rule.filter.dst_ip.u.prefix4,
292 RTA_DATA(tb[FRA_DST]), 4);
293 else
294 memcpy(&rule.rule.filter.dst_ip.u.prefix6,
295 RTA_DATA(tb[FRA_DST]), 16);
296 rule.rule.filter.dst_ip.prefixlen = frh->dst_len;
297 rule.rule.filter.dst_ip.family = frh->family;
298 rule.rule.filter.filter_bm |= PBR_FILTER_DST_IP;
299 }
300
301 if (tb[FRA_TABLE])
302 rule.rule.action.table = *(uint32_t *)RTA_DATA(tb[FRA_TABLE]);
303 else
304 rule.rule.action.table = frh->table;
305
306 /* TBD: We don't care about rules not specifying an IIF. */
307 if (tb[FRA_IFNAME] == NULL)
308 return 0;
309
310 if (tb[FRA_PROTOCOL])
311 proto = *(uint8_t *)RTA_DATA(tb[FRA_PROTOCOL]);
312
313 ifname = (char *)RTA_DATA(tb[FRA_IFNAME]);
314 strlcpy(rule.ifname, ifname, sizeof(rule.ifname));
315
316 if (h->nlmsg_type == RTM_NEWRULE) {
317 /*
318 * If we see a rule at startup we created, delete it now.
319 * It should have been flushed on a previous shutdown.
320 */
321 if (startup && proto == RTPROT_ZEBRA) {
322 enum zebra_dplane_result ret;
323
324 ret = dplane_pbr_rule_delete(&rule);
325
326 zlog_debug(
327 "%s: %s leftover rule: family %s IF %s Pref %u Src %s Dst %s Table %u",
328 __func__,
329 ((ret == ZEBRA_DPLANE_REQUEST_FAILURE)
330 ? "Failed to remove"
331 : "Removed"),
332 nl_family_to_str(frh->family), rule.ifname,
333 rule.rule.priority,
334 prefix2str(&rule.rule.filter.src_ip, buf1,
335 sizeof(buf1)),
336 prefix2str(&rule.rule.filter.dst_ip, buf2,
337 sizeof(buf2)),
338 rule.rule.action.table);
339 }
340
341 /* TBD */
342 return 0;
343 }
344
345 zns = zebra_ns_lookup(ns_id);
346
347 /* If we don't know the interface, we don't care. */
348 if (!if_lookup_by_name_per_ns(zns, ifname))
349 return 0;
350
351 if (IS_ZEBRA_DEBUG_KERNEL)
352 zlog_debug(
353 "Rx %s family %s IF %s Pref %u Src %s Dst %s Table %u",
354 nl_msg_type_to_str(h->nlmsg_type),
355 nl_family_to_str(frh->family), rule.ifname,
356 rule.rule.priority,
357 prefix2str(&rule.rule.filter.src_ip, buf1,
358 sizeof(buf1)),
359 prefix2str(&rule.rule.filter.dst_ip, buf2,
360 sizeof(buf2)),
361 rule.rule.action.table);
362
363 return kernel_pbr_rule_del(&rule);
364 }
365
366 /*
367 * Request rules from the kernel
368 */
netlink_request_rules(struct zebra_ns * zns,int family,int type)369 static int netlink_request_rules(struct zebra_ns *zns, int family, int type)
370 {
371 struct {
372 struct nlmsghdr n;
373 struct fib_rule_hdr frh;
374 char buf[NL_PKT_BUF_SIZE];
375 } req;
376
377 memset(&req, 0, sizeof(req));
378 req.n.nlmsg_type = type;
379 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
380 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct fib_rule_hdr));
381 req.frh.family = family;
382
383 return netlink_request(&zns->netlink_cmd, &req);
384 }
385
386 /*
387 * Get to know existing PBR rules in the kernel - typically called at startup.
388 */
netlink_rules_read(struct zebra_ns * zns)389 int netlink_rules_read(struct zebra_ns *zns)
390 {
391 int ret;
392 struct zebra_dplane_info dp_info;
393
394 zebra_dplane_info_from_zns(&dp_info, zns, true);
395
396 ret = netlink_request_rules(zns, AF_INET, RTM_GETRULE);
397 if (ret < 0)
398 return ret;
399
400 ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd,
401 &dp_info, 0, 1);
402 if (ret < 0)
403 return ret;
404
405 ret = netlink_request_rules(zns, AF_INET6, RTM_GETRULE);
406 if (ret < 0)
407 return ret;
408
409 ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd,
410 &dp_info, 0, 1);
411 return ret;
412 }
413
414 #endif /* HAVE_NETLINK */
415