xref: /linux/net/netfilter/nf_conntrack_bpf.c (revision d642ef71)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Unstable Conntrack Helpers for XDP and TC-BPF hook
3  *
4  * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
5  * allowed to break compatibility for these functions since the interface they
6  * are exposed through to BPF programs is explicitly unstable.
7  */
8 
9 #include <linux/bpf_verifier.h>
10 #include <linux/bpf.h>
11 #include <linux/btf.h>
12 #include <linux/filter.h>
13 #include <linux/mutex.h>
14 #include <linux/types.h>
15 #include <linux/btf_ids.h>
16 #include <linux/net_namespace.h>
17 #include <net/xdp.h>
18 #include <net/netfilter/nf_conntrack_bpf.h>
19 #include <net/netfilter/nf_conntrack_core.h>
20 
21 /* bpf_ct_opts - Options for CT lookup helpers
22  *
23  * Members:
24  * @netns_id   - Specify the network namespace for lookup
25  *		 Values:
26  *		   BPF_F_CURRENT_NETNS (-1)
27  *		     Use namespace associated with ctx (xdp_md, __sk_buff)
28  *		   [0, S32_MAX]
29  *		     Network Namespace ID
30  * @error      - Out parameter, set for any errors encountered
31  *		 Values:
32  *		   -EINVAL - Passed NULL for bpf_tuple pointer
33  *		   -EINVAL - opts->reserved is not 0
34  *		   -EINVAL - netns_id is less than -1
35  *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
36  *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
37  *		   -ENONET - No network namespace found for netns_id
38  *		   -ENOENT - Conntrack lookup could not find entry for tuple
39  *		   -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
40  *				   or sizeof(tuple->ipv6)
41  * @l4proto    - Layer 4 protocol
42  *		 Values:
43  *		   IPPROTO_TCP, IPPROTO_UDP
44  * @dir:       - connection tracking tuple direction.
45  * @reserved   - Reserved member, will be reused for more options in future
46  *		 Values:
47  *		   0
48  */
49 struct bpf_ct_opts {
50 	s32 netns_id;
51 	s32 error;
52 	u8 l4proto;
53 	u8 dir;
54 	u8 reserved[2];
55 };
56 
57 enum {
58 	NF_BPF_CT_OPTS_SZ = 12,
59 };
60 
61 static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
62 				 u32 tuple_len, u8 protonum, u8 dir,
63 				 struct nf_conntrack_tuple *tuple)
64 {
65 	union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3;
66 	union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3;
67 	union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u
68 						  : &tuple->src.u;
69 	union nf_conntrack_man_proto *dport = dir ? &tuple->src.u
70 						  : (void *)&tuple->dst.u;
71 
72 	if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
73 		return -EPROTO;
74 
75 	memset(tuple, 0, sizeof(*tuple));
76 
77 	switch (tuple_len) {
78 	case sizeof(bpf_tuple->ipv4):
79 		tuple->src.l3num = AF_INET;
80 		src->ip = bpf_tuple->ipv4.saddr;
81 		sport->tcp.port = bpf_tuple->ipv4.sport;
82 		dst->ip = bpf_tuple->ipv4.daddr;
83 		dport->tcp.port = bpf_tuple->ipv4.dport;
84 		break;
85 	case sizeof(bpf_tuple->ipv6):
86 		tuple->src.l3num = AF_INET6;
87 		memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
88 		sport->tcp.port = bpf_tuple->ipv6.sport;
89 		memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
90 		dport->tcp.port = bpf_tuple->ipv6.dport;
91 		break;
92 	default:
93 		return -EAFNOSUPPORT;
94 	}
95 	tuple->dst.protonum = protonum;
96 	tuple->dst.dir = dir;
97 
98 	return 0;
99 }
100 
101 static struct nf_conn *
102 __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
103 			u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len,
104 			u32 timeout)
105 {
106 	struct nf_conntrack_tuple otuple, rtuple;
107 	struct nf_conn *ct;
108 	int err;
109 
110 	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
111 	    opts_len != NF_BPF_CT_OPTS_SZ)
112 		return ERR_PTR(-EINVAL);
113 
114 	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
115 		return ERR_PTR(-EINVAL);
116 
117 	err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
118 				    IP_CT_DIR_ORIGINAL, &otuple);
119 	if (err < 0)
120 		return ERR_PTR(err);
121 
122 	err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
123 				    IP_CT_DIR_REPLY, &rtuple);
124 	if (err < 0)
125 		return ERR_PTR(err);
126 
127 	if (opts->netns_id >= 0) {
128 		net = get_net_ns_by_id(net, opts->netns_id);
129 		if (unlikely(!net))
130 			return ERR_PTR(-ENONET);
131 	}
132 
133 	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
134 				GFP_ATOMIC);
135 	if (IS_ERR(ct))
136 		goto out;
137 
138 	memset(&ct->proto, 0, sizeof(ct->proto));
139 	__nf_ct_set_timeout(ct, timeout * HZ);
140 
141 out:
142 	if (opts->netns_id >= 0)
143 		put_net(net);
144 
145 	return ct;
146 }
147 
148 static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
149 					  struct bpf_sock_tuple *bpf_tuple,
150 					  u32 tuple_len, struct bpf_ct_opts *opts,
151 					  u32 opts_len)
152 {
153 	struct nf_conntrack_tuple_hash *hash;
154 	struct nf_conntrack_tuple tuple;
155 	struct nf_conn *ct;
156 	int err;
157 
158 	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
159 	    opts_len != NF_BPF_CT_OPTS_SZ)
160 		return ERR_PTR(-EINVAL);
161 	if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
162 		return ERR_PTR(-EPROTO);
163 	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
164 		return ERR_PTR(-EINVAL);
165 
166 	err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
167 				    IP_CT_DIR_ORIGINAL, &tuple);
168 	if (err < 0)
169 		return ERR_PTR(err);
170 
171 	if (opts->netns_id >= 0) {
172 		net = get_net_ns_by_id(net, opts->netns_id);
173 		if (unlikely(!net))
174 			return ERR_PTR(-ENONET);
175 	}
176 
177 	hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
178 	if (opts->netns_id >= 0)
179 		put_net(net);
180 	if (!hash)
181 		return ERR_PTR(-ENOENT);
182 
183 	ct = nf_ct_tuplehash_to_ctrack(hash);
184 	opts->dir = NF_CT_DIRECTION(hash);
185 
186 	return ct;
187 }
188 
189 BTF_ID_LIST(btf_nf_conn_ids)
190 BTF_ID(struct, nf_conn)
191 BTF_ID(struct, nf_conn___init)
192 
193 /* Check writes into `struct nf_conn` */
194 static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
195 					   const struct bpf_reg_state *reg,
196 					   int off, int size)
197 {
198 	const struct btf_type *ncit, *nct, *t;
199 	size_t end;
200 
201 	ncit = btf_type_by_id(reg->btf, btf_nf_conn_ids[1]);
202 	nct = btf_type_by_id(reg->btf, btf_nf_conn_ids[0]);
203 	t = btf_type_by_id(reg->btf, reg->btf_id);
204 	if (t != nct && t != ncit) {
205 		bpf_log(log, "only read is supported\n");
206 		return -EACCES;
207 	}
208 
209 	/* `struct nf_conn` and `struct nf_conn___init` have the same layout
210 	 * so we are safe to simply merge offset checks here
211 	 */
212 	switch (off) {
213 #if defined(CONFIG_NF_CONNTRACK_MARK)
214 	case offsetof(struct nf_conn, mark):
215 		end = offsetofend(struct nf_conn, mark);
216 		break;
217 #endif
218 	default:
219 		bpf_log(log, "no write support to nf_conn at off %d\n", off);
220 		return -EACCES;
221 	}
222 
223 	if (off + size > end) {
224 		bpf_log(log,
225 			"write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
226 			off, size, end);
227 		return -EACCES;
228 	}
229 
230 	return 0;
231 }
232 
233 __bpf_kfunc_start_defs();
234 
235 /* bpf_xdp_ct_alloc - Allocate a new CT entry
236  *
237  * Parameters:
238  * @xdp_ctx	- Pointer to ctx (xdp_md) in XDP program
239  *		    Cannot be NULL
240  * @bpf_tuple	- Pointer to memory representing the tuple to look up
241  *		    Cannot be NULL
242  * @tuple__sz	- Length of the tuple structure
243  *		    Must be one of sizeof(bpf_tuple->ipv4) or
244  *		    sizeof(bpf_tuple->ipv6)
245  * @opts	- Additional options for allocation (documented above)
246  *		    Cannot be NULL
247  * @opts__sz	- Length of the bpf_ct_opts structure
248  *		    Must be NF_BPF_CT_OPTS_SZ (12)
249  */
250 __bpf_kfunc struct nf_conn___init *
251 bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
252 		 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
253 {
254 	struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
255 	struct nf_conn *nfct;
256 
257 	nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
258 				       opts, opts__sz, 10);
259 	if (IS_ERR(nfct)) {
260 		if (opts)
261 			opts->error = PTR_ERR(nfct);
262 		return NULL;
263 	}
264 
265 	return (struct nf_conn___init *)nfct;
266 }
267 
268 /* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
269  *		       reference to it
270  *
271  * Parameters:
272  * @xdp_ctx	- Pointer to ctx (xdp_md) in XDP program
273  *		    Cannot be NULL
274  * @bpf_tuple	- Pointer to memory representing the tuple to look up
275  *		    Cannot be NULL
276  * @tuple__sz	- Length of the tuple structure
277  *		    Must be one of sizeof(bpf_tuple->ipv4) or
278  *		    sizeof(bpf_tuple->ipv6)
279  * @opts	- Additional options for lookup (documented above)
280  *		    Cannot be NULL
281  * @opts__sz	- Length of the bpf_ct_opts structure
282  *		    Must be NF_BPF_CT_OPTS_SZ (12)
283  */
284 __bpf_kfunc struct nf_conn *
285 bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
286 		  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
287 {
288 	struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
289 	struct net *caller_net;
290 	struct nf_conn *nfct;
291 
292 	caller_net = dev_net(ctx->rxq->dev);
293 	nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
294 	if (IS_ERR(nfct)) {
295 		if (opts)
296 			opts->error = PTR_ERR(nfct);
297 		return NULL;
298 	}
299 	return nfct;
300 }
301 
302 /* bpf_skb_ct_alloc - Allocate a new CT entry
303  *
304  * Parameters:
305  * @skb_ctx	- Pointer to ctx (__sk_buff) in TC program
306  *		    Cannot be NULL
307  * @bpf_tuple	- Pointer to memory representing the tuple to look up
308  *		    Cannot be NULL
309  * @tuple__sz	- Length of the tuple structure
310  *		    Must be one of sizeof(bpf_tuple->ipv4) or
311  *		    sizeof(bpf_tuple->ipv6)
312  * @opts	- Additional options for allocation (documented above)
313  *		    Cannot be NULL
314  * @opts__sz	- Length of the bpf_ct_opts structure
315  *		    Must be NF_BPF_CT_OPTS_SZ (12)
316  */
317 __bpf_kfunc struct nf_conn___init *
318 bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
319 		 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
320 {
321 	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
322 	struct nf_conn *nfct;
323 	struct net *net;
324 
325 	net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
326 	nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
327 	if (IS_ERR(nfct)) {
328 		if (opts)
329 			opts->error = PTR_ERR(nfct);
330 		return NULL;
331 	}
332 
333 	return (struct nf_conn___init *)nfct;
334 }
335 
336 /* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
337  *		       reference to it
338  *
339  * Parameters:
340  * @skb_ctx	- Pointer to ctx (__sk_buff) in TC program
341  *		    Cannot be NULL
342  * @bpf_tuple	- Pointer to memory representing the tuple to look up
343  *		    Cannot be NULL
344  * @tuple__sz	- Length of the tuple structure
345  *		    Must be one of sizeof(bpf_tuple->ipv4) or
346  *		    sizeof(bpf_tuple->ipv6)
347  * @opts	- Additional options for lookup (documented above)
348  *		    Cannot be NULL
349  * @opts__sz	- Length of the bpf_ct_opts structure
350  *		    Must be NF_BPF_CT_OPTS_SZ (12)
351  */
352 __bpf_kfunc struct nf_conn *
353 bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
354 		  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
355 {
356 	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
357 	struct net *caller_net;
358 	struct nf_conn *nfct;
359 
360 	caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
361 	nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
362 	if (IS_ERR(nfct)) {
363 		if (opts)
364 			opts->error = PTR_ERR(nfct);
365 		return NULL;
366 	}
367 	return nfct;
368 }
369 
370 /* bpf_ct_insert_entry - Add the provided entry into a CT map
371  *
372  * This must be invoked for referenced PTR_TO_BTF_ID.
373  *
374  * @nfct	 - Pointer to referenced nf_conn___init object, obtained
375  *		   using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
376  */
377 __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
378 {
379 	struct nf_conn *nfct = (struct nf_conn *)nfct_i;
380 	int err;
381 
382 	if (!nf_ct_is_confirmed(nfct))
383 		nfct->timeout += nfct_time_stamp;
384 	nfct->status |= IPS_CONFIRMED;
385 	err = nf_conntrack_hash_check_insert(nfct);
386 	if (err < 0) {
387 		nf_conntrack_free(nfct);
388 		return NULL;
389 	}
390 	return nfct;
391 }
392 
393 /* bpf_ct_release - Release acquired nf_conn object
394  *
395  * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
396  * the program if any references remain in the program in all of the explored
397  * states.
398  *
399  * Parameters:
400  * @nf_conn	 - Pointer to referenced nf_conn object, obtained using
401  *		   bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
402  */
403 __bpf_kfunc void bpf_ct_release(struct nf_conn *nfct)
404 {
405 	nf_ct_put(nfct);
406 }
407 
408 /* bpf_ct_set_timeout - Set timeout of allocated nf_conn
409  *
410  * Sets the default timeout of newly allocated nf_conn before insertion.
411  * This helper must be invoked for refcounted pointer to nf_conn___init.
412  *
413  * Parameters:
414  * @nfct	 - Pointer to referenced nf_conn object, obtained using
415  *                 bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
416  * @timeout      - Timeout in msecs.
417  */
418 __bpf_kfunc void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
419 {
420 	__nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
421 }
422 
423 /* bpf_ct_change_timeout - Change timeout of inserted nf_conn
424  *
425  * Change timeout associated of the inserted or looked up nf_conn.
426  * This helper must be invoked for refcounted pointer to nf_conn.
427  *
428  * Parameters:
429  * @nfct	 - Pointer to referenced nf_conn object, obtained using
430  *		   bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
431  * @timeout      - New timeout in msecs.
432  */
433 __bpf_kfunc int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
434 {
435 	return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
436 }
437 
438 /* bpf_ct_set_status - Set status field of allocated nf_conn
439  *
440  * Set the status field of the newly allocated nf_conn before insertion.
441  * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init.
442  *
443  * Parameters:
444  * @nfct	 - Pointer to referenced nf_conn object, obtained using
445  *		   bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
446  * @status       - New status value.
447  */
448 __bpf_kfunc int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
449 {
450 	return nf_ct_change_status_common((struct nf_conn *)nfct, status);
451 }
452 
453 /* bpf_ct_change_status - Change status of inserted nf_conn
454  *
455  * Change the status field of the provided connection tracking entry.
456  * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn.
457  *
458  * Parameters:
459  * @nfct	 - Pointer to referenced nf_conn object, obtained using
460  *		   bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
461  * @status       - New status value.
462  */
463 __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
464 {
465 	return nf_ct_change_status_common(nfct, status);
466 }
467 
468 __bpf_kfunc_end_defs();
469 
470 BTF_SET8_START(nf_ct_kfunc_set)
471 BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
472 BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
473 BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
474 BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
475 BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
476 BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
477 BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
478 BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
479 BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
480 BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
481 BTF_SET8_END(nf_ct_kfunc_set)
482 
483 static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
484 	.owner = THIS_MODULE,
485 	.set   = &nf_ct_kfunc_set,
486 };
487 
488 int register_nf_conntrack_bpf(void)
489 {
490 	int ret;
491 
492 	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
493 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
494 	if (!ret) {
495 		mutex_lock(&nf_conn_btf_access_lock);
496 		nfct_btf_struct_access = _nf_conntrack_btf_struct_access;
497 		mutex_unlock(&nf_conn_btf_access_lock);
498 	}
499 
500 	return ret;
501 }
502 
503 void cleanup_nf_conntrack_bpf(void)
504 {
505 	mutex_lock(&nf_conn_btf_access_lock);
506 	nfct_btf_struct_access = NULL;
507 	mutex_unlock(&nf_conn_btf_access_lock);
508 }
509