1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 
4 #include <stddef.h>
5 #include <errno.h>
6 #include <stdbool.h>
7 #include <sys/types.h>
8 #include <sys/socket.h>
9 #include <linux/tcp.h>
10 #include <linux/socket.h>
11 #include <linux/bpf.h>
12 #include <linux/types.h>
13 #include <bpf/bpf_helpers.h>
14 #include <bpf/bpf_endian.h>
15 #define BPF_PROG_TEST_TCP_HDR_OPTIONS
16 #include "test_tcp_hdr_options.h"
17 
18 #ifndef sizeof_field
19 #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
20 #endif
21 
22 __u8 test_kind = TCPOPT_EXP;
23 __u16 test_magic = 0xeB9F;
24 __u32 inherit_cb_flags = 0;
25 
26 struct bpf_test_option passive_synack_out = {};
27 struct bpf_test_option passive_fin_out	= {};
28 
29 struct bpf_test_option passive_estab_in = {};
30 struct bpf_test_option passive_fin_in	= {};
31 
32 struct bpf_test_option active_syn_out	= {};
33 struct bpf_test_option active_fin_out	= {};
34 
35 struct bpf_test_option active_estab_in	= {};
36 struct bpf_test_option active_fin_in	= {};
37 
38 struct {
39 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
40 	__uint(map_flags, BPF_F_NO_PREALLOC);
41 	__type(key, int);
42 	__type(value, struct hdr_stg);
43 } hdr_stg_map SEC(".maps");
44 
skops_want_cookie(const struct bpf_sock_ops * skops)45 static bool skops_want_cookie(const struct bpf_sock_ops *skops)
46 {
47 	return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
48 }
49 
skops_current_mss(const struct bpf_sock_ops * skops)50 static bool skops_current_mss(const struct bpf_sock_ops *skops)
51 {
52 	return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
53 }
54 
option_total_len(__u8 flags)55 static __u8 option_total_len(__u8 flags)
56 {
57 	__u8 i, len = 1; /* +1 for flags */
58 
59 	if (!flags)
60 		return 0;
61 
62 	/* RESEND bit does not use a byte */
63 	for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
64 		len += !!TEST_OPTION_FLAGS(flags, i);
65 
66 	if (test_kind == TCPOPT_EXP)
67 		return len + TCP_BPF_EXPOPT_BASE_LEN;
68 	else
69 		return len + 2; /* +1 kind, +1 kind-len */
70 }
71 
write_test_option(const struct bpf_test_option * test_opt,__u8 * data)72 static void write_test_option(const struct bpf_test_option *test_opt,
73 			      __u8 *data)
74 {
75 	__u8 offset = 0;
76 
77 	data[offset++] = test_opt->flags;
78 	if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
79 		data[offset++] = test_opt->max_delack_ms;
80 
81 	if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
82 		data[offset++] = test_opt->rand;
83 }
84 
store_option(struct bpf_sock_ops * skops,const struct bpf_test_option * test_opt)85 static int store_option(struct bpf_sock_ops *skops,
86 			const struct bpf_test_option *test_opt)
87 {
88 	union {
89 		struct tcp_exprm_opt exprm;
90 		struct tcp_opt regular;
91 	} write_opt;
92 	int err;
93 
94 	if (test_kind == TCPOPT_EXP) {
95 		write_opt.exprm.kind = TCPOPT_EXP;
96 		write_opt.exprm.len = option_total_len(test_opt->flags);
97 		write_opt.exprm.magic = __bpf_htons(test_magic);
98 		write_opt.exprm.data32 = 0;
99 		write_test_option(test_opt, write_opt.exprm.data);
100 		err = bpf_store_hdr_opt(skops, &write_opt.exprm,
101 					sizeof(write_opt.exprm), 0);
102 	} else {
103 		write_opt.regular.kind = test_kind;
104 		write_opt.regular.len = option_total_len(test_opt->flags);
105 		write_opt.regular.data32 = 0;
106 		write_test_option(test_opt, write_opt.regular.data);
107 		err = bpf_store_hdr_opt(skops, &write_opt.regular,
108 					sizeof(write_opt.regular), 0);
109 	}
110 
111 	if (err)
112 		RET_CG_ERR(err);
113 
114 	return CG_OK;
115 }
116 
parse_test_option(struct bpf_test_option * opt,const __u8 * start)117 static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
118 {
119 	opt->flags = *start++;
120 
121 	if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
122 		opt->max_delack_ms = *start++;
123 
124 	if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
125 		opt->rand = *start++;
126 
127 	return 0;
128 }
129 
load_option(struct bpf_sock_ops * skops,struct bpf_test_option * test_opt,bool from_syn)130 static int load_option(struct bpf_sock_ops *skops,
131 		       struct bpf_test_option *test_opt, bool from_syn)
132 {
133 	union {
134 		struct tcp_exprm_opt exprm;
135 		struct tcp_opt regular;
136 	} search_opt;
137 	int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
138 
139 	if (test_kind == TCPOPT_EXP) {
140 		search_opt.exprm.kind = TCPOPT_EXP;
141 		search_opt.exprm.len = 4;
142 		search_opt.exprm.magic = __bpf_htons(test_magic);
143 		search_opt.exprm.data32 = 0;
144 		ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
145 				       sizeof(search_opt.exprm), load_flags);
146 		if (ret < 0)
147 			return ret;
148 		return parse_test_option(test_opt, search_opt.exprm.data);
149 	} else {
150 		search_opt.regular.kind = test_kind;
151 		search_opt.regular.len = 0;
152 		search_opt.regular.data32 = 0;
153 		ret = bpf_load_hdr_opt(skops, &search_opt.regular,
154 				       sizeof(search_opt.regular), load_flags);
155 		if (ret < 0)
156 			return ret;
157 		return parse_test_option(test_opt, search_opt.regular.data);
158 	}
159 }
160 
synack_opt_len(struct bpf_sock_ops * skops)161 static int synack_opt_len(struct bpf_sock_ops *skops)
162 {
163 	struct bpf_test_option test_opt = {};
164 	__u8 optlen;
165 	int err;
166 
167 	if (!passive_synack_out.flags)
168 		return CG_OK;
169 
170 	err = load_option(skops, &test_opt, true);
171 
172 	/* bpf_test_option is not found */
173 	if (err == -ENOMSG)
174 		return CG_OK;
175 
176 	if (err)
177 		RET_CG_ERR(err);
178 
179 	optlen = option_total_len(passive_synack_out.flags);
180 	if (optlen) {
181 		err = bpf_reserve_hdr_opt(skops, optlen, 0);
182 		if (err)
183 			RET_CG_ERR(err);
184 	}
185 
186 	return CG_OK;
187 }
188 
write_synack_opt(struct bpf_sock_ops * skops)189 static int write_synack_opt(struct bpf_sock_ops *skops)
190 {
191 	struct bpf_test_option opt;
192 
193 	if (!passive_synack_out.flags)
194 		/* We should not even be called since no header
195 		 * space has been reserved.
196 		 */
197 		RET_CG_ERR(0);
198 
199 	opt = passive_synack_out;
200 	if (skops_want_cookie(skops))
201 		SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
202 
203 	return store_option(skops, &opt);
204 }
205 
syn_opt_len(struct bpf_sock_ops * skops)206 static int syn_opt_len(struct bpf_sock_ops *skops)
207 {
208 	__u8 optlen;
209 	int err;
210 
211 	if (!active_syn_out.flags)
212 		return CG_OK;
213 
214 	optlen = option_total_len(active_syn_out.flags);
215 	if (optlen) {
216 		err = bpf_reserve_hdr_opt(skops, optlen, 0);
217 		if (err)
218 			RET_CG_ERR(err);
219 	}
220 
221 	return CG_OK;
222 }
223 
write_syn_opt(struct bpf_sock_ops * skops)224 static int write_syn_opt(struct bpf_sock_ops *skops)
225 {
226 	if (!active_syn_out.flags)
227 		RET_CG_ERR(0);
228 
229 	return store_option(skops, &active_syn_out);
230 }
231 
fin_opt_len(struct bpf_sock_ops * skops)232 static int fin_opt_len(struct bpf_sock_ops *skops)
233 {
234 	struct bpf_test_option *opt;
235 	struct hdr_stg *hdr_stg;
236 	__u8 optlen;
237 	int err;
238 
239 	if (!skops->sk)
240 		RET_CG_ERR(0);
241 
242 	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
243 	if (!hdr_stg)
244 		RET_CG_ERR(0);
245 
246 	if (hdr_stg->active)
247 		opt = &active_fin_out;
248 	else
249 		opt = &passive_fin_out;
250 
251 	optlen = option_total_len(opt->flags);
252 	if (optlen) {
253 		err = bpf_reserve_hdr_opt(skops, optlen, 0);
254 		if (err)
255 			RET_CG_ERR(err);
256 	}
257 
258 	return CG_OK;
259 }
260 
write_fin_opt(struct bpf_sock_ops * skops)261 static int write_fin_opt(struct bpf_sock_ops *skops)
262 {
263 	struct bpf_test_option *opt;
264 	struct hdr_stg *hdr_stg;
265 
266 	if (!skops->sk)
267 		RET_CG_ERR(0);
268 
269 	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
270 	if (!hdr_stg)
271 		RET_CG_ERR(0);
272 
273 	if (hdr_stg->active)
274 		opt = &active_fin_out;
275 	else
276 		opt = &passive_fin_out;
277 
278 	if (!opt->flags)
279 		RET_CG_ERR(0);
280 
281 	return store_option(skops, opt);
282 }
283 
resend_in_ack(struct bpf_sock_ops * skops)284 static int resend_in_ack(struct bpf_sock_ops *skops)
285 {
286 	struct hdr_stg *hdr_stg;
287 
288 	if (!skops->sk)
289 		return -1;
290 
291 	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
292 	if (!hdr_stg)
293 		return -1;
294 
295 	return !!hdr_stg->resend_syn;
296 }
297 
nodata_opt_len(struct bpf_sock_ops * skops)298 static int nodata_opt_len(struct bpf_sock_ops *skops)
299 {
300 	int resend;
301 
302 	resend = resend_in_ack(skops);
303 	if (resend < 0)
304 		RET_CG_ERR(0);
305 
306 	if (resend)
307 		return syn_opt_len(skops);
308 
309 	return CG_OK;
310 }
311 
write_nodata_opt(struct bpf_sock_ops * skops)312 static int write_nodata_opt(struct bpf_sock_ops *skops)
313 {
314 	int resend;
315 
316 	resend = resend_in_ack(skops);
317 	if (resend < 0)
318 		RET_CG_ERR(0);
319 
320 	if (resend)
321 		return write_syn_opt(skops);
322 
323 	return CG_OK;
324 }
325 
data_opt_len(struct bpf_sock_ops * skops)326 static int data_opt_len(struct bpf_sock_ops *skops)
327 {
328 	/* Same as the nodata version.  Mostly to show
329 	 * an example usage on skops->skb_len.
330 	 */
331 	return nodata_opt_len(skops);
332 }
333 
write_data_opt(struct bpf_sock_ops * skops)334 static int write_data_opt(struct bpf_sock_ops *skops)
335 {
336 	return write_nodata_opt(skops);
337 }
338 
current_mss_opt_len(struct bpf_sock_ops * skops)339 static int current_mss_opt_len(struct bpf_sock_ops *skops)
340 {
341 	/* Reserve maximum that may be needed */
342 	int err;
343 
344 	err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
345 	if (err)
346 		RET_CG_ERR(err);
347 
348 	return CG_OK;
349 }
350 
handle_hdr_opt_len(struct bpf_sock_ops * skops)351 static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
352 {
353 	__u8 tcp_flags = skops_tcp_flags(skops);
354 
355 	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
356 		return synack_opt_len(skops);
357 
358 	if (tcp_flags & TCPHDR_SYN)
359 		return syn_opt_len(skops);
360 
361 	if (tcp_flags & TCPHDR_FIN)
362 		return fin_opt_len(skops);
363 
364 	if (skops_current_mss(skops))
365 		/* The kernel is calculating the MSS */
366 		return current_mss_opt_len(skops);
367 
368 	if (skops->skb_len)
369 		return data_opt_len(skops);
370 
371 	return nodata_opt_len(skops);
372 }
373 
handle_write_hdr_opt(struct bpf_sock_ops * skops)374 static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
375 {
376 	__u8 tcp_flags = skops_tcp_flags(skops);
377 	struct tcphdr *th;
378 
379 	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
380 		return write_synack_opt(skops);
381 
382 	if (tcp_flags & TCPHDR_SYN)
383 		return write_syn_opt(skops);
384 
385 	if (tcp_flags & TCPHDR_FIN)
386 		return write_fin_opt(skops);
387 
388 	th = skops->skb_data;
389 	if (th + 1 > skops->skb_data_end)
390 		RET_CG_ERR(0);
391 
392 	if (skops->skb_len > tcp_hdrlen(th))
393 		return write_data_opt(skops);
394 
395 	return write_nodata_opt(skops);
396 }
397 
set_delack_max(struct bpf_sock_ops * skops,__u8 max_delack_ms)398 static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
399 {
400 	__u32 max_delack_us = max_delack_ms * 1000;
401 
402 	return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
403 			      &max_delack_us, sizeof(max_delack_us));
404 }
405 
set_rto_min(struct bpf_sock_ops * skops,__u8 peer_max_delack_ms)406 static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
407 {
408 	__u32 min_rto_us = peer_max_delack_ms * 1000;
409 
410 	return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
411 			      sizeof(min_rto_us));
412 }
413 
handle_active_estab(struct bpf_sock_ops * skops)414 static int handle_active_estab(struct bpf_sock_ops *skops)
415 {
416 	struct hdr_stg init_stg = {
417 		.active = true,
418 	};
419 	int err;
420 
421 	err = load_option(skops, &active_estab_in, false);
422 	if (err && err != -ENOMSG)
423 		RET_CG_ERR(err);
424 
425 	init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
426 						OPTION_RESEND);
427 	if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
428 					      &init_stg,
429 					      BPF_SK_STORAGE_GET_F_CREATE))
430 		RET_CG_ERR(0);
431 
432 	if (init_stg.resend_syn)
433 		/* Don't clear the write_hdr cb now because
434 		 * the ACK may get lost and retransmit may
435 		 * be needed.
436 		 *
437 		 * PARSE_ALL_HDR cb flag is set to learn if this
438 		 * resend_syn option has received by the peer.
439 		 *
440 		 * The header option will be resent until a valid
441 		 * packet is received at handle_parse_hdr()
442 		 * and all hdr cb flags will be cleared in
443 		 * handle_parse_hdr().
444 		 */
445 		set_parse_all_hdr_cb_flags(skops);
446 	else if (!active_fin_out.flags)
447 		/* No options will be written from now */
448 		clear_hdr_cb_flags(skops);
449 
450 	if (active_syn_out.max_delack_ms) {
451 		err = set_delack_max(skops, active_syn_out.max_delack_ms);
452 		if (err)
453 			RET_CG_ERR(err);
454 	}
455 
456 	if (active_estab_in.max_delack_ms) {
457 		err = set_rto_min(skops, active_estab_in.max_delack_ms);
458 		if (err)
459 			RET_CG_ERR(err);
460 	}
461 
462 	return CG_OK;
463 }
464 
handle_passive_estab(struct bpf_sock_ops * skops)465 static int handle_passive_estab(struct bpf_sock_ops *skops)
466 {
467 	struct hdr_stg init_stg = {};
468 	struct tcphdr *th;
469 	int err;
470 
471 	inherit_cb_flags = skops->bpf_sock_ops_cb_flags;
472 
473 	err = load_option(skops, &passive_estab_in, true);
474 	if (err == -ENOENT) {
475 		/* saved_syn is not found. It was in syncookie mode.
476 		 * We have asked the active side to resend the options
477 		 * in ACK, so try to find the bpf_test_option from ACK now.
478 		 */
479 		err = load_option(skops, &passive_estab_in, false);
480 		init_stg.syncookie = true;
481 	}
482 
483 	/* ENOMSG: The bpf_test_option is not found which is fine.
484 	 * Bail out now for all other errors.
485 	 */
486 	if (err && err != -ENOMSG)
487 		RET_CG_ERR(err);
488 
489 	th = skops->skb_data;
490 	if (th + 1 > skops->skb_data_end)
491 		RET_CG_ERR(0);
492 
493 	if (th->syn) {
494 		/* Fastopen */
495 
496 		/* Cannot clear cb_flags to stop write_hdr cb.
497 		 * synack is not sent yet for fast open.
498 		 * Even it was, the synack may need to be retransmitted.
499 		 *
500 		 * PARSE_ALL_HDR cb flag is set to learn
501 		 * if synack has reached the peer.
502 		 * All cb_flags will be cleared in handle_parse_hdr().
503 		 */
504 		set_parse_all_hdr_cb_flags(skops);
505 		init_stg.fastopen = true;
506 	} else if (!passive_fin_out.flags) {
507 		/* No options will be written from now */
508 		clear_hdr_cb_flags(skops);
509 	}
510 
511 	if (!skops->sk ||
512 	    !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
513 				BPF_SK_STORAGE_GET_F_CREATE))
514 		RET_CG_ERR(0);
515 
516 	if (passive_synack_out.max_delack_ms) {
517 		err = set_delack_max(skops, passive_synack_out.max_delack_ms);
518 		if (err)
519 			RET_CG_ERR(err);
520 	}
521 
522 	if (passive_estab_in.max_delack_ms) {
523 		err = set_rto_min(skops, passive_estab_in.max_delack_ms);
524 		if (err)
525 			RET_CG_ERR(err);
526 	}
527 
528 	return CG_OK;
529 }
530 
handle_parse_hdr(struct bpf_sock_ops * skops)531 static int handle_parse_hdr(struct bpf_sock_ops *skops)
532 {
533 	struct hdr_stg *hdr_stg;
534 	struct tcphdr *th;
535 
536 	if (!skops->sk)
537 		RET_CG_ERR(0);
538 
539 	th = skops->skb_data;
540 	if (th + 1 > skops->skb_data_end)
541 		RET_CG_ERR(0);
542 
543 	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
544 	if (!hdr_stg)
545 		RET_CG_ERR(0);
546 
547 	if (hdr_stg->resend_syn || hdr_stg->fastopen)
548 		/* The PARSE_ALL_HDR cb flag was turned on
549 		 * to ensure that the previously written
550 		 * options have reached the peer.
551 		 * Those previously written option includes:
552 		 *     - Active side: resend_syn in ACK during syncookie
553 		 *      or
554 		 *     - Passive side: SYNACK during fastopen
555 		 *
556 		 * A valid packet has been received here after
557 		 * the 3WHS, so the PARSE_ALL_HDR cb flag
558 		 * can be cleared now.
559 		 */
560 		clear_parse_all_hdr_cb_flags(skops);
561 
562 	if (hdr_stg->resend_syn && !active_fin_out.flags)
563 		/* Active side resent the syn option in ACK
564 		 * because the server was in syncookie mode.
565 		 * A valid packet has been received, so
566 		 * clear header cb flags if there is no
567 		 * more option to send.
568 		 */
569 		clear_hdr_cb_flags(skops);
570 
571 	if (hdr_stg->fastopen && !passive_fin_out.flags)
572 		/* Passive side was in fastopen.
573 		 * A valid packet has been received, so
574 		 * the SYNACK has reached the peer.
575 		 * Clear header cb flags if there is no more
576 		 * option to send.
577 		 */
578 		clear_hdr_cb_flags(skops);
579 
580 	if (th->fin) {
581 		struct bpf_test_option *fin_opt;
582 		int err;
583 
584 		if (hdr_stg->active)
585 			fin_opt = &active_fin_in;
586 		else
587 			fin_opt = &passive_fin_in;
588 
589 		err = load_option(skops, fin_opt, false);
590 		if (err && err != -ENOMSG)
591 			RET_CG_ERR(err);
592 	}
593 
594 	return CG_OK;
595 }
596 
597 SEC("sockops/estab")
estab(struct bpf_sock_ops * skops)598 int estab(struct bpf_sock_ops *skops)
599 {
600 	int true_val = 1;
601 
602 	switch (skops->op) {
603 	case BPF_SOCK_OPS_TCP_LISTEN_CB:
604 		bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
605 			       &true_val, sizeof(true_val));
606 		set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
607 		break;
608 	case BPF_SOCK_OPS_TCP_CONNECT_CB:
609 		set_hdr_cb_flags(skops, 0);
610 		break;
611 	case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
612 		return handle_parse_hdr(skops);
613 	case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
614 		return handle_hdr_opt_len(skops);
615 	case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
616 		return handle_write_hdr_opt(skops);
617 	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
618 		return handle_passive_estab(skops);
619 	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
620 		return handle_active_estab(skops);
621 	}
622 
623 	return CG_OK;
624 }
625 
626 char _license[] SEC("license") = "GPL";
627