1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 
4 #include <stddef.h>
5 #include <errno.h>
6 #include <stdbool.h>
7 #include <sys/types.h>
8 #include <sys/socket.h>
9 #include <linux/ipv6.h>
10 #include <linux/tcp.h>
11 #include <linux/socket.h>
12 #include <linux/bpf.h>
13 #include <linux/types.h>
14 #include <bpf/bpf_helpers.h>
15 #include <bpf/bpf_endian.h>
16 #define BPF_PROG_TEST_TCP_HDR_OPTIONS
17 #include "test_tcp_hdr_options.h"
18 
19 __u16 last_addr16_n = __bpf_htons(1);
20 __u16 active_lport_n = 0;
21 __u16 active_lport_h = 0;
22 __u16 passive_lport_n = 0;
23 __u16 passive_lport_h = 0;
24 
25 /* options received at passive side */
26 unsigned int nr_pure_ack = 0;
27 unsigned int nr_data = 0;
28 unsigned int nr_syn = 0;
29 unsigned int nr_fin = 0;
30 
31 /* Check the header received from the active side */
32 static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
33 {
34 	union {
35 		struct tcphdr th;
36 		struct ipv6hdr ip6;
37 		struct tcp_exprm_opt exprm_opt;
38 		struct tcp_opt reg_opt;
39 		__u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */
40 	} hdr = {};
41 	__u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
42 	struct tcphdr *pth;
43 	int ret;
44 
45 	hdr.reg_opt.kind = 0xB9;
46 
47 	/* The option is 4 bytes long instead of 2 bytes */
48 	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags);
49 	if (ret != -ENOSPC)
50 		RET_CG_ERR(ret);
51 
52 	/* Test searching magic with regular kind */
53 	hdr.reg_opt.len = 4;
54 	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
55 			       load_flags);
56 	if (ret != -EINVAL)
57 		RET_CG_ERR(ret);
58 
59 	hdr.reg_opt.len = 0;
60 	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
61 			       load_flags);
62 	if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 ||
63 	    hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce)
64 		RET_CG_ERR(ret);
65 
66 	/* Test searching experimental option with invalid kind length */
67 	hdr.exprm_opt.kind = TCPOPT_EXP;
68 	hdr.exprm_opt.len = 5;
69 	hdr.exprm_opt.magic = 0;
70 	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
71 			       load_flags);
72 	if (ret != -EINVAL)
73 		RET_CG_ERR(ret);
74 
75 	/* Test searching experimental option with 0 magic value */
76 	hdr.exprm_opt.len = 4;
77 	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
78 			       load_flags);
79 	if (ret != -ENOMSG)
80 		RET_CG_ERR(ret);
81 
82 	hdr.exprm_opt.magic = __bpf_htons(0xeB9F);
83 	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
84 			       load_flags);
85 	if (ret != 4 || hdr.exprm_opt.len != 4 ||
86 	    hdr.exprm_opt.kind != TCPOPT_EXP ||
87 	    hdr.exprm_opt.magic != __bpf_htons(0xeB9F))
88 		RET_CG_ERR(ret);
89 
90 	if (!check_syn)
91 		return CG_OK;
92 
93 	/* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV
94 	 *
95 	 * Test loading from tp->saved_syn for other sk_state.
96 	 */
97 	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6,
98 			     sizeof(hdr.ip6));
99 	if (ret != -ENOSPC)
100 		RET_CG_ERR(ret);
101 
102 	if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n ||
103 	    hdr.ip6.daddr.s6_addr16[7] != last_addr16_n)
104 		RET_CG_ERR(0);
105 
106 	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr));
107 	if (ret < 0)
108 		RET_CG_ERR(ret);
109 
110 	pth = (struct tcphdr *)(&hdr.ip6 + 1);
111 	if (pth->dest != passive_lport_n || pth->source != active_lport_n)
112 		RET_CG_ERR(0);
113 
114 	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr));
115 	if (ret < 0)
116 		RET_CG_ERR(ret);
117 
118 	if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n)
119 		RET_CG_ERR(0);
120 
121 	return CG_OK;
122 }
123 
124 static int check_active_syn_in(struct bpf_sock_ops *skops)
125 {
126 	return __check_active_hdr_in(skops, true);
127 }
128 
129 static int check_active_hdr_in(struct bpf_sock_ops *skops)
130 {
131 	struct tcphdr *th;
132 
133 	if (__check_active_hdr_in(skops, false) == CG_ERR)
134 		return CG_ERR;
135 
136 	th = skops->skb_data;
137 	if (th + 1 > skops->skb_data_end)
138 		RET_CG_ERR(0);
139 
140 	if (tcp_hdrlen(th) < skops->skb_len)
141 		nr_data++;
142 
143 	if (th->fin)
144 		nr_fin++;
145 
146 	if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
147 		nr_pure_ack++;
148 
149 	return CG_OK;
150 }
151 
152 static int active_opt_len(struct bpf_sock_ops *skops)
153 {
154 	int err;
155 
156 	/* Reserve more than enough to allow the -EEXIST test in
157 	 * the write_active_opt().
158 	 */
159 	err = bpf_reserve_hdr_opt(skops, 12, 0);
160 	if (err)
161 		RET_CG_ERR(err);
162 
163 	return CG_OK;
164 }
165 
166 static int write_active_opt(struct bpf_sock_ops *skops)
167 {
168 	struct tcp_exprm_opt exprm_opt = {};
169 	struct tcp_opt win_scale_opt = {};
170 	struct tcp_opt reg_opt = {};
171 	struct tcphdr *th;
172 	int err, ret;
173 
174 	exprm_opt.kind = TCPOPT_EXP;
175 	exprm_opt.len = 4;
176 	exprm_opt.magic = __bpf_htons(0xeB9F);
177 
178 	reg_opt.kind = 0xB9;
179 	reg_opt.len = 4;
180 	reg_opt.data[0] = 0xfa;
181 	reg_opt.data[1] = 0xce;
182 
183 	win_scale_opt.kind = TCPOPT_WINDOW;
184 
185 	err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
186 	if (err)
187 		RET_CG_ERR(err);
188 
189 	/* Store the same exprm option */
190 	err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
191 	if (err != -EEXIST)
192 		RET_CG_ERR(err);
193 
194 	err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
195 	if (err)
196 		RET_CG_ERR(err);
197 	err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
198 	if (err != -EEXIST)
199 		RET_CG_ERR(err);
200 
201 	/* Check the option has been written and can be searched */
202 	ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
203 	if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP ||
204 	    exprm_opt.magic != __bpf_htons(0xeB9F))
205 		RET_CG_ERR(ret);
206 
207 	reg_opt.len = 0;
208 	ret = bpf_load_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
209 	if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 ||
210 	    reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce)
211 		RET_CG_ERR(ret);
212 
213 	th = skops->skb_data;
214 	if (th + 1 > skops->skb_data_end)
215 		RET_CG_ERR(0);
216 
217 	if (th->syn) {
218 		active_lport_h = skops->local_port;
219 		active_lport_n = th->source;
220 
221 		/* Search the win scale option written by kernel
222 		 * in the SYN packet.
223 		 */
224 		ret = bpf_load_hdr_opt(skops, &win_scale_opt,
225 				       sizeof(win_scale_opt), 0);
226 		if (ret != 3 || win_scale_opt.len != 3 ||
227 		    win_scale_opt.kind != TCPOPT_WINDOW)
228 			RET_CG_ERR(ret);
229 
230 		/* Write the win scale option that kernel
231 		 * has already written.
232 		 */
233 		err = bpf_store_hdr_opt(skops, &win_scale_opt,
234 					sizeof(win_scale_opt), 0);
235 		if (err != -EEXIST)
236 			RET_CG_ERR(err);
237 	}
238 
239 	return CG_OK;
240 }
241 
242 static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
243 {
244 	__u8 tcp_flags = skops_tcp_flags(skops);
245 
246 	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
247 		/* Check the SYN from bpf_sock_ops_kern->syn_skb */
248 		return check_active_syn_in(skops);
249 
250 	/* Passive side should have cleared the write hdr cb by now */
251 	if (skops->local_port == passive_lport_h)
252 		RET_CG_ERR(0);
253 
254 	return active_opt_len(skops);
255 }
256 
257 static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
258 {
259 	if (skops->local_port == passive_lport_h)
260 		RET_CG_ERR(0);
261 
262 	return write_active_opt(skops);
263 }
264 
265 static int handle_parse_hdr(struct bpf_sock_ops *skops)
266 {
267 	/* Passive side is not writing any non-standard/unknown
268 	 * option, so the active side should never be called.
269 	 */
270 	if (skops->local_port == active_lport_h)
271 		RET_CG_ERR(0);
272 
273 	return check_active_hdr_in(skops);
274 }
275 
276 static int handle_passive_estab(struct bpf_sock_ops *skops)
277 {
278 	int err;
279 
280 	/* No more write hdr cb */
281 	bpf_sock_ops_cb_flags_set(skops,
282 				  skops->bpf_sock_ops_cb_flags &
283 				  ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
284 
285 	/* Recheck the SYN but check the tp->saved_syn this time */
286 	err = check_active_syn_in(skops);
287 	if (err == CG_ERR)
288 		return err;
289 
290 	nr_syn++;
291 
292 	/* The ack has header option written by the active side also */
293 	return check_active_hdr_in(skops);
294 }
295 
296 SEC("sockops")
297 int misc_estab(struct bpf_sock_ops *skops)
298 {
299 	int true_val = 1;
300 
301 	switch (skops->op) {
302 	case BPF_SOCK_OPS_TCP_LISTEN_CB:
303 		passive_lport_h = skops->local_port;
304 		passive_lport_n = __bpf_htons(passive_lport_h);
305 		bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
306 			       &true_val, sizeof(true_val));
307 		set_hdr_cb_flags(skops, 0);
308 		break;
309 	case BPF_SOCK_OPS_TCP_CONNECT_CB:
310 		set_hdr_cb_flags(skops, 0);
311 		break;
312 	case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
313 		return handle_parse_hdr(skops);
314 	case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
315 		return handle_hdr_opt_len(skops);
316 	case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
317 		return handle_write_hdr_opt(skops);
318 	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
319 		return handle_passive_estab(skops);
320 	}
321 
322 	return CG_OK;
323 }
324 
325 char _license[] SEC("license") = "GPL";
326