xref: /linux/samples/bpf/sockex3_kern.c (revision 9a6b55ac)
1 /* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of version 2 of the GNU General Public
5  * License as published by the Free Software Foundation.
6  */
7 #include <uapi/linux/bpf.h>
8 #include "bpf_helpers.h"
9 #include "bpf_legacy.h"
10 #include <uapi/linux/in.h>
11 #include <uapi/linux/if.h>
12 #include <uapi/linux/if_ether.h>
13 #include <uapi/linux/ip.h>
14 #include <uapi/linux/ipv6.h>
15 #include <uapi/linux/if_tunnel.h>
16 #include <uapi/linux/mpls.h>
17 #define IP_MF		0x2000
18 #define IP_OFFSET	0x1FFF
19 
20 #define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
21 
22 struct bpf_map_def SEC("maps") jmp_table = {
23 	.type = BPF_MAP_TYPE_PROG_ARRAY,
24 	.key_size = sizeof(u32),
25 	.value_size = sizeof(u32),
26 	.max_entries = 8,
27 };
28 
29 #define PARSE_VLAN 1
30 #define PARSE_MPLS 2
31 #define PARSE_IP 3
32 #define PARSE_IPV6 4
33 
34 /* protocol dispatch routine.
35  * It tail-calls next BPF program depending on eth proto
36  * Note, we could have used:
37  * bpf_tail_call(skb, &jmp_table, proto);
38  * but it would need large prog_array
39  */
40 static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
41 {
42 	switch (proto) {
43 	case ETH_P_8021Q:
44 	case ETH_P_8021AD:
45 		bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
46 		break;
47 	case ETH_P_MPLS_UC:
48 	case ETH_P_MPLS_MC:
49 		bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
50 		break;
51 	case ETH_P_IP:
52 		bpf_tail_call(skb, &jmp_table, PARSE_IP);
53 		break;
54 	case ETH_P_IPV6:
55 		bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
56 		break;
57 	}
58 }
59 
60 struct vlan_hdr {
61 	__be16 h_vlan_TCI;
62 	__be16 h_vlan_encapsulated_proto;
63 };
64 
65 struct flow_key_record {
66 	__be32 src;
67 	__be32 dst;
68 	union {
69 		__be32 ports;
70 		__be16 port16[2];
71 	};
72 	__u32 ip_proto;
73 };
74 
75 static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
76 {
77 	return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
78 		& (IP_MF | IP_OFFSET);
79 }
80 
81 static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
82 {
83 	__u64 w0 = load_word(ctx, off);
84 	__u64 w1 = load_word(ctx, off + 4);
85 	__u64 w2 = load_word(ctx, off + 8);
86 	__u64 w3 = load_word(ctx, off + 12);
87 
88 	return (__u32)(w0 ^ w1 ^ w2 ^ w3);
89 }
90 
91 struct globals {
92 	struct flow_key_record flow;
93 };
94 
95 struct bpf_map_def SEC("maps") percpu_map = {
96 	.type = BPF_MAP_TYPE_ARRAY,
97 	.key_size = sizeof(__u32),
98 	.value_size = sizeof(struct globals),
99 	.max_entries = 32,
100 };
101 
102 /* user poor man's per_cpu until native support is ready */
103 static struct globals *this_cpu_globals(void)
104 {
105 	u32 key = bpf_get_smp_processor_id();
106 
107 	return bpf_map_lookup_elem(&percpu_map, &key);
108 }
109 
110 /* some simple stats for user space consumption */
111 struct pair {
112 	__u64 packets;
113 	__u64 bytes;
114 };
115 
116 struct bpf_map_def SEC("maps") hash_map = {
117 	.type = BPF_MAP_TYPE_HASH,
118 	.key_size = sizeof(struct flow_key_record),
119 	.value_size = sizeof(struct pair),
120 	.max_entries = 1024,
121 };
122 
123 static void update_stats(struct __sk_buff *skb, struct globals *g)
124 {
125 	struct flow_key_record key = g->flow;
126 	struct pair *value;
127 
128 	value = bpf_map_lookup_elem(&hash_map, &key);
129 	if (value) {
130 		__sync_fetch_and_add(&value->packets, 1);
131 		__sync_fetch_and_add(&value->bytes, skb->len);
132 	} else {
133 		struct pair val = {1, skb->len};
134 
135 		bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
136 	}
137 }
138 
139 static __always_inline void parse_ip_proto(struct __sk_buff *skb,
140 					   struct globals *g, __u32 ip_proto)
141 {
142 	__u32 nhoff = skb->cb[0];
143 	int poff;
144 
145 	switch (ip_proto) {
146 	case IPPROTO_GRE: {
147 		struct gre_hdr {
148 			__be16 flags;
149 			__be16 proto;
150 		};
151 
152 		__u32 gre_flags = load_half(skb,
153 					    nhoff + offsetof(struct gre_hdr, flags));
154 		__u32 gre_proto = load_half(skb,
155 					    nhoff + offsetof(struct gre_hdr, proto));
156 
157 		if (gre_flags & (GRE_VERSION|GRE_ROUTING))
158 			break;
159 
160 		nhoff += 4;
161 		if (gre_flags & GRE_CSUM)
162 			nhoff += 4;
163 		if (gre_flags & GRE_KEY)
164 			nhoff += 4;
165 		if (gre_flags & GRE_SEQ)
166 			nhoff += 4;
167 
168 		skb->cb[0] = nhoff;
169 		parse_eth_proto(skb, gre_proto);
170 		break;
171 	}
172 	case IPPROTO_IPIP:
173 		parse_eth_proto(skb, ETH_P_IP);
174 		break;
175 	case IPPROTO_IPV6:
176 		parse_eth_proto(skb, ETH_P_IPV6);
177 		break;
178 	case IPPROTO_TCP:
179 	case IPPROTO_UDP:
180 		g->flow.ports = load_word(skb, nhoff);
181 	case IPPROTO_ICMP:
182 		g->flow.ip_proto = ip_proto;
183 		update_stats(skb, g);
184 		break;
185 	default:
186 		break;
187 	}
188 }
189 
190 PROG(PARSE_IP)(struct __sk_buff *skb)
191 {
192 	struct globals *g = this_cpu_globals();
193 	__u32 nhoff, verlen, ip_proto;
194 
195 	if (!g)
196 		return 0;
197 
198 	nhoff = skb->cb[0];
199 
200 	if (unlikely(ip_is_fragment(skb, nhoff)))
201 		return 0;
202 
203 	ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
204 
205 	if (ip_proto != IPPROTO_GRE) {
206 		g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
207 		g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
208 	}
209 
210 	verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
211 	nhoff += (verlen & 0xF) << 2;
212 
213 	skb->cb[0] = nhoff;
214 	parse_ip_proto(skb, g, ip_proto);
215 	return 0;
216 }
217 
218 PROG(PARSE_IPV6)(struct __sk_buff *skb)
219 {
220 	struct globals *g = this_cpu_globals();
221 	__u32 nhoff, ip_proto;
222 
223 	if (!g)
224 		return 0;
225 
226 	nhoff = skb->cb[0];
227 
228 	ip_proto = load_byte(skb,
229 			     nhoff + offsetof(struct ipv6hdr, nexthdr));
230 	g->flow.src = ipv6_addr_hash(skb,
231 				     nhoff + offsetof(struct ipv6hdr, saddr));
232 	g->flow.dst = ipv6_addr_hash(skb,
233 				     nhoff + offsetof(struct ipv6hdr, daddr));
234 	nhoff += sizeof(struct ipv6hdr);
235 
236 	skb->cb[0] = nhoff;
237 	parse_ip_proto(skb, g, ip_proto);
238 	return 0;
239 }
240 
241 PROG(PARSE_VLAN)(struct __sk_buff *skb)
242 {
243 	__u32 nhoff, proto;
244 
245 	nhoff = skb->cb[0];
246 
247 	proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
248 						h_vlan_encapsulated_proto));
249 	nhoff += sizeof(struct vlan_hdr);
250 	skb->cb[0] = nhoff;
251 
252 	parse_eth_proto(skb, proto);
253 
254 	return 0;
255 }
256 
257 PROG(PARSE_MPLS)(struct __sk_buff *skb)
258 {
259 	__u32 nhoff, label;
260 
261 	nhoff = skb->cb[0];
262 
263 	label = load_word(skb, nhoff);
264 	nhoff += sizeof(struct mpls_label);
265 	skb->cb[0] = nhoff;
266 
267 	if (label & MPLS_LS_S_MASK) {
268 		__u8 verlen = load_byte(skb, nhoff);
269 		if ((verlen & 0xF0) == 4)
270 			parse_eth_proto(skb, ETH_P_IP);
271 		else
272 			parse_eth_proto(skb, ETH_P_IPV6);
273 	} else {
274 		parse_eth_proto(skb, ETH_P_MPLS_UC);
275 	}
276 
277 	return 0;
278 }
279 
280 SEC("socket/0")
281 int main_prog(struct __sk_buff *skb)
282 {
283 	__u32 nhoff = ETH_HLEN;
284 	__u32 proto = load_half(skb, 12);
285 
286 	skb->cb[0] = nhoff;
287 	parse_eth_proto(skb, proto);
288 	return 0;
289 }
290 
291 char _license[] SEC("license") = "GPL";
292