1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <linux/tc_act/tc_csum.h>
8 #include <net/flow_offload.h>
9 #include <net/netfilter/nf_flow_table.h>
10 #include <net/netfilter/nf_conntrack.h>
11 #include <net/netfilter/nf_conntrack_core.h>
12 #include <net/netfilter/nf_conntrack_tuple.h>
13 
14 static struct work_struct nf_flow_offload_work;
15 static DEFINE_SPINLOCK(flow_offload_pending_list_lock);
16 static LIST_HEAD(flow_offload_pending_list);
17 
18 struct flow_offload_work {
19 	struct list_head	list;
20 	enum flow_cls_command	cmd;
21 	int			priority;
22 	struct nf_flowtable	*flowtable;
23 	struct flow_offload	*flow;
24 };
25 
26 struct nf_flow_key {
27 	struct flow_dissector_key_meta			meta;
28 	struct flow_dissector_key_control		control;
29 	struct flow_dissector_key_basic			basic;
30 	union {
31 		struct flow_dissector_key_ipv4_addrs	ipv4;
32 		struct flow_dissector_key_ipv6_addrs	ipv6;
33 	};
34 	struct flow_dissector_key_tcp			tcp;
35 	struct flow_dissector_key_ports			tp;
36 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
37 
38 struct nf_flow_match {
39 	struct flow_dissector	dissector;
40 	struct nf_flow_key	key;
41 	struct nf_flow_key	mask;
42 };
43 
44 struct nf_flow_rule {
45 	struct nf_flow_match	match;
46 	struct flow_rule	*rule;
47 };
48 
49 #define NF_FLOW_DISSECTOR(__match, __type, __field)	\
50 	(__match)->dissector.offset[__type] =		\
51 		offsetof(struct nf_flow_key, __field)
52 
53 static int nf_flow_rule_match(struct nf_flow_match *match,
54 			      const struct flow_offload_tuple *tuple)
55 {
56 	struct nf_flow_key *mask = &match->mask;
57 	struct nf_flow_key *key = &match->key;
58 
59 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
60 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
61 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
62 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
63 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
64 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
65 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
66 
67 	key->meta.ingress_ifindex = tuple->iifidx;
68 	mask->meta.ingress_ifindex = 0xffffffff;
69 
70 	switch (tuple->l3proto) {
71 	case AF_INET:
72 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
73 		key->basic.n_proto = htons(ETH_P_IP);
74 		key->ipv4.src = tuple->src_v4.s_addr;
75 		mask->ipv4.src = 0xffffffff;
76 		key->ipv4.dst = tuple->dst_v4.s_addr;
77 		mask->ipv4.dst = 0xffffffff;
78 		break;
79        case AF_INET6:
80 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
81 		key->basic.n_proto = htons(ETH_P_IPV6);
82 		key->ipv6.src = tuple->src_v6;
83 		memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
84 		key->ipv6.dst = tuple->dst_v6;
85 		memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
86 		break;
87 	default:
88 		return -EOPNOTSUPP;
89 	}
90 	match->dissector.used_keys |= BIT(key->control.addr_type);
91 	mask->basic.n_proto = 0xffff;
92 
93 	switch (tuple->l4proto) {
94 	case IPPROTO_TCP:
95 		key->tcp.flags = 0;
96 		mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
97 		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
98 		break;
99 	case IPPROTO_UDP:
100 		break;
101 	default:
102 		return -EOPNOTSUPP;
103 	}
104 
105 	key->basic.ip_proto = tuple->l4proto;
106 	mask->basic.ip_proto = 0xff;
107 
108 	key->tp.src = tuple->src_port;
109 	mask->tp.src = 0xffff;
110 	key->tp.dst = tuple->dst_port;
111 	mask->tp.dst = 0xffff;
112 
113 	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
114 				      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
115 				      BIT(FLOW_DISSECTOR_KEY_BASIC) |
116 				      BIT(FLOW_DISSECTOR_KEY_PORTS);
117 	return 0;
118 }
119 
120 static void flow_offload_mangle(struct flow_action_entry *entry,
121 				enum flow_action_mangle_base htype, u32 offset,
122 				const __be32 *value, const __be32 *mask)
123 {
124 	entry->id = FLOW_ACTION_MANGLE;
125 	entry->mangle.htype = htype;
126 	entry->mangle.offset = offset;
127 	memcpy(&entry->mangle.mask, mask, sizeof(u32));
128 	memcpy(&entry->mangle.val, value, sizeof(u32));
129 }
130 
131 static inline struct flow_action_entry *
132 flow_action_entry_next(struct nf_flow_rule *flow_rule)
133 {
134 	int i = flow_rule->rule->action.num_entries++;
135 
136 	return &flow_rule->rule->action.entries[i];
137 }
138 
139 static int flow_offload_eth_src(struct net *net,
140 				const struct flow_offload *flow,
141 				enum flow_offload_tuple_dir dir,
142 				struct nf_flow_rule *flow_rule)
143 {
144 	const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
145 	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
146 	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
147 	struct net_device *dev;
148 	u32 mask, val;
149 	u16 val16;
150 
151 	dev = dev_get_by_index(net, tuple->iifidx);
152 	if (!dev)
153 		return -ENOENT;
154 
155 	mask = ~0xffff0000;
156 	memcpy(&val16, dev->dev_addr, 2);
157 	val = val16 << 16;
158 	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
159 			    &val, &mask);
160 
161 	mask = ~0xffffffff;
162 	memcpy(&val, dev->dev_addr + 2, 4);
163 	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
164 			    &val, &mask);
165 	dev_put(dev);
166 
167 	return 0;
168 }
169 
170 static int flow_offload_eth_dst(struct net *net,
171 				const struct flow_offload *flow,
172 				enum flow_offload_tuple_dir dir,
173 				struct nf_flow_rule *flow_rule)
174 {
175 	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
176 	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
177 	const void *daddr = &flow->tuplehash[!dir].tuple.src_v4;
178 	const struct dst_entry *dst_cache;
179 	unsigned char ha[ETH_ALEN];
180 	struct neighbour *n;
181 	u32 mask, val;
182 	u8 nud_state;
183 	u16 val16;
184 
185 	dst_cache = flow->tuplehash[dir].tuple.dst_cache;
186 	n = dst_neigh_lookup(dst_cache, daddr);
187 	if (!n)
188 		return -ENOENT;
189 
190 	read_lock_bh(&n->lock);
191 	nud_state = n->nud_state;
192 	ether_addr_copy(ha, n->ha);
193 	read_unlock_bh(&n->lock);
194 
195 	if (!(nud_state & NUD_VALID)) {
196 		neigh_release(n);
197 		return -ENOENT;
198 	}
199 
200 	mask = ~0xffffffff;
201 	memcpy(&val, ha, 4);
202 	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
203 			    &val, &mask);
204 
205 	mask = ~0x0000ffff;
206 	memcpy(&val16, ha + 4, 2);
207 	val = val16;
208 	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
209 			    &val, &mask);
210 	neigh_release(n);
211 
212 	return 0;
213 }
214 
215 static void flow_offload_ipv4_snat(struct net *net,
216 				   const struct flow_offload *flow,
217 				   enum flow_offload_tuple_dir dir,
218 				   struct nf_flow_rule *flow_rule)
219 {
220 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
221 	u32 mask = ~htonl(0xffffffff);
222 	__be32 addr;
223 	u32 offset;
224 
225 	switch (dir) {
226 	case FLOW_OFFLOAD_DIR_ORIGINAL:
227 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
228 		offset = offsetof(struct iphdr, saddr);
229 		break;
230 	case FLOW_OFFLOAD_DIR_REPLY:
231 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
232 		offset = offsetof(struct iphdr, daddr);
233 		break;
234 	default:
235 		return;
236 	}
237 
238 	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
239 			    &addr, &mask);
240 }
241 
242 static void flow_offload_ipv4_dnat(struct net *net,
243 				   const struct flow_offload *flow,
244 				   enum flow_offload_tuple_dir dir,
245 				   struct nf_flow_rule *flow_rule)
246 {
247 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
248 	u32 mask = ~htonl(0xffffffff);
249 	__be32 addr;
250 	u32 offset;
251 
252 	switch (dir) {
253 	case FLOW_OFFLOAD_DIR_ORIGINAL:
254 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
255 		offset = offsetof(struct iphdr, daddr);
256 		break;
257 	case FLOW_OFFLOAD_DIR_REPLY:
258 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
259 		offset = offsetof(struct iphdr, saddr);
260 		break;
261 	default:
262 		return;
263 	}
264 
265 	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
266 			    &addr, &mask);
267 }
268 
269 static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
270 				     unsigned int offset,
271 				     const __be32 *addr, const __be32 *mask)
272 {
273 	struct flow_action_entry *entry;
274 	int i;
275 
276 	for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) {
277 		entry = flow_action_entry_next(flow_rule);
278 		flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
279 				    offset + i, &addr[i], mask);
280 	}
281 }
282 
283 static void flow_offload_ipv6_snat(struct net *net,
284 				   const struct flow_offload *flow,
285 				   enum flow_offload_tuple_dir dir,
286 				   struct nf_flow_rule *flow_rule)
287 {
288 	u32 mask = ~htonl(0xffffffff);
289 	const __be32 *addr;
290 	u32 offset;
291 
292 	switch (dir) {
293 	case FLOW_OFFLOAD_DIR_ORIGINAL:
294 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
295 		offset = offsetof(struct ipv6hdr, saddr);
296 		break;
297 	case FLOW_OFFLOAD_DIR_REPLY:
298 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
299 		offset = offsetof(struct ipv6hdr, daddr);
300 		break;
301 	default:
302 		return;
303 	}
304 
305 	flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
306 }
307 
308 static void flow_offload_ipv6_dnat(struct net *net,
309 				   const struct flow_offload *flow,
310 				   enum flow_offload_tuple_dir dir,
311 				   struct nf_flow_rule *flow_rule)
312 {
313 	u32 mask = ~htonl(0xffffffff);
314 	const __be32 *addr;
315 	u32 offset;
316 
317 	switch (dir) {
318 	case FLOW_OFFLOAD_DIR_ORIGINAL:
319 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
320 		offset = offsetof(struct ipv6hdr, daddr);
321 		break;
322 	case FLOW_OFFLOAD_DIR_REPLY:
323 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
324 		offset = offsetof(struct ipv6hdr, saddr);
325 		break;
326 	default:
327 		return;
328 	}
329 
330 	flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
331 }
332 
333 static int flow_offload_l4proto(const struct flow_offload *flow)
334 {
335 	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
336 	u8 type = 0;
337 
338 	switch (protonum) {
339 	case IPPROTO_TCP:
340 		type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
341 		break;
342 	case IPPROTO_UDP:
343 		type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
344 		break;
345 	default:
346 		break;
347 	}
348 
349 	return type;
350 }
351 
352 static void flow_offload_port_snat(struct net *net,
353 				   const struct flow_offload *flow,
354 				   enum flow_offload_tuple_dir dir,
355 				   struct nf_flow_rule *flow_rule)
356 {
357 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
358 	u32 mask, port;
359 	u32 offset;
360 
361 	switch (dir) {
362 	case FLOW_OFFLOAD_DIR_ORIGINAL:
363 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
364 		offset = 0; /* offsetof(struct tcphdr, source); */
365 		port = htonl(port << 16);
366 		mask = ~htonl(0xffff0000);
367 		break;
368 	case FLOW_OFFLOAD_DIR_REPLY:
369 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
370 		offset = 0; /* offsetof(struct tcphdr, dest); */
371 		port = htonl(port);
372 		mask = ~htonl(0xffff);
373 		break;
374 	default:
375 		return;
376 	}
377 
378 	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
379 			    &port, &mask);
380 }
381 
382 static void flow_offload_port_dnat(struct net *net,
383 				   const struct flow_offload *flow,
384 				   enum flow_offload_tuple_dir dir,
385 				   struct nf_flow_rule *flow_rule)
386 {
387 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
388 	u32 mask, port;
389 	u32 offset;
390 
391 	switch (dir) {
392 	case FLOW_OFFLOAD_DIR_ORIGINAL:
393 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
394 		offset = 0; /* offsetof(struct tcphdr, dest); */
395 		port = htonl(port);
396 		mask = ~htonl(0xffff);
397 		break;
398 	case FLOW_OFFLOAD_DIR_REPLY:
399 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
400 		offset = 0; /* offsetof(struct tcphdr, source); */
401 		port = htonl(port << 16);
402 		mask = ~htonl(0xffff0000);
403 		break;
404 	default:
405 		return;
406 	}
407 
408 	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
409 			    &port, &mask);
410 }
411 
412 static void flow_offload_ipv4_checksum(struct net *net,
413 				       const struct flow_offload *flow,
414 				       struct nf_flow_rule *flow_rule)
415 {
416 	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
417 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
418 
419 	entry->id = FLOW_ACTION_CSUM;
420 	entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
421 
422 	switch (protonum) {
423 	case IPPROTO_TCP:
424 		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
425 		break;
426 	case IPPROTO_UDP:
427 		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
428 		break;
429 	}
430 }
431 
432 static void flow_offload_redirect(const struct flow_offload *flow,
433 				  enum flow_offload_tuple_dir dir,
434 				  struct nf_flow_rule *flow_rule)
435 {
436 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
437 	struct rtable *rt;
438 
439 	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
440 	entry->id = FLOW_ACTION_REDIRECT;
441 	entry->dev = rt->dst.dev;
442 	dev_hold(rt->dst.dev);
443 }
444 
445 int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
446 			    enum flow_offload_tuple_dir dir,
447 			    struct nf_flow_rule *flow_rule)
448 {
449 	if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
450 	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
451 		return -1;
452 
453 	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
454 		flow_offload_ipv4_snat(net, flow, dir, flow_rule);
455 		flow_offload_port_snat(net, flow, dir, flow_rule);
456 	}
457 	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
458 		flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
459 		flow_offload_port_dnat(net, flow, dir, flow_rule);
460 	}
461 	if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
462 	    test_bit(NF_FLOW_DNAT, &flow->flags))
463 		flow_offload_ipv4_checksum(net, flow, flow_rule);
464 
465 	flow_offload_redirect(flow, dir, flow_rule);
466 
467 	return 0;
468 }
469 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
470 
471 int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
472 			    enum flow_offload_tuple_dir dir,
473 			    struct nf_flow_rule *flow_rule)
474 {
475 	if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
476 	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
477 		return -1;
478 
479 	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
480 		flow_offload_ipv6_snat(net, flow, dir, flow_rule);
481 		flow_offload_port_snat(net, flow, dir, flow_rule);
482 	}
483 	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
484 		flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
485 		flow_offload_port_dnat(net, flow, dir, flow_rule);
486 	}
487 
488 	flow_offload_redirect(flow, dir, flow_rule);
489 
490 	return 0;
491 }
492 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
493 
494 #define NF_FLOW_RULE_ACTION_MAX	16
495 
496 static struct nf_flow_rule *
497 nf_flow_offload_rule_alloc(struct net *net,
498 			   const struct flow_offload_work *offload,
499 			   enum flow_offload_tuple_dir dir)
500 {
501 	const struct nf_flowtable *flowtable = offload->flowtable;
502 	const struct flow_offload *flow = offload->flow;
503 	const struct flow_offload_tuple *tuple;
504 	struct nf_flow_rule *flow_rule;
505 	int err = -ENOMEM;
506 
507 	flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
508 	if (!flow_rule)
509 		goto err_flow;
510 
511 	flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
512 	if (!flow_rule->rule)
513 		goto err_flow_rule;
514 
515 	flow_rule->rule->match.dissector = &flow_rule->match.dissector;
516 	flow_rule->rule->match.mask = &flow_rule->match.mask;
517 	flow_rule->rule->match.key = &flow_rule->match.key;
518 
519 	tuple = &flow->tuplehash[dir].tuple;
520 	err = nf_flow_rule_match(&flow_rule->match, tuple);
521 	if (err < 0)
522 		goto err_flow_match;
523 
524 	flow_rule->rule->action.num_entries = 0;
525 	if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
526 		goto err_flow_match;
527 
528 	return flow_rule;
529 
530 err_flow_match:
531 	kfree(flow_rule->rule);
532 err_flow_rule:
533 	kfree(flow_rule);
534 err_flow:
535 	return NULL;
536 }
537 
538 static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
539 {
540 	struct flow_action_entry *entry;
541 	int i;
542 
543 	for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
544 		entry = &flow_rule->rule->action.entries[i];
545 		if (entry->id != FLOW_ACTION_REDIRECT)
546 			continue;
547 
548 		dev_put(entry->dev);
549 	}
550 	kfree(flow_rule->rule);
551 	kfree(flow_rule);
552 }
553 
554 static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
555 {
556 	int i;
557 
558 	for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
559 		__nf_flow_offload_destroy(flow_rule[i]);
560 }
561 
562 static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
563 				 struct nf_flow_rule *flow_rule[])
564 {
565 	struct net *net = read_pnet(&offload->flowtable->net);
566 
567 	flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
568 						  FLOW_OFFLOAD_DIR_ORIGINAL);
569 	if (!flow_rule[0])
570 		return -ENOMEM;
571 
572 	flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
573 						  FLOW_OFFLOAD_DIR_REPLY);
574 	if (!flow_rule[1]) {
575 		__nf_flow_offload_destroy(flow_rule[0]);
576 		return -ENOMEM;
577 	}
578 
579 	return 0;
580 }
581 
582 static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
583 				 __be16 proto, int priority,
584 				 enum flow_cls_command cmd,
585 				 const struct flow_offload_tuple *tuple,
586 				 struct netlink_ext_ack *extack)
587 {
588 	cls_flow->common.protocol = proto;
589 	cls_flow->common.prio = priority;
590 	cls_flow->common.extack = extack;
591 	cls_flow->command = cmd;
592 	cls_flow->cookie = (unsigned long)tuple;
593 }
594 
595 static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
596 				 struct flow_offload *flow,
597 				 struct nf_flow_rule *flow_rule,
598 				 enum flow_offload_tuple_dir dir,
599 				 int priority, int cmd,
600 				 struct list_head *block_cb_list)
601 {
602 	struct flow_cls_offload cls_flow = {};
603 	struct flow_block_cb *block_cb;
604 	struct netlink_ext_ack extack;
605 	__be16 proto = ETH_P_ALL;
606 	int err, i = 0;
607 
608 	nf_flow_offload_init(&cls_flow, proto, priority, cmd,
609 			     &flow->tuplehash[dir].tuple, &extack);
610 	if (cmd == FLOW_CLS_REPLACE)
611 		cls_flow.rule = flow_rule->rule;
612 
613 	list_for_each_entry(block_cb, block_cb_list, list) {
614 		err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
615 				   block_cb->cb_priv);
616 		if (err < 0)
617 			continue;
618 
619 		i++;
620 	}
621 
622 	return i;
623 }
624 
625 static int flow_offload_tuple_add(struct flow_offload_work *offload,
626 				  struct nf_flow_rule *flow_rule,
627 				  enum flow_offload_tuple_dir dir)
628 {
629 	return nf_flow_offload_tuple(offload->flowtable, offload->flow,
630 				     flow_rule, dir, offload->priority,
631 				     FLOW_CLS_REPLACE,
632 				     &offload->flowtable->flow_block.cb_list);
633 }
634 
635 static void flow_offload_tuple_del(struct flow_offload_work *offload,
636 				   enum flow_offload_tuple_dir dir)
637 {
638 	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
639 			      offload->priority, FLOW_CLS_DESTROY,
640 			      &offload->flowtable->flow_block.cb_list);
641 }
642 
643 static int flow_offload_rule_add(struct flow_offload_work *offload,
644 				 struct nf_flow_rule *flow_rule[])
645 {
646 	int ok_count = 0;
647 
648 	ok_count += flow_offload_tuple_add(offload, flow_rule[0],
649 					   FLOW_OFFLOAD_DIR_ORIGINAL);
650 	ok_count += flow_offload_tuple_add(offload, flow_rule[1],
651 					   FLOW_OFFLOAD_DIR_REPLY);
652 	if (ok_count == 0)
653 		return -ENOENT;
654 
655 	return 0;
656 }
657 
658 static void flow_offload_work_add(struct flow_offload_work *offload)
659 {
660 	struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
661 	int err;
662 
663 	err = nf_flow_offload_alloc(offload, flow_rule);
664 	if (err < 0)
665 		return;
666 
667 	err = flow_offload_rule_add(offload, flow_rule);
668 	if (err < 0)
669 		set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags);
670 
671 	nf_flow_offload_destroy(flow_rule);
672 }
673 
674 static void flow_offload_work_del(struct flow_offload_work *offload)
675 {
676 	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
677 	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
678 	set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
679 }
680 
681 static void flow_offload_tuple_stats(struct flow_offload_work *offload,
682 				     enum flow_offload_tuple_dir dir,
683 				     struct flow_stats *stats)
684 {
685 	struct nf_flowtable *flowtable = offload->flowtable;
686 	struct flow_cls_offload cls_flow = {};
687 	struct flow_block_cb *block_cb;
688 	struct netlink_ext_ack extack;
689 	__be16 proto = ETH_P_ALL;
690 
691 	nf_flow_offload_init(&cls_flow, proto, offload->priority,
692 			     FLOW_CLS_STATS,
693 			     &offload->flow->tuplehash[dir].tuple, &extack);
694 
695 	list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
696 		block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv);
697 	memcpy(stats, &cls_flow.stats, sizeof(*stats));
698 }
699 
700 static void flow_offload_work_stats(struct flow_offload_work *offload)
701 {
702 	struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
703 	u64 lastused;
704 
705 	flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
706 	flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
707 
708 	lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
709 	offload->flow->timeout = max_t(u64, offload->flow->timeout,
710 				       lastused + NF_FLOW_TIMEOUT);
711 }
712 
713 static void flow_offload_work_handler(struct work_struct *work)
714 {
715 	struct flow_offload_work *offload, *next;
716 	LIST_HEAD(offload_pending_list);
717 
718 	spin_lock_bh(&flow_offload_pending_list_lock);
719 	list_replace_init(&flow_offload_pending_list, &offload_pending_list);
720 	spin_unlock_bh(&flow_offload_pending_list_lock);
721 
722 	list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
723 		switch (offload->cmd) {
724 		case FLOW_CLS_REPLACE:
725 			flow_offload_work_add(offload);
726 			break;
727 		case FLOW_CLS_DESTROY:
728 			flow_offload_work_del(offload);
729 			break;
730 		case FLOW_CLS_STATS:
731 			flow_offload_work_stats(offload);
732 			break;
733 		default:
734 			WARN_ON_ONCE(1);
735 		}
736 		list_del(&offload->list);
737 		kfree(offload);
738 	}
739 }
740 
741 static void flow_offload_queue_work(struct flow_offload_work *offload)
742 {
743 	spin_lock_bh(&flow_offload_pending_list_lock);
744 	list_add_tail(&offload->list, &flow_offload_pending_list);
745 	spin_unlock_bh(&flow_offload_pending_list_lock);
746 
747 	schedule_work(&nf_flow_offload_work);
748 }
749 
750 static struct flow_offload_work *
751 nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
752 			   struct flow_offload *flow, unsigned int cmd)
753 {
754 	struct flow_offload_work *offload;
755 
756 	offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
757 	if (!offload)
758 		return NULL;
759 
760 	offload->cmd = cmd;
761 	offload->flow = flow;
762 	offload->priority = flowtable->priority;
763 	offload->flowtable = flowtable;
764 
765 	return offload;
766 }
767 
768 
769 void nf_flow_offload_add(struct nf_flowtable *flowtable,
770 			 struct flow_offload *flow)
771 {
772 	struct flow_offload_work *offload;
773 
774 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
775 	if (!offload)
776 		return;
777 
778 	flow_offload_queue_work(offload);
779 }
780 
781 void nf_flow_offload_del(struct nf_flowtable *flowtable,
782 			 struct flow_offload *flow)
783 {
784 	struct flow_offload_work *offload;
785 
786 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
787 	if (!offload)
788 		return;
789 
790 	set_bit(NF_FLOW_HW_DYING, &flow->flags);
791 	flow_offload_queue_work(offload);
792 }
793 
794 void nf_flow_offload_stats(struct nf_flowtable *flowtable,
795 			   struct flow_offload *flow)
796 {
797 	struct flow_offload_work *offload;
798 	__s32 delta;
799 
800 	delta = nf_flow_timeout_delta(flow->timeout);
801 	if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10))
802 		return;
803 
804 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
805 	if (!offload)
806 		return;
807 
808 	flow_offload_queue_work(offload);
809 }
810 
811 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
812 {
813 	if (nf_flowtable_hw_offload(flowtable))
814 		flush_work(&nf_flow_offload_work);
815 }
816 
817 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
818 				     struct flow_block_offload *bo,
819 				     enum flow_block_command cmd)
820 {
821 	struct flow_block_cb *block_cb, *next;
822 	int err = 0;
823 
824 	switch (cmd) {
825 	case FLOW_BLOCK_BIND:
826 		list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
827 		break;
828 	case FLOW_BLOCK_UNBIND:
829 		list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
830 			list_del(&block_cb->list);
831 			flow_block_cb_free(block_cb);
832 		}
833 		break;
834 	default:
835 		WARN_ON_ONCE(1);
836 		err = -EOPNOTSUPP;
837 	}
838 
839 	return err;
840 }
841 
842 static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
843 				     struct nf_flowtable *flowtable,
844 				     struct net_device *dev,
845 				     enum flow_block_command cmd,
846 				     struct netlink_ext_ack *extack)
847 {
848 	int err;
849 
850 	if (!nf_flowtable_hw_offload(flowtable))
851 		return 0;
852 
853 	if (!dev->netdev_ops->ndo_setup_tc)
854 		return -EOPNOTSUPP;
855 
856 	memset(bo, 0, sizeof(*bo));
857 	bo->net		= dev_net(dev);
858 	bo->block	= &flowtable->flow_block;
859 	bo->command	= cmd;
860 	bo->binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
861 	bo->extack	= extack;
862 	INIT_LIST_HEAD(&bo->cb_list);
863 
864 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
865 	if (err < 0)
866 		return err;
867 
868 	return 0;
869 }
870 
871 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
872 				struct net_device *dev,
873 				enum flow_block_command cmd)
874 {
875 	struct netlink_ext_ack extack = {};
876 	struct flow_block_offload bo;
877 	int err;
878 
879 	err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack);
880 	if (err < 0)
881 		return err;
882 
883 	return nf_flow_table_block_setup(flowtable, &bo, cmd);
884 }
885 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
886 
887 int nf_flow_table_offload_init(void)
888 {
889 	INIT_WORK(&nf_flow_offload_work, flow_offload_work_handler);
890 
891 	return 0;
892 }
893 
894 void nf_flow_table_offload_exit(void)
895 {
896 	struct flow_offload_work *offload, *next;
897 	LIST_HEAD(offload_pending_list);
898 
899 	cancel_work_sync(&nf_flow_offload_work);
900 
901 	list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
902 		list_del(&offload->list);
903 		kfree(offload);
904 	}
905 }
906