xref: /linux/drivers/net/ethernet/sfc/tc.c (revision d642ef71)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2019 Solarflare Communications Inc.
5  * Copyright 2020-2022 Xilinx Inc.
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as published
9  * by the Free Software Foundation, incorporated herein by reference.
10  */
11 
12 #include <net/pkt_cls.h>
13 #include <net/vxlan.h>
14 #include <net/geneve.h>
15 #include <net/tc_act/tc_ct.h>
16 #include "tc.h"
17 #include "tc_bindings.h"
18 #include "tc_encap_actions.h"
19 #include "tc_conntrack.h"
20 #include "mae.h"
21 #include "ef100_rep.h"
22 #include "efx.h"
23 
24 enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
25 {
26 	if (netif_is_vxlan(net_dev))
27 		return EFX_ENCAP_TYPE_VXLAN;
28 	if (netif_is_geneve(net_dev))
29 		return EFX_ENCAP_TYPE_GENEVE;
30 
31 	return EFX_ENCAP_TYPE_NONE;
32 }
33 
34 #define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff)
35 /* Hoplimit is stored in the most significant byte in the pedit ipv6 header action */
36 #define EFX_TC_HDR_TYPE_HLIMIT_MASK ~((u32)0xff000000)
37 #define EFX_EFV_PF	NULL
38 /* Look up the representor information (efv) for a device.
39  * May return NULL for the PF (us), or an error pointer for a device that
40  * isn't supported as a TC offload endpoint
41  */
42 struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
43 					 struct net_device *dev)
44 {
45 	struct efx_rep *efv;
46 
47 	if (!dev)
48 		return ERR_PTR(-EOPNOTSUPP);
49 	/* Is it us (the PF)? */
50 	if (dev == efx->net_dev)
51 		return EFX_EFV_PF;
52 	/* Is it an efx vfrep at all? */
53 	if (dev->netdev_ops != &efx_ef100_rep_netdev_ops)
54 		return ERR_PTR(-EOPNOTSUPP);
55 	/* Is it ours?  We don't support TC rules that include another
56 	 * EF100's netdevices (not even on another port of the same NIC).
57 	 */
58 	efv = netdev_priv(dev);
59 	if (efv->parent != efx)
60 		return ERR_PTR(-EOPNOTSUPP);
61 	return efv;
62 }
63 
64 /* Convert a driver-internal vport ID into an internal device (PF or VF) */
65 static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv)
66 {
67 	u32 mport;
68 
69 	if (IS_ERR(efv))
70 		return PTR_ERR(efv);
71 	if (!efv) /* device is PF (us) */
72 		efx_mae_mport_uplink(efx, &mport);
73 	else /* device is repr */
74 		efx_mae_mport_mport(efx, efv->mport, &mport);
75 	return mport;
76 }
77 
78 /* Convert a driver-internal vport ID into an external device (wire or VF) */
79 s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
80 {
81 	u32 mport;
82 
83 	if (IS_ERR(efv))
84 		return PTR_ERR(efv);
85 	if (!efv) /* device is PF (us) */
86 		efx_mae_mport_wire(efx, &mport);
87 	else /* device is repr */
88 		efx_mae_mport_mport(efx, efv->mport, &mport);
89 	return mport;
90 }
91 
92 static const struct rhashtable_params efx_tc_mac_ht_params = {
93 	.key_len	= offsetofend(struct efx_tc_mac_pedit_action, h_addr),
94 	.key_offset	= 0,
95 	.head_offset	= offsetof(struct efx_tc_mac_pedit_action, linkage),
96 };
97 
98 static const struct rhashtable_params efx_tc_encap_match_ht_params = {
99 	.key_len	= offsetof(struct efx_tc_encap_match, linkage),
100 	.key_offset	= 0,
101 	.head_offset	= offsetof(struct efx_tc_encap_match, linkage),
102 };
103 
104 static const struct rhashtable_params efx_tc_match_action_ht_params = {
105 	.key_len	= sizeof(unsigned long),
106 	.key_offset	= offsetof(struct efx_tc_flow_rule, cookie),
107 	.head_offset	= offsetof(struct efx_tc_flow_rule, linkage),
108 };
109 
110 static const struct rhashtable_params efx_tc_lhs_rule_ht_params = {
111 	.key_len	= sizeof(unsigned long),
112 	.key_offset	= offsetof(struct efx_tc_lhs_rule, cookie),
113 	.head_offset	= offsetof(struct efx_tc_lhs_rule, linkage),
114 };
115 
116 static const struct rhashtable_params efx_tc_recirc_ht_params = {
117 	.key_len	= offsetof(struct efx_tc_recirc_id, linkage),
118 	.key_offset	= 0,
119 	.head_offset	= offsetof(struct efx_tc_recirc_id, linkage),
120 };
121 
122 static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx,
123 							     unsigned char h_addr[ETH_ALEN],
124 							     struct netlink_ext_ack *extack)
125 {
126 	struct efx_tc_mac_pedit_action *ped, *old;
127 	int rc;
128 
129 	ped = kzalloc(sizeof(*ped), GFP_USER);
130 	if (!ped)
131 		return ERR_PTR(-ENOMEM);
132 	memcpy(ped->h_addr, h_addr, ETH_ALEN);
133 	old = rhashtable_lookup_get_insert_fast(&efx->tc->mac_ht,
134 						&ped->linkage,
135 						efx_tc_mac_ht_params);
136 	if (old) {
137 		/* don't need our new entry */
138 		kfree(ped);
139 		if (IS_ERR(old)) /* oh dear, it's actually an error */
140 			return ERR_CAST(old);
141 		if (!refcount_inc_not_zero(&old->ref))
142 			return ERR_PTR(-EAGAIN);
143 		/* existing entry found, ref taken */
144 		return old;
145 	}
146 
147 	rc = efx_mae_allocate_pedit_mac(efx, ped);
148 	if (rc < 0) {
149 		NL_SET_ERR_MSG_MOD(extack, "Failed to store pedit MAC address in hw");
150 		goto out_remove;
151 	}
152 
153 	/* ref and return */
154 	refcount_set(&ped->ref, 1);
155 	return ped;
156 out_remove:
157 	rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
158 			       efx_tc_mac_ht_params);
159 	kfree(ped);
160 	return ERR_PTR(rc);
161 }
162 
163 static void efx_tc_flower_put_mac(struct efx_nic *efx,
164 				  struct efx_tc_mac_pedit_action *ped)
165 {
166 	if (!refcount_dec_and_test(&ped->ref))
167 		return; /* still in use */
168 	rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
169 			       efx_tc_mac_ht_params);
170 	efx_mae_free_pedit_mac(efx, ped);
171 	kfree(ped);
172 }
173 
174 static void efx_tc_free_action_set(struct efx_nic *efx,
175 				   struct efx_tc_action_set *act, bool in_hw)
176 {
177 	/* Failure paths calling this on the 'cursor' action set in_hw=false,
178 	 * because if the alloc had succeeded we'd've put it in acts.list and
179 	 * not still have it in act.
180 	 */
181 	if (in_hw) {
182 		efx_mae_free_action_set(efx, act->fw_id);
183 		/* in_hw is true iff we are on an acts.list; make sure to
184 		 * remove ourselves from that list before we are freed.
185 		 */
186 		list_del(&act->list);
187 	}
188 	if (act->count) {
189 		spin_lock_bh(&act->count->cnt->lock);
190 		if (!list_empty(&act->count_user))
191 			list_del(&act->count_user);
192 		spin_unlock_bh(&act->count->cnt->lock);
193 		efx_tc_flower_put_counter_index(efx, act->count);
194 	}
195 	if (act->encap_md) {
196 		list_del(&act->encap_user);
197 		efx_tc_flower_release_encap_md(efx, act->encap_md);
198 	}
199 	if (act->src_mac)
200 		efx_tc_flower_put_mac(efx, act->src_mac);
201 	if (act->dst_mac)
202 		efx_tc_flower_put_mac(efx, act->dst_mac);
203 	kfree(act);
204 }
205 
206 static void efx_tc_free_action_set_list(struct efx_nic *efx,
207 					struct efx_tc_action_set_list *acts,
208 					bool in_hw)
209 {
210 	struct efx_tc_action_set *act, *next;
211 
212 	/* Failure paths set in_hw=false, because usually the acts didn't get
213 	 * to efx_mae_alloc_action_set_list(); if they did, the failure tree
214 	 * has a separate efx_mae_free_action_set_list() before calling us.
215 	 */
216 	if (in_hw)
217 		efx_mae_free_action_set_list(efx, acts);
218 	/* Any act that's on the list will be in_hw even if the list isn't */
219 	list_for_each_entry_safe(act, next, &acts->list, list)
220 		efx_tc_free_action_set(efx, act, true);
221 	/* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */
222 }
223 
224 /* Boilerplate for the simple 'copy a field' cases */
225 #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)	\
226 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) {		\
227 	struct flow_match_##_type fm;					\
228 									\
229 	flow_rule_match_##_tcget(rule, &fm);				\
230 	match->value._field = fm.key->_tcfield;				\
231 	match->mask._field = fm.mask->_tcfield;				\
232 }
233 #define MAP_KEY_AND_MASK(_name, _type, _tcfield, _field)	\
234 	_MAP_KEY_AND_MASK(_name, _type, _type, _tcfield, _field)
235 #define MAP_ENC_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)	\
236 	_MAP_KEY_AND_MASK(ENC_##_name, _type, _tcget, _tcfield, _field)
237 
238 static int efx_tc_flower_parse_match(struct efx_nic *efx,
239 				     struct flow_rule *rule,
240 				     struct efx_tc_match *match,
241 				     struct netlink_ext_ack *extack)
242 {
243 	struct flow_dissector *dissector = rule->match.dissector;
244 	unsigned char ipv = 0;
245 
246 	/* Owing to internal TC infelicities, the IPV6_ADDRS key might be set
247 	 * even on IPv4 filters; so rather than relying on dissector->used_keys
248 	 * we check the addr_type in the CONTROL key.  If we don't find it (or
249 	 * it's masked, which should never happen), we treat both IPV4_ADDRS
250 	 * and IPV6_ADDRS as absent.
251 	 */
252 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
253 		struct flow_match_control fm;
254 
255 		flow_rule_match_control(rule, &fm);
256 		if (IS_ALL_ONES(fm.mask->addr_type))
257 			switch (fm.key->addr_type) {
258 			case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
259 				ipv = 4;
260 				break;
261 			case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
262 				ipv = 6;
263 				break;
264 			default:
265 				break;
266 			}
267 
268 		if (fm.mask->flags & FLOW_DIS_IS_FRAGMENT) {
269 			match->value.ip_frag = fm.key->flags & FLOW_DIS_IS_FRAGMENT;
270 			match->mask.ip_frag = true;
271 		}
272 		if (fm.mask->flags & FLOW_DIS_FIRST_FRAG) {
273 			match->value.ip_firstfrag = fm.key->flags & FLOW_DIS_FIRST_FRAG;
274 			match->mask.ip_firstfrag = true;
275 		}
276 		if (fm.mask->flags & ~(FLOW_DIS_IS_FRAGMENT | FLOW_DIS_FIRST_FRAG)) {
277 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on control.flags %#x",
278 					       fm.mask->flags);
279 			return -EOPNOTSUPP;
280 		}
281 	}
282 	if (dissector->used_keys &
283 	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
284 	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
285 	      BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
286 	      BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
287 	      BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) |
288 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
289 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
290 	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
291 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
292 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
293 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
294 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
295 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
296 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
297 	      BIT_ULL(FLOW_DISSECTOR_KEY_CT) |
298 	      BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
299 	      BIT_ULL(FLOW_DISSECTOR_KEY_IP))) {
300 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported flower keys %#llx",
301 				       dissector->used_keys);
302 		return -EOPNOTSUPP;
303 	}
304 
305 	MAP_KEY_AND_MASK(BASIC, basic, n_proto, eth_proto);
306 	/* Make sure we're IP if any L3/L4 keys used. */
307 	if (!IS_ALL_ONES(match->mask.eth_proto) ||
308 	    !(match->value.eth_proto == htons(ETH_P_IP) ||
309 	      match->value.eth_proto == htons(ETH_P_IPV6)))
310 		if (dissector->used_keys &
311 		    (BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
312 		     BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
313 		     BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
314 		     BIT_ULL(FLOW_DISSECTOR_KEY_IP) |
315 		     BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
316 			NL_SET_ERR_MSG_FMT_MOD(extack,
317 					       "L3/L4 flower keys %#llx require protocol ipv[46]",
318 					       dissector->used_keys);
319 			return -EINVAL;
320 		}
321 
322 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
323 		struct flow_match_vlan fm;
324 
325 		flow_rule_match_vlan(rule, &fm);
326 		if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
327 			match->value.vlan_proto[0] = fm.key->vlan_tpid;
328 			match->mask.vlan_proto[0] = fm.mask->vlan_tpid;
329 			match->value.vlan_tci[0] = cpu_to_be16(fm.key->vlan_priority << 13 |
330 							       fm.key->vlan_id);
331 			match->mask.vlan_tci[0] = cpu_to_be16(fm.mask->vlan_priority << 13 |
332 							      fm.mask->vlan_id);
333 		}
334 	}
335 
336 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
337 		struct flow_match_vlan fm;
338 
339 		flow_rule_match_cvlan(rule, &fm);
340 		if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
341 			match->value.vlan_proto[1] = fm.key->vlan_tpid;
342 			match->mask.vlan_proto[1] = fm.mask->vlan_tpid;
343 			match->value.vlan_tci[1] = cpu_to_be16(fm.key->vlan_priority << 13 |
344 							       fm.key->vlan_id);
345 			match->mask.vlan_tci[1] = cpu_to_be16(fm.mask->vlan_priority << 13 |
346 							      fm.mask->vlan_id);
347 		}
348 	}
349 
350 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
351 		struct flow_match_eth_addrs fm;
352 
353 		flow_rule_match_eth_addrs(rule, &fm);
354 		ether_addr_copy(match->value.eth_saddr, fm.key->src);
355 		ether_addr_copy(match->value.eth_daddr, fm.key->dst);
356 		ether_addr_copy(match->mask.eth_saddr, fm.mask->src);
357 		ether_addr_copy(match->mask.eth_daddr, fm.mask->dst);
358 	}
359 
360 	MAP_KEY_AND_MASK(BASIC, basic, ip_proto, ip_proto);
361 	/* Make sure we're TCP/UDP if any L4 keys used. */
362 	if ((match->value.ip_proto != IPPROTO_UDP &&
363 	     match->value.ip_proto != IPPROTO_TCP) || !IS_ALL_ONES(match->mask.ip_proto))
364 		if (dissector->used_keys &
365 		    (BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
366 		     BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
367 			NL_SET_ERR_MSG_FMT_MOD(extack,
368 					       "L4 flower keys %#llx require ipproto udp or tcp",
369 					       dissector->used_keys);
370 			return -EINVAL;
371 		}
372 	MAP_KEY_AND_MASK(IP, ip, tos, ip_tos);
373 	MAP_KEY_AND_MASK(IP, ip, ttl, ip_ttl);
374 	if (ipv == 4) {
375 		MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, src, src_ip);
376 		MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, dst, dst_ip);
377 	}
378 #ifdef CONFIG_IPV6
379 	else if (ipv == 6) {
380 		MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, src, src_ip6);
381 		MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, dst, dst_ip6);
382 	}
383 #endif
384 	MAP_KEY_AND_MASK(PORTS, ports, src, l4_sport);
385 	MAP_KEY_AND_MASK(PORTS, ports, dst, l4_dport);
386 	MAP_KEY_AND_MASK(TCP, tcp, flags, tcp_flags);
387 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
388 		struct flow_match_control fm;
389 
390 		flow_rule_match_enc_control(rule, &fm);
391 		if (fm.mask->flags) {
392 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on enc_control.flags %#x",
393 					       fm.mask->flags);
394 			return -EOPNOTSUPP;
395 		}
396 		if (!IS_ALL_ONES(fm.mask->addr_type)) {
397 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported enc addr_type mask %u (key %u)",
398 					       fm.mask->addr_type,
399 					       fm.key->addr_type);
400 			return -EOPNOTSUPP;
401 		}
402 		switch (fm.key->addr_type) {
403 		case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
404 			MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
405 					     src, enc_src_ip);
406 			MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
407 					     dst, enc_dst_ip);
408 			break;
409 #ifdef CONFIG_IPV6
410 		case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
411 			MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
412 					     src, enc_src_ip6);
413 			MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
414 					     dst, enc_dst_ip6);
415 			break;
416 #endif
417 		default:
418 			NL_SET_ERR_MSG_FMT_MOD(extack,
419 					       "Unsupported enc addr_type %u (supported are IPv4, IPv6)",
420 					       fm.key->addr_type);
421 			return -EOPNOTSUPP;
422 		}
423 		MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, tos, enc_ip_tos);
424 		MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, ttl, enc_ip_ttl);
425 		MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, src, enc_sport);
426 		MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, dst, enc_dport);
427 		MAP_ENC_KEY_AND_MASK(KEYID, enc_keyid, enc_keyid, keyid, enc_keyid);
428 	} else if (dissector->used_keys &
429 		   (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
430 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
431 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
432 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
433 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
434 		NL_SET_ERR_MSG_FMT_MOD(extack,
435 				       "Flower enc keys require enc_control (keys: %#llx)",
436 				       dissector->used_keys);
437 		return -EOPNOTSUPP;
438 	}
439 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) {
440 		struct flow_match_ct fm;
441 
442 		flow_rule_match_ct(rule, &fm);
443 		match->value.ct_state_trk = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
444 		match->mask.ct_state_trk = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
445 		match->value.ct_state_est = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
446 		match->mask.ct_state_est = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
447 		if (fm.mask->ct_state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
448 					  TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)) {
449 			NL_SET_ERR_MSG_FMT_MOD(extack,
450 					       "Unsupported ct_state match %#x",
451 					       fm.mask->ct_state);
452 			return -EOPNOTSUPP;
453 		}
454 		match->value.ct_mark = fm.key->ct_mark;
455 		match->mask.ct_mark = fm.mask->ct_mark;
456 		match->value.ct_zone = fm.key->ct_zone;
457 		match->mask.ct_zone = fm.mask->ct_zone;
458 
459 		if (memchr_inv(fm.mask->ct_labels, 0, sizeof(fm.mask->ct_labels))) {
460 			NL_SET_ERR_MSG_MOD(extack, "Matching on ct_label not supported");
461 			return -EOPNOTSUPP;
462 		}
463 	}
464 
465 	return 0;
466 }
467 
468 static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
469 					      struct efx_tc_encap_match *encap)
470 {
471 	int rc;
472 
473 	if (!refcount_dec_and_test(&encap->ref))
474 		return; /* still in use */
475 
476 	if (encap->type == EFX_TC_EM_DIRECT) {
477 		rc = efx_mae_unregister_encap_match(efx, encap);
478 		if (rc)
479 			/* Display message but carry on and remove entry from our
480 			 * SW tables, because there's not much we can do about it.
481 			 */
482 			netif_err(efx, drv, efx->net_dev,
483 				  "Failed to release encap match %#x, rc %d\n",
484 				  encap->fw_id, rc);
485 	}
486 	rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
487 			       efx_tc_encap_match_ht_params);
488 	if (encap->pseudo)
489 		efx_tc_flower_release_encap_match(efx, encap->pseudo);
490 	kfree(encap);
491 }
492 
493 static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
494 					    struct efx_tc_match *match,
495 					    enum efx_encap_type type,
496 					    enum efx_tc_em_pseudo_type em_type,
497 					    u8 child_ip_tos_mask,
498 					    __be16 child_udp_sport_mask,
499 					    struct netlink_ext_ack *extack)
500 {
501 	struct efx_tc_encap_match *encap, *old, *pseudo = NULL;
502 	bool ipv6 = false;
503 	int rc;
504 
505 	/* We require that the socket-defining fields (IP addrs and UDP dest
506 	 * port) are present and exact-match.  Other fields may only be used
507 	 * if the field-set (and any masks) are the same for all encap
508 	 * matches on the same <sip,dip,dport> tuple; this is enforced by
509 	 * pseudo encap matches.
510 	 */
511 	if (match->mask.enc_dst_ip | match->mask.enc_src_ip) {
512 		if (!IS_ALL_ONES(match->mask.enc_dst_ip)) {
513 			NL_SET_ERR_MSG_MOD(extack,
514 					   "Egress encap match is not exact on dst IP address");
515 			return -EOPNOTSUPP;
516 		}
517 		if (!IS_ALL_ONES(match->mask.enc_src_ip)) {
518 			NL_SET_ERR_MSG_MOD(extack,
519 					   "Egress encap match is not exact on src IP address");
520 			return -EOPNOTSUPP;
521 		}
522 #ifdef CONFIG_IPV6
523 		if (!ipv6_addr_any(&match->mask.enc_dst_ip6) ||
524 		    !ipv6_addr_any(&match->mask.enc_src_ip6)) {
525 			NL_SET_ERR_MSG_MOD(extack,
526 					   "Egress encap match on both IPv4 and IPv6, don't understand");
527 			return -EOPNOTSUPP;
528 		}
529 	} else {
530 		ipv6 = true;
531 		if (!efx_ipv6_addr_all_ones(&match->mask.enc_dst_ip6)) {
532 			NL_SET_ERR_MSG_MOD(extack,
533 					   "Egress encap match is not exact on dst IP address");
534 			return -EOPNOTSUPP;
535 		}
536 		if (!efx_ipv6_addr_all_ones(&match->mask.enc_src_ip6)) {
537 			NL_SET_ERR_MSG_MOD(extack,
538 					   "Egress encap match is not exact on src IP address");
539 			return -EOPNOTSUPP;
540 		}
541 #endif
542 	}
543 	if (!IS_ALL_ONES(match->mask.enc_dport)) {
544 		NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port");
545 		return -EOPNOTSUPP;
546 	}
547 	if (match->mask.enc_sport || match->mask.enc_ip_tos) {
548 		struct efx_tc_match pmatch = *match;
549 
550 		if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */
551 			NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler");
552 			return -EOPNOTSUPP;
553 		}
554 		pmatch.value.enc_ip_tos = 0;
555 		pmatch.mask.enc_ip_tos = 0;
556 		pmatch.value.enc_sport = 0;
557 		pmatch.mask.enc_sport = 0;
558 		rc = efx_tc_flower_record_encap_match(efx, &pmatch, type,
559 						      EFX_TC_EM_PSEUDO_MASK,
560 						      match->mask.enc_ip_tos,
561 						      match->mask.enc_sport,
562 						      extack);
563 		if (rc)
564 			return rc;
565 		pseudo = pmatch.encap;
566 	}
567 	if (match->mask.enc_ip_ttl) {
568 		NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported");
569 		rc = -EOPNOTSUPP;
570 		goto fail_pseudo;
571 	}
572 
573 	rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos,
574 					    match->mask.enc_sport, extack);
575 	if (rc)
576 		goto fail_pseudo;
577 
578 	encap = kzalloc(sizeof(*encap), GFP_USER);
579 	if (!encap) {
580 		rc = -ENOMEM;
581 		goto fail_pseudo;
582 	}
583 	encap->src_ip = match->value.enc_src_ip;
584 	encap->dst_ip = match->value.enc_dst_ip;
585 #ifdef CONFIG_IPV6
586 	encap->src_ip6 = match->value.enc_src_ip6;
587 	encap->dst_ip6 = match->value.enc_dst_ip6;
588 #endif
589 	encap->udp_dport = match->value.enc_dport;
590 	encap->tun_type = type;
591 	encap->ip_tos = match->value.enc_ip_tos;
592 	encap->ip_tos_mask = match->mask.enc_ip_tos;
593 	encap->child_ip_tos_mask = child_ip_tos_mask;
594 	encap->udp_sport = match->value.enc_sport;
595 	encap->udp_sport_mask = match->mask.enc_sport;
596 	encap->child_udp_sport_mask = child_udp_sport_mask;
597 	encap->type = em_type;
598 	encap->pseudo = pseudo;
599 	old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht,
600 						&encap->linkage,
601 						efx_tc_encap_match_ht_params);
602 	if (old) {
603 		/* don't need our new entry */
604 		kfree(encap);
605 		if (pseudo) /* don't need our new pseudo either */
606 			efx_tc_flower_release_encap_match(efx, pseudo);
607 		if (IS_ERR(old)) /* oh dear, it's actually an error */
608 			return PTR_ERR(old);
609 		/* check old and new em_types are compatible */
610 		switch (old->type) {
611 		case EFX_TC_EM_DIRECT:
612 			/* old EM is in hardware, so mustn't overlap with a
613 			 * pseudo, but may be shared with another direct EM
614 			 */
615 			if (em_type == EFX_TC_EM_DIRECT)
616 				break;
617 			NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry");
618 			return -EEXIST;
619 		case EFX_TC_EM_PSEUDO_MASK:
620 			/* old EM is protecting a ToS- or src port-qualified
621 			 * filter, so may only be shared with another pseudo
622 			 * for the same ToS and src port masks.
623 			 */
624 			if (em_type != EFX_TC_EM_PSEUDO_MASK) {
625 				NL_SET_ERR_MSG_FMT_MOD(extack,
626 						       "%s encap match conflicts with existing pseudo(MASK) entry",
627 						       em_type ? "Pseudo" : "Direct");
628 				return -EEXIST;
629 			}
630 			if (child_ip_tos_mask != old->child_ip_tos_mask) {
631 				NL_SET_ERR_MSG_FMT_MOD(extack,
632 						       "Pseudo encap match for TOS mask %#04x conflicts with existing mask %#04x",
633 						       child_ip_tos_mask,
634 						       old->child_ip_tos_mask);
635 				return -EEXIST;
636 			}
637 			if (child_udp_sport_mask != old->child_udp_sport_mask) {
638 				NL_SET_ERR_MSG_FMT_MOD(extack,
639 						       "Pseudo encap match for UDP src port mask %#x conflicts with existing mask %#x",
640 						       child_udp_sport_mask,
641 						       old->child_udp_sport_mask);
642 				return -EEXIST;
643 			}
644 			break;
645 		case EFX_TC_EM_PSEUDO_OR:
646 			/* old EM corresponds to an OR that has to be unique
647 			 * (it must not overlap with any other OR, whether
648 			 * direct-EM or pseudo).
649 			 */
650 			NL_SET_ERR_MSG_FMT_MOD(extack,
651 					       "%s encap match conflicts with existing pseudo(OR) entry",
652 					       em_type ? "Pseudo" : "Direct");
653 			return -EEXIST;
654 		default: /* Unrecognised pseudo-type.  Just say no */
655 			NL_SET_ERR_MSG_FMT_MOD(extack,
656 					       "%s encap match conflicts with existing pseudo(%d) entry",
657 					       em_type ? "Pseudo" : "Direct",
658 					       old->type);
659 			return -EEXIST;
660 		}
661 		/* check old and new tun_types are compatible */
662 		if (old->tun_type != type) {
663 			NL_SET_ERR_MSG_FMT_MOD(extack,
664 					       "Egress encap match with conflicting tun_type %u != %u",
665 					       old->tun_type, type);
666 			return -EEXIST;
667 		}
668 		if (!refcount_inc_not_zero(&old->ref))
669 			return -EAGAIN;
670 		/* existing entry found */
671 		encap = old;
672 	} else {
673 		if (em_type == EFX_TC_EM_DIRECT) {
674 			rc = efx_mae_register_encap_match(efx, encap);
675 			if (rc) {
676 				NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW");
677 				goto fail;
678 			}
679 		}
680 		refcount_set(&encap->ref, 1);
681 	}
682 	match->encap = encap;
683 	return 0;
684 fail:
685 	rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
686 			       efx_tc_encap_match_ht_params);
687 	kfree(encap);
688 fail_pseudo:
689 	if (pseudo)
690 		efx_tc_flower_release_encap_match(efx, pseudo);
691 	return rc;
692 }
693 
694 static struct efx_tc_recirc_id *efx_tc_get_recirc_id(struct efx_nic *efx,
695 						     u32 chain_index,
696 						     struct net_device *net_dev)
697 {
698 	struct efx_tc_recirc_id *rid, *old;
699 	int rc;
700 
701 	rid = kzalloc(sizeof(*rid), GFP_USER);
702 	if (!rid)
703 		return ERR_PTR(-ENOMEM);
704 	rid->chain_index = chain_index;
705 	/* We don't take a reference here, because it's implied - if there's
706 	 * a rule on the net_dev that's been offloaded to us, then the net_dev
707 	 * can't go away until the rule has been deoffloaded.
708 	 */
709 	rid->net_dev = net_dev;
710 	old = rhashtable_lookup_get_insert_fast(&efx->tc->recirc_ht,
711 						&rid->linkage,
712 						efx_tc_recirc_ht_params);
713 	if (old) {
714 		/* don't need our new entry */
715 		kfree(rid);
716 		if (IS_ERR(old)) /* oh dear, it's actually an error */
717 			return ERR_CAST(old);
718 		if (!refcount_inc_not_zero(&old->ref))
719 			return ERR_PTR(-EAGAIN);
720 		/* existing entry found */
721 		rid = old;
722 	} else {
723 		rc = ida_alloc_range(&efx->tc->recirc_ida, 1, U8_MAX, GFP_USER);
724 		if (rc < 0) {
725 			rhashtable_remove_fast(&efx->tc->recirc_ht,
726 					       &rid->linkage,
727 					       efx_tc_recirc_ht_params);
728 			kfree(rid);
729 			return ERR_PTR(rc);
730 		}
731 		rid->fw_id = rc;
732 		refcount_set(&rid->ref, 1);
733 	}
734 	return rid;
735 }
736 
737 static void efx_tc_put_recirc_id(struct efx_nic *efx, struct efx_tc_recirc_id *rid)
738 {
739 	if (!refcount_dec_and_test(&rid->ref))
740 		return; /* still in use */
741 	rhashtable_remove_fast(&efx->tc->recirc_ht, &rid->linkage,
742 			       efx_tc_recirc_ht_params);
743 	ida_free(&efx->tc->recirc_ida, rid->fw_id);
744 	kfree(rid);
745 }
746 
747 static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
748 {
749 	efx_mae_delete_rule(efx, rule->fw_id);
750 
751 	/* Release entries in subsidiary tables */
752 	efx_tc_free_action_set_list(efx, &rule->acts, true);
753 	if (rule->match.rid)
754 		efx_tc_put_recirc_id(efx, rule->match.rid);
755 	if (rule->match.encap)
756 		efx_tc_flower_release_encap_match(efx, rule->match.encap);
757 	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
758 }
759 
760 static const char *efx_tc_encap_type_name(enum efx_encap_type typ)
761 {
762 	switch (typ) {
763 	case EFX_ENCAP_TYPE_NONE:
764 		return "none";
765 	case EFX_ENCAP_TYPE_VXLAN:
766 		return "vxlan";
767 	case EFX_ENCAP_TYPE_GENEVE:
768 		return "geneve";
769 	default:
770 		pr_warn_once("Unknown efx_encap_type %d encountered\n", typ);
771 		return "unknown";
772 	}
773 }
774 
775 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
776 enum efx_tc_action_order {
777 	EFX_TC_AO_DECAP,
778 	EFX_TC_AO_DEC_TTL,
779 	EFX_TC_AO_PEDIT_MAC_ADDRS,
780 	EFX_TC_AO_VLAN_POP,
781 	EFX_TC_AO_VLAN_PUSH,
782 	EFX_TC_AO_COUNT,
783 	EFX_TC_AO_ENCAP,
784 	EFX_TC_AO_DELIVER
785 };
786 /* Determine whether we can add @new action without violating order */
787 static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
788 					  enum efx_tc_action_order new)
789 {
790 	switch (new) {
791 	case EFX_TC_AO_DECAP:
792 		if (act->decap)
793 			return false;
794 		/* PEDIT_MAC_ADDRS must not happen before DECAP, though it
795 		 * can wait until much later
796 		 */
797 		if (act->dst_mac || act->src_mac)
798 			return false;
799 
800 		/* Decrementing ttl must not happen before DECAP */
801 		if (act->do_ttl_dec)
802 			return false;
803 		fallthrough;
804 	case EFX_TC_AO_VLAN_POP:
805 		if (act->vlan_pop >= 2)
806 			return false;
807 		/* If we've already pushed a VLAN, we can't then pop it;
808 		 * the hardware would instead try to pop an existing VLAN
809 		 * before pushing the new one.
810 		 */
811 		if (act->vlan_push)
812 			return false;
813 		fallthrough;
814 	case EFX_TC_AO_VLAN_PUSH:
815 		if (act->vlan_push >= 2)
816 			return false;
817 		fallthrough;
818 	case EFX_TC_AO_COUNT:
819 		if (act->count)
820 			return false;
821 		fallthrough;
822 	case EFX_TC_AO_PEDIT_MAC_ADDRS:
823 	case EFX_TC_AO_ENCAP:
824 		if (act->encap_md)
825 			return false;
826 		fallthrough;
827 	case EFX_TC_AO_DELIVER:
828 		return !act->deliver;
829 	case EFX_TC_AO_DEC_TTL:
830 		if (act->encap_md)
831 			return false;
832 		return !act->do_ttl_dec;
833 	default:
834 		/* Bad caller.  Whatever they wanted to do, say they can't. */
835 		WARN_ON_ONCE(1);
836 		return false;
837 	}
838 }
839 
840 /**
841  * DOC: TC conntrack sequences
842  *
843  * The MAE hardware can handle at most two rounds of action rule matching,
844  * consequently we support conntrack through the notion of a "left-hand side
845  * rule".  This is a rule which typically contains only the actions "ct" and
846  * "goto chain N", and corresponds to one or more "right-hand side rules" in
847  * chain N, which typically match on +trk+est, and may perform ct(nat) actions.
848  * RHS rules go in the Action Rule table as normal but with a nonzero recirc_id
849  * (the hardware equivalent of chain_index), while LHS rules may go in either
850  * the Action Rule or the Outer Rule table, the latter being preferred for
851  * performance reasons, and set both DO_CT and a recirc_id in their response.
852  *
853  * Besides the RHS rules, there are often also similar rules matching on
854  * +trk+new which perform the ct(commit) action.  These are not offloaded.
855  */
856 
857 static bool efx_tc_rule_is_lhs_rule(struct flow_rule *fr,
858 				    struct efx_tc_match *match)
859 {
860 	const struct flow_action_entry *fa;
861 	int i;
862 
863 	flow_action_for_each(i, fa, &fr->action) {
864 		switch (fa->id) {
865 		case FLOW_ACTION_GOTO:
866 			return true;
867 		case FLOW_ACTION_CT:
868 			/* If rule is -trk, or doesn't mention trk at all, then
869 			 * a CT action implies a conntrack lookup (hence it's an
870 			 * LHS rule).  If rule is +trk, then a CT action could
871 			 * just be ct(nat) or even ct(commit) (though the latter
872 			 * can't be offloaded).
873 			 */
874 			if (!match->mask.ct_state_trk || !match->value.ct_state_trk)
875 				return true;
876 			break;
877 		default:
878 			break;
879 		}
880 	}
881 	return false;
882 }
883 
884 /* A foreign LHS rule has matches on enc_ keys at the TC layer (including an
885  * implied match on enc_ip_proto UDP).  Translate these into non-enc_ keys,
886  * so that we can use the same MAE machinery as local LHS rules (and so that
887  * the lhs_rules entries have uniform semantics).  It may seem odd to do it
888  * this way round, given that the corresponding fields in the MAE MCDIs are
889  * all ENC_, but (a) we don't have enc_L2 or enc_ip_proto in struct
890  * efx_tc_match_fields and (b) semantically an LHS rule doesn't have inner
891  * fields so it's just matching on *the* header rather than the outer header.
892  * Make sure that the non-enc_ keys were not already being matched on, as that
893  * would imply a rule that needed a triple lookup.  (Hardware can do that,
894  * with OR-AR-CT-AR, but it halves packet rate so we avoid it where possible;
895  * see efx_tc_flower_flhs_needs_ar().)
896  */
897 static int efx_tc_flower_translate_flhs_match(struct efx_tc_match *match)
898 {
899 	int rc = 0;
900 
901 #define COPY_MASK_AND_VALUE(_key, _ekey)	({	\
902 	if (match->mask._key) {				\
903 		rc = -EOPNOTSUPP;			\
904 	} else {					\
905 		match->mask._key = match->mask._ekey;	\
906 		match->mask._ekey = 0;			\
907 		match->value._key = match->value._ekey;	\
908 		match->value._ekey = 0;			\
909 	}						\
910 	rc;						\
911 })
912 #define COPY_FROM_ENC(_key)	COPY_MASK_AND_VALUE(_key, enc_##_key)
913 	if (match->mask.ip_proto)
914 		return -EOPNOTSUPP;
915 	match->mask.ip_proto = ~0;
916 	match->value.ip_proto = IPPROTO_UDP;
917 	if (COPY_FROM_ENC(src_ip) || COPY_FROM_ENC(dst_ip))
918 		return rc;
919 #ifdef CONFIG_IPV6
920 	if (!ipv6_addr_any(&match->mask.src_ip6))
921 		return -EOPNOTSUPP;
922 	match->mask.src_ip6 = match->mask.enc_src_ip6;
923 	memset(&match->mask.enc_src_ip6, 0, sizeof(struct in6_addr));
924 	if (!ipv6_addr_any(&match->mask.dst_ip6))
925 		return -EOPNOTSUPP;
926 	match->mask.dst_ip6 = match->mask.enc_dst_ip6;
927 	memset(&match->mask.enc_dst_ip6, 0, sizeof(struct in6_addr));
928 #endif
929 	if (COPY_FROM_ENC(ip_tos) || COPY_FROM_ENC(ip_ttl))
930 		return rc;
931 	/* should really copy enc_ip_frag but we don't have that in
932 	 * parse_match yet
933 	 */
934 	if (COPY_MASK_AND_VALUE(l4_sport, enc_sport) ||
935 	    COPY_MASK_AND_VALUE(l4_dport, enc_dport))
936 		return rc;
937 	return 0;
938 #undef COPY_FROM_ENC
939 #undef COPY_MASK_AND_VALUE
940 }
941 
942 /* If a foreign LHS rule wants to match on keys that are only available after
943  * encap header identification and parsing, then it can't be done in the Outer
944  * Rule lookup, because that lookup determines the encap type used to parse
945  * beyond the outer headers.  Thus, such rules must use the OR-AR-CT-AR lookup
946  * sequence, with an EM (struct efx_tc_encap_match) in the OR step.
947  * Return true iff the passed match requires this.
948  */
949 static bool efx_tc_flower_flhs_needs_ar(struct efx_tc_match *match)
950 {
951 	/* matches on inner-header keys can't be done in OR */
952 	return match->mask.eth_proto ||
953 	       match->mask.vlan_tci[0] || match->mask.vlan_tci[1] ||
954 	       match->mask.vlan_proto[0] || match->mask.vlan_proto[1] ||
955 	       memchr_inv(match->mask.eth_saddr, 0, ETH_ALEN) ||
956 	       memchr_inv(match->mask.eth_daddr, 0, ETH_ALEN) ||
957 	       match->mask.ip_proto ||
958 	       match->mask.ip_tos || match->mask.ip_ttl ||
959 	       match->mask.src_ip || match->mask.dst_ip ||
960 #ifdef CONFIG_IPV6
961 	       !ipv6_addr_any(&match->mask.src_ip6) ||
962 	       !ipv6_addr_any(&match->mask.dst_ip6) ||
963 #endif
964 	       match->mask.ip_frag || match->mask.ip_firstfrag ||
965 	       match->mask.l4_sport || match->mask.l4_dport ||
966 	       match->mask.tcp_flags ||
967 	/* nor can VNI */
968 	       match->mask.enc_keyid;
969 }
970 
971 static int efx_tc_flower_handle_lhs_actions(struct efx_nic *efx,
972 					    struct flow_cls_offload *tc,
973 					    struct flow_rule *fr,
974 					    struct net_device *net_dev,
975 					    struct efx_tc_lhs_rule *rule)
976 
977 {
978 	struct netlink_ext_ack *extack = tc->common.extack;
979 	struct efx_tc_lhs_action *act = &rule->lhs_act;
980 	const struct flow_action_entry *fa;
981 	enum efx_tc_counter_type ctype;
982 	bool pipe = true;
983 	int i;
984 
985 	ctype = rule->is_ar ? EFX_TC_COUNTER_TYPE_AR : EFX_TC_COUNTER_TYPE_OR;
986 
987 	flow_action_for_each(i, fa, &fr->action) {
988 		struct efx_tc_ct_zone *ct_zone;
989 		struct efx_tc_recirc_id *rid;
990 
991 		if (!pipe) {
992 			/* more actions after a non-pipe action */
993 			NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
994 			return -EINVAL;
995 		}
996 		switch (fa->id) {
997 		case FLOW_ACTION_GOTO:
998 			if (!fa->chain_index) {
999 				NL_SET_ERR_MSG_MOD(extack, "Can't goto chain 0, no looping in hw");
1000 				return -EOPNOTSUPP;
1001 			}
1002 			rid = efx_tc_get_recirc_id(efx, fa->chain_index,
1003 						   net_dev);
1004 			if (IS_ERR(rid)) {
1005 				NL_SET_ERR_MSG_MOD(extack, "Failed to allocate a hardware recirculation ID for this chain_index");
1006 				return PTR_ERR(rid);
1007 			}
1008 			act->rid = rid;
1009 			if (fa->hw_stats) {
1010 				struct efx_tc_counter_index *cnt;
1011 
1012 				if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1013 					NL_SET_ERR_MSG_FMT_MOD(extack,
1014 							       "hw_stats_type %u not supported (only 'delayed')",
1015 							       fa->hw_stats);
1016 					return -EOPNOTSUPP;
1017 				}
1018 				cnt = efx_tc_flower_get_counter_index(efx, tc->cookie,
1019 								      ctype);
1020 				if (IS_ERR(cnt)) {
1021 					NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1022 					return PTR_ERR(cnt);
1023 				}
1024 				WARN_ON(act->count); /* can't happen */
1025 				act->count = cnt;
1026 			}
1027 			pipe = false;
1028 			break;
1029 		case FLOW_ACTION_CT:
1030 			if (act->zone) {
1031 				NL_SET_ERR_MSG_MOD(extack, "Can't offload multiple ct actions");
1032 				return -EOPNOTSUPP;
1033 			}
1034 			if (fa->ct.action & (TCA_CT_ACT_COMMIT |
1035 					     TCA_CT_ACT_FORCE)) {
1036 				NL_SET_ERR_MSG_MOD(extack, "Can't offload ct commit/force");
1037 				return -EOPNOTSUPP;
1038 			}
1039 			if (fa->ct.action & TCA_CT_ACT_CLEAR) {
1040 				NL_SET_ERR_MSG_MOD(extack, "Can't clear ct in LHS rule");
1041 				return -EOPNOTSUPP;
1042 			}
1043 			if (fa->ct.action & (TCA_CT_ACT_NAT |
1044 					     TCA_CT_ACT_NAT_SRC |
1045 					     TCA_CT_ACT_NAT_DST)) {
1046 				NL_SET_ERR_MSG_MOD(extack, "Can't perform NAT in LHS rule - packet isn't conntracked yet");
1047 				return -EOPNOTSUPP;
1048 			}
1049 			if (fa->ct.action) {
1050 				NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled ct.action %u for LHS rule\n",
1051 						       fa->ct.action);
1052 				return -EOPNOTSUPP;
1053 			}
1054 			ct_zone = efx_tc_ct_register_zone(efx, fa->ct.zone,
1055 							  fa->ct.flow_table);
1056 			if (IS_ERR(ct_zone)) {
1057 				NL_SET_ERR_MSG_MOD(extack, "Failed to register for CT updates");
1058 				return PTR_ERR(ct_zone);
1059 			}
1060 			act->zone = ct_zone;
1061 			break;
1062 		default:
1063 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u for LHS rule\n",
1064 					       fa->id);
1065 			return -EOPNOTSUPP;
1066 		}
1067 	}
1068 
1069 	if (pipe) {
1070 		NL_SET_ERR_MSG_MOD(extack, "Missing goto chain in LHS rule");
1071 		return -EOPNOTSUPP;
1072 	}
1073 	return 0;
1074 }
1075 
1076 static void efx_tc_flower_release_lhs_actions(struct efx_nic *efx,
1077 					      struct efx_tc_lhs_action *act)
1078 {
1079 	if (act->rid)
1080 		efx_tc_put_recirc_id(efx, act->rid);
1081 	if (act->zone)
1082 		efx_tc_ct_unregister_zone(efx, act->zone);
1083 	if (act->count)
1084 		efx_tc_flower_put_counter_index(efx, act->count);
1085 }
1086 
1087 /**
1088  * struct efx_tc_mangler_state - accumulates 32-bit pedits into fields
1089  *
1090  * @dst_mac_32:	dst_mac[0:3] has been populated
1091  * @dst_mac_16:	dst_mac[4:5] has been populated
1092  * @src_mac_16:	src_mac[0:1] has been populated
1093  * @src_mac_32:	src_mac[2:5] has been populated
1094  * @dst_mac:	h_dest field of ethhdr
1095  * @src_mac:	h_source field of ethhdr
1096  *
1097  * Since FLOW_ACTION_MANGLE comes in 32-bit chunks that do not
1098  * necessarily equate to whole fields of the packet header, this
1099  * structure is used to hold the cumulative effect of the partial
1100  * field pedits that have been processed so far.
1101  */
1102 struct efx_tc_mangler_state {
1103 	u8 dst_mac_32:1; /* eth->h_dest[0:3] */
1104 	u8 dst_mac_16:1; /* eth->h_dest[4:5] */
1105 	u8 src_mac_16:1; /* eth->h_source[0:1] */
1106 	u8 src_mac_32:1; /* eth->h_source[2:5] */
1107 	unsigned char dst_mac[ETH_ALEN];
1108 	unsigned char src_mac[ETH_ALEN];
1109 };
1110 
1111 /** efx_tc_complete_mac_mangle() - pull complete field pedits out of @mung
1112  * @efx:	NIC we're installing a flow rule on
1113  * @act:	action set (cursor) to update
1114  * @mung:	accumulated partial mangles
1115  * @extack:	netlink extended ack for reporting errors
1116  *
1117  * Check @mung to find any combinations of partial mangles that can be
1118  * combined into a complete packet field edit, add that edit to @act,
1119  * and consume the partial mangles from @mung.
1120  */
1121 
1122 static int efx_tc_complete_mac_mangle(struct efx_nic *efx,
1123 				      struct efx_tc_action_set *act,
1124 				      struct efx_tc_mangler_state *mung,
1125 				      struct netlink_ext_ack *extack)
1126 {
1127 	struct efx_tc_mac_pedit_action *ped;
1128 
1129 	if (mung->dst_mac_32 && mung->dst_mac_16) {
1130 		ped = efx_tc_flower_get_mac(efx, mung->dst_mac, extack);
1131 		if (IS_ERR(ped))
1132 			return PTR_ERR(ped);
1133 
1134 		/* Check that we have not already populated dst_mac */
1135 		if (act->dst_mac)
1136 			efx_tc_flower_put_mac(efx, act->dst_mac);
1137 
1138 		act->dst_mac = ped;
1139 
1140 		/* consume the incomplete state */
1141 		mung->dst_mac_32 = 0;
1142 		mung->dst_mac_16 = 0;
1143 	}
1144 	if (mung->src_mac_16 && mung->src_mac_32) {
1145 		ped = efx_tc_flower_get_mac(efx, mung->src_mac, extack);
1146 		if (IS_ERR(ped))
1147 			return PTR_ERR(ped);
1148 
1149 		/* Check that we have not already populated src_mac */
1150 		if (act->src_mac)
1151 			efx_tc_flower_put_mac(efx, act->src_mac);
1152 
1153 		act->src_mac = ped;
1154 
1155 		/* consume the incomplete state */
1156 		mung->src_mac_32 = 0;
1157 		mung->src_mac_16 = 0;
1158 	}
1159 	return 0;
1160 }
1161 
1162 static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act,
1163 			    const struct flow_action_entry *fa,
1164 			    struct netlink_ext_ack *extack)
1165 {
1166 	switch (fa->mangle.htype) {
1167 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1168 		switch (fa->mangle.offset) {
1169 		case offsetof(struct iphdr, ttl):
1170 			/* check that pedit applies to ttl only */
1171 			if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK)
1172 				break;
1173 
1174 			/* Adding 0xff is equivalent to decrementing the ttl.
1175 			 * Other added values are not supported.
1176 			 */
1177 			if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) != U8_MAX)
1178 				break;
1179 
1180 			/* check that we do not decrement ttl twice */
1181 			if (!efx_tc_flower_action_order_ok(act,
1182 							   EFX_TC_AO_DEC_TTL)) {
1183 				NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported");
1184 				return -EOPNOTSUPP;
1185 			}
1186 			act->do_ttl_dec = 1;
1187 			return 0;
1188 		default:
1189 			break;
1190 		}
1191 		break;
1192 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1193 		switch (fa->mangle.offset) {
1194 		case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
1195 			/* check that pedit applies to hoplimit only */
1196 			if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK)
1197 				break;
1198 
1199 			/* Adding 0xff is equivalent to decrementing the hoplimit.
1200 			 * Other added values are not supported.
1201 			 */
1202 			if ((fa->mangle.val >> 24) != U8_MAX)
1203 				break;
1204 
1205 			/* check that we do not decrement hoplimit twice */
1206 			if (!efx_tc_flower_action_order_ok(act,
1207 							   EFX_TC_AO_DEC_TTL)) {
1208 				NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported");
1209 				return -EOPNOTSUPP;
1210 			}
1211 			act->do_ttl_dec = 1;
1212 			return 0;
1213 		default:
1214 			break;
1215 		}
1216 		break;
1217 	default:
1218 		break;
1219 	}
1220 
1221 	NL_SET_ERR_MSG_FMT_MOD(extack,
1222 			       "ttl add action type %x %x %x/%x is not supported",
1223 			       fa->mangle.htype, fa->mangle.offset,
1224 			       fa->mangle.val, fa->mangle.mask);
1225 	return -EOPNOTSUPP;
1226 }
1227 
1228 /**
1229  * efx_tc_mangle() - handle a single 32-bit (or less) pedit
1230  * @efx:	NIC we're installing a flow rule on
1231  * @act:	action set (cursor) to update
1232  * @fa:		FLOW_ACTION_MANGLE action metadata
1233  * @mung:	accumulator for partial mangles
1234  * @extack:	netlink extended ack for reporting errors
1235  * @match:	original match used along with the mangle action
1236  *
1237  * Identify the fields written by a FLOW_ACTION_MANGLE, and record
1238  * the partial mangle state in @mung.  If this mangle completes an
1239  * earlier partial mangle, consume and apply to @act by calling
1240  * efx_tc_complete_mac_mangle().
1241  */
1242 
1243 static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act,
1244 			 const struct flow_action_entry *fa,
1245 			 struct efx_tc_mangler_state *mung,
1246 			 struct netlink_ext_ack *extack,
1247 			 struct efx_tc_match *match)
1248 {
1249 	__le32 mac32;
1250 	__le16 mac16;
1251 	u8 tr_ttl;
1252 
1253 	switch (fa->mangle.htype) {
1254 	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1255 		BUILD_BUG_ON(offsetof(struct ethhdr, h_dest) != 0);
1256 		BUILD_BUG_ON(offsetof(struct ethhdr, h_source) != 6);
1257 		if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_PEDIT_MAC_ADDRS)) {
1258 			NL_SET_ERR_MSG_MOD(extack,
1259 					   "Pedit mangle mac action violates action order");
1260 			return -EOPNOTSUPP;
1261 		}
1262 		switch (fa->mangle.offset) {
1263 		case 0:
1264 			if (fa->mangle.mask) {
1265 				NL_SET_ERR_MSG_FMT_MOD(extack,
1266 						       "mask (%#x) of eth.dst32 mangle is not supported",
1267 						       fa->mangle.mask);
1268 				return -EOPNOTSUPP;
1269 			}
1270 			/* Ethernet address is little-endian */
1271 			mac32 = cpu_to_le32(fa->mangle.val);
1272 			memcpy(mung->dst_mac, &mac32, sizeof(mac32));
1273 			mung->dst_mac_32 = 1;
1274 			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1275 		case 4:
1276 			if (fa->mangle.mask == 0xffff) {
1277 				mac16 = cpu_to_le16(fa->mangle.val >> 16);
1278 				memcpy(mung->src_mac, &mac16, sizeof(mac16));
1279 				mung->src_mac_16 = 1;
1280 			} else if (fa->mangle.mask == 0xffff0000) {
1281 				mac16 = cpu_to_le16((u16)fa->mangle.val);
1282 				memcpy(mung->dst_mac + 4, &mac16, sizeof(mac16));
1283 				mung->dst_mac_16 = 1;
1284 			} else {
1285 				NL_SET_ERR_MSG_FMT_MOD(extack,
1286 						       "mask (%#x) of eth+4 mangle is not high or low 16b",
1287 						       fa->mangle.mask);
1288 				return -EOPNOTSUPP;
1289 			}
1290 			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1291 		case 8:
1292 			if (fa->mangle.mask) {
1293 				NL_SET_ERR_MSG_FMT_MOD(extack,
1294 						       "mask (%#x) of eth.src32 mangle is not supported",
1295 						       fa->mangle.mask);
1296 				return -EOPNOTSUPP;
1297 			}
1298 			mac32 = cpu_to_le32(fa->mangle.val);
1299 			memcpy(mung->src_mac + 2, &mac32, sizeof(mac32));
1300 			mung->src_mac_32 = 1;
1301 			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1302 		default:
1303 			NL_SET_ERR_MSG_FMT_MOD(extack, "mangle eth+%u %x/%x is not supported",
1304 					       fa->mangle.offset, fa->mangle.val, fa->mangle.mask);
1305 			return -EOPNOTSUPP;
1306 		}
1307 		break;
1308 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1309 		switch (fa->mangle.offset) {
1310 		case offsetof(struct iphdr, ttl):
1311 			/* we currently only support pedit IP4 when it applies
1312 			 * to TTL and then only when it can be achieved with a
1313 			 * decrement ttl action
1314 			 */
1315 
1316 			/* check that pedit applies to ttl only */
1317 			if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) {
1318 				NL_SET_ERR_MSG_FMT_MOD(extack,
1319 						       "mask (%#x) out of range, only support mangle action on ipv4.ttl",
1320 						       fa->mangle.mask);
1321 				return -EOPNOTSUPP;
1322 			}
1323 
1324 			/* we can only convert to a dec ttl when we have an
1325 			 * exact match on the ttl field
1326 			 */
1327 			if (match->mask.ip_ttl != U8_MAX) {
1328 				NL_SET_ERR_MSG_FMT_MOD(extack,
1329 						       "only support mangle ttl when we have an exact match, current mask (%#x)",
1330 						       match->mask.ip_ttl);
1331 				return -EOPNOTSUPP;
1332 			}
1333 
1334 			/* check that we don't try to decrement 0, which equates
1335 			 * to setting the ttl to 0xff
1336 			 */
1337 			if (match->value.ip_ttl == 0) {
1338 				NL_SET_ERR_MSG_MOD(extack,
1339 						   "decrement ttl past 0 is not supported");
1340 				return -EOPNOTSUPP;
1341 			}
1342 
1343 			/* check that we do not decrement ttl twice */
1344 			if (!efx_tc_flower_action_order_ok(act,
1345 							   EFX_TC_AO_DEC_TTL)) {
1346 				NL_SET_ERR_MSG_MOD(extack,
1347 						   "multiple dec ttl is not supported");
1348 				return -EOPNOTSUPP;
1349 			}
1350 
1351 			/* check pedit can be achieved with decrement action */
1352 			tr_ttl = match->value.ip_ttl - 1;
1353 			if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) == tr_ttl) {
1354 				act->do_ttl_dec = 1;
1355 				return 0;
1356 			}
1357 
1358 			fallthrough;
1359 		default:
1360 			NL_SET_ERR_MSG_FMT_MOD(extack,
1361 					       "only support mangle on the ttl field (offset is %u)",
1362 					       fa->mangle.offset);
1363 			return -EOPNOTSUPP;
1364 		}
1365 		break;
1366 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1367 		switch (fa->mangle.offset) {
1368 		case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
1369 			/* we currently only support pedit IP6 when it applies
1370 			 * to the hoplimit and then only when it can be achieved
1371 			 * with a decrement hoplimit action
1372 			 */
1373 
1374 			/* check that pedit applies to ttl only */
1375 			if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) {
1376 				NL_SET_ERR_MSG_FMT_MOD(extack,
1377 						       "mask (%#x) out of range, only support mangle action on ipv6.hop_limit",
1378 						       fa->mangle.mask);
1379 
1380 				return -EOPNOTSUPP;
1381 			}
1382 
1383 			/* we can only convert to a dec ttl when we have an
1384 			 * exact match on the ttl field
1385 			 */
1386 			if (match->mask.ip_ttl != U8_MAX) {
1387 				NL_SET_ERR_MSG_FMT_MOD(extack,
1388 						       "only support hop_limit when we have an exact match, current mask (%#x)",
1389 						       match->mask.ip_ttl);
1390 				return -EOPNOTSUPP;
1391 			}
1392 
1393 			/* check that we don't try to decrement 0, which equates
1394 			 * to setting the ttl to 0xff
1395 			 */
1396 			if (match->value.ip_ttl == 0) {
1397 				NL_SET_ERR_MSG_MOD(extack,
1398 						   "decrementing hop_limit past 0 is not supported");
1399 				return -EOPNOTSUPP;
1400 			}
1401 
1402 			/* check that we do not decrement hoplimit twice */
1403 			if (!efx_tc_flower_action_order_ok(act,
1404 							   EFX_TC_AO_DEC_TTL)) {
1405 				NL_SET_ERR_MSG_MOD(extack,
1406 						   "multiple dec ttl is not supported");
1407 				return -EOPNOTSUPP;
1408 			}
1409 
1410 			/* check pedit can be achieved with decrement action */
1411 			tr_ttl = match->value.ip_ttl - 1;
1412 			if ((fa->mangle.val >> 24) == tr_ttl) {
1413 				act->do_ttl_dec = 1;
1414 				return 0;
1415 			}
1416 
1417 			fallthrough;
1418 		default:
1419 			NL_SET_ERR_MSG_FMT_MOD(extack,
1420 					       "only support mangle on the hop_limit field");
1421 			return -EOPNOTSUPP;
1422 		}
1423 	default:
1424 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule",
1425 				       fa->mangle.htype);
1426 		return -EOPNOTSUPP;
1427 	}
1428 	return 0;
1429 }
1430 
1431 /**
1432  * efx_tc_incomplete_mangle() - check for leftover partial pedits
1433  * @mung:	accumulator for partial mangles
1434  * @extack:	netlink extended ack for reporting errors
1435  *
1436  * Since the MAE can only overwrite whole fields, any partial
1437  * field mangle left over on reaching packet delivery (mirred or
1438  * end of TC actions) cannot be offloaded.  Check for any such
1439  * and reject them with -%EOPNOTSUPP.
1440  */
1441 
1442 static int efx_tc_incomplete_mangle(struct efx_tc_mangler_state *mung,
1443 				    struct netlink_ext_ack *extack)
1444 {
1445 	if (mung->dst_mac_32 || mung->dst_mac_16) {
1446 		NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of destination MAC address");
1447 		return -EOPNOTSUPP;
1448 	}
1449 	if (mung->src_mac_16 || mung->src_mac_32) {
1450 		NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of source MAC address");
1451 		return -EOPNOTSUPP;
1452 	}
1453 	return 0;
1454 }
1455 
1456 static int efx_tc_flower_replace_foreign_lhs_ar(struct efx_nic *efx,
1457 						struct flow_cls_offload *tc,
1458 						struct flow_rule *fr,
1459 						struct efx_tc_match *match,
1460 						struct net_device *net_dev)
1461 {
1462 	struct netlink_ext_ack *extack = tc->common.extack;
1463 	struct efx_tc_lhs_rule *rule, *old;
1464 	enum efx_encap_type type;
1465 	int rc;
1466 
1467 	type = efx_tc_indr_netdev_type(net_dev);
1468 	if (type == EFX_ENCAP_TYPE_NONE) {
1469 		NL_SET_ERR_MSG_MOD(extack, "Egress encap match on unsupported tunnel device");
1470 		return -EOPNOTSUPP;
1471 	}
1472 
1473 	rc = efx_mae_check_encap_type_supported(efx, type);
1474 	if (rc) {
1475 		NL_SET_ERR_MSG_FMT_MOD(extack,
1476 				       "Firmware reports no support for %s encap match",
1477 				       efx_tc_encap_type_name(type));
1478 		return rc;
1479 	}
1480 	/* This is an Action Rule, so it needs a separate Encap Match in the
1481 	 * Outer Rule table.  Insert that now.
1482 	 */
1483 	rc = efx_tc_flower_record_encap_match(efx, match, type,
1484 					      EFX_TC_EM_DIRECT, 0, 0, extack);
1485 	if (rc)
1486 		return rc;
1487 
1488 	match->mask.recirc_id = 0xff;
1489 	if (match->mask.ct_state_trk && match->value.ct_state_trk) {
1490 		NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk");
1491 		rc = -EOPNOTSUPP;
1492 		goto release_encap_match;
1493 	}
1494 	/* LHS rules are always -trk, so we don't need to match on that */
1495 	match->mask.ct_state_trk = 0;
1496 	match->value.ct_state_trk = 0;
1497 	/* We must inhibit match on TCP SYN/FIN/RST, so that SW can see
1498 	 * the packet and update the conntrack table.
1499 	 * Outer Rules will do that with CT_TCP_FLAGS_INHIBIT, but Action
1500 	 * Rules don't have that; instead they support matching on
1501 	 * TCP_SYN_FIN_RST (aka TCP_INTERESTING_FLAGS), so use that.
1502 	 * This is only strictly needed if there will be a DO_CT action,
1503 	 * which we don't know yet, but typically there will be and it's
1504 	 * simpler not to bother checking here.
1505 	 */
1506 	match->mask.tcp_syn_fin_rst = true;
1507 
1508 	rc = efx_mae_match_check_caps(efx, &match->mask, extack);
1509 	if (rc)
1510 		goto release_encap_match;
1511 
1512 	rule = kzalloc(sizeof(*rule), GFP_USER);
1513 	if (!rule) {
1514 		rc = -ENOMEM;
1515 		goto release_encap_match;
1516 	}
1517 	rule->cookie = tc->cookie;
1518 	rule->is_ar = true;
1519 	old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht,
1520 						&rule->linkage,
1521 						efx_tc_lhs_rule_ht_params);
1522 	if (old) {
1523 		netif_dbg(efx, drv, efx->net_dev,
1524 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
1525 		rc = -EEXIST;
1526 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1527 		goto release;
1528 	}
1529 
1530 	/* Parse actions */
1531 	rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, net_dev, rule);
1532 	if (rc)
1533 		goto release;
1534 
1535 	rule->match = *match;
1536 	rule->lhs_act.tun_type = type;
1537 
1538 	rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC);
1539 	if (rc) {
1540 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1541 		goto release;
1542 	}
1543 	netif_dbg(efx, drv, efx->net_dev,
1544 		  "Successfully parsed lhs rule (cookie %lx)\n",
1545 		  tc->cookie);
1546 	return 0;
1547 
1548 release:
1549 	efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act);
1550 	if (!old)
1551 		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage,
1552 				       efx_tc_lhs_rule_ht_params);
1553 	kfree(rule);
1554 release_encap_match:
1555 	if (match->encap)
1556 		efx_tc_flower_release_encap_match(efx, match->encap);
1557 	return rc;
1558 }
1559 
1560 static int efx_tc_flower_replace_foreign_lhs(struct efx_nic *efx,
1561 					     struct flow_cls_offload *tc,
1562 					     struct flow_rule *fr,
1563 					     struct efx_tc_match *match,
1564 					     struct net_device *net_dev)
1565 {
1566 	struct netlink_ext_ack *extack = tc->common.extack;
1567 	struct efx_tc_lhs_rule *rule, *old;
1568 	enum efx_encap_type type;
1569 	int rc;
1570 
1571 	if (tc->common.chain_index) {
1572 		NL_SET_ERR_MSG_MOD(extack, "LHS rule only allowed in chain 0");
1573 		return -EOPNOTSUPP;
1574 	}
1575 
1576 	if (!efx_tc_match_is_encap(&match->mask)) {
1577 		/* This is not a tunnel decap rule, ignore it */
1578 		netif_dbg(efx, drv, efx->net_dev, "Ignoring foreign LHS filter without encap match\n");
1579 		return -EOPNOTSUPP;
1580 	}
1581 
1582 	if (efx_tc_flower_flhs_needs_ar(match))
1583 		return efx_tc_flower_replace_foreign_lhs_ar(efx, tc, fr, match,
1584 							    net_dev);
1585 
1586 	type = efx_tc_indr_netdev_type(net_dev);
1587 	if (type == EFX_ENCAP_TYPE_NONE) {
1588 		NL_SET_ERR_MSG_MOD(extack, "Egress encap match on unsupported tunnel device\n");
1589 		return -EOPNOTSUPP;
1590 	}
1591 
1592 	rc = efx_mae_check_encap_type_supported(efx, type);
1593 	if (rc) {
1594 		NL_SET_ERR_MSG_FMT_MOD(extack,
1595 				       "Firmware reports no support for %s encap match",
1596 				       efx_tc_encap_type_name(type));
1597 		return rc;
1598 	}
1599 	/* Reserve the outer tuple with a pseudo Encap Match */
1600 	rc = efx_tc_flower_record_encap_match(efx, match, type,
1601 					      EFX_TC_EM_PSEUDO_OR, 0, 0,
1602 					      extack);
1603 	if (rc)
1604 		return rc;
1605 
1606 	if (match->mask.ct_state_trk && match->value.ct_state_trk) {
1607 		NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk");
1608 		rc = -EOPNOTSUPP;
1609 		goto release_encap_match;
1610 	}
1611 	/* LHS rules are always -trk, so we don't need to match on that */
1612 	match->mask.ct_state_trk = 0;
1613 	match->value.ct_state_trk = 0;
1614 
1615 	rc = efx_tc_flower_translate_flhs_match(match);
1616 	if (rc) {
1617 		NL_SET_ERR_MSG_MOD(extack, "LHS rule cannot match on inner fields");
1618 		goto release_encap_match;
1619 	}
1620 
1621 	rc = efx_mae_match_check_caps_lhs(efx, &match->mask, extack);
1622 	if (rc)
1623 		goto release_encap_match;
1624 
1625 	rule = kzalloc(sizeof(*rule), GFP_USER);
1626 	if (!rule) {
1627 		rc = -ENOMEM;
1628 		goto release_encap_match;
1629 	}
1630 	rule->cookie = tc->cookie;
1631 	old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht,
1632 						&rule->linkage,
1633 						efx_tc_lhs_rule_ht_params);
1634 	if (old) {
1635 		netif_dbg(efx, drv, efx->net_dev,
1636 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
1637 		rc = -EEXIST;
1638 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1639 		goto release;
1640 	}
1641 
1642 	/* Parse actions */
1643 	rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, net_dev, rule);
1644 	if (rc)
1645 		goto release;
1646 
1647 	rule->match = *match;
1648 	rule->lhs_act.tun_type = type;
1649 
1650 	rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC);
1651 	if (rc) {
1652 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1653 		goto release;
1654 	}
1655 	netif_dbg(efx, drv, efx->net_dev,
1656 		  "Successfully parsed lhs rule (cookie %lx)\n",
1657 		  tc->cookie);
1658 	return 0;
1659 
1660 release:
1661 	efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act);
1662 	if (!old)
1663 		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage,
1664 				       efx_tc_lhs_rule_ht_params);
1665 	kfree(rule);
1666 release_encap_match:
1667 	if (match->encap)
1668 		efx_tc_flower_release_encap_match(efx, match->encap);
1669 	return rc;
1670 }
1671 
1672 static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
1673 					 struct net_device *net_dev,
1674 					 struct flow_cls_offload *tc)
1675 {
1676 	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
1677 	struct netlink_ext_ack *extack = tc->common.extack;
1678 	struct efx_tc_flow_rule *rule = NULL, *old = NULL;
1679 	struct efx_tc_action_set *act = NULL;
1680 	bool found = false, uplinked = false;
1681 	const struct flow_action_entry *fa;
1682 	struct efx_tc_match match;
1683 	struct efx_rep *to_efv;
1684 	s64 rc;
1685 	int i;
1686 
1687 	/* Parse match */
1688 	memset(&match, 0, sizeof(match));
1689 	rc = efx_tc_flower_parse_match(efx, fr, &match, extack);
1690 	if (rc)
1691 		return rc;
1692 	/* The rule as given to us doesn't specify a source netdevice.
1693 	 * But, determining whether packets from a VF should match it is
1694 	 * complicated, so leave those to the software slowpath: qualify
1695 	 * the filter with source m-port == wire.
1696 	 */
1697 	rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF);
1698 	if (rc < 0) {
1699 		NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port for foreign filter");
1700 		return rc;
1701 	}
1702 	match.value.ingress_port = rc;
1703 	match.mask.ingress_port = ~0;
1704 
1705 	if (efx_tc_rule_is_lhs_rule(fr, &match))
1706 		return efx_tc_flower_replace_foreign_lhs(efx, tc, fr, &match,
1707 							 net_dev);
1708 
1709 	if (tc->common.chain_index) {
1710 		struct efx_tc_recirc_id *rid;
1711 
1712 		rid = efx_tc_get_recirc_id(efx, tc->common.chain_index, net_dev);
1713 		if (IS_ERR(rid)) {
1714 			NL_SET_ERR_MSG_FMT_MOD(extack,
1715 					       "Failed to allocate a hardware recirculation ID for chain_index %u",
1716 					       tc->common.chain_index);
1717 			return PTR_ERR(rid);
1718 		}
1719 		match.rid = rid;
1720 		match.value.recirc_id = rid->fw_id;
1721 	}
1722 	match.mask.recirc_id = 0xff;
1723 
1724 	/* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
1725 	 * +trk+est, which is strictly implied by +est, so rewrite it to that.
1726 	 */
1727 	if (match.mask.ct_state_trk && match.value.ct_state_trk &&
1728 	    match.mask.ct_state_est && match.value.ct_state_est)
1729 		match.mask.ct_state_trk = 0;
1730 	/* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
1731 	 * match +trk-est (CT_HIT=0) despite being on an established connection.
1732 	 * So make -est imply -tcp_syn_fin_rst match to ensure these packets
1733 	 * still hit the software path.
1734 	 */
1735 	if (match.mask.ct_state_est && !match.value.ct_state_est) {
1736 		if (match.value.tcp_syn_fin_rst) {
1737 			/* Can't offload this combination */
1738 			NL_SET_ERR_MSG_MOD(extack, "TCP flags and -est conflict for offload");
1739 			rc = -EOPNOTSUPP;
1740 			goto release;
1741 		}
1742 		match.mask.tcp_syn_fin_rst = true;
1743 	}
1744 
1745 	flow_action_for_each(i, fa, &fr->action) {
1746 		switch (fa->id) {
1747 		case FLOW_ACTION_REDIRECT:
1748 		case FLOW_ACTION_MIRRED: /* mirred means mirror here */
1749 			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1750 			if (IS_ERR(to_efv))
1751 				continue;
1752 			found = true;
1753 			break;
1754 		default:
1755 			break;
1756 		}
1757 	}
1758 	if (!found) { /* We don't care. */
1759 		netif_dbg(efx, drv, efx->net_dev,
1760 			  "Ignoring foreign filter that doesn't egdev us\n");
1761 		rc = -EOPNOTSUPP;
1762 		goto release;
1763 	}
1764 
1765 	rc = efx_mae_match_check_caps(efx, &match.mask, extack);
1766 	if (rc)
1767 		goto release;
1768 
1769 	if (efx_tc_match_is_encap(&match.mask)) {
1770 		enum efx_encap_type type;
1771 
1772 		type = efx_tc_indr_netdev_type(net_dev);
1773 		if (type == EFX_ENCAP_TYPE_NONE) {
1774 			NL_SET_ERR_MSG_MOD(extack,
1775 					   "Egress encap match on unsupported tunnel device");
1776 			rc = -EOPNOTSUPP;
1777 			goto release;
1778 		}
1779 
1780 		rc = efx_mae_check_encap_type_supported(efx, type);
1781 		if (rc) {
1782 			NL_SET_ERR_MSG_FMT_MOD(extack,
1783 					       "Firmware reports no support for %s encap match",
1784 					       efx_tc_encap_type_name(type));
1785 			goto release;
1786 		}
1787 
1788 		rc = efx_tc_flower_record_encap_match(efx, &match, type,
1789 						      EFX_TC_EM_DIRECT, 0, 0,
1790 						      extack);
1791 		if (rc)
1792 			goto release;
1793 	} else if (!tc->common.chain_index) {
1794 		/* This is not a tunnel decap rule, ignore it */
1795 		netif_dbg(efx, drv, efx->net_dev,
1796 			  "Ignoring foreign filter without encap match\n");
1797 		rc = -EOPNOTSUPP;
1798 		goto release;
1799 	}
1800 
1801 	rule = kzalloc(sizeof(*rule), GFP_USER);
1802 	if (!rule) {
1803 		rc = -ENOMEM;
1804 		goto release;
1805 	}
1806 	INIT_LIST_HEAD(&rule->acts.list);
1807 	rule->cookie = tc->cookie;
1808 	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
1809 						&rule->linkage,
1810 						efx_tc_match_action_ht_params);
1811 	if (IS_ERR(old)) {
1812 		rc = PTR_ERR(old);
1813 		goto release;
1814 	} else if (old) {
1815 		netif_dbg(efx, drv, efx->net_dev,
1816 			  "Ignoring already-offloaded rule (cookie %lx)\n",
1817 			  tc->cookie);
1818 		rc = -EEXIST;
1819 		goto release;
1820 	}
1821 
1822 	act = kzalloc(sizeof(*act), GFP_USER);
1823 	if (!act) {
1824 		rc = -ENOMEM;
1825 		goto release;
1826 	}
1827 
1828 	/* Parse actions.  For foreign rules we only support decap & redirect.
1829 	 * See corresponding code in efx_tc_flower_replace() for theory of
1830 	 * operation & how 'act' cursor is used.
1831 	 */
1832 	flow_action_for_each(i, fa, &fr->action) {
1833 		struct efx_tc_action_set save;
1834 
1835 		switch (fa->id) {
1836 		case FLOW_ACTION_REDIRECT:
1837 		case FLOW_ACTION_MIRRED:
1838 			/* See corresponding code in efx_tc_flower_replace() for
1839 			 * long explanations of what's going on here.
1840 			 */
1841 			save = *act;
1842 			if (fa->hw_stats) {
1843 				struct efx_tc_counter_index *ctr;
1844 
1845 				if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1846 					NL_SET_ERR_MSG_FMT_MOD(extack,
1847 							       "hw_stats_type %u not supported (only 'delayed')",
1848 							       fa->hw_stats);
1849 					rc = -EOPNOTSUPP;
1850 					goto release;
1851 				}
1852 				if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
1853 					NL_SET_ERR_MSG_MOD(extack, "Count action violates action order (can't happen)");
1854 					rc = -EOPNOTSUPP;
1855 					goto release;
1856 				}
1857 
1858 				ctr = efx_tc_flower_get_counter_index(efx,
1859 								      tc->cookie,
1860 								      EFX_TC_COUNTER_TYPE_AR);
1861 				if (IS_ERR(ctr)) {
1862 					rc = PTR_ERR(ctr);
1863 					NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1864 					goto release;
1865 				}
1866 				act->count = ctr;
1867 				INIT_LIST_HEAD(&act->count_user);
1868 			}
1869 
1870 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
1871 				/* can't happen */
1872 				rc = -EOPNOTSUPP;
1873 				NL_SET_ERR_MSG_MOD(extack,
1874 						   "Deliver action violates action order (can't happen)");
1875 				goto release;
1876 			}
1877 			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1878 			/* PF implies egdev is us, in which case we really
1879 			 * want to deliver to the uplink (because this is an
1880 			 * ingress filter).  If we don't recognise the egdev
1881 			 * at all, then we'd better trap so SW can handle it.
1882 			 */
1883 			if (IS_ERR(to_efv))
1884 				to_efv = EFX_EFV_PF;
1885 			if (to_efv == EFX_EFV_PF) {
1886 				if (uplinked)
1887 					break;
1888 				uplinked = true;
1889 			}
1890 			rc = efx_tc_flower_internal_mport(efx, to_efv);
1891 			if (rc < 0) {
1892 				NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
1893 				goto release;
1894 			}
1895 			act->dest_mport = rc;
1896 			act->deliver = 1;
1897 			rc = efx_mae_alloc_action_set(efx, act);
1898 			if (rc) {
1899 				NL_SET_ERR_MSG_MOD(extack,
1900 						   "Failed to write action set to hw (mirred)");
1901 				goto release;
1902 			}
1903 			list_add_tail(&act->list, &rule->acts.list);
1904 			act = NULL;
1905 			if (fa->id == FLOW_ACTION_REDIRECT)
1906 				break; /* end of the line */
1907 			/* Mirror, so continue on with saved act */
1908 			act = kzalloc(sizeof(*act), GFP_USER);
1909 			if (!act) {
1910 				rc = -ENOMEM;
1911 				goto release;
1912 			}
1913 			*act = save;
1914 			break;
1915 		case FLOW_ACTION_TUNNEL_DECAP:
1916 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DECAP)) {
1917 				rc = -EINVAL;
1918 				NL_SET_ERR_MSG_MOD(extack, "Decap action violates action order");
1919 				goto release;
1920 			}
1921 			act->decap = 1;
1922 			/* If we previously delivered/trapped to uplink, now
1923 			 * that we've decapped we'll want another copy if we
1924 			 * try to deliver/trap to uplink again.
1925 			 */
1926 			uplinked = false;
1927 			break;
1928 		default:
1929 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
1930 					       fa->id);
1931 			rc = -EOPNOTSUPP;
1932 			goto release;
1933 		}
1934 	}
1935 
1936 	if (act) {
1937 		if (!uplinked) {
1938 			/* Not shot/redirected, so deliver to default dest (which is
1939 			 * the uplink, as this is an ingress filter)
1940 			 */
1941 			efx_mae_mport_uplink(efx, &act->dest_mport);
1942 			act->deliver = 1;
1943 		}
1944 		rc = efx_mae_alloc_action_set(efx, act);
1945 		if (rc) {
1946 			NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
1947 			goto release;
1948 		}
1949 		list_add_tail(&act->list, &rule->acts.list);
1950 		act = NULL; /* Prevent double-free in error path */
1951 	}
1952 
1953 	rule->match = match;
1954 
1955 	netif_dbg(efx, drv, efx->net_dev,
1956 		  "Successfully parsed foreign filter (cookie %lx)\n",
1957 		  tc->cookie);
1958 
1959 	rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
1960 	if (rc) {
1961 		NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
1962 		goto release;
1963 	}
1964 	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
1965 				 rule->acts.fw_id, &rule->fw_id);
1966 	if (rc) {
1967 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1968 		goto release_acts;
1969 	}
1970 	return 0;
1971 
1972 release_acts:
1973 	efx_mae_free_action_set_list(efx, &rule->acts);
1974 release:
1975 	/* We failed to insert the rule, so free up any entries we created in
1976 	 * subsidiary tables.
1977 	 */
1978 	if (match.rid)
1979 		efx_tc_put_recirc_id(efx, match.rid);
1980 	if (act)
1981 		efx_tc_free_action_set(efx, act, false);
1982 	if (rule) {
1983 		if (!old)
1984 			rhashtable_remove_fast(&efx->tc->match_action_ht,
1985 					       &rule->linkage,
1986 					       efx_tc_match_action_ht_params);
1987 		efx_tc_free_action_set_list(efx, &rule->acts, false);
1988 	}
1989 	kfree(rule);
1990 	if (match.encap)
1991 		efx_tc_flower_release_encap_match(efx, match.encap);
1992 	return rc;
1993 }
1994 
1995 static int efx_tc_flower_replace_lhs(struct efx_nic *efx,
1996 				     struct flow_cls_offload *tc,
1997 				     struct flow_rule *fr,
1998 				     struct efx_tc_match *match,
1999 				     struct efx_rep *efv,
2000 				     struct net_device *net_dev)
2001 {
2002 	struct netlink_ext_ack *extack = tc->common.extack;
2003 	struct efx_tc_lhs_rule *rule, *old;
2004 	int rc;
2005 
2006 	if (tc->common.chain_index) {
2007 		NL_SET_ERR_MSG_MOD(extack, "LHS rule only allowed in chain 0");
2008 		return -EOPNOTSUPP;
2009 	}
2010 
2011 	if (match->mask.ct_state_trk && match->value.ct_state_trk) {
2012 		NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk");
2013 		return -EOPNOTSUPP;
2014 	}
2015 	/* LHS rules are always -trk, so we don't need to match on that */
2016 	match->mask.ct_state_trk = 0;
2017 	match->value.ct_state_trk = 0;
2018 
2019 	rc = efx_mae_match_check_caps_lhs(efx, &match->mask, extack);
2020 	if (rc)
2021 		return rc;
2022 
2023 	rule = kzalloc(sizeof(*rule), GFP_USER);
2024 	if (!rule)
2025 		return -ENOMEM;
2026 	rule->cookie = tc->cookie;
2027 	old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht,
2028 						&rule->linkage,
2029 						efx_tc_lhs_rule_ht_params);
2030 	if (IS_ERR(old)) {
2031 		rc = PTR_ERR(old);
2032 		goto release;
2033 	} else if (old) {
2034 		netif_dbg(efx, drv, efx->net_dev,
2035 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
2036 		rc = -EEXIST;
2037 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
2038 		goto release;
2039 	}
2040 
2041 	/* Parse actions */
2042 	/* See note in efx_tc_flower_replace() regarding passed net_dev
2043 	 * (used for efx_tc_get_recirc_id()).
2044 	 */
2045 	rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, efx->net_dev, rule);
2046 	if (rc)
2047 		goto release;
2048 
2049 	rule->match = *match;
2050 
2051 	rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC);
2052 	if (rc) {
2053 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
2054 		goto release;
2055 	}
2056 	netif_dbg(efx, drv, efx->net_dev,
2057 		  "Successfully parsed lhs rule (cookie %lx)\n",
2058 		  tc->cookie);
2059 	return 0;
2060 
2061 release:
2062 	efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act);
2063 	if (!old)
2064 		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage,
2065 				       efx_tc_lhs_rule_ht_params);
2066 	kfree(rule);
2067 	return rc;
2068 }
2069 
2070 static int efx_tc_flower_replace(struct efx_nic *efx,
2071 				 struct net_device *net_dev,
2072 				 struct flow_cls_offload *tc,
2073 				 struct efx_rep *efv)
2074 {
2075 	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
2076 	struct netlink_ext_ack *extack = tc->common.extack;
2077 	const struct ip_tunnel_info *encap_info = NULL;
2078 	struct efx_tc_flow_rule *rule = NULL, *old;
2079 	struct efx_tc_mangler_state mung = {};
2080 	struct efx_tc_action_set *act = NULL;
2081 	const struct flow_action_entry *fa;
2082 	struct efx_rep *from_efv, *to_efv;
2083 	struct efx_tc_match match;
2084 	u32 acts_id;
2085 	s64 rc;
2086 	int i;
2087 
2088 	if (!tc_can_offload_extack(efx->net_dev, extack))
2089 		return -EOPNOTSUPP;
2090 	if (WARN_ON(!efx->tc))
2091 		return -ENETDOWN;
2092 	if (WARN_ON(!efx->tc->up))
2093 		return -ENETDOWN;
2094 
2095 	from_efv = efx_tc_flower_lookup_efv(efx, net_dev);
2096 	if (IS_ERR(from_efv)) {
2097 		/* Not from our PF or representors, so probably a tunnel dev */
2098 		return efx_tc_flower_replace_foreign(efx, net_dev, tc);
2099 	}
2100 
2101 	if (efv != from_efv) {
2102 		/* can't happen */
2103 		NL_SET_ERR_MSG_FMT_MOD(extack, "for %s efv is %snull but from_efv is %snull (can't happen)",
2104 				       netdev_name(net_dev), efv ? "non-" : "",
2105 				       from_efv ? "non-" : "");
2106 		return -EINVAL;
2107 	}
2108 
2109 	/* Parse match */
2110 	memset(&match, 0, sizeof(match));
2111 	rc = efx_tc_flower_external_mport(efx, from_efv);
2112 	if (rc < 0) {
2113 		NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port");
2114 		return rc;
2115 	}
2116 	match.value.ingress_port = rc;
2117 	match.mask.ingress_port = ~0;
2118 	rc = efx_tc_flower_parse_match(efx, fr, &match, extack);
2119 	if (rc)
2120 		return rc;
2121 	if (efx_tc_match_is_encap(&match.mask)) {
2122 		NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported");
2123 		return -EOPNOTSUPP;
2124 	}
2125 
2126 	if (efx_tc_rule_is_lhs_rule(fr, &match))
2127 		return efx_tc_flower_replace_lhs(efx, tc, fr, &match, efv,
2128 						 net_dev);
2129 
2130 	/* chain_index 0 is always recirc_id 0 (and does not appear in recirc_ht).
2131 	 * Conveniently, match.rid == NULL and match.value.recirc_id == 0 owing
2132 	 * to the initial memset(), so we don't need to do anything in that case.
2133 	 */
2134 	if (tc->common.chain_index) {
2135 		struct efx_tc_recirc_id *rid;
2136 
2137 		/* Note regarding passed net_dev:
2138 		 * VFreps and PF can share chain namespace, as they have
2139 		 * distinct ingress_mports.  So we don't need to burn an
2140 		 * extra recirc_id if both use the same chain_index.
2141 		 * (Strictly speaking, we could give each VFrep its own
2142 		 * recirc_id namespace that doesn't take IDs away from the
2143 		 * PF, but that would require a bunch of additional IDAs -
2144 		 * one for each representor - and that's not likely to be
2145 		 * the main cause of recirc_id exhaustion anyway.)
2146 		 */
2147 		rid = efx_tc_get_recirc_id(efx, tc->common.chain_index,
2148 					   efx->net_dev);
2149 		if (IS_ERR(rid)) {
2150 			NL_SET_ERR_MSG_FMT_MOD(extack,
2151 					       "Failed to allocate a hardware recirculation ID for chain_index %u",
2152 					       tc->common.chain_index);
2153 			return PTR_ERR(rid);
2154 		}
2155 		match.rid = rid;
2156 		match.value.recirc_id = rid->fw_id;
2157 	}
2158 	match.mask.recirc_id = 0xff;
2159 
2160 	/* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
2161 	 * +trk+est, which is strictly implied by +est, so rewrite it to that.
2162 	 */
2163 	if (match.mask.ct_state_trk && match.value.ct_state_trk &&
2164 	    match.mask.ct_state_est && match.value.ct_state_est)
2165 		match.mask.ct_state_trk = 0;
2166 	/* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
2167 	 * match +trk-est (CT_HIT=0) despite being on an established connection.
2168 	 * So make -est imply -tcp_syn_fin_rst match to ensure these packets
2169 	 * still hit the software path.
2170 	 */
2171 	if (match.mask.ct_state_est && !match.value.ct_state_est) {
2172 		if (match.value.tcp_syn_fin_rst) {
2173 			/* Can't offload this combination */
2174 			rc = -EOPNOTSUPP;
2175 			goto release;
2176 		}
2177 		match.mask.tcp_syn_fin_rst = true;
2178 	}
2179 
2180 	rc = efx_mae_match_check_caps(efx, &match.mask, extack);
2181 	if (rc)
2182 		goto release;
2183 
2184 	rule = kzalloc(sizeof(*rule), GFP_USER);
2185 	if (!rule) {
2186 		rc = -ENOMEM;
2187 		goto release;
2188 	}
2189 	INIT_LIST_HEAD(&rule->acts.list);
2190 	rule->cookie = tc->cookie;
2191 	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
2192 						&rule->linkage,
2193 						efx_tc_match_action_ht_params);
2194 	if (IS_ERR(old)) {
2195 		rc = PTR_ERR(old);
2196 		goto release;
2197 	} else if (old) {
2198 		netif_dbg(efx, drv, efx->net_dev,
2199 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
2200 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
2201 		rc = -EEXIST;
2202 		goto release;
2203 	}
2204 
2205 	/* Parse actions */
2206 	act = kzalloc(sizeof(*act), GFP_USER);
2207 	if (!act) {
2208 		rc = -ENOMEM;
2209 		goto release;
2210 	}
2211 
2212 	/**
2213 	 * DOC: TC action translation
2214 	 *
2215 	 * Actions in TC are sequential and cumulative, with delivery actions
2216 	 * potentially anywhere in the order.  The EF100 MAE, however, takes
2217 	 * an 'action set list' consisting of 'action sets', each of which is
2218 	 * applied to the _original_ packet, and consists of a set of optional
2219 	 * actions in a fixed order with delivery at the end.
2220 	 * To translate between these two models, we maintain a 'cursor', @act,
2221 	 * which describes the cumulative effect of all the packet-mutating
2222 	 * actions encountered so far; on handling a delivery (mirred or drop)
2223 	 * action, once the action-set has been inserted into hardware, we
2224 	 * append @act to the action-set list (@rule->acts); if this is a pipe
2225 	 * action (mirred mirror) we then allocate a new @act with a copy of
2226 	 * the cursor state _before_ the delivery action, otherwise we set @act
2227 	 * to %NULL.
2228 	 * This ensures that every allocated action-set is either attached to
2229 	 * @rule->acts or pointed to by @act (and never both), and that only
2230 	 * those action-sets in @rule->acts exist in hardware.  Consequently,
2231 	 * in the failure path, @act only needs to be freed in memory, whereas
2232 	 * for @rule->acts we remove each action-set from hardware before
2233 	 * freeing it (efx_tc_free_action_set_list()), even if the action-set
2234 	 * list itself is not in hardware.
2235 	 */
2236 	flow_action_for_each(i, fa, &fr->action) {
2237 		struct efx_tc_action_set save;
2238 		u16 tci;
2239 
2240 		if (!act) {
2241 			/* more actions after a non-pipe action */
2242 			NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
2243 			rc = -EINVAL;
2244 			goto release;
2245 		}
2246 
2247 		if ((fa->id == FLOW_ACTION_REDIRECT ||
2248 		     fa->id == FLOW_ACTION_MIRRED ||
2249 		     fa->id == FLOW_ACTION_DROP) && fa->hw_stats) {
2250 			struct efx_tc_counter_index *ctr;
2251 
2252 			/* Currently the only actions that want stats are
2253 			 * mirred and gact (ok, shot, trap, goto-chain), which
2254 			 * means we want stats just before delivery.  Also,
2255 			 * note that tunnel_key set shouldn't change the length
2256 			 * — it's only the subsequent mirred that does that,
2257 			 * and the stats are taken _before_ the mirred action
2258 			 * happens.
2259 			 */
2260 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
2261 				/* All supported actions that count either steal
2262 				 * (gact shot, mirred redirect) or clone act
2263 				 * (mirred mirror), so we should never get two
2264 				 * count actions on one action_set.
2265 				 */
2266 				NL_SET_ERR_MSG_MOD(extack, "Count-action conflict (can't happen)");
2267 				rc = -EOPNOTSUPP;
2268 				goto release;
2269 			}
2270 
2271 			if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
2272 				NL_SET_ERR_MSG_FMT_MOD(extack, "hw_stats_type %u not supported (only 'delayed')",
2273 						       fa->hw_stats);
2274 				rc = -EOPNOTSUPP;
2275 				goto release;
2276 			}
2277 
2278 			ctr = efx_tc_flower_get_counter_index(efx, tc->cookie,
2279 							      EFX_TC_COUNTER_TYPE_AR);
2280 			if (IS_ERR(ctr)) {
2281 				rc = PTR_ERR(ctr);
2282 				NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
2283 				goto release;
2284 			}
2285 			act->count = ctr;
2286 			INIT_LIST_HEAD(&act->count_user);
2287 		}
2288 
2289 		switch (fa->id) {
2290 		case FLOW_ACTION_DROP:
2291 			rc = efx_mae_alloc_action_set(efx, act);
2292 			if (rc) {
2293 				NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (drop)");
2294 				goto release;
2295 			}
2296 			list_add_tail(&act->list, &rule->acts.list);
2297 			act = NULL; /* end of the line */
2298 			break;
2299 		case FLOW_ACTION_REDIRECT:
2300 		case FLOW_ACTION_MIRRED:
2301 			save = *act;
2302 
2303 			if (encap_info) {
2304 				struct efx_tc_encap_action *encap;
2305 
2306 				if (!efx_tc_flower_action_order_ok(act,
2307 								   EFX_TC_AO_ENCAP)) {
2308 					rc = -EOPNOTSUPP;
2309 					NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order");
2310 					goto release;
2311 				}
2312 				encap = efx_tc_flower_create_encap_md(
2313 						efx, encap_info, fa->dev, extack);
2314 				if (IS_ERR_OR_NULL(encap)) {
2315 					rc = PTR_ERR(encap);
2316 					if (!rc)
2317 						rc = -EIO; /* arbitrary */
2318 					goto release;
2319 				}
2320 				act->encap_md = encap;
2321 				list_add_tail(&act->encap_user, &encap->users);
2322 				act->dest_mport = encap->dest_mport;
2323 				act->deliver = 1;
2324 				if (act->count && !WARN_ON(!act->count->cnt)) {
2325 					/* This counter is used by an encap
2326 					 * action, which needs a reference back
2327 					 * so it can prod neighbouring whenever
2328 					 * traffic is seen.
2329 					 */
2330 					spin_lock_bh(&act->count->cnt->lock);
2331 					list_add_tail(&act->count_user,
2332 						      &act->count->cnt->users);
2333 					spin_unlock_bh(&act->count->cnt->lock);
2334 				}
2335 				rc = efx_mae_alloc_action_set(efx, act);
2336 				if (rc) {
2337 					NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)");
2338 					goto release;
2339 				}
2340 				list_add_tail(&act->list, &rule->acts.list);
2341 				act->user = &rule->acts;
2342 				act = NULL;
2343 				if (fa->id == FLOW_ACTION_REDIRECT)
2344 					break; /* end of the line */
2345 				/* Mirror, so continue on with saved act */
2346 				save.count = NULL;
2347 				act = kzalloc(sizeof(*act), GFP_USER);
2348 				if (!act) {
2349 					rc = -ENOMEM;
2350 					goto release;
2351 				}
2352 				*act = save;
2353 				break;
2354 			}
2355 
2356 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
2357 				/* can't happen */
2358 				rc = -EOPNOTSUPP;
2359 				NL_SET_ERR_MSG_MOD(extack, "Deliver action violates action order (can't happen)");
2360 				goto release;
2361 			}
2362 
2363 			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
2364 			if (IS_ERR(to_efv)) {
2365 				NL_SET_ERR_MSG_MOD(extack, "Mirred egress device not on switch");
2366 				rc = PTR_ERR(to_efv);
2367 				goto release;
2368 			}
2369 			rc = efx_tc_flower_external_mport(efx, to_efv);
2370 			if (rc < 0) {
2371 				NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
2372 				goto release;
2373 			}
2374 			act->dest_mport = rc;
2375 			act->deliver = 1;
2376 			rc = efx_mae_alloc_action_set(efx, act);
2377 			if (rc) {
2378 				NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (mirred)");
2379 				goto release;
2380 			}
2381 			list_add_tail(&act->list, &rule->acts.list);
2382 			act = NULL;
2383 			if (fa->id == FLOW_ACTION_REDIRECT)
2384 				break; /* end of the line */
2385 			/* Mirror, so continue on with saved act */
2386 			save.count = NULL;
2387 			act = kzalloc(sizeof(*act), GFP_USER);
2388 			if (!act) {
2389 				rc = -ENOMEM;
2390 				goto release;
2391 			}
2392 			*act = save;
2393 			break;
2394 		case FLOW_ACTION_VLAN_POP:
2395 			if (act->vlan_push) {
2396 				act->vlan_push--;
2397 			} else if (efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_POP)) {
2398 				act->vlan_pop++;
2399 			} else {
2400 				NL_SET_ERR_MSG_MOD(extack,
2401 						   "More than two VLAN pops, or action order violated");
2402 				rc = -EINVAL;
2403 				goto release;
2404 			}
2405 			break;
2406 		case FLOW_ACTION_VLAN_PUSH:
2407 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_PUSH)) {
2408 				rc = -EINVAL;
2409 				NL_SET_ERR_MSG_MOD(extack,
2410 						   "More than two VLAN pushes, or action order violated");
2411 				goto release;
2412 			}
2413 			tci = fa->vlan.vid & VLAN_VID_MASK;
2414 			tci |= fa->vlan.prio << VLAN_PRIO_SHIFT;
2415 			act->vlan_tci[act->vlan_push] = cpu_to_be16(tci);
2416 			act->vlan_proto[act->vlan_push] = fa->vlan.proto;
2417 			act->vlan_push++;
2418 			break;
2419 		case FLOW_ACTION_ADD:
2420 			rc = efx_tc_pedit_add(efx, act, fa, extack);
2421 			if (rc < 0)
2422 				goto release;
2423 			break;
2424 		case FLOW_ACTION_MANGLE:
2425 			rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match);
2426 			if (rc < 0)
2427 				goto release;
2428 			break;
2429 		case FLOW_ACTION_TUNNEL_ENCAP:
2430 			if (encap_info) {
2431 				/* Can't specify encap multiple times.
2432 				 * If you want to overwrite an existing
2433 				 * encap_info, use an intervening
2434 				 * FLOW_ACTION_TUNNEL_DECAP to clear it.
2435 				 */
2436 				NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set");
2437 				rc = -EINVAL;
2438 				goto release;
2439 			}
2440 			if (!fa->tunnel) {
2441 				NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key");
2442 				rc = -EOPNOTSUPP;
2443 				goto release;
2444 			}
2445 			encap_info = fa->tunnel;
2446 			break;
2447 		case FLOW_ACTION_TUNNEL_DECAP:
2448 			if (encap_info) {
2449 				encap_info = NULL;
2450 				break;
2451 			}
2452 			/* Since we don't support enc_key matches on ingress
2453 			 * (and if we did there'd be no tunnel-device to give
2454 			 * us a type), we can't offload a decap that's not
2455 			 * just undoing a previous encap action.
2456 			 */
2457 			NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device");
2458 			rc = -EOPNOTSUPP;
2459 			goto release;
2460 		case FLOW_ACTION_CT:
2461 			if (fa->ct.action != TCA_CT_ACT_NAT) {
2462 				rc = -EOPNOTSUPP;
2463 				NL_SET_ERR_MSG_FMT_MOD(extack, "Can only offload CT 'nat' action in RHS rules, not %d", fa->ct.action);
2464 				goto release;
2465 			}
2466 			act->do_nat = 1;
2467 			break;
2468 		default:
2469 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
2470 					       fa->id);
2471 			rc = -EOPNOTSUPP;
2472 			goto release;
2473 		}
2474 	}
2475 
2476 	rc = efx_tc_incomplete_mangle(&mung, extack);
2477 	if (rc < 0)
2478 		goto release;
2479 	if (act) {
2480 		/* Not shot/redirected, so deliver to default dest */
2481 		if (from_efv == EFX_EFV_PF)
2482 			/* Rule applies to traffic from the wire,
2483 			 * and default dest is thus the PF
2484 			 */
2485 			efx_mae_mport_uplink(efx, &act->dest_mport);
2486 		else
2487 			/* Representor, so rule applies to traffic from
2488 			 * representee, and default dest is thus the rep.
2489 			 * All reps use the same mport for delivery
2490 			 */
2491 			efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2492 					    &act->dest_mport);
2493 		act->deliver = 1;
2494 		rc = efx_mae_alloc_action_set(efx, act);
2495 		if (rc) {
2496 			NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
2497 			goto release;
2498 		}
2499 		list_add_tail(&act->list, &rule->acts.list);
2500 		act = NULL; /* Prevent double-free in error path */
2501 	}
2502 
2503 	netif_dbg(efx, drv, efx->net_dev,
2504 		  "Successfully parsed filter (cookie %lx)\n",
2505 		  tc->cookie);
2506 
2507 	rule->match = match;
2508 
2509 	rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
2510 	if (rc) {
2511 		NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
2512 		goto release;
2513 	}
2514 	if (from_efv == EFX_EFV_PF)
2515 		/* PF netdev, so rule applies to traffic from wire */
2516 		rule->fallback = &efx->tc->facts.pf;
2517 	else
2518 		/* repdev, so rule applies to traffic from representee */
2519 		rule->fallback = &efx->tc->facts.reps;
2520 	if (!efx_tc_check_ready(efx, rule)) {
2521 		netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n");
2522 		acts_id = rule->fallback->fw_id;
2523 	} else {
2524 		netif_dbg(efx, drv, efx->net_dev, "ready for hw\n");
2525 		acts_id = rule->acts.fw_id;
2526 	}
2527 	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
2528 				 acts_id, &rule->fw_id);
2529 	if (rc) {
2530 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
2531 		goto release_acts;
2532 	}
2533 	return 0;
2534 
2535 release_acts:
2536 	efx_mae_free_action_set_list(efx, &rule->acts);
2537 release:
2538 	/* We failed to insert the rule, so free up any entries we created in
2539 	 * subsidiary tables.
2540 	 */
2541 	if (match.rid)
2542 		efx_tc_put_recirc_id(efx, match.rid);
2543 	if (act)
2544 		efx_tc_free_action_set(efx, act, false);
2545 	if (rule) {
2546 		if (!old)
2547 			rhashtable_remove_fast(&efx->tc->match_action_ht,
2548 					       &rule->linkage,
2549 					       efx_tc_match_action_ht_params);
2550 		efx_tc_free_action_set_list(efx, &rule->acts, false);
2551 	}
2552 	kfree(rule);
2553 	return rc;
2554 }
2555 
2556 static int efx_tc_flower_destroy(struct efx_nic *efx,
2557 				 struct net_device *net_dev,
2558 				 struct flow_cls_offload *tc)
2559 {
2560 	struct netlink_ext_ack *extack = tc->common.extack;
2561 	struct efx_tc_lhs_rule *lhs_rule;
2562 	struct efx_tc_flow_rule *rule;
2563 
2564 	lhs_rule = rhashtable_lookup_fast(&efx->tc->lhs_rule_ht, &tc->cookie,
2565 					  efx_tc_lhs_rule_ht_params);
2566 	if (lhs_rule) {
2567 		/* Remove it from HW */
2568 		efx_mae_remove_lhs_rule(efx, lhs_rule);
2569 		/* Delete it from SW */
2570 		efx_tc_flower_release_lhs_actions(efx, &lhs_rule->lhs_act);
2571 		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &lhs_rule->linkage,
2572 				       efx_tc_lhs_rule_ht_params);
2573 		if (lhs_rule->match.encap)
2574 			efx_tc_flower_release_encap_match(efx, lhs_rule->match.encap);
2575 		netif_dbg(efx, drv, efx->net_dev, "Removed (lhs) filter %lx\n",
2576 			  lhs_rule->cookie);
2577 		kfree(lhs_rule);
2578 		return 0;
2579 	}
2580 
2581 	rule = rhashtable_lookup_fast(&efx->tc->match_action_ht, &tc->cookie,
2582 				      efx_tc_match_action_ht_params);
2583 	if (!rule) {
2584 		/* Only log a message if we're the ingress device.  Otherwise
2585 		 * it's a foreign filter and we might just not have been
2586 		 * interested (e.g. we might not have been the egress device
2587 		 * either).
2588 		 */
2589 		if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
2590 			netif_warn(efx, drv, efx->net_dev,
2591 				   "Filter %lx not found to remove\n", tc->cookie);
2592 		NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
2593 		return -ENOENT;
2594 	}
2595 
2596 	/* Remove it from HW */
2597 	efx_tc_delete_rule(efx, rule);
2598 	/* Delete it from SW */
2599 	rhashtable_remove_fast(&efx->tc->match_action_ht, &rule->linkage,
2600 			       efx_tc_match_action_ht_params);
2601 	netif_dbg(efx, drv, efx->net_dev, "Removed filter %lx\n", rule->cookie);
2602 	kfree(rule);
2603 	return 0;
2604 }
2605 
2606 static int efx_tc_flower_stats(struct efx_nic *efx, struct net_device *net_dev,
2607 			       struct flow_cls_offload *tc)
2608 {
2609 	struct netlink_ext_ack *extack = tc->common.extack;
2610 	struct efx_tc_counter_index *ctr;
2611 	struct efx_tc_counter *cnt;
2612 	u64 packets, bytes;
2613 
2614 	ctr = efx_tc_flower_find_counter_index(efx, tc->cookie);
2615 	if (!ctr) {
2616 		/* See comment in efx_tc_flower_destroy() */
2617 		if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
2618 			if (net_ratelimit())
2619 				netif_warn(efx, drv, efx->net_dev,
2620 					   "Filter %lx not found for stats\n",
2621 					   tc->cookie);
2622 		NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
2623 		return -ENOENT;
2624 	}
2625 	if (WARN_ON(!ctr->cnt)) /* can't happen */
2626 		return -EIO;
2627 	cnt = ctr->cnt;
2628 
2629 	spin_lock_bh(&cnt->lock);
2630 	/* Report only new pkts/bytes since last time TC asked */
2631 	packets = cnt->packets;
2632 	bytes = cnt->bytes;
2633 	flow_stats_update(&tc->stats, bytes - cnt->old_bytes,
2634 			  packets - cnt->old_packets, 0, cnt->touched,
2635 			  FLOW_ACTION_HW_STATS_DELAYED);
2636 	cnt->old_packets = packets;
2637 	cnt->old_bytes = bytes;
2638 	spin_unlock_bh(&cnt->lock);
2639 	return 0;
2640 }
2641 
2642 int efx_tc_flower(struct efx_nic *efx, struct net_device *net_dev,
2643 		  struct flow_cls_offload *tc, struct efx_rep *efv)
2644 {
2645 	int rc;
2646 
2647 	if (!efx->tc)
2648 		return -EOPNOTSUPP;
2649 
2650 	mutex_lock(&efx->tc->mutex);
2651 	switch (tc->command) {
2652 	case FLOW_CLS_REPLACE:
2653 		rc = efx_tc_flower_replace(efx, net_dev, tc, efv);
2654 		break;
2655 	case FLOW_CLS_DESTROY:
2656 		rc = efx_tc_flower_destroy(efx, net_dev, tc);
2657 		break;
2658 	case FLOW_CLS_STATS:
2659 		rc = efx_tc_flower_stats(efx, net_dev, tc);
2660 		break;
2661 	default:
2662 		rc = -EOPNOTSUPP;
2663 		break;
2664 	}
2665 	mutex_unlock(&efx->tc->mutex);
2666 	return rc;
2667 }
2668 
2669 static int efx_tc_configure_default_rule(struct efx_nic *efx, u32 ing_port,
2670 					 u32 eg_port, struct efx_tc_flow_rule *rule)
2671 {
2672 	struct efx_tc_action_set_list *acts = &rule->acts;
2673 	struct efx_tc_match *match = &rule->match;
2674 	struct efx_tc_action_set *act;
2675 	int rc;
2676 
2677 	match->value.ingress_port = ing_port;
2678 	match->mask.ingress_port = ~0;
2679 	act = kzalloc(sizeof(*act), GFP_KERNEL);
2680 	if (!act)
2681 		return -ENOMEM;
2682 	act->deliver = 1;
2683 	act->dest_mport = eg_port;
2684 	rc = efx_mae_alloc_action_set(efx, act);
2685 	if (rc)
2686 		goto fail1;
2687 	EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
2688 	list_add_tail(&act->list, &acts->list);
2689 	rc = efx_mae_alloc_action_set_list(efx, acts);
2690 	if (rc)
2691 		goto fail2;
2692 	rc = efx_mae_insert_rule(efx, match, EFX_TC_PRIO_DFLT,
2693 				 acts->fw_id, &rule->fw_id);
2694 	if (rc)
2695 		goto fail3;
2696 	return 0;
2697 fail3:
2698 	efx_mae_free_action_set_list(efx, acts);
2699 fail2:
2700 	list_del(&act->list);
2701 	efx_mae_free_action_set(efx, act->fw_id);
2702 fail1:
2703 	kfree(act);
2704 	return rc;
2705 }
2706 
2707 static int efx_tc_configure_default_rule_pf(struct efx_nic *efx)
2708 {
2709 	struct efx_tc_flow_rule *rule = &efx->tc->dflt.pf;
2710 	u32 ing_port, eg_port;
2711 
2712 	efx_mae_mport_uplink(efx, &ing_port);
2713 	efx_mae_mport_wire(efx, &eg_port);
2714 	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2715 }
2716 
2717 static int efx_tc_configure_default_rule_wire(struct efx_nic *efx)
2718 {
2719 	struct efx_tc_flow_rule *rule = &efx->tc->dflt.wire;
2720 	u32 ing_port, eg_port;
2721 
2722 	efx_mae_mport_wire(efx, &ing_port);
2723 	efx_mae_mport_uplink(efx, &eg_port);
2724 	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2725 }
2726 
2727 int efx_tc_configure_default_rule_rep(struct efx_rep *efv)
2728 {
2729 	struct efx_tc_flow_rule *rule = &efv->dflt;
2730 	struct efx_nic *efx = efv->parent;
2731 	u32 ing_port, eg_port;
2732 
2733 	efx_mae_mport_mport(efx, efv->mport, &ing_port);
2734 	efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
2735 	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2736 }
2737 
2738 void efx_tc_deconfigure_default_rule(struct efx_nic *efx,
2739 				     struct efx_tc_flow_rule *rule)
2740 {
2741 	if (rule->fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL)
2742 		efx_tc_delete_rule(efx, rule);
2743 	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2744 }
2745 
2746 static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port,
2747 					  struct efx_tc_action_set_list *acts)
2748 {
2749 	struct efx_tc_action_set *act;
2750 	int rc;
2751 
2752 	act = kzalloc(sizeof(*act), GFP_KERNEL);
2753 	if (!act)
2754 		return -ENOMEM;
2755 	act->deliver = 1;
2756 	act->dest_mport = eg_port;
2757 	rc = efx_mae_alloc_action_set(efx, act);
2758 	if (rc)
2759 		goto fail1;
2760 	EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
2761 	list_add_tail(&act->list, &acts->list);
2762 	rc = efx_mae_alloc_action_set_list(efx, acts);
2763 	if (rc)
2764 		goto fail2;
2765 	return 0;
2766 fail2:
2767 	list_del(&act->list);
2768 	efx_mae_free_action_set(efx, act->fw_id);
2769 fail1:
2770 	kfree(act);
2771 	return rc;
2772 }
2773 
2774 static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx)
2775 {
2776 	struct efx_tc_action_set_list *acts = &efx->tc->facts.pf;
2777 	u32 eg_port;
2778 
2779 	efx_mae_mport_uplink(efx, &eg_port);
2780 	return efx_tc_configure_fallback_acts(efx, eg_port, acts);
2781 }
2782 
2783 static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx)
2784 {
2785 	struct efx_tc_action_set_list *acts = &efx->tc->facts.reps;
2786 	u32 eg_port;
2787 
2788 	efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
2789 	return efx_tc_configure_fallback_acts(efx, eg_port, acts);
2790 }
2791 
2792 static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx,
2793 					     struct efx_tc_action_set_list *acts)
2794 {
2795 	efx_tc_free_action_set_list(efx, acts, true);
2796 }
2797 
2798 static int efx_tc_configure_rep_mport(struct efx_nic *efx)
2799 {
2800 	u32 rep_mport_label;
2801 	int rc;
2802 
2803 	rc = efx_mae_allocate_mport(efx, &efx->tc->reps_mport_id, &rep_mport_label);
2804 	if (rc)
2805 		return rc;
2806 	pci_dbg(efx->pci_dev, "created rep mport 0x%08x (0x%04x)\n",
2807 		efx->tc->reps_mport_id, rep_mport_label);
2808 	/* Use mport *selector* as vport ID */
2809 	efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2810 			    &efx->tc->reps_mport_vport_id);
2811 	return 0;
2812 }
2813 
2814 static void efx_tc_deconfigure_rep_mport(struct efx_nic *efx)
2815 {
2816 	efx_mae_free_mport(efx, efx->tc->reps_mport_id);
2817 	efx->tc->reps_mport_id = MAE_MPORT_SELECTOR_NULL;
2818 }
2819 
2820 int efx_tc_insert_rep_filters(struct efx_nic *efx)
2821 {
2822 	struct efx_filter_spec promisc, allmulti;
2823 	int rc;
2824 
2825 	if (efx->type->is_vf)
2826 		return 0;
2827 	if (!efx->tc)
2828 		return 0;
2829 	efx_filter_init_rx(&promisc, EFX_FILTER_PRI_REQUIRED, 0, 0);
2830 	efx_filter_set_uc_def(&promisc);
2831 	efx_filter_set_vport_id(&promisc, efx->tc->reps_mport_vport_id);
2832 	rc = efx_filter_insert_filter(efx, &promisc, false);
2833 	if (rc < 0)
2834 		return rc;
2835 	efx->tc->reps_filter_uc = rc;
2836 	efx_filter_init_rx(&allmulti, EFX_FILTER_PRI_REQUIRED, 0, 0);
2837 	efx_filter_set_mc_def(&allmulti);
2838 	efx_filter_set_vport_id(&allmulti, efx->tc->reps_mport_vport_id);
2839 	rc = efx_filter_insert_filter(efx, &allmulti, false);
2840 	if (rc < 0)
2841 		return rc;
2842 	efx->tc->reps_filter_mc = rc;
2843 	return 0;
2844 }
2845 
2846 void efx_tc_remove_rep_filters(struct efx_nic *efx)
2847 {
2848 	if (efx->type->is_vf)
2849 		return;
2850 	if (!efx->tc)
2851 		return;
2852 	if (efx->tc->reps_filter_mc >= 0)
2853 		efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_mc);
2854 	efx->tc->reps_filter_mc = -1;
2855 	if (efx->tc->reps_filter_uc >= 0)
2856 		efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_uc);
2857 	efx->tc->reps_filter_uc = -1;
2858 }
2859 
2860 int efx_init_tc(struct efx_nic *efx)
2861 {
2862 	int rc;
2863 
2864 	rc = efx_mae_get_caps(efx, efx->tc->caps);
2865 	if (rc)
2866 		return rc;
2867 	if (efx->tc->caps->match_field_count > MAE_NUM_FIELDS)
2868 		/* Firmware supports some match fields the driver doesn't know
2869 		 * about.  Not fatal, unless any of those fields are required
2870 		 * (MAE_FIELD_SUPPORTED_MATCH_ALWAYS) but if so we don't know.
2871 		 */
2872 		netif_warn(efx, probe, efx->net_dev,
2873 			   "FW reports additional match fields %u\n",
2874 			   efx->tc->caps->match_field_count);
2875 	if (efx->tc->caps->action_prios < EFX_TC_PRIO__NUM) {
2876 		netif_err(efx, probe, efx->net_dev,
2877 			  "Too few action prios supported (have %u, need %u)\n",
2878 			  efx->tc->caps->action_prios, EFX_TC_PRIO__NUM);
2879 		return -EIO;
2880 	}
2881 	rc = efx_tc_configure_default_rule_pf(efx);
2882 	if (rc)
2883 		return rc;
2884 	rc = efx_tc_configure_default_rule_wire(efx);
2885 	if (rc)
2886 		return rc;
2887 	rc = efx_tc_configure_rep_mport(efx);
2888 	if (rc)
2889 		return rc;
2890 	rc = efx_tc_configure_fallback_acts_pf(efx);
2891 	if (rc)
2892 		return rc;
2893 	rc = efx_tc_configure_fallback_acts_reps(efx);
2894 	if (rc)
2895 		return rc;
2896 	rc = efx_mae_get_tables(efx);
2897 	if (rc)
2898 		return rc;
2899 	rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx);
2900 	if (rc)
2901 		goto out_free;
2902 	efx->tc->up = true;
2903 	return 0;
2904 out_free:
2905 	efx_mae_free_tables(efx);
2906 	return rc;
2907 }
2908 
2909 void efx_fini_tc(struct efx_nic *efx)
2910 {
2911 	/* We can get called even if efx_init_struct_tc() failed */
2912 	if (!efx->tc)
2913 		return;
2914 	if (efx->tc->up)
2915 		flow_indr_dev_unregister(efx_tc_indr_setup_cb, efx, efx_tc_block_unbind);
2916 	efx_tc_deconfigure_rep_mport(efx);
2917 	efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf);
2918 	efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire);
2919 	efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf);
2920 	efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps);
2921 	efx->tc->up = false;
2922 	efx_mae_free_tables(efx);
2923 }
2924 
2925 /* At teardown time, all TC filter rules (and thus all resources they created)
2926  * should already have been removed.  If we find any in our hashtables, make a
2927  * cursory attempt to clean up the software side.
2928  */
2929 static void efx_tc_encap_match_free(void *ptr, void *__unused)
2930 {
2931 	struct efx_tc_encap_match *encap = ptr;
2932 
2933 	WARN_ON(refcount_read(&encap->ref));
2934 	kfree(encap);
2935 }
2936 
2937 static void efx_tc_recirc_free(void *ptr, void *arg)
2938 {
2939 	struct efx_tc_recirc_id *rid = ptr;
2940 	struct efx_nic *efx = arg;
2941 
2942 	WARN_ON(refcount_read(&rid->ref));
2943 	ida_free(&efx->tc->recirc_ida, rid->fw_id);
2944 	kfree(rid);
2945 }
2946 
2947 static void efx_tc_lhs_free(void *ptr, void *arg)
2948 {
2949 	struct efx_tc_lhs_rule *rule = ptr;
2950 	struct efx_nic *efx = arg;
2951 
2952 	netif_err(efx, drv, efx->net_dev,
2953 		  "tc lhs_rule %lx still present at teardown, removing\n",
2954 		  rule->cookie);
2955 
2956 	if (rule->lhs_act.zone)
2957 		efx_tc_ct_unregister_zone(efx, rule->lhs_act.zone);
2958 	if (rule->lhs_act.count)
2959 		efx_tc_flower_put_counter_index(efx, rule->lhs_act.count);
2960 	efx_mae_remove_lhs_rule(efx, rule);
2961 
2962 	kfree(rule);
2963 }
2964 
2965 static void efx_tc_mac_free(void *ptr, void *__unused)
2966 {
2967 	struct efx_tc_mac_pedit_action *ped = ptr;
2968 
2969 	WARN_ON(refcount_read(&ped->ref));
2970 	kfree(ped);
2971 }
2972 
2973 static void efx_tc_flow_free(void *ptr, void *arg)
2974 {
2975 	struct efx_tc_flow_rule *rule = ptr;
2976 	struct efx_nic *efx = arg;
2977 
2978 	netif_err(efx, drv, efx->net_dev,
2979 		  "tc rule %lx still present at teardown, removing\n",
2980 		  rule->cookie);
2981 
2982 	/* Also releases entries in subsidiary tables */
2983 	efx_tc_delete_rule(efx, rule);
2984 
2985 	kfree(rule);
2986 }
2987 
2988 int efx_init_struct_tc(struct efx_nic *efx)
2989 {
2990 	int rc;
2991 
2992 	if (efx->type->is_vf)
2993 		return 0;
2994 
2995 	efx->tc = kzalloc(sizeof(*efx->tc), GFP_KERNEL);
2996 	if (!efx->tc)
2997 		return -ENOMEM;
2998 	efx->tc->caps = kzalloc(sizeof(struct mae_caps), GFP_KERNEL);
2999 	if (!efx->tc->caps) {
3000 		rc = -ENOMEM;
3001 		goto fail_alloc_caps;
3002 	}
3003 	INIT_LIST_HEAD(&efx->tc->block_list);
3004 
3005 	mutex_init(&efx->tc->mutex);
3006 	init_waitqueue_head(&efx->tc->flush_wq);
3007 	rc = efx_tc_init_encap_actions(efx);
3008 	if (rc < 0)
3009 		goto fail_encap_actions;
3010 	rc = efx_tc_init_counters(efx);
3011 	if (rc < 0)
3012 		goto fail_counters;
3013 	rc = rhashtable_init(&efx->tc->mac_ht, &efx_tc_mac_ht_params);
3014 	if (rc < 0)
3015 		goto fail_mac_ht;
3016 	rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params);
3017 	if (rc < 0)
3018 		goto fail_encap_match_ht;
3019 	rc = rhashtable_init(&efx->tc->match_action_ht, &efx_tc_match_action_ht_params);
3020 	if (rc < 0)
3021 		goto fail_match_action_ht;
3022 	rc = rhashtable_init(&efx->tc->lhs_rule_ht, &efx_tc_lhs_rule_ht_params);
3023 	if (rc < 0)
3024 		goto fail_lhs_rule_ht;
3025 	rc = efx_tc_init_conntrack(efx);
3026 	if (rc < 0)
3027 		goto fail_conntrack;
3028 	rc = rhashtable_init(&efx->tc->recirc_ht, &efx_tc_recirc_ht_params);
3029 	if (rc < 0)
3030 		goto fail_recirc_ht;
3031 	ida_init(&efx->tc->recirc_ida);
3032 	efx->tc->reps_filter_uc = -1;
3033 	efx->tc->reps_filter_mc = -1;
3034 	INIT_LIST_HEAD(&efx->tc->dflt.pf.acts.list);
3035 	efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
3036 	INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list);
3037 	efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
3038 	INIT_LIST_HEAD(&efx->tc->facts.pf.list);
3039 	efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
3040 	INIT_LIST_HEAD(&efx->tc->facts.reps.list);
3041 	efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
3042 	efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type;
3043 	return 0;
3044 fail_recirc_ht:
3045 	efx_tc_destroy_conntrack(efx);
3046 fail_conntrack:
3047 	rhashtable_destroy(&efx->tc->lhs_rule_ht);
3048 fail_lhs_rule_ht:
3049 	rhashtable_destroy(&efx->tc->match_action_ht);
3050 fail_match_action_ht:
3051 	rhashtable_destroy(&efx->tc->encap_match_ht);
3052 fail_encap_match_ht:
3053 	rhashtable_destroy(&efx->tc->mac_ht);
3054 fail_mac_ht:
3055 	efx_tc_destroy_counters(efx);
3056 fail_counters:
3057 	efx_tc_destroy_encap_actions(efx);
3058 fail_encap_actions:
3059 	mutex_destroy(&efx->tc->mutex);
3060 	kfree(efx->tc->caps);
3061 fail_alloc_caps:
3062 	kfree(efx->tc);
3063 	efx->tc = NULL;
3064 	return rc;
3065 }
3066 
3067 void efx_fini_struct_tc(struct efx_nic *efx)
3068 {
3069 	if (!efx->tc)
3070 		return;
3071 
3072 	mutex_lock(&efx->tc->mutex);
3073 	EFX_WARN_ON_PARANOID(efx->tc->dflt.pf.fw_id !=
3074 			     MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
3075 	EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id !=
3076 			     MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
3077 	EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id !=
3078 			     MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
3079 	EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id !=
3080 			     MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
3081 	rhashtable_free_and_destroy(&efx->tc->lhs_rule_ht, efx_tc_lhs_free, efx);
3082 	rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free,
3083 				    efx);
3084 	rhashtable_free_and_destroy(&efx->tc->encap_match_ht,
3085 				    efx_tc_encap_match_free, NULL);
3086 	efx_tc_fini_conntrack(efx);
3087 	rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx);
3088 	WARN_ON(!ida_is_empty(&efx->tc->recirc_ida));
3089 	ida_destroy(&efx->tc->recirc_ida);
3090 	rhashtable_free_and_destroy(&efx->tc->mac_ht, efx_tc_mac_free, NULL);
3091 	efx_tc_fini_counters(efx);
3092 	efx_tc_fini_encap_actions(efx);
3093 	mutex_unlock(&efx->tc->mutex);
3094 	mutex_destroy(&efx->tc->mutex);
3095 	kfree(efx->tc->caps);
3096 	kfree(efx->tc);
3097 	efx->tc = NULL;
3098 }
3099