1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/refcount.h>
16 #include <linux/xarray.h>
17 #include <linux/if_macvlan.h>
18 #include <linux/debugfs.h>
19 
20 #include "lib/fs_chains.h"
21 #include "en/tc_ct.h"
22 #include "en/tc/ct_fs.h"
23 #include "en/tc_priv.h"
24 #include "en/mod_hdr.h"
25 #include "en/mapping.h"
26 #include "en/tc/post_act.h"
27 #include "en.h"
28 #include "en_tc.h"
29 #include "en_rep.h"
30 #include "fs_core.h"
31 
32 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
33 #define MLX5_CT_STATE_TRK_BIT BIT(2)
34 #define MLX5_CT_STATE_NAT_BIT BIT(3)
35 #define MLX5_CT_STATE_REPLY_BIT BIT(4)
36 #define MLX5_CT_STATE_RELATED_BIT BIT(5)
37 #define MLX5_CT_STATE_INVALID_BIT BIT(6)
38 
39 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen)
40 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
41 
42 /* Statically allocate modify actions for
43  * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
44  * This will be increased dynamically if needed (for the ipv6 snat + dnat).
45  */
46 #define MLX5_CT_MIN_MOD_ACTS 10
47 
48 #define ct_dbg(fmt, args...)\
49 	netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
50 
51 struct mlx5_tc_ct_debugfs {
52 	struct {
53 		atomic_t offloaded;
54 		atomic_t rx_dropped;
55 	} stats;
56 
57 	struct dentry *root;
58 };
59 
60 struct mlx5_tc_ct_priv {
61 	struct mlx5_core_dev *dev;
62 	const struct net_device *netdev;
63 	struct mod_hdr_tbl *mod_hdr_tbl;
64 	struct xarray tuple_ids;
65 	struct rhashtable zone_ht;
66 	struct rhashtable ct_tuples_ht;
67 	struct rhashtable ct_tuples_nat_ht;
68 	struct mlx5_flow_table *ct;
69 	struct mlx5_flow_table *ct_nat;
70 	struct mlx5e_post_act *post_act;
71 	struct mutex control_lock; /* guards parallel adds/dels */
72 	struct mapping_ctx *zone_mapping;
73 	struct mapping_ctx *labels_mapping;
74 	enum mlx5_flow_namespace_type ns_type;
75 	struct mlx5_fs_chains *chains;
76 	struct mlx5_ct_fs *fs;
77 	struct mlx5_ct_fs_ops *fs_ops;
78 	spinlock_t ht_lock; /* protects ft entries */
79 	struct workqueue_struct *wq;
80 
81 	struct mlx5_tc_ct_debugfs debugfs;
82 };
83 
84 struct mlx5_ct_flow {
85 	struct mlx5_flow_attr *pre_ct_attr;
86 	struct mlx5_flow_handle *pre_ct_rule;
87 	struct mlx5_ct_ft *ft;
88 	u32 chain_mapping;
89 };
90 
91 struct mlx5_ct_zone_rule {
92 	struct mlx5_ct_fs_rule *rule;
93 	struct mlx5e_mod_hdr_handle *mh;
94 	struct mlx5_flow_attr *attr;
95 	bool nat;
96 };
97 
98 struct mlx5_tc_ct_pre {
99 	struct mlx5_flow_table *ft;
100 	struct mlx5_flow_group *flow_grp;
101 	struct mlx5_flow_group *miss_grp;
102 	struct mlx5_flow_handle *flow_rule;
103 	struct mlx5_flow_handle *miss_rule;
104 	struct mlx5_modify_hdr *modify_hdr;
105 };
106 
107 struct mlx5_ct_ft {
108 	struct rhash_head node;
109 	u16 zone;
110 	u32 zone_restore_id;
111 	refcount_t refcount;
112 	struct nf_flowtable *nf_ft;
113 	struct mlx5_tc_ct_priv *ct_priv;
114 	struct rhashtable ct_entries_ht;
115 	struct mlx5_tc_ct_pre pre_ct;
116 	struct mlx5_tc_ct_pre pre_ct_nat;
117 };
118 
119 struct mlx5_ct_tuple {
120 	u16 addr_type;
121 	__be16 n_proto;
122 	u8 ip_proto;
123 	struct {
124 		union {
125 			__be32 src_v4;
126 			struct in6_addr src_v6;
127 		};
128 		union {
129 			__be32 dst_v4;
130 			struct in6_addr dst_v6;
131 		};
132 	} ip;
133 	struct {
134 		__be16 src;
135 		__be16 dst;
136 	} port;
137 
138 	u16 zone;
139 };
140 
141 struct mlx5_ct_counter {
142 	struct mlx5_fc *counter;
143 	refcount_t refcount;
144 	bool is_shared;
145 };
146 
147 enum {
148 	MLX5_CT_ENTRY_FLAG_VALID,
149 };
150 
151 struct mlx5_ct_entry {
152 	struct rhash_head node;
153 	struct rhash_head tuple_node;
154 	struct rhash_head tuple_nat_node;
155 	struct mlx5_ct_counter *counter;
156 	unsigned long cookie;
157 	unsigned long restore_cookie;
158 	struct mlx5_ct_tuple tuple;
159 	struct mlx5_ct_tuple tuple_nat;
160 	struct mlx5_ct_zone_rule zone_rules[2];
161 
162 	struct mlx5_tc_ct_priv *ct_priv;
163 	struct work_struct work;
164 
165 	refcount_t refcnt;
166 	unsigned long flags;
167 };
168 
169 static void
170 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
171 				 struct mlx5_flow_attr *attr,
172 				 struct mlx5e_mod_hdr_handle *mh);
173 
174 static const struct rhashtable_params cts_ht_params = {
175 	.head_offset = offsetof(struct mlx5_ct_entry, node),
176 	.key_offset = offsetof(struct mlx5_ct_entry, cookie),
177 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
178 	.automatic_shrinking = true,
179 	.min_size = 16 * 1024,
180 };
181 
182 static const struct rhashtable_params zone_params = {
183 	.head_offset = offsetof(struct mlx5_ct_ft, node),
184 	.key_offset = offsetof(struct mlx5_ct_ft, zone),
185 	.key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
186 	.automatic_shrinking = true,
187 };
188 
189 static const struct rhashtable_params tuples_ht_params = {
190 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
191 	.key_offset = offsetof(struct mlx5_ct_entry, tuple),
192 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
193 	.automatic_shrinking = true,
194 	.min_size = 16 * 1024,
195 };
196 
197 static const struct rhashtable_params tuples_nat_ht_params = {
198 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
199 	.key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
200 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
201 	.automatic_shrinking = true,
202 	.min_size = 16 * 1024,
203 };
204 
205 static bool
206 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
207 {
208 	return !!(entry->tuple_nat_node.next);
209 }
210 
211 static int
212 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
213 		       u32 *labels, u32 *id)
214 {
215 	if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
216 		*id = 0;
217 		return 0;
218 	}
219 
220 	if (mapping_add(ct_priv->labels_mapping, labels, id))
221 		return -EOPNOTSUPP;
222 
223 	return 0;
224 }
225 
226 static void
227 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
228 {
229 	if (id)
230 		mapping_remove(ct_priv->labels_mapping, id);
231 }
232 
233 static int
234 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
235 {
236 	struct flow_match_control control;
237 	struct flow_match_basic basic;
238 
239 	flow_rule_match_basic(rule, &basic);
240 	flow_rule_match_control(rule, &control);
241 
242 	tuple->n_proto = basic.key->n_proto;
243 	tuple->ip_proto = basic.key->ip_proto;
244 	tuple->addr_type = control.key->addr_type;
245 
246 	if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
247 		struct flow_match_ipv4_addrs match;
248 
249 		flow_rule_match_ipv4_addrs(rule, &match);
250 		tuple->ip.src_v4 = match.key->src;
251 		tuple->ip.dst_v4 = match.key->dst;
252 	} else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
253 		struct flow_match_ipv6_addrs match;
254 
255 		flow_rule_match_ipv6_addrs(rule, &match);
256 		tuple->ip.src_v6 = match.key->src;
257 		tuple->ip.dst_v6 = match.key->dst;
258 	} else {
259 		return -EOPNOTSUPP;
260 	}
261 
262 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
263 		struct flow_match_ports match;
264 
265 		flow_rule_match_ports(rule, &match);
266 		switch (tuple->ip_proto) {
267 		case IPPROTO_TCP:
268 		case IPPROTO_UDP:
269 			tuple->port.src = match.key->src;
270 			tuple->port.dst = match.key->dst;
271 			break;
272 		default:
273 			return -EOPNOTSUPP;
274 		}
275 	} else {
276 		if (tuple->ip_proto != IPPROTO_GRE)
277 			return -EOPNOTSUPP;
278 	}
279 
280 	return 0;
281 }
282 
283 static int
284 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
285 			     struct flow_rule *rule)
286 {
287 	struct flow_action *flow_action = &rule->action;
288 	struct flow_action_entry *act;
289 	u32 offset, val, ip6_offset;
290 	int i;
291 
292 	flow_action_for_each(i, act, flow_action) {
293 		if (act->id != FLOW_ACTION_MANGLE)
294 			continue;
295 
296 		offset = act->mangle.offset;
297 		val = act->mangle.val;
298 		switch (act->mangle.htype) {
299 		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
300 			if (offset == offsetof(struct iphdr, saddr))
301 				tuple->ip.src_v4 = cpu_to_be32(val);
302 			else if (offset == offsetof(struct iphdr, daddr))
303 				tuple->ip.dst_v4 = cpu_to_be32(val);
304 			else
305 				return -EOPNOTSUPP;
306 			break;
307 
308 		case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
309 			ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
310 			ip6_offset /= 4;
311 			if (ip6_offset < 4)
312 				tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
313 			else if (ip6_offset < 8)
314 				tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
315 			else
316 				return -EOPNOTSUPP;
317 			break;
318 
319 		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
320 			if (offset == offsetof(struct tcphdr, source))
321 				tuple->port.src = cpu_to_be16(val);
322 			else if (offset == offsetof(struct tcphdr, dest))
323 				tuple->port.dst = cpu_to_be16(val);
324 			else
325 				return -EOPNOTSUPP;
326 			break;
327 
328 		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
329 			if (offset == offsetof(struct udphdr, source))
330 				tuple->port.src = cpu_to_be16(val);
331 			else if (offset == offsetof(struct udphdr, dest))
332 				tuple->port.dst = cpu_to_be16(val);
333 			else
334 				return -EOPNOTSUPP;
335 			break;
336 
337 		default:
338 			return -EOPNOTSUPP;
339 		}
340 	}
341 
342 	return 0;
343 }
344 
345 static int
346 mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
347 				 struct net_device *ndev)
348 {
349 	struct mlx5e_priv *other_priv = netdev_priv(ndev);
350 	struct mlx5_core_dev *mdev = ct_priv->dev;
351 	bool vf_rep, uplink_rep;
352 
353 	vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
354 	uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
355 
356 	if (vf_rep)
357 		return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
358 	if (uplink_rep)
359 		return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
360 	if (is_vlan_dev(ndev))
361 		return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
362 	if (netif_is_macvlan(ndev))
363 		return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
364 	if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
365 		return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
366 
367 	return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
368 }
369 
370 static int
371 mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
372 			   struct mlx5_flow_spec *spec,
373 			   struct flow_rule *rule)
374 {
375 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
376 				       outer_headers);
377 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
378 				       outer_headers);
379 	u16 addr_type = 0;
380 	u8 ip_proto = 0;
381 
382 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
383 		struct flow_match_basic match;
384 
385 		flow_rule_match_basic(rule, &match);
386 
387 		mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
388 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
389 			 match.mask->ip_proto);
390 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
391 			 match.key->ip_proto);
392 
393 		ip_proto = match.key->ip_proto;
394 	}
395 
396 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
397 		struct flow_match_control match;
398 
399 		flow_rule_match_control(rule, &match);
400 		addr_type = match.key->addr_type;
401 	}
402 
403 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
404 		struct flow_match_ipv4_addrs match;
405 
406 		flow_rule_match_ipv4_addrs(rule, &match);
407 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
408 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
409 		       &match.mask->src, sizeof(match.mask->src));
410 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
411 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
412 		       &match.key->src, sizeof(match.key->src));
413 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
414 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
415 		       &match.mask->dst, sizeof(match.mask->dst));
416 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
417 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
418 		       &match.key->dst, sizeof(match.key->dst));
419 	}
420 
421 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
422 		struct flow_match_ipv6_addrs match;
423 
424 		flow_rule_match_ipv6_addrs(rule, &match);
425 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
426 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
427 		       &match.mask->src, sizeof(match.mask->src));
428 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
429 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
430 		       &match.key->src, sizeof(match.key->src));
431 
432 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
433 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
434 		       &match.mask->dst, sizeof(match.mask->dst));
435 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
436 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
437 		       &match.key->dst, sizeof(match.key->dst));
438 	}
439 
440 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
441 		struct flow_match_ports match;
442 
443 		flow_rule_match_ports(rule, &match);
444 		switch (ip_proto) {
445 		case IPPROTO_TCP:
446 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
447 				 tcp_sport, ntohs(match.mask->src));
448 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
449 				 tcp_sport, ntohs(match.key->src));
450 
451 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
452 				 tcp_dport, ntohs(match.mask->dst));
453 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
454 				 tcp_dport, ntohs(match.key->dst));
455 			break;
456 
457 		case IPPROTO_UDP:
458 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
459 				 udp_sport, ntohs(match.mask->src));
460 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
461 				 udp_sport, ntohs(match.key->src));
462 
463 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
464 				 udp_dport, ntohs(match.mask->dst));
465 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
466 				 udp_dport, ntohs(match.key->dst));
467 			break;
468 		default:
469 			break;
470 		}
471 	}
472 
473 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
474 		struct flow_match_tcp match;
475 
476 		flow_rule_match_tcp(rule, &match);
477 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
478 			 ntohs(match.mask->flags));
479 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
480 			 ntohs(match.key->flags));
481 	}
482 
483 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
484 		struct flow_match_meta match;
485 
486 		flow_rule_match_meta(rule, &match);
487 
488 		if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
489 			struct net_device *dev;
490 
491 			dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
492 			if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
493 				spec->flow_context.flow_source =
494 					mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
495 
496 			dev_put(dev);
497 		}
498 	}
499 
500 	return 0;
501 }
502 
503 static void
504 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
505 {
506 	if (entry->counter->is_shared &&
507 	    !refcount_dec_and_test(&entry->counter->refcount))
508 		return;
509 
510 	mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
511 	kfree(entry->counter);
512 }
513 
514 static void
515 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
516 			  struct mlx5_ct_entry *entry,
517 			  bool nat)
518 {
519 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
520 	struct mlx5_flow_attr *attr = zone_rule->attr;
521 
522 	ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
523 
524 	ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
525 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
526 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
527 	kfree(attr);
528 }
529 
530 static void
531 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
532 			   struct mlx5_ct_entry *entry)
533 {
534 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
535 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
536 
537 	atomic_dec(&ct_priv->debugfs.stats.offloaded);
538 }
539 
540 static struct flow_action_entry *
541 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
542 {
543 	struct flow_action *flow_action = &flow_rule->action;
544 	struct flow_action_entry *act;
545 	int i;
546 
547 	flow_action_for_each(i, act, flow_action) {
548 		if (act->id == FLOW_ACTION_CT_METADATA)
549 			return act;
550 	}
551 
552 	return NULL;
553 }
554 
555 static int
556 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
557 			       struct mlx5e_tc_mod_hdr_acts *mod_acts,
558 			       u8 ct_state,
559 			       u32 mark,
560 			       u32 labels_id,
561 			       u8 zone_restore_id)
562 {
563 	enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
564 	struct mlx5_core_dev *dev = ct_priv->dev;
565 	int err;
566 
567 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
568 					CTSTATE_TO_REG, ct_state);
569 	if (err)
570 		return err;
571 
572 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
573 					MARK_TO_REG, mark);
574 	if (err)
575 		return err;
576 
577 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
578 					LABELS_TO_REG, labels_id);
579 	if (err)
580 		return err;
581 
582 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
583 					ZONE_RESTORE_TO_REG, zone_restore_id);
584 	if (err)
585 		return err;
586 
587 	/* Make another copy of zone id in reg_b for
588 	 * NIC rx flows since we don't copy reg_c1 to
589 	 * reg_b upon miss.
590 	 */
591 	if (ns != MLX5_FLOW_NAMESPACE_FDB) {
592 		err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
593 						NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
594 		if (err)
595 			return err;
596 	}
597 	return 0;
598 }
599 
600 int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
601 				 struct mlx5e_tc_mod_hdr_acts *mod_acts)
602 {
603 		return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
604 }
605 
606 static int
607 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
608 				   char *modact)
609 {
610 	u32 offset = act->mangle.offset, field;
611 
612 	switch (act->mangle.htype) {
613 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
614 		MLX5_SET(set_action_in, modact, length, 0);
615 		if (offset == offsetof(struct iphdr, saddr))
616 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
617 		else if (offset == offsetof(struct iphdr, daddr))
618 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
619 		else
620 			return -EOPNOTSUPP;
621 		break;
622 
623 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
624 		MLX5_SET(set_action_in, modact, length, 0);
625 		if (offset == offsetof(struct ipv6hdr, saddr) + 12)
626 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
627 		else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
628 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
629 		else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
630 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
631 		else if (offset == offsetof(struct ipv6hdr, saddr))
632 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
633 		else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
634 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
635 		else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
636 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
637 		else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
638 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
639 		else if (offset == offsetof(struct ipv6hdr, daddr))
640 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
641 		else
642 			return -EOPNOTSUPP;
643 		break;
644 
645 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
646 		MLX5_SET(set_action_in, modact, length, 16);
647 		if (offset == offsetof(struct tcphdr, source))
648 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
649 		else if (offset == offsetof(struct tcphdr, dest))
650 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
651 		else
652 			return -EOPNOTSUPP;
653 		break;
654 
655 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
656 		MLX5_SET(set_action_in, modact, length, 16);
657 		if (offset == offsetof(struct udphdr, source))
658 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
659 		else if (offset == offsetof(struct udphdr, dest))
660 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
661 		else
662 			return -EOPNOTSUPP;
663 		break;
664 
665 	default:
666 		return -EOPNOTSUPP;
667 	}
668 
669 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
670 	MLX5_SET(set_action_in, modact, offset, 0);
671 	MLX5_SET(set_action_in, modact, field, field);
672 	MLX5_SET(set_action_in, modact, data, act->mangle.val);
673 
674 	return 0;
675 }
676 
677 static int
678 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
679 			    struct flow_rule *flow_rule,
680 			    struct mlx5e_tc_mod_hdr_acts *mod_acts)
681 {
682 	struct flow_action *flow_action = &flow_rule->action;
683 	struct mlx5_core_dev *mdev = ct_priv->dev;
684 	struct flow_action_entry *act;
685 	char *modact;
686 	int err, i;
687 
688 	flow_action_for_each(i, act, flow_action) {
689 		switch (act->id) {
690 		case FLOW_ACTION_MANGLE: {
691 			modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts);
692 			if (IS_ERR(modact))
693 				return PTR_ERR(modact);
694 
695 			err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
696 			if (err)
697 				return err;
698 
699 			mod_acts->num_actions++;
700 		}
701 		break;
702 
703 		case FLOW_ACTION_CT_METADATA:
704 			/* Handled earlier */
705 			continue;
706 		default:
707 			return -EOPNOTSUPP;
708 		}
709 	}
710 
711 	return 0;
712 }
713 
714 static int
715 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
716 				struct mlx5_flow_attr *attr,
717 				struct flow_rule *flow_rule,
718 				struct mlx5e_mod_hdr_handle **mh,
719 				u8 zone_restore_id, bool nat_table, bool has_nat)
720 {
721 	DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
722 	DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
723 	struct flow_action_entry *meta;
724 	u16 ct_state = 0;
725 	int err;
726 
727 	meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
728 	if (!meta)
729 		return -EOPNOTSUPP;
730 
731 	err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
732 				     &attr->ct_attr.ct_labels_id);
733 	if (err)
734 		return -EOPNOTSUPP;
735 	if (nat_table) {
736 		if (has_nat) {
737 			err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
738 			if (err)
739 				goto err_mapping;
740 		}
741 
742 		ct_state |= MLX5_CT_STATE_NAT_BIT;
743 	}
744 
745 	ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
746 	ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
747 	err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
748 					     ct_state,
749 					     meta->ct_metadata.mark,
750 					     attr->ct_attr.ct_labels_id,
751 					     zone_restore_id);
752 	if (err)
753 		goto err_mapping;
754 
755 	if (nat_table && has_nat) {
756 		attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
757 							    mod_acts.num_actions,
758 							    mod_acts.actions);
759 		if (IS_ERR(attr->modify_hdr)) {
760 			err = PTR_ERR(attr->modify_hdr);
761 			goto err_mapping;
762 		}
763 
764 		*mh = NULL;
765 	} else {
766 		*mh = mlx5e_mod_hdr_attach(ct_priv->dev,
767 					   ct_priv->mod_hdr_tbl,
768 					   ct_priv->ns_type,
769 					   &mod_acts);
770 		if (IS_ERR(*mh)) {
771 			err = PTR_ERR(*mh);
772 			goto err_mapping;
773 		}
774 		attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
775 	}
776 
777 	mlx5e_mod_hdr_dealloc(&mod_acts);
778 	return 0;
779 
780 err_mapping:
781 	mlx5e_mod_hdr_dealloc(&mod_acts);
782 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
783 	return err;
784 }
785 
786 static void
787 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
788 				 struct mlx5_flow_attr *attr,
789 				 struct mlx5e_mod_hdr_handle *mh)
790 {
791 	if (mh)
792 		mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
793 	else
794 		mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
795 }
796 
797 static int
798 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
799 			  struct flow_rule *flow_rule,
800 			  struct mlx5_ct_entry *entry,
801 			  bool nat, u8 zone_restore_id)
802 {
803 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
804 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
805 	struct mlx5_flow_spec *spec = NULL;
806 	struct mlx5_flow_attr *attr;
807 	int err;
808 
809 	zone_rule->nat = nat;
810 
811 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
812 	if (!spec)
813 		return -ENOMEM;
814 
815 	attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
816 	if (!attr) {
817 		err = -ENOMEM;
818 		goto err_attr;
819 	}
820 
821 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
822 					      &zone_rule->mh,
823 					      zone_restore_id,
824 					      nat,
825 					      mlx5_tc_ct_entry_has_nat(entry));
826 	if (err) {
827 		ct_dbg("Failed to create ct entry mod hdr");
828 		goto err_mod_hdr;
829 	}
830 
831 	attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
832 		       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
833 		       MLX5_FLOW_CONTEXT_ACTION_COUNT;
834 	attr->dest_chain = 0;
835 	attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
836 	attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
837 	if (entry->tuple.ip_proto == IPPROTO_TCP ||
838 	    entry->tuple.ip_proto == IPPROTO_UDP)
839 		attr->outer_match_level = MLX5_MATCH_L4;
840 	else
841 		attr->outer_match_level = MLX5_MATCH_L3;
842 	attr->counter = entry->counter->counter;
843 	attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
844 	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
845 		attr->esw_attr->in_mdev = priv->mdev;
846 
847 	mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
848 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
849 
850 	zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
851 	if (IS_ERR(zone_rule->rule)) {
852 		err = PTR_ERR(zone_rule->rule);
853 		ct_dbg("Failed to add ct entry rule, nat: %d", nat);
854 		goto err_rule;
855 	}
856 
857 	zone_rule->attr = attr;
858 
859 	kvfree(spec);
860 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
861 
862 	return 0;
863 
864 err_rule:
865 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
866 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
867 err_mod_hdr:
868 	kfree(attr);
869 err_attr:
870 	kvfree(spec);
871 	return err;
872 }
873 
874 static bool
875 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
876 {
877 	return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
878 }
879 
880 static struct mlx5_ct_entry *
881 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
882 {
883 	struct mlx5_ct_entry *entry;
884 
885 	entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
886 				       tuples_ht_params);
887 	if (entry && mlx5_tc_ct_entry_valid(entry) &&
888 	    refcount_inc_not_zero(&entry->refcnt)) {
889 		return entry;
890 	} else if (!entry) {
891 		entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
892 					       tuple, tuples_nat_ht_params);
893 		if (entry && mlx5_tc_ct_entry_valid(entry) &&
894 		    refcount_inc_not_zero(&entry->refcnt))
895 			return entry;
896 	}
897 
898 	return entry ? ERR_PTR(-EINVAL) : NULL;
899 }
900 
901 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
902 {
903 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
904 
905 	rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
906 			       &entry->tuple_nat_node,
907 			       tuples_nat_ht_params);
908 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
909 			       tuples_ht_params);
910 }
911 
912 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
913 {
914 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
915 
916 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
917 
918 	spin_lock_bh(&ct_priv->ht_lock);
919 	mlx5_tc_ct_entry_remove_from_tuples(entry);
920 	spin_unlock_bh(&ct_priv->ht_lock);
921 
922 	mlx5_tc_ct_counter_put(ct_priv, entry);
923 	kfree(entry);
924 }
925 
926 static void
927 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
928 {
929 	if (!refcount_dec_and_test(&entry->refcnt))
930 		return;
931 
932 	mlx5_tc_ct_entry_del(entry);
933 }
934 
935 static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
936 {
937 	struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
938 
939 	mlx5_tc_ct_entry_del(entry);
940 }
941 
942 static void
943 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
944 {
945 	if (!refcount_dec_and_test(&entry->refcnt))
946 		return;
947 
948 	INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
949 	queue_work(entry->ct_priv->wq, &entry->work);
950 }
951 
952 static struct mlx5_ct_counter *
953 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
954 {
955 	struct mlx5_ct_counter *counter;
956 	int ret;
957 
958 	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
959 	if (!counter)
960 		return ERR_PTR(-ENOMEM);
961 
962 	counter->is_shared = false;
963 	counter->counter = mlx5_fc_create_ex(ct_priv->dev, true);
964 	if (IS_ERR(counter->counter)) {
965 		ct_dbg("Failed to create counter for ct entry");
966 		ret = PTR_ERR(counter->counter);
967 		kfree(counter);
968 		return ERR_PTR(ret);
969 	}
970 
971 	return counter;
972 }
973 
974 static struct mlx5_ct_counter *
975 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
976 			      struct mlx5_ct_entry *entry)
977 {
978 	struct mlx5_ct_tuple rev_tuple = entry->tuple;
979 	struct mlx5_ct_counter *shared_counter;
980 	struct mlx5_ct_entry *rev_entry;
981 
982 	/* get the reversed tuple */
983 	swap(rev_tuple.port.src, rev_tuple.port.dst);
984 
985 	if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
986 		__be32 tmp_addr = rev_tuple.ip.src_v4;
987 
988 		rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
989 		rev_tuple.ip.dst_v4 = tmp_addr;
990 	} else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
991 		struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
992 
993 		rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
994 		rev_tuple.ip.dst_v6 = tmp_addr;
995 	} else {
996 		return ERR_PTR(-EOPNOTSUPP);
997 	}
998 
999 	/* Use the same counter as the reverse direction */
1000 	spin_lock_bh(&ct_priv->ht_lock);
1001 	rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
1002 
1003 	if (IS_ERR(rev_entry)) {
1004 		spin_unlock_bh(&ct_priv->ht_lock);
1005 		goto create_counter;
1006 	}
1007 
1008 	if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
1009 		ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
1010 		shared_counter = rev_entry->counter;
1011 		spin_unlock_bh(&ct_priv->ht_lock);
1012 
1013 		mlx5_tc_ct_entry_put(rev_entry);
1014 		return shared_counter;
1015 	}
1016 
1017 	spin_unlock_bh(&ct_priv->ht_lock);
1018 
1019 create_counter:
1020 
1021 	shared_counter = mlx5_tc_ct_counter_create(ct_priv);
1022 	if (IS_ERR(shared_counter))
1023 		return shared_counter;
1024 
1025 	shared_counter->is_shared = true;
1026 	refcount_set(&shared_counter->refcount, 1);
1027 	return shared_counter;
1028 }
1029 
1030 static int
1031 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
1032 			   struct flow_rule *flow_rule,
1033 			   struct mlx5_ct_entry *entry,
1034 			   u8 zone_restore_id)
1035 {
1036 	int err;
1037 
1038 	if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
1039 		entry->counter = mlx5_tc_ct_counter_create(ct_priv);
1040 	else
1041 		entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
1042 
1043 	if (IS_ERR(entry->counter)) {
1044 		err = PTR_ERR(entry->counter);
1045 		return err;
1046 	}
1047 
1048 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
1049 					zone_restore_id);
1050 	if (err)
1051 		goto err_orig;
1052 
1053 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
1054 					zone_restore_id);
1055 	if (err)
1056 		goto err_nat;
1057 
1058 	atomic_inc(&ct_priv->debugfs.stats.offloaded);
1059 	return 0;
1060 
1061 err_nat:
1062 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
1063 err_orig:
1064 	mlx5_tc_ct_counter_put(ct_priv, entry);
1065 	return err;
1066 }
1067 
1068 static int
1069 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
1070 				  struct flow_cls_offload *flow)
1071 {
1072 	struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
1073 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1074 	struct flow_action_entry *meta_action;
1075 	unsigned long cookie = flow->cookie;
1076 	struct mlx5_ct_entry *entry;
1077 	int err;
1078 
1079 	meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
1080 	if (!meta_action)
1081 		return -EOPNOTSUPP;
1082 
1083 	spin_lock_bh(&ct_priv->ht_lock);
1084 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1085 	if (entry && refcount_inc_not_zero(&entry->refcnt)) {
1086 		spin_unlock_bh(&ct_priv->ht_lock);
1087 		mlx5_tc_ct_entry_put(entry);
1088 		return -EEXIST;
1089 	}
1090 	spin_unlock_bh(&ct_priv->ht_lock);
1091 
1092 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1093 	if (!entry)
1094 		return -ENOMEM;
1095 
1096 	entry->tuple.zone = ft->zone;
1097 	entry->cookie = flow->cookie;
1098 	entry->restore_cookie = meta_action->ct_metadata.cookie;
1099 	refcount_set(&entry->refcnt, 2);
1100 	entry->ct_priv = ct_priv;
1101 
1102 	err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
1103 	if (err)
1104 		goto err_set;
1105 
1106 	memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
1107 	err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
1108 	if (err)
1109 		goto err_set;
1110 
1111 	spin_lock_bh(&ct_priv->ht_lock);
1112 
1113 	err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1114 					    cts_ht_params);
1115 	if (err)
1116 		goto err_entries;
1117 
1118 	err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1119 					    &entry->tuple_node,
1120 					    tuples_ht_params);
1121 	if (err)
1122 		goto err_tuple;
1123 
1124 	if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1125 		err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1126 						    &entry->tuple_nat_node,
1127 						    tuples_nat_ht_params);
1128 		if (err)
1129 			goto err_tuple_nat;
1130 	}
1131 	spin_unlock_bh(&ct_priv->ht_lock);
1132 
1133 	err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1134 					 ft->zone_restore_id);
1135 	if (err)
1136 		goto err_rules;
1137 
1138 	set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1139 	mlx5_tc_ct_entry_put(entry); /* this function reference */
1140 
1141 	return 0;
1142 
1143 err_rules:
1144 	spin_lock_bh(&ct_priv->ht_lock);
1145 	if (mlx5_tc_ct_entry_has_nat(entry))
1146 		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
1147 				       &entry->tuple_nat_node, tuples_nat_ht_params);
1148 err_tuple_nat:
1149 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
1150 			       &entry->tuple_node,
1151 			       tuples_ht_params);
1152 err_tuple:
1153 	rhashtable_remove_fast(&ft->ct_entries_ht,
1154 			       &entry->node,
1155 			       cts_ht_params);
1156 err_entries:
1157 	spin_unlock_bh(&ct_priv->ht_lock);
1158 err_set:
1159 	kfree(entry);
1160 	if (err != -EEXIST)
1161 		netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1162 	return err;
1163 }
1164 
1165 static int
1166 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1167 				  struct flow_cls_offload *flow)
1168 {
1169 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1170 	unsigned long cookie = flow->cookie;
1171 	struct mlx5_ct_entry *entry;
1172 
1173 	spin_lock_bh(&ct_priv->ht_lock);
1174 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1175 	if (!entry) {
1176 		spin_unlock_bh(&ct_priv->ht_lock);
1177 		return -ENOENT;
1178 	}
1179 
1180 	if (!mlx5_tc_ct_entry_valid(entry)) {
1181 		spin_unlock_bh(&ct_priv->ht_lock);
1182 		return -EINVAL;
1183 	}
1184 
1185 	rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1186 	spin_unlock_bh(&ct_priv->ht_lock);
1187 
1188 	mlx5_tc_ct_entry_put(entry);
1189 
1190 	return 0;
1191 }
1192 
1193 static int
1194 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1195 				    struct flow_cls_offload *f)
1196 {
1197 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1198 	unsigned long cookie = f->cookie;
1199 	struct mlx5_ct_entry *entry;
1200 	u64 lastuse, packets, bytes;
1201 
1202 	spin_lock_bh(&ct_priv->ht_lock);
1203 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1204 	if (!entry) {
1205 		spin_unlock_bh(&ct_priv->ht_lock);
1206 		return -ENOENT;
1207 	}
1208 
1209 	if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1210 		spin_unlock_bh(&ct_priv->ht_lock);
1211 		return -EINVAL;
1212 	}
1213 
1214 	spin_unlock_bh(&ct_priv->ht_lock);
1215 
1216 	mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1217 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1218 			  FLOW_ACTION_HW_STATS_DELAYED);
1219 
1220 	mlx5_tc_ct_entry_put(entry);
1221 	return 0;
1222 }
1223 
1224 static int
1225 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1226 			      void *cb_priv)
1227 {
1228 	struct flow_cls_offload *f = type_data;
1229 	struct mlx5_ct_ft *ft = cb_priv;
1230 
1231 	if (type != TC_SETUP_CLSFLOWER)
1232 		return -EOPNOTSUPP;
1233 
1234 	switch (f->command) {
1235 	case FLOW_CLS_REPLACE:
1236 		return mlx5_tc_ct_block_flow_offload_add(ft, f);
1237 	case FLOW_CLS_DESTROY:
1238 		return mlx5_tc_ct_block_flow_offload_del(ft, f);
1239 	case FLOW_CLS_STATS:
1240 		return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1241 	default:
1242 		break;
1243 	}
1244 
1245 	return -EOPNOTSUPP;
1246 }
1247 
1248 static bool
1249 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1250 			u16 zone)
1251 {
1252 	struct flow_keys flow_keys;
1253 
1254 	skb_reset_network_header(skb);
1255 	skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
1256 
1257 	tuple->zone = zone;
1258 
1259 	if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1260 	    flow_keys.basic.ip_proto != IPPROTO_UDP &&
1261 	    flow_keys.basic.ip_proto != IPPROTO_GRE)
1262 		return false;
1263 
1264 	if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
1265 	    flow_keys.basic.ip_proto == IPPROTO_UDP) {
1266 		tuple->port.src = flow_keys.ports.src;
1267 		tuple->port.dst = flow_keys.ports.dst;
1268 	}
1269 	tuple->n_proto = flow_keys.basic.n_proto;
1270 	tuple->ip_proto = flow_keys.basic.ip_proto;
1271 
1272 	switch (flow_keys.basic.n_proto) {
1273 	case htons(ETH_P_IP):
1274 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1275 		tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1276 		tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1277 		break;
1278 
1279 	case htons(ETH_P_IPV6):
1280 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1281 		tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1282 		tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1283 		break;
1284 	default:
1285 		goto out;
1286 	}
1287 
1288 	return true;
1289 
1290 out:
1291 	return false;
1292 }
1293 
1294 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1295 {
1296 	u32 ctstate = 0, ctstate_mask = 0;
1297 
1298 	mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1299 					&ctstate, &ctstate_mask);
1300 
1301 	if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
1302 		return -EOPNOTSUPP;
1303 
1304 	ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1305 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1306 				    ctstate, ctstate_mask);
1307 
1308 	return 0;
1309 }
1310 
1311 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1312 {
1313 	if (!priv || !ct_attr->ct_labels_id)
1314 		return;
1315 
1316 	mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
1317 }
1318 
1319 int
1320 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1321 		     struct mlx5_flow_spec *spec,
1322 		     struct flow_cls_offload *f,
1323 		     struct mlx5_ct_attr *ct_attr,
1324 		     struct netlink_ext_ack *extack)
1325 {
1326 	bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
1327 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1328 	struct flow_dissector_key_ct *mask, *key;
1329 	u32 ctstate = 0, ctstate_mask = 0;
1330 	u16 ct_state_on, ct_state_off;
1331 	u16 ct_state, ct_state_mask;
1332 	struct flow_match_ct match;
1333 	u32 ct_labels[4];
1334 
1335 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1336 		return 0;
1337 
1338 	if (!priv) {
1339 		NL_SET_ERR_MSG_MOD(extack,
1340 				   "offload of ct matching isn't available");
1341 		return -EOPNOTSUPP;
1342 	}
1343 
1344 	flow_rule_match_ct(rule, &match);
1345 
1346 	key = match.key;
1347 	mask = match.mask;
1348 
1349 	ct_state = key->ct_state;
1350 	ct_state_mask = mask->ct_state;
1351 
1352 	if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1353 			      TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1354 			      TCA_FLOWER_KEY_CT_FLAGS_NEW |
1355 			      TCA_FLOWER_KEY_CT_FLAGS_REPLY |
1356 			      TCA_FLOWER_KEY_CT_FLAGS_RELATED |
1357 			      TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
1358 		NL_SET_ERR_MSG_MOD(extack,
1359 				   "only ct_state trk, est, new and rpl are supported for offload");
1360 		return -EOPNOTSUPP;
1361 	}
1362 
1363 	ct_state_on = ct_state & ct_state_mask;
1364 	ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1365 	trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1366 	new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1367 	est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1368 	rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1369 	rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1370 	inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1371 	untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1372 	unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1373 	unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1374 	unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1375 	uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1376 
1377 	ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1378 	ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1379 	ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1380 	ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1381 	ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1382 	ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1383 	ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
1384 	ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
1385 
1386 	if (rel) {
1387 		NL_SET_ERR_MSG_MOD(extack,
1388 				   "matching on ct_state +rel isn't supported");
1389 		return -EOPNOTSUPP;
1390 	}
1391 
1392 	if (inv) {
1393 		NL_SET_ERR_MSG_MOD(extack,
1394 				   "matching on ct_state +inv isn't supported");
1395 		return -EOPNOTSUPP;
1396 	}
1397 
1398 	if (new) {
1399 		NL_SET_ERR_MSG_MOD(extack,
1400 				   "matching on ct_state +new isn't supported");
1401 		return -EOPNOTSUPP;
1402 	}
1403 
1404 	if (mask->ct_zone)
1405 		mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1406 					    key->ct_zone, MLX5_CT_ZONE_MASK);
1407 	if (ctstate_mask)
1408 		mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1409 					    ctstate, ctstate_mask);
1410 	if (mask->ct_mark)
1411 		mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1412 					    key->ct_mark, mask->ct_mark);
1413 	if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1414 	    mask->ct_labels[3]) {
1415 		ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1416 		ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1417 		ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1418 		ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1419 		if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
1420 			return -EOPNOTSUPP;
1421 		mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1422 					    MLX5_CT_LABELS_MASK);
1423 	}
1424 
1425 	return 0;
1426 }
1427 
1428 int
1429 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1430 			struct mlx5_flow_attr *attr,
1431 			struct mlx5e_tc_mod_hdr_acts *mod_acts,
1432 			const struct flow_action_entry *act,
1433 			struct netlink_ext_ack *extack)
1434 {
1435 	if (!priv) {
1436 		NL_SET_ERR_MSG_MOD(extack,
1437 				   "offload of ct action isn't available");
1438 		return -EOPNOTSUPP;
1439 	}
1440 
1441 	attr->ct_attr.zone = act->ct.zone;
1442 	attr->ct_attr.ct_action = act->ct.action;
1443 	attr->ct_attr.nf_ft = act->ct.flow_table;
1444 
1445 	return 0;
1446 }
1447 
1448 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1449 				  struct mlx5_tc_ct_pre *pre_ct,
1450 				  bool nat)
1451 {
1452 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1453 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1454 	struct mlx5_core_dev *dev = ct_priv->dev;
1455 	struct mlx5_flow_table *ft = pre_ct->ft;
1456 	struct mlx5_flow_destination dest = {};
1457 	struct mlx5_flow_act flow_act = {};
1458 	struct mlx5_modify_hdr *mod_hdr;
1459 	struct mlx5_flow_handle *rule;
1460 	struct mlx5_flow_spec *spec;
1461 	u32 ctstate;
1462 	u16 zone;
1463 	int err;
1464 
1465 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1466 	if (!spec)
1467 		return -ENOMEM;
1468 
1469 	zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1470 	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1471 					ZONE_TO_REG, zone);
1472 	if (err) {
1473 		ct_dbg("Failed to set zone register mapping");
1474 		goto err_mapping;
1475 	}
1476 
1477 	mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1478 					   pre_mod_acts.num_actions,
1479 					   pre_mod_acts.actions);
1480 
1481 	if (IS_ERR(mod_hdr)) {
1482 		err = PTR_ERR(mod_hdr);
1483 		ct_dbg("Failed to create pre ct mod hdr");
1484 		goto err_mapping;
1485 	}
1486 	pre_ct->modify_hdr = mod_hdr;
1487 
1488 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1489 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1490 	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1491 	flow_act.modify_hdr = mod_hdr;
1492 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1493 
1494 	/* add flow rule */
1495 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1496 				    zone, MLX5_CT_ZONE_MASK);
1497 	ctstate = MLX5_CT_STATE_TRK_BIT;
1498 	if (nat)
1499 		ctstate |= MLX5_CT_STATE_NAT_BIT;
1500 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1501 
1502 	dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
1503 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1504 	if (IS_ERR(rule)) {
1505 		err = PTR_ERR(rule);
1506 		ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1507 		goto err_flow_rule;
1508 	}
1509 	pre_ct->flow_rule = rule;
1510 
1511 	/* add miss rule */
1512 	dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1513 	rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
1514 	if (IS_ERR(rule)) {
1515 		err = PTR_ERR(rule);
1516 		ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1517 		goto err_miss_rule;
1518 	}
1519 	pre_ct->miss_rule = rule;
1520 
1521 	mlx5e_mod_hdr_dealloc(&pre_mod_acts);
1522 	kvfree(spec);
1523 	return 0;
1524 
1525 err_miss_rule:
1526 	mlx5_del_flow_rules(pre_ct->flow_rule);
1527 err_flow_rule:
1528 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1529 err_mapping:
1530 	mlx5e_mod_hdr_dealloc(&pre_mod_acts);
1531 	kvfree(spec);
1532 	return err;
1533 }
1534 
1535 static void
1536 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1537 		       struct mlx5_tc_ct_pre *pre_ct)
1538 {
1539 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1540 	struct mlx5_core_dev *dev = ct_priv->dev;
1541 
1542 	mlx5_del_flow_rules(pre_ct->flow_rule);
1543 	mlx5_del_flow_rules(pre_ct->miss_rule);
1544 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1545 }
1546 
1547 static int
1548 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1549 			struct mlx5_tc_ct_pre *pre_ct,
1550 			bool nat)
1551 {
1552 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1553 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1554 	struct mlx5_core_dev *dev = ct_priv->dev;
1555 	struct mlx5_flow_table_attr ft_attr = {};
1556 	struct mlx5_flow_namespace *ns;
1557 	struct mlx5_flow_table *ft;
1558 	struct mlx5_flow_group *g;
1559 	u32 metadata_reg_c_2_mask;
1560 	u32 *flow_group_in;
1561 	void *misc;
1562 	int err;
1563 
1564 	ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1565 	if (!ns) {
1566 		err = -EOPNOTSUPP;
1567 		ct_dbg("Failed to get flow namespace");
1568 		return err;
1569 	}
1570 
1571 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1572 	if (!flow_group_in)
1573 		return -ENOMEM;
1574 
1575 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1576 	ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1577 			FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1578 	ft_attr.max_fte = 2;
1579 	ft_attr.level = 1;
1580 	ft = mlx5_create_flow_table(ns, &ft_attr);
1581 	if (IS_ERR(ft)) {
1582 		err = PTR_ERR(ft);
1583 		ct_dbg("Failed to create pre ct table");
1584 		goto out_free;
1585 	}
1586 	pre_ct->ft = ft;
1587 
1588 	/* create flow group */
1589 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1590 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1591 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1592 		 MLX5_MATCH_MISC_PARAMETERS_2);
1593 
1594 	misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1595 			    match_criteria.misc_parameters_2);
1596 
1597 	metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1598 	metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1599 	if (nat)
1600 		metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1601 
1602 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1603 		 metadata_reg_c_2_mask);
1604 
1605 	g = mlx5_create_flow_group(ft, flow_group_in);
1606 	if (IS_ERR(g)) {
1607 		err = PTR_ERR(g);
1608 		ct_dbg("Failed to create pre ct group");
1609 		goto err_flow_grp;
1610 	}
1611 	pre_ct->flow_grp = g;
1612 
1613 	/* create miss group */
1614 	memset(flow_group_in, 0, inlen);
1615 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1616 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1617 	g = mlx5_create_flow_group(ft, flow_group_in);
1618 	if (IS_ERR(g)) {
1619 		err = PTR_ERR(g);
1620 		ct_dbg("Failed to create pre ct miss group");
1621 		goto err_miss_grp;
1622 	}
1623 	pre_ct->miss_grp = g;
1624 
1625 	err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1626 	if (err)
1627 		goto err_add_rules;
1628 
1629 	kvfree(flow_group_in);
1630 	return 0;
1631 
1632 err_add_rules:
1633 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1634 err_miss_grp:
1635 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1636 err_flow_grp:
1637 	mlx5_destroy_flow_table(ft);
1638 out_free:
1639 	kvfree(flow_group_in);
1640 	return err;
1641 }
1642 
1643 static void
1644 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1645 		       struct mlx5_tc_ct_pre *pre_ct)
1646 {
1647 	tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1648 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1649 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1650 	mlx5_destroy_flow_table(pre_ct->ft);
1651 }
1652 
1653 static int
1654 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1655 {
1656 	int err;
1657 
1658 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1659 	if (err)
1660 		return err;
1661 
1662 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1663 	if (err)
1664 		goto err_pre_ct_nat;
1665 
1666 	return 0;
1667 
1668 err_pre_ct_nat:
1669 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1670 	return err;
1671 }
1672 
1673 static void
1674 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1675 {
1676 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1677 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1678 }
1679 
1680 /* To avoid false lock dependency warning set the ct_entries_ht lock
1681  * class different than the lock class of the ht being used when deleting
1682  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
1683  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
1684  * it's different than the ht->mutex here.
1685  */
1686 static struct lock_class_key ct_entries_ht_lock_key;
1687 
1688 static struct mlx5_ct_ft *
1689 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1690 		     struct nf_flowtable *nf_ft)
1691 {
1692 	struct mlx5_ct_ft *ft;
1693 	int err;
1694 
1695 	ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1696 	if (ft) {
1697 		refcount_inc(&ft->refcount);
1698 		return ft;
1699 	}
1700 
1701 	ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1702 	if (!ft)
1703 		return ERR_PTR(-ENOMEM);
1704 
1705 	err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1706 	if (err)
1707 		goto err_mapping;
1708 
1709 	ft->zone = zone;
1710 	ft->nf_ft = nf_ft;
1711 	ft->ct_priv = ct_priv;
1712 	refcount_set(&ft->refcount, 1);
1713 
1714 	err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1715 	if (err)
1716 		goto err_alloc_pre_ct;
1717 
1718 	err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1719 	if (err)
1720 		goto err_init;
1721 
1722 	lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
1723 
1724 	err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1725 				     zone_params);
1726 	if (err)
1727 		goto err_insert;
1728 
1729 	err = nf_flow_table_offload_add_cb(ft->nf_ft,
1730 					   mlx5_tc_ct_block_flow_offload, ft);
1731 	if (err)
1732 		goto err_add_cb;
1733 
1734 	return ft;
1735 
1736 err_add_cb:
1737 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1738 err_insert:
1739 	rhashtable_destroy(&ft->ct_entries_ht);
1740 err_init:
1741 	mlx5_tc_ct_free_pre_ct_tables(ft);
1742 err_alloc_pre_ct:
1743 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1744 err_mapping:
1745 	kfree(ft);
1746 	return ERR_PTR(err);
1747 }
1748 
1749 static void
1750 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1751 {
1752 	struct mlx5_ct_entry *entry = ptr;
1753 
1754 	mlx5_tc_ct_entry_put(entry);
1755 }
1756 
1757 static void
1758 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1759 {
1760 	if (!refcount_dec_and_test(&ft->refcount))
1761 		return;
1762 
1763 	flush_workqueue(ct_priv->wq);
1764 	nf_flow_table_offload_del_cb(ft->nf_ft,
1765 				     mlx5_tc_ct_block_flow_offload, ft);
1766 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1767 	rhashtable_free_and_destroy(&ft->ct_entries_ht,
1768 				    mlx5_tc_ct_flush_ft_entry,
1769 				    ct_priv);
1770 	mlx5_tc_ct_free_pre_ct_tables(ft);
1771 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1772 	kfree(ft);
1773 }
1774 
1775 /* We translate the tc filter with CT action to the following HW model:
1776  *
1777  * +---------------------+
1778  * + ft prio (tc chain)  +
1779  * + original match      +
1780  * +---------------------+
1781  *      | set chain miss mapping
1782  *      | set fte_id
1783  *      | set tunnel_id
1784  *      | do decap
1785  *      v
1786  * +---------------------+
1787  * + pre_ct/pre_ct_nat   +  if matches     +-------------------------+
1788  * + zone+nat match      +---------------->+ post_act (see below)    +
1789  * +---------------------+  set zone       +-------------------------+
1790  *      | set zone
1791  *      v
1792  * +--------------------+
1793  * + CT (nat or no nat) +
1794  * + tuple + zone match +
1795  * +--------------------+
1796  *      | set mark
1797  *      | set labels_id
1798  *      | set established
1799  *	| set zone_restore
1800  *      | do nat (if needed)
1801  *      v
1802  * +--------------+
1803  * + post_act     + original filter actions
1804  * + fte_id match +------------------------>
1805  * +--------------+
1806  */
1807 static struct mlx5_flow_handle *
1808 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1809 			  struct mlx5_flow_spec *orig_spec,
1810 			  struct mlx5_flow_attr *attr)
1811 {
1812 	bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1813 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1814 	struct mlx5e_tc_mod_hdr_acts *pre_mod_acts;
1815 	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1816 	struct mlx5_flow_attr *pre_ct_attr;
1817 	struct mlx5_modify_hdr *mod_hdr;
1818 	struct mlx5_ct_flow *ct_flow;
1819 	int chain_mapping = 0, err;
1820 	struct mlx5_ct_ft *ft;
1821 
1822 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1823 	if (!ct_flow) {
1824 		return ERR_PTR(-ENOMEM);
1825 	}
1826 
1827 	/* Register for CT established events */
1828 	ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1829 				  attr->ct_attr.nf_ft);
1830 	if (IS_ERR(ft)) {
1831 		err = PTR_ERR(ft);
1832 		ct_dbg("Failed to register to ft callback");
1833 		goto err_ft;
1834 	}
1835 	ct_flow->ft = ft;
1836 
1837 	/* Base flow attributes of both rules on original rule attribute */
1838 	ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1839 	if (!ct_flow->pre_ct_attr) {
1840 		err = -ENOMEM;
1841 		goto err_alloc_pre;
1842 	}
1843 
1844 	pre_ct_attr = ct_flow->pre_ct_attr;
1845 	memcpy(pre_ct_attr, attr, attr_sz);
1846 	pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts;
1847 
1848 	/* Modify the original rule's action to fwd and modify, leave decap */
1849 	pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1850 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1851 			       MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1852 
1853 	/* Write chain miss tag for miss in ct table as we
1854 	 * don't go though all prios of this chain as normal tc rules
1855 	 * miss.
1856 	 */
1857 	err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1858 					    &chain_mapping);
1859 	if (err) {
1860 		ct_dbg("Failed to get chain register mapping for chain");
1861 		goto err_get_chain;
1862 	}
1863 	ct_flow->chain_mapping = chain_mapping;
1864 
1865 	err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
1866 					CHAIN_TO_REG, chain_mapping);
1867 	if (err) {
1868 		ct_dbg("Failed to set chain register mapping");
1869 		goto err_mapping;
1870 	}
1871 
1872 	/* If original flow is decap, we do it before going into ct table
1873 	 * so add a rewrite for the tunnel match_id.
1874 	 */
1875 	if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1876 	    attr->chain == 0) {
1877 		err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts,
1878 						ct_priv->ns_type,
1879 						TUNNEL_TO_REG,
1880 						attr->tunnel_id);
1881 		if (err) {
1882 			ct_dbg("Failed to set tunnel register mapping");
1883 			goto err_mapping;
1884 		}
1885 	}
1886 
1887 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1888 					   pre_mod_acts->num_actions,
1889 					   pre_mod_acts->actions);
1890 	if (IS_ERR(mod_hdr)) {
1891 		err = PTR_ERR(mod_hdr);
1892 		ct_dbg("Failed to create pre ct mod hdr");
1893 		goto err_mapping;
1894 	}
1895 	pre_ct_attr->modify_hdr = mod_hdr;
1896 
1897 	/* Change original rule point to ct table */
1898 	pre_ct_attr->dest_chain = 0;
1899 	pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1900 	ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1901 						   pre_ct_attr);
1902 	if (IS_ERR(ct_flow->pre_ct_rule)) {
1903 		err = PTR_ERR(ct_flow->pre_ct_rule);
1904 		ct_dbg("Failed to add pre ct rule");
1905 		goto err_insert_orig;
1906 	}
1907 
1908 	attr->ct_attr.ct_flow = ct_flow;
1909 	mlx5e_mod_hdr_dealloc(pre_mod_acts);
1910 
1911 	return ct_flow->pre_ct_rule;
1912 
1913 err_insert_orig:
1914 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1915 err_mapping:
1916 	mlx5e_mod_hdr_dealloc(pre_mod_acts);
1917 	mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1918 err_get_chain:
1919 	kfree(ct_flow->pre_ct_attr);
1920 err_alloc_pre:
1921 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1922 err_ft:
1923 	kfree(ct_flow);
1924 	netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1925 	return ERR_PTR(err);
1926 }
1927 
1928 struct mlx5_flow_handle *
1929 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1930 			struct mlx5_flow_spec *spec,
1931 			struct mlx5_flow_attr *attr,
1932 			struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1933 {
1934 	struct mlx5_flow_handle *rule;
1935 
1936 	if (!priv)
1937 		return ERR_PTR(-EOPNOTSUPP);
1938 
1939 	mutex_lock(&priv->control_lock);
1940 	rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
1941 	mutex_unlock(&priv->control_lock);
1942 
1943 	return rule;
1944 }
1945 
1946 static void
1947 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1948 			 struct mlx5_ct_flow *ct_flow,
1949 			 struct mlx5_flow_attr *attr)
1950 {
1951 	struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1952 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1953 
1954 	mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
1955 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1956 
1957 	mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1958 	mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1959 
1960 	kfree(ct_flow->pre_ct_attr);
1961 	kfree(ct_flow);
1962 }
1963 
1964 void
1965 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1966 		       struct mlx5_flow_attr *attr)
1967 {
1968 	struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1969 
1970 	/* We are called on error to clean up stuff from parsing
1971 	 * but we don't have anything for now
1972 	 */
1973 	if (!ct_flow)
1974 		return;
1975 
1976 	mutex_lock(&priv->control_lock);
1977 	__mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
1978 	mutex_unlock(&priv->control_lock);
1979 }
1980 
1981 static int
1982 mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
1983 {
1984 	struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
1985 	struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
1986 	int err;
1987 
1988 	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
1989 	    ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
1990 		ct_dbg("Using SMFS ct flow steering provider");
1991 		fs_ops = mlx5_ct_fs_smfs_ops_get();
1992 	}
1993 
1994 	ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
1995 	if (!ct_priv->fs)
1996 		return -ENOMEM;
1997 
1998 	ct_priv->fs->netdev = ct_priv->netdev;
1999 	ct_priv->fs->dev = ct_priv->dev;
2000 	ct_priv->fs_ops = fs_ops;
2001 
2002 	err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
2003 	if (err)
2004 		goto err_init;
2005 
2006 	return 0;
2007 
2008 err_init:
2009 	kfree(ct_priv->fs);
2010 	return err;
2011 }
2012 
2013 static int
2014 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
2015 				  const char **err_msg)
2016 {
2017 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
2018 		/* vlan workaround should be avoided for multi chain rules.
2019 		 * This is just a sanity check as pop vlan action should
2020 		 * be supported by any FW that supports ignore_flow_level
2021 		 */
2022 
2023 		*err_msg = "firmware vlan actions support is missing";
2024 		return -EOPNOTSUPP;
2025 	}
2026 
2027 	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
2028 				    fdb_modify_header_fwd_to_table)) {
2029 		/* CT always writes to registers which are mod header actions.
2030 		 * Therefore, mod header and goto is required
2031 		 */
2032 
2033 		*err_msg = "firmware fwd and modify support is missing";
2034 		return -EOPNOTSUPP;
2035 	}
2036 
2037 	if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2038 		*err_msg = "register loopback isn't supported";
2039 		return -EOPNOTSUPP;
2040 	}
2041 
2042 	return 0;
2043 }
2044 
2045 static int
2046 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2047 			      enum mlx5_flow_namespace_type ns_type,
2048 			      struct mlx5e_post_act *post_act)
2049 {
2050 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2051 	const char *err_msg = NULL;
2052 	int err = 0;
2053 
2054 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
2055 	/* cannot restore chain ID on HW miss */
2056 
2057 	err_msg = "tc skb extension missing";
2058 	err = -EOPNOTSUPP;
2059 	goto out_err;
2060 #endif
2061 	if (IS_ERR_OR_NULL(post_act)) {
2062 		/* Ignore_flow_level support isn't supported by default for VFs and so post_act
2063 		 * won't be supported. Skip showing error msg.
2064 		 */
2065 		if (priv->mdev->coredev_type != MLX5_COREDEV_VF)
2066 			err_msg = "post action is missing";
2067 		err = -EOPNOTSUPP;
2068 		goto out_err;
2069 	}
2070 
2071 	if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2072 		err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg);
2073 
2074 out_err:
2075 	if (err && err_msg)
2076 		netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg);
2077 	return err;
2078 }
2079 
2080 static void
2081 mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2082 {
2083 	bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB;
2084 	struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
2085 	char dirname[16] = {};
2086 
2087 	if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0)
2088 		return;
2089 
2090 	ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev));
2091 	debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
2092 				&ct_dbgfs->stats.offloaded);
2093 	debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
2094 				&ct_dbgfs->stats.rx_dropped);
2095 }
2096 
2097 static void
2098 mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2099 {
2100 	debugfs_remove_recursive(ct_priv->debugfs.root);
2101 }
2102 
2103 #define INIT_ERR_PREFIX "tc ct offload init failed"
2104 
2105 struct mlx5_tc_ct_priv *
2106 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2107 		struct mod_hdr_tbl *mod_hdr,
2108 		enum mlx5_flow_namespace_type ns_type,
2109 		struct mlx5e_post_act *post_act)
2110 {
2111 	struct mlx5_tc_ct_priv *ct_priv;
2112 	struct mlx5_core_dev *dev;
2113 	u64 mapping_id;
2114 	int err;
2115 
2116 	dev = priv->mdev;
2117 	err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act);
2118 	if (err)
2119 		goto err_support;
2120 
2121 	ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2122 	if (!ct_priv)
2123 		goto err_alloc;
2124 
2125 	mapping_id = mlx5_query_nic_system_image_guid(dev);
2126 
2127 	ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
2128 						      sizeof(u16), 0, true);
2129 	if (IS_ERR(ct_priv->zone_mapping)) {
2130 		err = PTR_ERR(ct_priv->zone_mapping);
2131 		goto err_mapping_zone;
2132 	}
2133 
2134 	ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
2135 							sizeof(u32) * 4, 0, true);
2136 	if (IS_ERR(ct_priv->labels_mapping)) {
2137 		err = PTR_ERR(ct_priv->labels_mapping);
2138 		goto err_mapping_labels;
2139 	}
2140 
2141 	spin_lock_init(&ct_priv->ht_lock);
2142 	ct_priv->ns_type = ns_type;
2143 	ct_priv->chains = chains;
2144 	ct_priv->netdev = priv->netdev;
2145 	ct_priv->dev = priv->mdev;
2146 	ct_priv->mod_hdr_tbl = mod_hdr;
2147 	ct_priv->ct = mlx5_chains_create_global_table(chains);
2148 	if (IS_ERR(ct_priv->ct)) {
2149 		err = PTR_ERR(ct_priv->ct);
2150 		mlx5_core_warn(dev,
2151 			       "%s, failed to create ct table err: %d\n",
2152 			       INIT_ERR_PREFIX, err);
2153 		goto err_ct_tbl;
2154 	}
2155 
2156 	ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2157 	if (IS_ERR(ct_priv->ct_nat)) {
2158 		err = PTR_ERR(ct_priv->ct_nat);
2159 		mlx5_core_warn(dev,
2160 			       "%s, failed to create ct nat table err: %d\n",
2161 			       INIT_ERR_PREFIX, err);
2162 		goto err_ct_nat_tbl;
2163 	}
2164 
2165 	ct_priv->post_act = post_act;
2166 	mutex_init(&ct_priv->control_lock);
2167 	if (rhashtable_init(&ct_priv->zone_ht, &zone_params))
2168 		goto err_ct_zone_ht;
2169 	if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params))
2170 		goto err_ct_tuples_ht;
2171 	if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
2172 		goto err_ct_tuples_nat_ht;
2173 
2174 	ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
2175 	if (!ct_priv->wq) {
2176 		err = -ENOMEM;
2177 		goto err_wq;
2178 	}
2179 
2180 	err = mlx5_tc_ct_fs_init(ct_priv);
2181 	if (err)
2182 		goto err_init_fs;
2183 
2184 	mlx5_ct_tc_create_dbgfs(ct_priv);
2185 	return ct_priv;
2186 
2187 err_init_fs:
2188 	destroy_workqueue(ct_priv->wq);
2189 err_wq:
2190 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2191 err_ct_tuples_nat_ht:
2192 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
2193 err_ct_tuples_ht:
2194 	rhashtable_destroy(&ct_priv->zone_ht);
2195 err_ct_zone_ht:
2196 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2197 err_ct_nat_tbl:
2198 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2199 err_ct_tbl:
2200 	mapping_destroy(ct_priv->labels_mapping);
2201 err_mapping_labels:
2202 	mapping_destroy(ct_priv->zone_mapping);
2203 err_mapping_zone:
2204 	kfree(ct_priv);
2205 err_alloc:
2206 err_support:
2207 
2208 	return NULL;
2209 }
2210 
2211 void
2212 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2213 {
2214 	struct mlx5_fs_chains *chains;
2215 
2216 	if (!ct_priv)
2217 		return;
2218 
2219 	destroy_workqueue(ct_priv->wq);
2220 	mlx5_ct_tc_remove_dbgfs(ct_priv);
2221 	chains = ct_priv->chains;
2222 
2223 	ct_priv->fs_ops->destroy(ct_priv->fs);
2224 	kfree(ct_priv->fs);
2225 
2226 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2227 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2228 	mapping_destroy(ct_priv->zone_mapping);
2229 	mapping_destroy(ct_priv->labels_mapping);
2230 
2231 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
2232 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2233 	rhashtable_destroy(&ct_priv->zone_ht);
2234 	mutex_destroy(&ct_priv->control_lock);
2235 	kfree(ct_priv);
2236 }
2237 
2238 bool
2239 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2240 			 struct sk_buff *skb, u8 zone_restore_id)
2241 {
2242 	struct mlx5_ct_tuple tuple = {};
2243 	struct mlx5_ct_entry *entry;
2244 	u16 zone;
2245 
2246 	if (!ct_priv || !zone_restore_id)
2247 		return true;
2248 
2249 	if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2250 		goto out_inc_drop;
2251 
2252 	if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2253 		goto out_inc_drop;
2254 
2255 	spin_lock(&ct_priv->ht_lock);
2256 
2257 	entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2258 	if (!entry) {
2259 		spin_unlock(&ct_priv->ht_lock);
2260 		goto out_inc_drop;
2261 	}
2262 
2263 	if (IS_ERR(entry)) {
2264 		spin_unlock(&ct_priv->ht_lock);
2265 		goto out_inc_drop;
2266 	}
2267 	spin_unlock(&ct_priv->ht_lock);
2268 
2269 	tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2270 	__mlx5_tc_ct_entry_put(entry);
2271 
2272 	return true;
2273 
2274 out_inc_drop:
2275 	atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
2276 	return false;
2277 }
2278