xref: /linux/drivers/net/ethernet/sfc/tc_conntrack.c (revision 38f9a08a)
1c3bb5c6aSEdward Cree // SPDX-License-Identifier: GPL-2.0-only
2c3bb5c6aSEdward Cree /****************************************************************************
3c3bb5c6aSEdward Cree  * Driver for Solarflare network controllers and boards
4c3bb5c6aSEdward Cree  * Copyright 2023, Advanced Micro Devices, Inc.
5c3bb5c6aSEdward Cree  *
6c3bb5c6aSEdward Cree  * This program is free software; you can redistribute it and/or modify it
7c3bb5c6aSEdward Cree  * under the terms of the GNU General Public License version 2 as published
8c3bb5c6aSEdward Cree  * by the Free Software Foundation, incorporated herein by reference.
9c3bb5c6aSEdward Cree  */
10c3bb5c6aSEdward Cree 
11c3bb5c6aSEdward Cree #include "tc_conntrack.h"
12c3bb5c6aSEdward Cree #include "tc.h"
13c3bb5c6aSEdward Cree #include "mae.h"
14c3bb5c6aSEdward Cree 
15c3bb5c6aSEdward Cree static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
16c3bb5c6aSEdward Cree 			     void *cb_priv);
17c3bb5c6aSEdward Cree 
18c3bb5c6aSEdward Cree static const struct rhashtable_params efx_tc_ct_zone_ht_params = {
19c3bb5c6aSEdward Cree 	.key_len	= offsetof(struct efx_tc_ct_zone, linkage),
20c3bb5c6aSEdward Cree 	.key_offset	= 0,
21c3bb5c6aSEdward Cree 	.head_offset	= offsetof(struct efx_tc_ct_zone, linkage),
22c3bb5c6aSEdward Cree };
23c3bb5c6aSEdward Cree 
241909387fSEdward Cree static const struct rhashtable_params efx_tc_ct_ht_params = {
251909387fSEdward Cree 	.key_len	= offsetof(struct efx_tc_ct_entry, linkage),
261909387fSEdward Cree 	.key_offset	= 0,
271909387fSEdward Cree 	.head_offset	= offsetof(struct efx_tc_ct_entry, linkage),
281909387fSEdward Cree };
291909387fSEdward Cree 
efx_tc_ct_zone_free(void * ptr,void * arg)30c3bb5c6aSEdward Cree static void efx_tc_ct_zone_free(void *ptr, void *arg)
31c3bb5c6aSEdward Cree {
32c3bb5c6aSEdward Cree 	struct efx_tc_ct_zone *zone = ptr;
33c3bb5c6aSEdward Cree 	struct efx_nic *efx = zone->efx;
34c3bb5c6aSEdward Cree 
35c3bb5c6aSEdward Cree 	netif_err(efx, drv, efx->net_dev,
36c3bb5c6aSEdward Cree 		  "tc ct_zone %u still present at teardown, removing\n",
37c3bb5c6aSEdward Cree 		  zone->zone);
38c3bb5c6aSEdward Cree 
39c3bb5c6aSEdward Cree 	nf_flow_table_offload_del_cb(zone->nf_ft, efx_tc_flow_block, zone);
40c3bb5c6aSEdward Cree 	kfree(zone);
41c3bb5c6aSEdward Cree }
42c3bb5c6aSEdward Cree 
efx_tc_ct_free(void * ptr,void * arg)431909387fSEdward Cree static void efx_tc_ct_free(void *ptr, void *arg)
441909387fSEdward Cree {
451909387fSEdward Cree 	struct efx_tc_ct_entry *conn = ptr;
461909387fSEdward Cree 	struct efx_nic *efx = arg;
471909387fSEdward Cree 
481909387fSEdward Cree 	netif_err(efx, drv, efx->net_dev,
491909387fSEdward Cree 		  "tc ct_entry %lx still present at teardown\n",
501909387fSEdward Cree 		  conn->cookie);
511909387fSEdward Cree 
521909387fSEdward Cree 	/* We can release the counter, but we can't remove the CT itself
531909387fSEdward Cree 	 * from hardware because the table meta is already gone.
541909387fSEdward Cree 	 */
551909387fSEdward Cree 	efx_tc_flower_release_counter(efx, conn->cnt);
561909387fSEdward Cree 	kfree(conn);
571909387fSEdward Cree }
581909387fSEdward Cree 
efx_tc_init_conntrack(struct efx_nic * efx)59c3bb5c6aSEdward Cree int efx_tc_init_conntrack(struct efx_nic *efx)
60c3bb5c6aSEdward Cree {
61c3bb5c6aSEdward Cree 	int rc;
62c3bb5c6aSEdward Cree 
63c3bb5c6aSEdward Cree 	rc = rhashtable_init(&efx->tc->ct_zone_ht, &efx_tc_ct_zone_ht_params);
64c3bb5c6aSEdward Cree 	if (rc < 0)
651909387fSEdward Cree 		goto fail_ct_zone_ht;
661909387fSEdward Cree 	rc = rhashtable_init(&efx->tc->ct_ht, &efx_tc_ct_ht_params);
671909387fSEdward Cree 	if (rc < 0)
681909387fSEdward Cree 		goto fail_ct_ht;
69c3bb5c6aSEdward Cree 	return 0;
701909387fSEdward Cree fail_ct_ht:
711909387fSEdward Cree 	rhashtable_destroy(&efx->tc->ct_zone_ht);
721909387fSEdward Cree fail_ct_zone_ht:
731909387fSEdward Cree 	return rc;
74c3bb5c6aSEdward Cree }
75c3bb5c6aSEdward Cree 
7629416025SEdward Cree /* Only call this in init failure teardown.
7729416025SEdward Cree  * Normal exit should fini instead as there may be entries in the table.
7829416025SEdward Cree  */
efx_tc_destroy_conntrack(struct efx_nic * efx)7929416025SEdward Cree void efx_tc_destroy_conntrack(struct efx_nic *efx)
8029416025SEdward Cree {
8129416025SEdward Cree 	rhashtable_destroy(&efx->tc->ct_ht);
8229416025SEdward Cree 	rhashtable_destroy(&efx->tc->ct_zone_ht);
8329416025SEdward Cree }
8429416025SEdward Cree 
efx_tc_fini_conntrack(struct efx_nic * efx)85c3bb5c6aSEdward Cree void efx_tc_fini_conntrack(struct efx_nic *efx)
86c3bb5c6aSEdward Cree {
87c3bb5c6aSEdward Cree 	rhashtable_free_and_destroy(&efx->tc->ct_zone_ht, efx_tc_ct_zone_free, NULL);
881909387fSEdward Cree 	rhashtable_free_and_destroy(&efx->tc->ct_ht, efx_tc_ct_free, efx);
891909387fSEdward Cree }
901909387fSEdward Cree 
911909387fSEdward Cree #define EFX_NF_TCP_FLAG(flg)	cpu_to_be16(be32_to_cpu(TCP_FLAG_##flg) >> 16)
921909387fSEdward Cree 
efx_tc_ct_parse_match(struct efx_nic * efx,struct flow_rule * fr,struct efx_tc_ct_entry * conn)931909387fSEdward Cree static int efx_tc_ct_parse_match(struct efx_nic *efx, struct flow_rule *fr,
941909387fSEdward Cree 				 struct efx_tc_ct_entry *conn)
951909387fSEdward Cree {
961909387fSEdward Cree 	struct flow_dissector *dissector = fr->match.dissector;
971909387fSEdward Cree 	unsigned char ipv = 0;
981909387fSEdward Cree 	bool tcp = false;
991909387fSEdward Cree 
1001909387fSEdward Cree 	if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_CONTROL)) {
1011909387fSEdward Cree 		struct flow_match_control fm;
1021909387fSEdward Cree 
1031909387fSEdward Cree 		flow_rule_match_control(fr, &fm);
1041909387fSEdward Cree 		if (IS_ALL_ONES(fm.mask->addr_type))
1051909387fSEdward Cree 			switch (fm.key->addr_type) {
1061909387fSEdward Cree 			case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1071909387fSEdward Cree 				ipv = 4;
1081909387fSEdward Cree 				break;
1091909387fSEdward Cree 			case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1101909387fSEdward Cree 				ipv = 6;
1111909387fSEdward Cree 				break;
1121909387fSEdward Cree 			default:
1131909387fSEdward Cree 				break;
1141909387fSEdward Cree 			}
1151909387fSEdward Cree 	}
1161909387fSEdward Cree 
1171909387fSEdward Cree 	if (!ipv) {
1181909387fSEdward Cree 		netif_dbg(efx, drv, efx->net_dev,
1191909387fSEdward Cree 			  "Conntrack missing ipv specification\n");
1201909387fSEdward Cree 		return -EOPNOTSUPP;
1211909387fSEdward Cree 	}
1221909387fSEdward Cree 
1231909387fSEdward Cree 	if (dissector->used_keys &
1241909387fSEdward Cree 	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
1251909387fSEdward Cree 	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
1261909387fSEdward Cree 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
1271909387fSEdward Cree 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
1281909387fSEdward Cree 	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
1291909387fSEdward Cree 	      BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
1301909387fSEdward Cree 	      BIT_ULL(FLOW_DISSECTOR_KEY_META))) {
1311909387fSEdward Cree 		netif_dbg(efx, drv, efx->net_dev,
1321909387fSEdward Cree 			  "Unsupported conntrack keys %#llx\n",
1331909387fSEdward Cree 			  dissector->used_keys);
1341909387fSEdward Cree 		return -EOPNOTSUPP;
1351909387fSEdward Cree 	}
1361909387fSEdward Cree 
1371909387fSEdward Cree 	if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_BASIC)) {
1381909387fSEdward Cree 		struct flow_match_basic fm;
1391909387fSEdward Cree 
1401909387fSEdward Cree 		flow_rule_match_basic(fr, &fm);
1411909387fSEdward Cree 		if (!IS_ALL_ONES(fm.mask->n_proto)) {
1421909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
1431909387fSEdward Cree 				  "Conntrack eth_proto is not exact-match; mask %04x\n",
1441909387fSEdward Cree 				   ntohs(fm.mask->n_proto));
1451909387fSEdward Cree 			return -EOPNOTSUPP;
1461909387fSEdward Cree 		}
1471909387fSEdward Cree 		conn->eth_proto = fm.key->n_proto;
1481909387fSEdward Cree 		if (conn->eth_proto != (ipv == 4 ? htons(ETH_P_IP)
1491909387fSEdward Cree 						 : htons(ETH_P_IPV6))) {
1501909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
1511909387fSEdward Cree 				  "Conntrack eth_proto is not IPv%u, is %04x\n",
1521909387fSEdward Cree 				   ipv, ntohs(conn->eth_proto));
1531909387fSEdward Cree 			return -EOPNOTSUPP;
1541909387fSEdward Cree 		}
1551909387fSEdward Cree 		if (!IS_ALL_ONES(fm.mask->ip_proto)) {
1561909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
1571909387fSEdward Cree 				  "Conntrack ip_proto is not exact-match; mask %02x\n",
1581909387fSEdward Cree 				   fm.mask->ip_proto);
1591909387fSEdward Cree 			return -EOPNOTSUPP;
1601909387fSEdward Cree 		}
1611909387fSEdward Cree 		conn->ip_proto = fm.key->ip_proto;
1621909387fSEdward Cree 		switch (conn->ip_proto) {
1631909387fSEdward Cree 		case IPPROTO_TCP:
1641909387fSEdward Cree 			tcp = true;
1651909387fSEdward Cree 			break;
1661909387fSEdward Cree 		case IPPROTO_UDP:
1671909387fSEdward Cree 			break;
1681909387fSEdward Cree 		default:
1691909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
1701909387fSEdward Cree 				  "Conntrack ip_proto not TCP or UDP, is %02x\n",
1711909387fSEdward Cree 				   conn->ip_proto);
1721909387fSEdward Cree 			return -EOPNOTSUPP;
1731909387fSEdward Cree 		}
1741909387fSEdward Cree 	} else {
1751909387fSEdward Cree 		netif_dbg(efx, drv, efx->net_dev,
1761909387fSEdward Cree 			  "Conntrack missing eth_proto, ip_proto\n");
1771909387fSEdward Cree 		return -EOPNOTSUPP;
1781909387fSEdward Cree 	}
1791909387fSEdward Cree 
1801909387fSEdward Cree 	if (ipv == 4 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
1811909387fSEdward Cree 		struct flow_match_ipv4_addrs fm;
1821909387fSEdward Cree 
1831909387fSEdward Cree 		flow_rule_match_ipv4_addrs(fr, &fm);
1841909387fSEdward Cree 		if (!IS_ALL_ONES(fm.mask->src)) {
1851909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
1861909387fSEdward Cree 				  "Conntrack ipv4.src is not exact-match; mask %08x\n",
1871909387fSEdward Cree 				   ntohl(fm.mask->src));
1881909387fSEdward Cree 			return -EOPNOTSUPP;
1891909387fSEdward Cree 		}
1901909387fSEdward Cree 		conn->src_ip = fm.key->src;
1911909387fSEdward Cree 		if (!IS_ALL_ONES(fm.mask->dst)) {
1921909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
1931909387fSEdward Cree 				  "Conntrack ipv4.dst is not exact-match; mask %08x\n",
1941909387fSEdward Cree 				   ntohl(fm.mask->dst));
1951909387fSEdward Cree 			return -EOPNOTSUPP;
1961909387fSEdward Cree 		}
1971909387fSEdward Cree 		conn->dst_ip = fm.key->dst;
1981909387fSEdward Cree 	} else if (ipv == 6 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
1991909387fSEdward Cree 		struct flow_match_ipv6_addrs fm;
2001909387fSEdward Cree 
2011909387fSEdward Cree 		flow_rule_match_ipv6_addrs(fr, &fm);
2021909387fSEdward Cree 		if (!efx_ipv6_addr_all_ones(&fm.mask->src)) {
2031909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
2041909387fSEdward Cree 				  "Conntrack ipv6.src is not exact-match; mask %pI6\n",
2051909387fSEdward Cree 				   &fm.mask->src);
2061909387fSEdward Cree 			return -EOPNOTSUPP;
2071909387fSEdward Cree 		}
2081909387fSEdward Cree 		conn->src_ip6 = fm.key->src;
2091909387fSEdward Cree 		if (!efx_ipv6_addr_all_ones(&fm.mask->dst)) {
2101909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
2111909387fSEdward Cree 				  "Conntrack ipv6.dst is not exact-match; mask %pI6\n",
2121909387fSEdward Cree 				   &fm.mask->dst);
2131909387fSEdward Cree 			return -EOPNOTSUPP;
2141909387fSEdward Cree 		}
2151909387fSEdward Cree 		conn->dst_ip6 = fm.key->dst;
2161909387fSEdward Cree 	} else {
2171909387fSEdward Cree 		netif_dbg(efx, drv, efx->net_dev,
2181909387fSEdward Cree 			  "Conntrack missing IPv%u addrs\n", ipv);
2191909387fSEdward Cree 		return -EOPNOTSUPP;
2201909387fSEdward Cree 	}
2211909387fSEdward Cree 
2221909387fSEdward Cree 	if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_PORTS)) {
2231909387fSEdward Cree 		struct flow_match_ports fm;
2241909387fSEdward Cree 
2251909387fSEdward Cree 		flow_rule_match_ports(fr, &fm);
2261909387fSEdward Cree 		if (!IS_ALL_ONES(fm.mask->src)) {
2271909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
2281909387fSEdward Cree 				  "Conntrack ports.src is not exact-match; mask %04x\n",
2291909387fSEdward Cree 				   ntohs(fm.mask->src));
2301909387fSEdward Cree 			return -EOPNOTSUPP;
2311909387fSEdward Cree 		}
2321909387fSEdward Cree 		conn->l4_sport = fm.key->src;
2331909387fSEdward Cree 		if (!IS_ALL_ONES(fm.mask->dst)) {
2341909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
2351909387fSEdward Cree 				  "Conntrack ports.dst is not exact-match; mask %04x\n",
2361909387fSEdward Cree 				   ntohs(fm.mask->dst));
2371909387fSEdward Cree 			return -EOPNOTSUPP;
2381909387fSEdward Cree 		}
2391909387fSEdward Cree 		conn->l4_dport = fm.key->dst;
2401909387fSEdward Cree 	} else {
2411909387fSEdward Cree 		netif_dbg(efx, drv, efx->net_dev, "Conntrack missing L4 ports\n");
2421909387fSEdward Cree 		return -EOPNOTSUPP;
2431909387fSEdward Cree 	}
2441909387fSEdward Cree 
2451909387fSEdward Cree 	if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_TCP)) {
2461909387fSEdward Cree 		__be16 tcp_interesting_flags;
2471909387fSEdward Cree 		struct flow_match_tcp fm;
2481909387fSEdward Cree 
2491909387fSEdward Cree 		if (!tcp) {
2501909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
2511909387fSEdward Cree 				  "Conntrack matching on TCP keys but ipproto is not tcp\n");
2521909387fSEdward Cree 			return -EOPNOTSUPP;
2531909387fSEdward Cree 		}
2541909387fSEdward Cree 		flow_rule_match_tcp(fr, &fm);
2551909387fSEdward Cree 		tcp_interesting_flags = EFX_NF_TCP_FLAG(SYN) |
2561909387fSEdward Cree 					EFX_NF_TCP_FLAG(RST) |
2571909387fSEdward Cree 					EFX_NF_TCP_FLAG(FIN);
2581909387fSEdward Cree 		/* If any of the tcp_interesting_flags is set, we always
2591909387fSEdward Cree 		 * inhibit CT lookup in LHS (so SW can update CT table).
2601909387fSEdward Cree 		 */
2611909387fSEdward Cree 		if (fm.key->flags & tcp_interesting_flags) {
2621909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
2631909387fSEdward Cree 				  "Unsupported conntrack tcp.flags %04x/%04x\n",
2641909387fSEdward Cree 				   ntohs(fm.key->flags), ntohs(fm.mask->flags));
2651909387fSEdward Cree 			return -EOPNOTSUPP;
2661909387fSEdward Cree 		}
2671909387fSEdward Cree 		/* Other TCP flags cannot be filtered at CT */
2681909387fSEdward Cree 		if (fm.mask->flags & ~tcp_interesting_flags) {
2691909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
2701909387fSEdward Cree 				  "Unsupported conntrack tcp.flags %04x/%04x\n",
2711909387fSEdward Cree 				   ntohs(fm.key->flags), ntohs(fm.mask->flags));
2721909387fSEdward Cree 			return -EOPNOTSUPP;
2731909387fSEdward Cree 		}
2741909387fSEdward Cree 	}
2751909387fSEdward Cree 
2761909387fSEdward Cree 	return 0;
2771909387fSEdward Cree }
2781909387fSEdward Cree 
279*38f9a08aSEdward Cree /**
280*38f9a08aSEdward Cree  * struct efx_tc_ct_mangler_state - tracks which fields have been pedited
281*38f9a08aSEdward Cree  *
282*38f9a08aSEdward Cree  * @ipv4: IP source or destination addr has been set
283*38f9a08aSEdward Cree  * @tcpudp: TCP/UDP source or destination port has been set
284*38f9a08aSEdward Cree  */
285*38f9a08aSEdward Cree struct efx_tc_ct_mangler_state {
286*38f9a08aSEdward Cree 	u8 ipv4:1;
287*38f9a08aSEdward Cree 	u8 tcpudp:1;
288*38f9a08aSEdward Cree };
289*38f9a08aSEdward Cree 
efx_tc_ct_mangle(struct efx_nic * efx,struct efx_tc_ct_entry * conn,const struct flow_action_entry * fa,struct efx_tc_ct_mangler_state * mung)290*38f9a08aSEdward Cree static int efx_tc_ct_mangle(struct efx_nic *efx, struct efx_tc_ct_entry *conn,
291*38f9a08aSEdward Cree 			    const struct flow_action_entry *fa,
292*38f9a08aSEdward Cree 			    struct efx_tc_ct_mangler_state *mung)
293*38f9a08aSEdward Cree {
294*38f9a08aSEdward Cree 	/* Is this the first mangle we've processed for this rule? */
295*38f9a08aSEdward Cree 	bool first = !(mung->ipv4 || mung->tcpudp);
296*38f9a08aSEdward Cree 	bool dnat = false;
297*38f9a08aSEdward Cree 
298*38f9a08aSEdward Cree 	switch (fa->mangle.htype) {
299*38f9a08aSEdward Cree 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
300*38f9a08aSEdward Cree 		switch (fa->mangle.offset) {
301*38f9a08aSEdward Cree 		case offsetof(struct iphdr, daddr):
302*38f9a08aSEdward Cree 			dnat = true;
303*38f9a08aSEdward Cree 			fallthrough;
304*38f9a08aSEdward Cree 		case offsetof(struct iphdr, saddr):
305*38f9a08aSEdward Cree 			if (fa->mangle.mask)
306*38f9a08aSEdward Cree 				return -EOPNOTSUPP;
307*38f9a08aSEdward Cree 			conn->nat_ip = htonl(fa->mangle.val);
308*38f9a08aSEdward Cree 			mung->ipv4 = 1;
309*38f9a08aSEdward Cree 			break;
310*38f9a08aSEdward Cree 		default:
311*38f9a08aSEdward Cree 			return -EOPNOTSUPP;
312*38f9a08aSEdward Cree 		}
313*38f9a08aSEdward Cree 		break;
314*38f9a08aSEdward Cree 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
315*38f9a08aSEdward Cree 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
316*38f9a08aSEdward Cree 		/* Both struct tcphdr and struct udphdr start with
317*38f9a08aSEdward Cree 		 *	__be16 source;
318*38f9a08aSEdward Cree 		 *	__be16 dest;
319*38f9a08aSEdward Cree 		 * so we can use the same code for both.
320*38f9a08aSEdward Cree 		 */
321*38f9a08aSEdward Cree 		switch (fa->mangle.offset) {
322*38f9a08aSEdward Cree 		case offsetof(struct tcphdr, dest):
323*38f9a08aSEdward Cree 			BUILD_BUG_ON(offsetof(struct tcphdr, dest) !=
324*38f9a08aSEdward Cree 				     offsetof(struct udphdr, dest));
325*38f9a08aSEdward Cree 			dnat = true;
326*38f9a08aSEdward Cree 			fallthrough;
327*38f9a08aSEdward Cree 		case offsetof(struct tcphdr, source):
328*38f9a08aSEdward Cree 			BUILD_BUG_ON(offsetof(struct tcphdr, source) !=
329*38f9a08aSEdward Cree 				     offsetof(struct udphdr, source));
330*38f9a08aSEdward Cree 			if (~fa->mangle.mask != 0xffff)
331*38f9a08aSEdward Cree 				return -EOPNOTSUPP;
332*38f9a08aSEdward Cree 			conn->l4_natport = htons(fa->mangle.val);
333*38f9a08aSEdward Cree 			mung->tcpudp = 1;
334*38f9a08aSEdward Cree 			break;
335*38f9a08aSEdward Cree 		default:
336*38f9a08aSEdward Cree 			return -EOPNOTSUPP;
337*38f9a08aSEdward Cree 		}
338*38f9a08aSEdward Cree 		break;
339*38f9a08aSEdward Cree 	default:
340*38f9a08aSEdward Cree 		return -EOPNOTSUPP;
341*38f9a08aSEdward Cree 	}
342*38f9a08aSEdward Cree 	/* first mangle tells us whether this is SNAT or DNAT;
343*38f9a08aSEdward Cree 	 * subsequent mangles must match that
344*38f9a08aSEdward Cree 	 */
345*38f9a08aSEdward Cree 	if (first)
346*38f9a08aSEdward Cree 		conn->dnat = dnat;
347*38f9a08aSEdward Cree 	else if (conn->dnat != dnat)
348*38f9a08aSEdward Cree 		return -EOPNOTSUPP;
349*38f9a08aSEdward Cree 	return 0;
350*38f9a08aSEdward Cree }
351*38f9a08aSEdward Cree 
efx_tc_ct_replace(struct efx_tc_ct_zone * ct_zone,struct flow_cls_offload * tc)3521909387fSEdward Cree static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone,
3531909387fSEdward Cree 			     struct flow_cls_offload *tc)
3541909387fSEdward Cree {
3551909387fSEdward Cree 	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
356*38f9a08aSEdward Cree 	struct efx_tc_ct_mangler_state mung = {};
3571909387fSEdward Cree 	struct efx_tc_ct_entry *conn, *old;
3581909387fSEdward Cree 	struct efx_nic *efx = ct_zone->efx;
3591909387fSEdward Cree 	const struct flow_action_entry *fa;
3601909387fSEdward Cree 	struct efx_tc_counter *cnt;
3611909387fSEdward Cree 	int rc, i;
3621909387fSEdward Cree 
3631909387fSEdward Cree 	if (WARN_ON(!efx->tc))
3641909387fSEdward Cree 		return -ENETDOWN;
3651909387fSEdward Cree 	if (WARN_ON(!efx->tc->up))
3661909387fSEdward Cree 		return -ENETDOWN;
3671909387fSEdward Cree 
3681909387fSEdward Cree 	conn = kzalloc(sizeof(*conn), GFP_USER);
3691909387fSEdward Cree 	if (!conn)
3701909387fSEdward Cree 		return -ENOMEM;
3711909387fSEdward Cree 	conn->cookie = tc->cookie;
3721909387fSEdward Cree 	old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_ht,
3731909387fSEdward Cree 						&conn->linkage,
3741909387fSEdward Cree 						efx_tc_ct_ht_params);
375fc21f083SEdward Cree 	if (IS_ERR(old)) {
376fc21f083SEdward Cree 		rc = PTR_ERR(old);
377fc21f083SEdward Cree 		goto release;
378fc21f083SEdward Cree 	} else if (old) {
3791909387fSEdward Cree 		netif_dbg(efx, drv, efx->net_dev,
3801909387fSEdward Cree 			  "Already offloaded conntrack (cookie %lx)\n", tc->cookie);
3811909387fSEdward Cree 		rc = -EEXIST;
3821909387fSEdward Cree 		goto release;
3831909387fSEdward Cree 	}
3841909387fSEdward Cree 
3851909387fSEdward Cree 	/* Parse match */
3861909387fSEdward Cree 	conn->zone = ct_zone;
3871909387fSEdward Cree 	rc = efx_tc_ct_parse_match(efx, fr, conn);
3881909387fSEdward Cree 	if (rc)
3891909387fSEdward Cree 		goto release;
3901909387fSEdward Cree 
3911909387fSEdward Cree 	/* Parse actions */
3921909387fSEdward Cree 	flow_action_for_each(i, fa, &fr->action) {
3931909387fSEdward Cree 		switch (fa->id) {
3941909387fSEdward Cree 		case FLOW_ACTION_CT_METADATA:
3951909387fSEdward Cree 			conn->mark = fa->ct_metadata.mark;
3961909387fSEdward Cree 			if (memchr_inv(fa->ct_metadata.labels, 0, sizeof(fa->ct_metadata.labels))) {
3971909387fSEdward Cree 				netif_dbg(efx, drv, efx->net_dev,
3981909387fSEdward Cree 					  "Setting CT label not supported\n");
3991909387fSEdward Cree 				rc = -EOPNOTSUPP;
4001909387fSEdward Cree 				goto release;
4011909387fSEdward Cree 			}
4021909387fSEdward Cree 			break;
403*38f9a08aSEdward Cree 		case FLOW_ACTION_MANGLE:
404*38f9a08aSEdward Cree 			if (conn->eth_proto != htons(ETH_P_IP)) {
405*38f9a08aSEdward Cree 				netif_dbg(efx, drv, efx->net_dev,
406*38f9a08aSEdward Cree 					  "NAT only supported for IPv4\n");
407*38f9a08aSEdward Cree 				rc = -EOPNOTSUPP;
408*38f9a08aSEdward Cree 				goto release;
409*38f9a08aSEdward Cree 			}
410*38f9a08aSEdward Cree 			rc = efx_tc_ct_mangle(efx, conn, fa, &mung);
411*38f9a08aSEdward Cree 			if (rc)
412*38f9a08aSEdward Cree 				goto release;
413*38f9a08aSEdward Cree 			break;
4141909387fSEdward Cree 		default:
4151909387fSEdward Cree 			netif_dbg(efx, drv, efx->net_dev,
4161909387fSEdward Cree 				  "Unhandled action %u for conntrack\n", fa->id);
4171909387fSEdward Cree 			rc = -EOPNOTSUPP;
4181909387fSEdward Cree 			goto release;
4191909387fSEdward Cree 		}
4201909387fSEdward Cree 	}
4211909387fSEdward Cree 
4221909387fSEdward Cree 	/* fill in defaults for unmangled values */
423*38f9a08aSEdward Cree 	if (!mung.ipv4)
4241909387fSEdward Cree 		conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip;
425*38f9a08aSEdward Cree 	if (!mung.tcpudp)
4261909387fSEdward Cree 		conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport;
4271909387fSEdward Cree 
4281909387fSEdward Cree 	cnt = efx_tc_flower_allocate_counter(efx, EFX_TC_COUNTER_TYPE_CT);
4291909387fSEdward Cree 	if (IS_ERR(cnt)) {
4301909387fSEdward Cree 		rc = PTR_ERR(cnt);
4311909387fSEdward Cree 		goto release;
4321909387fSEdward Cree 	}
4331909387fSEdward Cree 	conn->cnt = cnt;
4341909387fSEdward Cree 
4351909387fSEdward Cree 	rc = efx_mae_insert_ct(efx, conn);
4361909387fSEdward Cree 	if (rc) {
4371909387fSEdward Cree 		netif_dbg(efx, drv, efx->net_dev,
4381909387fSEdward Cree 			  "Failed to insert conntrack, %d\n", rc);
4391909387fSEdward Cree 		goto release;
4401909387fSEdward Cree 	}
4411909387fSEdward Cree 	mutex_lock(&ct_zone->mutex);
4421909387fSEdward Cree 	list_add_tail(&conn->list, &ct_zone->cts);
4431909387fSEdward Cree 	mutex_unlock(&ct_zone->mutex);
4441909387fSEdward Cree 	return 0;
4451909387fSEdward Cree release:
4461909387fSEdward Cree 	if (conn->cnt)
4471909387fSEdward Cree 		efx_tc_flower_release_counter(efx, conn->cnt);
4481909387fSEdward Cree 	if (!old)
4491909387fSEdward Cree 		rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
4501909387fSEdward Cree 				       efx_tc_ct_ht_params);
4511909387fSEdward Cree 	kfree(conn);
4521909387fSEdward Cree 	return rc;
4531909387fSEdward Cree }
4541909387fSEdward Cree 
4551909387fSEdward Cree /* Caller must follow with efx_tc_ct_remove_finish() after RCU grace period! */
efx_tc_ct_remove(struct efx_nic * efx,struct efx_tc_ct_entry * conn)4561909387fSEdward Cree static void efx_tc_ct_remove(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
4571909387fSEdward Cree {
4581909387fSEdward Cree 	int rc;
4591909387fSEdward Cree 
4601909387fSEdward Cree 	/* Remove it from HW */
4611909387fSEdward Cree 	rc = efx_mae_remove_ct(efx, conn);
4621909387fSEdward Cree 	/* Delete it from SW */
4631909387fSEdward Cree 	rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
4641909387fSEdward Cree 			       efx_tc_ct_ht_params);
4651909387fSEdward Cree 	if (rc) {
4661909387fSEdward Cree 		netif_err(efx, drv, efx->net_dev,
4671909387fSEdward Cree 			  "Failed to remove conntrack %lx from hw, rc %d\n",
4681909387fSEdward Cree 			  conn->cookie, rc);
4691909387fSEdward Cree 	} else {
4701909387fSEdward Cree 		netif_dbg(efx, drv, efx->net_dev, "Removed conntrack %lx\n",
4711909387fSEdward Cree 			  conn->cookie);
4721909387fSEdward Cree 	}
4731909387fSEdward Cree }
4741909387fSEdward Cree 
efx_tc_ct_remove_finish(struct efx_nic * efx,struct efx_tc_ct_entry * conn)4751909387fSEdward Cree static void efx_tc_ct_remove_finish(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
4761909387fSEdward Cree {
4771909387fSEdward Cree 	/* Remove related CT counter.  This is delayed after the conn object we
4781909387fSEdward Cree 	 * are working with has been successfully removed.  This protects the
4791909387fSEdward Cree 	 * counter from being used-after-free inside efx_tc_ct_stats.
4801909387fSEdward Cree 	 */
4811909387fSEdward Cree 	efx_tc_flower_release_counter(efx, conn->cnt);
4821909387fSEdward Cree 	kfree(conn);
4831909387fSEdward Cree }
4841909387fSEdward Cree 
efx_tc_ct_destroy(struct efx_tc_ct_zone * ct_zone,struct flow_cls_offload * tc)4851909387fSEdward Cree static int efx_tc_ct_destroy(struct efx_tc_ct_zone *ct_zone,
4861909387fSEdward Cree 			     struct flow_cls_offload *tc)
4871909387fSEdward Cree {
4881909387fSEdward Cree 	struct efx_nic *efx = ct_zone->efx;
4891909387fSEdward Cree 	struct efx_tc_ct_entry *conn;
4901909387fSEdward Cree 
4911909387fSEdward Cree 	conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
4921909387fSEdward Cree 				      efx_tc_ct_ht_params);
4931909387fSEdward Cree 	if (!conn) {
4941909387fSEdward Cree 		netif_warn(efx, drv, efx->net_dev,
4951909387fSEdward Cree 			   "Conntrack %lx not found to remove\n", tc->cookie);
4961909387fSEdward Cree 		return -ENOENT;
4971909387fSEdward Cree 	}
4981909387fSEdward Cree 
4991909387fSEdward Cree 	mutex_lock(&ct_zone->mutex);
5001909387fSEdward Cree 	list_del(&conn->list);
5011909387fSEdward Cree 	efx_tc_ct_remove(efx, conn);
5021909387fSEdward Cree 	mutex_unlock(&ct_zone->mutex);
5031909387fSEdward Cree 	synchronize_rcu();
5041909387fSEdward Cree 	efx_tc_ct_remove_finish(efx, conn);
5051909387fSEdward Cree 	return 0;
5061909387fSEdward Cree }
5071909387fSEdward Cree 
efx_tc_ct_stats(struct efx_tc_ct_zone * ct_zone,struct flow_cls_offload * tc)5081909387fSEdward Cree static int efx_tc_ct_stats(struct efx_tc_ct_zone *ct_zone,
5091909387fSEdward Cree 			   struct flow_cls_offload *tc)
5101909387fSEdward Cree {
5111909387fSEdward Cree 	struct efx_nic *efx = ct_zone->efx;
5121909387fSEdward Cree 	struct efx_tc_ct_entry *conn;
5131909387fSEdward Cree 	struct efx_tc_counter *cnt;
5141909387fSEdward Cree 
5151909387fSEdward Cree 	rcu_read_lock();
5161909387fSEdward Cree 	conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
5171909387fSEdward Cree 				      efx_tc_ct_ht_params);
5181909387fSEdward Cree 	if (!conn) {
5191909387fSEdward Cree 		netif_warn(efx, drv, efx->net_dev,
5201909387fSEdward Cree 			   "Conntrack %lx not found for stats\n", tc->cookie);
5211909387fSEdward Cree 		rcu_read_unlock();
5221909387fSEdward Cree 		return -ENOENT;
5231909387fSEdward Cree 	}
5241909387fSEdward Cree 
5251909387fSEdward Cree 	cnt = conn->cnt;
5261909387fSEdward Cree 	spin_lock_bh(&cnt->lock);
5271909387fSEdward Cree 	/* Report only last use */
5281909387fSEdward Cree 	flow_stats_update(&tc->stats, 0, 0, 0, cnt->touched,
5291909387fSEdward Cree 			  FLOW_ACTION_HW_STATS_DELAYED);
5301909387fSEdward Cree 	spin_unlock_bh(&cnt->lock);
5311909387fSEdward Cree 	rcu_read_unlock();
5321909387fSEdward Cree 
5331909387fSEdward Cree 	return 0;
534c3bb5c6aSEdward Cree }
535c3bb5c6aSEdward Cree 
efx_tc_flow_block(enum tc_setup_type type,void * type_data,void * cb_priv)536c3bb5c6aSEdward Cree static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
537c3bb5c6aSEdward Cree 			     void *cb_priv)
538c3bb5c6aSEdward Cree {
5391909387fSEdward Cree 	struct flow_cls_offload *tcb = type_data;
5401909387fSEdward Cree 	struct efx_tc_ct_zone *ct_zone = cb_priv;
5411909387fSEdward Cree 
5421909387fSEdward Cree 	if (type != TC_SETUP_CLSFLOWER)
5431909387fSEdward Cree 		return -EOPNOTSUPP;
5441909387fSEdward Cree 
5451909387fSEdward Cree 	switch (tcb->command) {
5461909387fSEdward Cree 	case FLOW_CLS_REPLACE:
5471909387fSEdward Cree 		return efx_tc_ct_replace(ct_zone, tcb);
5481909387fSEdward Cree 	case FLOW_CLS_DESTROY:
5491909387fSEdward Cree 		return efx_tc_ct_destroy(ct_zone, tcb);
5501909387fSEdward Cree 	case FLOW_CLS_STATS:
5511909387fSEdward Cree 		return efx_tc_ct_stats(ct_zone, tcb);
5521909387fSEdward Cree 	default:
5531909387fSEdward Cree 		break;
5545cce7814SYang Li 	}
5551909387fSEdward Cree 
556c3bb5c6aSEdward Cree 	return -EOPNOTSUPP;
557c3bb5c6aSEdward Cree }
558c3bb5c6aSEdward Cree 
efx_tc_ct_register_zone(struct efx_nic * efx,u16 zone,struct nf_flowtable * ct_ft)559c3bb5c6aSEdward Cree struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone,
560c3bb5c6aSEdward Cree 					       struct nf_flowtable *ct_ft)
561c3bb5c6aSEdward Cree {
562c3bb5c6aSEdward Cree 	struct efx_tc_ct_zone *ct_zone, *old;
563c3bb5c6aSEdward Cree 	int rc;
564c3bb5c6aSEdward Cree 
565c3bb5c6aSEdward Cree 	ct_zone = kzalloc(sizeof(*ct_zone), GFP_USER);
566c3bb5c6aSEdward Cree 	if (!ct_zone)
567c3bb5c6aSEdward Cree 		return ERR_PTR(-ENOMEM);
568c3bb5c6aSEdward Cree 	ct_zone->zone = zone;
569c3bb5c6aSEdward Cree 	old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_zone_ht,
570c3bb5c6aSEdward Cree 						&ct_zone->linkage,
571c3bb5c6aSEdward Cree 						efx_tc_ct_zone_ht_params);
572c3bb5c6aSEdward Cree 	if (old) {
573c3bb5c6aSEdward Cree 		/* don't need our new entry */
574c3bb5c6aSEdward Cree 		kfree(ct_zone);
575fc21f083SEdward Cree 		if (IS_ERR(old)) /* oh dear, it's actually an error */
576fc21f083SEdward Cree 			return ERR_CAST(old);
577c3bb5c6aSEdward Cree 		if (!refcount_inc_not_zero(&old->ref))
578c3bb5c6aSEdward Cree 			return ERR_PTR(-EAGAIN);
579c3bb5c6aSEdward Cree 		/* existing entry found */
580c3bb5c6aSEdward Cree 		WARN_ON_ONCE(old->nf_ft != ct_ft);
581c3bb5c6aSEdward Cree 		netif_dbg(efx, drv, efx->net_dev,
582c3bb5c6aSEdward Cree 			  "Found existing ct_zone for %u\n", zone);
583c3bb5c6aSEdward Cree 		return old;
584c3bb5c6aSEdward Cree 	}
585c3bb5c6aSEdward Cree 	ct_zone->nf_ft = ct_ft;
586c3bb5c6aSEdward Cree 	ct_zone->efx = efx;
5871909387fSEdward Cree 	INIT_LIST_HEAD(&ct_zone->cts);
5881909387fSEdward Cree 	mutex_init(&ct_zone->mutex);
589c3bb5c6aSEdward Cree 	rc = nf_flow_table_offload_add_cb(ct_ft, efx_tc_flow_block, ct_zone);
590c3bb5c6aSEdward Cree 	netif_dbg(efx, drv, efx->net_dev, "Adding new ct_zone for %u, rc %d\n",
591c3bb5c6aSEdward Cree 		  zone, rc);
592c3bb5c6aSEdward Cree 	if (rc < 0)
593c3bb5c6aSEdward Cree 		goto fail;
594c3bb5c6aSEdward Cree 	refcount_set(&ct_zone->ref, 1);
595c3bb5c6aSEdward Cree 	return ct_zone;
596c3bb5c6aSEdward Cree fail:
597c3bb5c6aSEdward Cree 	rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
598c3bb5c6aSEdward Cree 			       efx_tc_ct_zone_ht_params);
599c3bb5c6aSEdward Cree 	kfree(ct_zone);
600c3bb5c6aSEdward Cree 	return ERR_PTR(rc);
601c3bb5c6aSEdward Cree }
602c3bb5c6aSEdward Cree 
efx_tc_ct_unregister_zone(struct efx_nic * efx,struct efx_tc_ct_zone * ct_zone)603c3bb5c6aSEdward Cree void efx_tc_ct_unregister_zone(struct efx_nic *efx,
604c3bb5c6aSEdward Cree 			       struct efx_tc_ct_zone *ct_zone)
605c3bb5c6aSEdward Cree {
6061909387fSEdward Cree 	struct efx_tc_ct_entry *conn, *next;
6071909387fSEdward Cree 
608c3bb5c6aSEdward Cree 	if (!refcount_dec_and_test(&ct_zone->ref))
609c3bb5c6aSEdward Cree 		return; /* still in use */
610c3bb5c6aSEdward Cree 	nf_flow_table_offload_del_cb(ct_zone->nf_ft, efx_tc_flow_block, ct_zone);
611c3bb5c6aSEdward Cree 	rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
612c3bb5c6aSEdward Cree 			       efx_tc_ct_zone_ht_params);
6131909387fSEdward Cree 	mutex_lock(&ct_zone->mutex);
6141909387fSEdward Cree 	list_for_each_entry(conn, &ct_zone->cts, list)
6151909387fSEdward Cree 		efx_tc_ct_remove(efx, conn);
6161909387fSEdward Cree 	synchronize_rcu();
6171909387fSEdward Cree 	/* need to use _safe because efx_tc_ct_remove_finish() frees conn */
6181909387fSEdward Cree 	list_for_each_entry_safe(conn, next, &ct_zone->cts, list)
6191909387fSEdward Cree 		efx_tc_ct_remove_finish(efx, conn);
6201909387fSEdward Cree 	mutex_unlock(&ct_zone->mutex);
6211909387fSEdward Cree 	mutex_destroy(&ct_zone->mutex);
622c3bb5c6aSEdward Cree 	netif_dbg(efx, drv, efx->net_dev, "Removed ct_zone for %u\n",
623c3bb5c6aSEdward Cree 		  ct_zone->zone);
624c3bb5c6aSEdward Cree 	kfree(ct_zone);
625c3bb5c6aSEdward Cree }
626