1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3 
4 #include <linux/etherdevice.h>
5 #include <linux/idr.h>
6 #include <linux/mlx5/driver.h>
7 #include <linux/mlx5/mlx5_ifc.h>
8 #include <linux/mlx5/vport.h>
9 #include <linux/mlx5/fs.h>
10 #include "mlx5_core.h"
11 #include "eswitch.h"
12 #include "en.h"
13 #include "en_tc.h"
14 #include "fs_core.h"
15 #include "esw/indir_table.h"
16 #include "lib/fs_chains.h"
17 #include "en/mod_hdr.h"
18 
19 #define MLX5_ESW_INDIR_TABLE_SIZE 128
20 #define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2)
21 #define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1)
22 
23 struct mlx5_esw_indir_table_rule {
24 	struct list_head list;
25 	struct mlx5_flow_handle *handle;
26 	union {
27 		__be32 v4;
28 		struct in6_addr v6;
29 	} dst_ip;
30 	u32 vni;
31 	struct mlx5_modify_hdr *mh;
32 	refcount_t refcnt;
33 };
34 
35 struct mlx5_esw_indir_table_entry {
36 	struct hlist_node hlist;
37 	struct mlx5_flow_table *ft;
38 	struct mlx5_flow_group *recirc_grp;
39 	struct mlx5_flow_group *fwd_grp;
40 	struct mlx5_flow_handle *fwd_rule;
41 	struct list_head recirc_rules;
42 	int recirc_cnt;
43 	int fwd_ref;
44 
45 	u16 vport;
46 	u8 ip_version;
47 };
48 
49 struct mlx5_esw_indir_table {
50 	struct mutex lock; /* protects table */
51 	DECLARE_HASHTABLE(table, 8);
52 };
53 
54 struct mlx5_esw_indir_table *
55 mlx5_esw_indir_table_init(void)
56 {
57 	struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL);
58 
59 	if (!indir)
60 		return ERR_PTR(-ENOMEM);
61 
62 	mutex_init(&indir->lock);
63 	hash_init(indir->table);
64 	return indir;
65 }
66 
67 void
68 mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
69 {
70 	mutex_destroy(&indir->lock);
71 	kvfree(indir);
72 }
73 
74 bool
75 mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
76 			    struct mlx5_flow_attr *attr,
77 			    u16 vport_num,
78 			    struct mlx5_core_dev *dest_mdev)
79 {
80 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
81 	bool vf_sf_vport;
82 
83 	vf_sf_vport = mlx5_eswitch_is_vf_vport(esw, vport_num) ||
84 		      mlx5_esw_is_sf_vport(esw, vport_num);
85 
86 	/* Use indirect table for all IP traffic from UL to VF with vport
87 	 * destination when source rewrite flag is set.
88 	 */
89 	return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK &&
90 		vf_sf_vport &&
91 		esw->dev == dest_mdev &&
92 		attr->ip_version &&
93 		attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE;
94 }
95 
96 u16
97 mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr)
98 {
99 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
100 
101 	return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0;
102 }
103 
104 static struct mlx5_esw_indir_table_rule *
105 mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e,
106 				 struct mlx5_esw_flow_attr *attr)
107 {
108 	struct mlx5_esw_indir_table_rule *rule;
109 
110 	list_for_each_entry(rule, &e->recirc_rules, list)
111 		if (rule->vni == attr->rx_tun_attr->vni &&
112 		    !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip,
113 			    sizeof(attr->rx_tun_attr->dst_ip)))
114 			goto found;
115 	return NULL;
116 
117 found:
118 	refcount_inc(&rule->refcnt);
119 	return rule;
120 }
121 
122 static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw,
123 					 struct mlx5_flow_attr *attr,
124 					 struct mlx5_flow_spec *spec,
125 					 struct mlx5_esw_indir_table_entry *e)
126 {
127 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
128 	struct mlx5_fs_chains *chains = esw_chains(esw);
129 	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
130 	struct mlx5_flow_destination dest = {};
131 	struct mlx5_esw_indir_table_rule *rule;
132 	struct mlx5_flow_act flow_act = {};
133 	struct mlx5_flow_spec *rule_spec;
134 	struct mlx5_flow_handle *handle;
135 	int err = 0;
136 	u32 data;
137 
138 	rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr);
139 	if (rule)
140 		return 0;
141 
142 	if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX)
143 		return -EINVAL;
144 
145 	rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
146 	if (!rule_spec)
147 		return -ENOMEM;
148 
149 	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
150 	if (!rule) {
151 		err = -ENOMEM;
152 		goto out;
153 	}
154 
155 	rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
156 					   MLX5_MATCH_MISC_PARAMETERS |
157 					   MLX5_MATCH_MISC_PARAMETERS_2;
158 	if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) {
159 		MLX5_SET(fte_match_param, rule_spec->match_criteria,
160 			 outer_headers.ip_version, 0xf);
161 		MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version,
162 			 attr->ip_version);
163 	} else if (attr->ip_version) {
164 		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
165 				 outer_headers.ethertype);
166 		MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype,
167 			 (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6));
168 	} else {
169 		err = -EOPNOTSUPP;
170 		goto err_ethertype;
171 	}
172 
173 	if (attr->ip_version == 4) {
174 		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
175 				 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
176 		MLX5_SET(fte_match_param, rule_spec->match_value,
177 			 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
178 			 ntohl(esw_attr->rx_tun_attr->dst_ip.v4));
179 	} else if (attr->ip_version == 6) {
180 		int len = sizeof(struct in6_addr);
181 
182 		memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
183 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
184 		       0xff, len);
185 		memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
186 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
187 		       &esw_attr->rx_tun_attr->dst_ip.v6, len);
188 	}
189 
190 	MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
191 			 misc_parameters.vxlan_vni);
192 	MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni,
193 		 MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni));
194 
195 	MLX5_SET(fte_match_param, rule_spec->match_criteria,
196 		 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
197 	MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
198 		 mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch,
199 							   MLX5_VPORT_UPLINK));
200 
201 	/* Modify flow source to recirculate packet */
202 	data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport);
203 	err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
204 					VPORT_TO_REG, data);
205 	if (err)
206 		goto err_mod_hdr_regc0;
207 
208 	err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
209 					TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT);
210 	if (err)
211 		goto err_mod_hdr_regc1;
212 
213 	flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
214 						       mod_acts.num_actions, mod_acts.actions);
215 	if (IS_ERR(flow_act.modify_hdr)) {
216 		err = PTR_ERR(flow_act.modify_hdr);
217 		goto err_mod_hdr_alloc;
218 	}
219 
220 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
221 	flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
222 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
223 	dest.ft = mlx5_chains_get_table(chains, 0, 1, 0);
224 	if (IS_ERR(dest.ft)) {
225 		err = PTR_ERR(dest.ft);
226 		goto err_table;
227 	}
228 	handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1);
229 	if (IS_ERR(handle)) {
230 		err = PTR_ERR(handle);
231 		goto err_handle;
232 	}
233 
234 	mlx5e_mod_hdr_dealloc(&mod_acts);
235 	rule->handle = handle;
236 	rule->vni = esw_attr->rx_tun_attr->vni;
237 	rule->mh = flow_act.modify_hdr;
238 	memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
239 	       sizeof(esw_attr->rx_tun_attr->dst_ip));
240 	refcount_set(&rule->refcnt, 1);
241 	list_add(&rule->list, &e->recirc_rules);
242 	e->recirc_cnt++;
243 	goto out;
244 
245 err_handle:
246 	mlx5_chains_put_table(chains, 0, 1, 0);
247 err_table:
248 	mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr);
249 err_mod_hdr_alloc:
250 err_mod_hdr_regc1:
251 	mlx5e_mod_hdr_dealloc(&mod_acts);
252 err_mod_hdr_regc0:
253 err_ethertype:
254 	kfree(rule);
255 out:
256 	kvfree(rule_spec);
257 	return err;
258 }
259 
260 static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw,
261 					  struct mlx5_flow_attr *attr,
262 					  struct mlx5_esw_indir_table_entry *e)
263 {
264 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
265 	struct mlx5_fs_chains *chains = esw_chains(esw);
266 	struct mlx5_esw_indir_table_rule *rule;
267 
268 	list_for_each_entry(rule, &e->recirc_rules, list)
269 		if (rule->vni == esw_attr->rx_tun_attr->vni &&
270 		    !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
271 			    sizeof(esw_attr->rx_tun_attr->dst_ip)))
272 			goto found;
273 
274 	return;
275 
276 found:
277 	if (!refcount_dec_and_test(&rule->refcnt))
278 		return;
279 
280 	mlx5_del_flow_rules(rule->handle);
281 	mlx5_chains_put_table(chains, 0, 1, 0);
282 	mlx5_modify_header_dealloc(esw->dev, rule->mh);
283 	list_del(&rule->list);
284 	kfree(rule);
285 	e->recirc_cnt--;
286 }
287 
288 static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw,
289 					  struct mlx5_flow_attr *attr,
290 					  struct mlx5_flow_spec *spec,
291 					  struct mlx5_esw_indir_table_entry *e)
292 {
293 	int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
294 	u32 *in, *match;
295 
296 	in = kvzalloc(inlen, GFP_KERNEL);
297 	if (!in)
298 		return -ENOMEM;
299 
300 	MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
301 		 MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2);
302 	match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
303 
304 	if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version))
305 		MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf);
306 	else
307 		MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype);
308 
309 	if (attr->ip_version == 4) {
310 		MLX5_SET_TO_ONES(fte_match_param, match,
311 				 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
312 	} else if (attr->ip_version == 6) {
313 		memset(MLX5_ADDR_OF(fte_match_param, match,
314 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
315 		       0xff, sizeof(struct in6_addr));
316 	} else {
317 		err = -EOPNOTSUPP;
318 		goto out;
319 	}
320 
321 	MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni);
322 	MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
323 		 mlx5_eswitch_get_vport_metadata_mask());
324 	MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
325 	MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX);
326 	e->recirc_grp = mlx5_create_flow_group(e->ft, in);
327 	if (IS_ERR(e->recirc_grp)) {
328 		err = PTR_ERR(e->recirc_grp);
329 		goto out;
330 	}
331 
332 	INIT_LIST_HEAD(&e->recirc_rules);
333 	e->recirc_cnt = 0;
334 
335 out:
336 	kvfree(in);
337 	return err;
338 }
339 
340 static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
341 				       struct mlx5_esw_indir_table_entry *e)
342 {
343 	int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
344 	struct mlx5_flow_destination dest = {};
345 	struct mlx5_flow_act flow_act = {};
346 	struct mlx5_flow_spec *spec;
347 	u32 *in;
348 
349 	in = kvzalloc(inlen, GFP_KERNEL);
350 	if (!in)
351 		return -ENOMEM;
352 
353 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
354 	if (!spec) {
355 		kvfree(in);
356 		return -ENOMEM;
357 	}
358 
359 	/* Hold one entry */
360 	MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
361 	MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
362 	e->fwd_grp = mlx5_create_flow_group(e->ft, in);
363 	if (IS_ERR(e->fwd_grp)) {
364 		err = PTR_ERR(e->fwd_grp);
365 		goto err_out;
366 	}
367 
368 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
369 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
370 	dest.vport.num = e->vport;
371 	dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
372 	dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
373 	e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1);
374 	if (IS_ERR(e->fwd_rule)) {
375 		mlx5_destroy_flow_group(e->fwd_grp);
376 		err = PTR_ERR(e->fwd_rule);
377 	}
378 
379 err_out:
380 	kvfree(spec);
381 	kvfree(in);
382 	return err;
383 }
384 
385 static struct mlx5_esw_indir_table_entry *
386 mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
387 				  struct mlx5_flow_spec *spec, u16 vport, bool decap)
388 {
389 	struct mlx5_flow_table_attr ft_attr = {};
390 	struct mlx5_flow_namespace *root_ns;
391 	struct mlx5_esw_indir_table_entry *e;
392 	struct mlx5_flow_table *ft;
393 	int err = 0;
394 
395 	root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
396 	if (!root_ns)
397 		return ERR_PTR(-ENOENT);
398 
399 	e = kzalloc(sizeof(*e), GFP_KERNEL);
400 	if (!e)
401 		return ERR_PTR(-ENOMEM);
402 
403 	ft_attr.prio = FDB_TC_OFFLOAD;
404 	ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE;
405 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
406 	ft_attr.level = 1;
407 
408 	ft = mlx5_create_flow_table(root_ns, &ft_attr);
409 	if (IS_ERR(ft)) {
410 		err = PTR_ERR(ft);
411 		goto tbl_err;
412 	}
413 	e->ft = ft;
414 	e->vport = vport;
415 	e->ip_version = attr->ip_version;
416 	e->fwd_ref = !decap;
417 
418 	err = mlx5_create_indir_recirc_group(esw, attr, spec, e);
419 	if (err)
420 		goto recirc_grp_err;
421 
422 	if (decap) {
423 		err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
424 		if (err)
425 			goto recirc_rule_err;
426 	}
427 
428 	err = mlx5_create_indir_fwd_group(esw, e);
429 	if (err)
430 		goto fwd_grp_err;
431 
432 	hash_add(esw->fdb_table.offloads.indir->table, &e->hlist,
433 		 vport << 16 | attr->ip_version);
434 
435 	return e;
436 
437 fwd_grp_err:
438 	if (decap)
439 		mlx5_esw_indir_table_rule_put(esw, attr, e);
440 recirc_rule_err:
441 	mlx5_destroy_flow_group(e->recirc_grp);
442 recirc_grp_err:
443 	mlx5_destroy_flow_table(e->ft);
444 tbl_err:
445 	kfree(e);
446 	return ERR_PTR(err);
447 }
448 
449 static struct mlx5_esw_indir_table_entry *
450 mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version)
451 {
452 	struct mlx5_esw_indir_table_entry *e;
453 	u32 key = vport << 16 | ip_version;
454 
455 	hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key)
456 		if (e->vport == vport && e->ip_version == ip_version)
457 			return e;
458 
459 	return NULL;
460 }
461 
462 struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
463 						 struct mlx5_flow_attr *attr,
464 						 struct mlx5_flow_spec *spec,
465 						 u16 vport, bool decap)
466 {
467 	struct mlx5_esw_indir_table_entry *e;
468 	int err;
469 
470 	mutex_lock(&esw->fdb_table.offloads.indir->lock);
471 	e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
472 	if (e) {
473 		if (!decap) {
474 			e->fwd_ref++;
475 		} else {
476 			err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
477 			if (err)
478 				goto out_err;
479 		}
480 	} else {
481 		e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap);
482 		if (IS_ERR(e)) {
483 			err = PTR_ERR(e);
484 			esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err);
485 			goto out_err;
486 		}
487 	}
488 	mutex_unlock(&esw->fdb_table.offloads.indir->lock);
489 	return e->ft;
490 
491 out_err:
492 	mutex_unlock(&esw->fdb_table.offloads.indir->lock);
493 	return ERR_PTR(err);
494 }
495 
496 void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
497 			      struct mlx5_flow_attr *attr,
498 			      u16 vport, bool decap)
499 {
500 	struct mlx5_esw_indir_table_entry *e;
501 
502 	mutex_lock(&esw->fdb_table.offloads.indir->lock);
503 	e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
504 	if (!e)
505 		goto out;
506 
507 	if (!decap)
508 		e->fwd_ref--;
509 	else
510 		mlx5_esw_indir_table_rule_put(esw, attr, e);
511 
512 	if (e->fwd_ref || e->recirc_cnt)
513 		goto out;
514 
515 	hash_del(&e->hlist);
516 	mlx5_destroy_flow_group(e->recirc_grp);
517 	mlx5_del_flow_rules(e->fwd_rule);
518 	mlx5_destroy_flow_group(e->fwd_grp);
519 	mlx5_destroy_flow_table(e->ft);
520 	kfree(e);
521 out:
522 	mutex_unlock(&esw->fdb_table.offloads.indir->lock);
523 }
524