xref: /freebsd/sys/dev/mlx5/mlx5_core/mlx5_fs_tree.c (revision ebb16d5e)
1 /*-
2  * Copyright (c) 2013-2021, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "opt_rss.h"
29 #include "opt_ratelimit.h"
30 
31 #include <linux/module.h>
32 #include <dev/mlx5/driver.h>
33 #include <dev/mlx5/mlx5_core/mlx5_core.h>
34 #include <dev/mlx5/mlx5_core/fs_core.h>
35 #include <linux/string.h>
36 #include <linux/compiler.h>
37 
38 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
39 					 sizeof(struct init_tree_node))
40 
41 #define ADD_PRIO(name_val, flags_val, min_level_val, max_ft_val, caps_val, \
42 		 ...) {.type = FS_TYPE_PRIO,\
43 	.name = name_val,\
44 	.min_ft_level = min_level_val,\
45 	.flags = flags_val,\
46 	.max_ft = max_ft_val,\
47 	.caps = caps_val,\
48 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
49 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
50 }
51 
52 #define ADD_FT_PRIO(name_val, flags_val, max_ft_val,  ...)\
53 	ADD_PRIO(name_val, flags_val, 0, max_ft_val, {},\
54 		 __VA_ARGS__)\
55 
56 #define ADD_NS(name_val, ...) {.type = FS_TYPE_NAMESPACE,\
57 	.name = name_val,\
58 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
59 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
60 }
61 
62 #define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
63 				   sizeof(long))
64 
65 #define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
66 
67 #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
68 			       .caps = (long[]) {__VA_ARGS__}}
69 
70 /* Flowtable sizes: */
71 #define	BYPASS_MAX_FT 5
72 #define	BYPASS_PRIO_MAX_FT 1
73 #define	OFFLOADS_MAX_FT 2
74 #define	KERNEL_MAX_FT 5
75 #define	LEFTOVER_MAX_FT 1
76 
77 /* Flowtable levels: */
78 #define	OFFLOADS_MIN_LEVEL 3
79 #define	KERNEL_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
80 #define	LEFTOVER_MIN_LEVEL (KERNEL_MIN_LEVEL + 1)
81 #define	BYPASS_MIN_LEVEL (MLX5_NUM_BYPASS_FTS + LEFTOVER_MIN_LEVEL)
82 
83 struct node_caps {
84 	size_t	arr_sz;
85 	long	*caps;
86 };
87 
88 struct init_tree_node {
89 	enum fs_type	type;
90 	const char	*name;
91 	struct init_tree_node *children;
92 	int ar_size;
93 	struct node_caps caps;
94 	u8  flags;
95 	int min_ft_level;
96 	int prio;
97 	int max_ft;
98 } root_fs = {
99 	.type = FS_TYPE_NAMESPACE,
100 	.name = "root",
101 	.ar_size = 4,
102 	.children = (struct init_tree_node[]) {
103 		ADD_PRIO("by_pass_prio", 0, BYPASS_MIN_LEVEL, 0,
104 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
105 					  FS_CAP(flow_table_properties_nic_receive.modify_root)),
106 			 ADD_NS("by_pass_ns",
107 				ADD_FT_PRIO("prio0", 0,
108 					    BYPASS_PRIO_MAX_FT),
109 				ADD_FT_PRIO("prio1", 0,
110 					    BYPASS_PRIO_MAX_FT),
111 				ADD_FT_PRIO("prio2", 0,
112 					    BYPASS_PRIO_MAX_FT),
113 				ADD_FT_PRIO("prio3", 0,
114 					    BYPASS_PRIO_MAX_FT),
115 				ADD_FT_PRIO("prio4", 0,
116 					    BYPASS_PRIO_MAX_FT),
117 				ADD_FT_PRIO("prio5", 0,
118 					    BYPASS_PRIO_MAX_FT),
119 				ADD_FT_PRIO("prio6", 0,
120 					    BYPASS_PRIO_MAX_FT),
121 				ADD_FT_PRIO("prio7", 0,
122 					    BYPASS_PRIO_MAX_FT),
123 				ADD_FT_PRIO("prio-mcast", 0,
124 					    BYPASS_PRIO_MAX_FT))),
125 		ADD_PRIO("offloads_prio", 0, OFFLOADS_MIN_LEVEL, 0, {},
126 			 ADD_NS("offloads_ns",
127 				ADD_FT_PRIO("prio_offloads-0", 0,
128 					    OFFLOADS_MAX_FT))),
129 		ADD_PRIO("kernel_prio", 0, KERNEL_MIN_LEVEL, 0, {},
130 			 ADD_NS("kernel_ns",
131 				ADD_FT_PRIO("prio_kernel-0", 0,
132 					    KERNEL_MAX_FT))),
133 		ADD_PRIO("leftovers_prio", MLX5_CORE_FS_PRIO_SHARED,
134 			 LEFTOVER_MIN_LEVEL, 0,
135 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
136 					  FS_CAP(flow_table_properties_nic_receive.modify_root)),
137 			 ADD_NS("leftover_ns",
138 				ADD_FT_PRIO("leftovers_prio-0",
139 					MLX5_CORE_FS_PRIO_SHARED,
140 					LEFTOVER_MAX_FT)))
141 	}
142 };
143 
144 /* Tree creation functions */
145 
146 static struct mlx5_flow_root_namespace *find_root(struct fs_base *node)
147 {
148 	struct fs_base *parent;
149 
150 	/* Make sure we only read it once while we go up the tree */
151 	while ((parent = node->parent))
152 		node = parent;
153 
154 	if (node->type != FS_TYPE_NAMESPACE) {
155 		return NULL;
156 	}
157 
158 	return container_of(container_of(node,
159 					 struct mlx5_flow_namespace,
160 					 base),
161 			    struct mlx5_flow_root_namespace,
162 			    ns);
163 }
164 
165 static inline struct mlx5_core_dev *fs_get_dev(struct fs_base *node)
166 {
167 	struct mlx5_flow_root_namespace *root = find_root(node);
168 
169 	if (root)
170 		return root->dev;
171 	return NULL;
172 }
173 
174 static void fs_init_node(struct fs_base *node,
175 			 unsigned int refcount)
176 {
177 	kref_init(&node->refcount);
178 	atomic_set(&node->users_refcount, refcount);
179 	init_completion(&node->complete);
180 	INIT_LIST_HEAD(&node->list);
181 	mutex_init(&node->lock);
182 }
183 
184 static void _fs_add_node(struct fs_base *node,
185 			 const char *name,
186 			 struct fs_base *parent)
187 {
188 	if (parent)
189 		atomic_inc(&parent->users_refcount);
190 	node->name = kstrdup_const(name, GFP_KERNEL);
191 	node->parent = parent;
192 }
193 
194 static void fs_add_node(struct fs_base *node,
195 			struct fs_base *parent, const char *name,
196 			unsigned int refcount)
197 {
198 	fs_init_node(node, refcount);
199 	_fs_add_node(node, name, parent);
200 }
201 
202 static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref),
203 		    bool parent_locked);
204 
205 static void fs_del_dst(struct mlx5_flow_rule *dst);
206 static void _fs_del_ft(struct mlx5_flow_table *ft);
207 static void fs_del_fg(struct mlx5_flow_group *fg);
208 static void fs_del_fte(struct fs_fte *fte);
209 
210 static void cmd_remove_node(struct fs_base *base)
211 {
212 	switch (base->type) {
213 	case FS_TYPE_FLOW_DEST:
214 		fs_del_dst(container_of(base, struct mlx5_flow_rule, base));
215 		break;
216 	case FS_TYPE_FLOW_TABLE:
217 		_fs_del_ft(container_of(base, struct mlx5_flow_table, base));
218 		break;
219 	case FS_TYPE_FLOW_GROUP:
220 		fs_del_fg(container_of(base, struct mlx5_flow_group, base));
221 		break;
222 	case FS_TYPE_FLOW_ENTRY:
223 		fs_del_fte(container_of(base, struct fs_fte, base));
224 		break;
225 	default:
226 		break;
227 	}
228 }
229 
230 static void __fs_remove_node(struct kref *kref)
231 {
232 	struct fs_base *node = container_of(kref, struct fs_base, refcount);
233 
234 	if (node->parent)
235 		mutex_lock(&node->parent->lock);
236 	mutex_lock(&node->lock);
237 	cmd_remove_node(node);
238 	mutex_unlock(&node->lock);
239 	complete(&node->complete);
240 	if (node->parent) {
241 		mutex_unlock(&node->parent->lock);
242 		_fs_put(node->parent, _fs_remove_node, false);
243 	}
244 }
245 
246 void _fs_remove_node(struct kref *kref)
247 {
248 	struct fs_base *node = container_of(kref, struct fs_base, refcount);
249 
250 	__fs_remove_node(kref);
251 	kfree_const(node->name);
252 	kfree(node);
253 }
254 
255 static void fs_get(struct fs_base *node)
256 {
257 	atomic_inc(&node->users_refcount);
258 }
259 
260 static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref),
261 		    bool parent_locked)
262 {
263 	struct fs_base *parent_node = node->parent;
264 
265 	if (parent_node && !parent_locked)
266 		mutex_lock(&parent_node->lock);
267 	if (atomic_dec_and_test(&node->users_refcount)) {
268 		if (parent_node) {
269 			/*remove from parent's list*/
270 			list_del_init(&node->list);
271 			mutex_unlock(&parent_node->lock);
272 		}
273 		kref_put(&node->refcount, kref_cb);
274 		if (parent_node && parent_locked)
275 			mutex_lock(&parent_node->lock);
276 	} else if (parent_node && !parent_locked) {
277 		mutex_unlock(&parent_node->lock);
278 	}
279 }
280 
281 static void fs_put(struct fs_base *node)
282 {
283 	_fs_put(node, __fs_remove_node, false);
284 }
285 
286 static void fs_put_parent_locked(struct fs_base *node)
287 {
288 	_fs_put(node, __fs_remove_node, true);
289 }
290 
291 static void fs_remove_node(struct fs_base *node)
292 {
293 	fs_put(node);
294 	wait_for_completion(&node->complete);
295 	kfree_const(node->name);
296 	kfree(node);
297 }
298 
299 static void fs_remove_node_parent_locked(struct fs_base *node)
300 {
301 	fs_put_parent_locked(node);
302 	wait_for_completion(&node->complete);
303 	kfree_const(node->name);
304 	kfree(node);
305 }
306 
307 static struct fs_fte *fs_alloc_fte(u8 action,
308 				   u32 flow_tag,
309 				   u32 *match_value,
310 				   unsigned int index)
311 {
312 	struct fs_fte *fte;
313 
314 
315 	fte = kzalloc(sizeof(*fte), GFP_KERNEL);
316 	if (!fte)
317 		return ERR_PTR(-ENOMEM);
318 
319 	memcpy(fte->val, match_value, sizeof(fte->val));
320 	fte->base.type =  FS_TYPE_FLOW_ENTRY;
321 	fte->dests_size = 0;
322 	fte->flow_tag = flow_tag;
323 	fte->index = index;
324 	INIT_LIST_HEAD(&fte->dests);
325 	fte->action = action;
326 
327 	return fte;
328 }
329 
330 static struct fs_fte *alloc_star_ft_entry(struct mlx5_flow_table *ft,
331 					  struct mlx5_flow_group *fg,
332 					  u32 *match_value,
333 					  unsigned int index)
334 {
335 	int err;
336 	struct fs_fte *fte;
337 	struct mlx5_flow_rule *dst;
338 
339 	if (fg->num_ftes == fg->max_ftes)
340 		return ERR_PTR(-ENOSPC);
341 
342 	fte = fs_alloc_fte(MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
343 			   MLX5_FS_DEFAULT_FLOW_TAG, match_value, index);
344 	if (IS_ERR(fte))
345 		return fte;
346 
347 	/*create dst*/
348 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
349 	if (!dst) {
350 		err = -ENOMEM;
351 		goto free_fte;
352 	}
353 
354 	fte->base.parent = &fg->base;
355 	fte->dests_size = 1;
356 	dst->dest_attr.type = MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE;
357 	dst->base.parent = &fte->base;
358 	list_add(&dst->base.list, &fte->dests);
359 	/* assumed that the callee creates the star rules sorted by index */
360 	list_add_tail(&fte->base.list, &fg->ftes);
361 	fg->num_ftes++;
362 
363 	return fte;
364 
365 free_fte:
366 	kfree(fte);
367 	return ERR_PTR(err);
368 }
369 
370 /* assume that fte can't be changed */
371 static void free_star_fte_entry(struct fs_fte *fte)
372 {
373 	struct mlx5_flow_group	*fg;
374 	struct mlx5_flow_rule	*dst, *temp;
375 
376 	fs_get_parent(fg, fte);
377 
378 	list_for_each_entry_safe(dst, temp, &fte->dests, base.list) {
379 		fte->dests_size--;
380 		list_del(&dst->base.list);
381 		kfree(dst);
382 	}
383 
384 	list_del(&fte->base.list);
385 	fg->num_ftes--;
386 	kfree(fte);
387 }
388 
389 static struct mlx5_flow_group *fs_alloc_fg(u32 *create_fg_in)
390 {
391 	struct mlx5_flow_group *fg;
392 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
393 					    create_fg_in, match_criteria);
394 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
395 					    create_fg_in,
396 					    match_criteria_enable);
397 	fg = kzalloc(sizeof(*fg), GFP_KERNEL);
398 	if (!fg)
399 		return ERR_PTR(-ENOMEM);
400 
401 	INIT_LIST_HEAD(&fg->ftes);
402 	fg->mask.match_criteria_enable = match_criteria_enable;
403 	memcpy(&fg->mask.match_criteria, match_criteria,
404 	       sizeof(fg->mask.match_criteria));
405 	fg->base.type =  FS_TYPE_FLOW_GROUP;
406 	fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in,
407 				   start_flow_index);
408 	fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in,
409 				end_flow_index) - fg->start_index + 1;
410 	return fg;
411 }
412 
413 static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio);
414 static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr,
415 					    struct fs_prio *prio);
416 
417 /* assumed src_ft and dst_ft can't be freed */
418 static int fs_set_star_rule(struct mlx5_core_dev *dev,
419 			    struct mlx5_flow_table *src_ft,
420 			    struct mlx5_flow_table *dst_ft)
421 {
422 	struct mlx5_flow_rule *src_dst;
423 	struct fs_fte *src_fte;
424 	int err = 0;
425 	u32 *match_value;
426 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
427 
428 	src_dst = list_first_entry(&src_ft->star_rule.fte->dests,
429 				   struct mlx5_flow_rule, base.list);
430 	match_value = mlx5_vzalloc(match_len);
431 	if (!match_value) {
432 		mlx5_core_warn(dev, "failed to allocate inbox\n");
433 		return -ENOMEM;
434 	}
435 	/*Create match context*/
436 
437 	fs_get_parent(src_fte, src_dst);
438 
439 	src_dst->dest_attr.ft = dst_ft;
440 	if (dst_ft) {
441 		err = mlx5_cmd_fs_set_fte(dev,
442 					  src_ft->vport,
443 					  &src_fte->status,
444 					  match_value, src_ft->type,
445 					  src_ft->id, src_fte->index,
446 					  src_ft->star_rule.fg->id,
447 					  src_fte->flow_tag,
448 					  src_fte->action,
449 					  src_fte->dests_size,
450 					  &src_fte->dests);
451 		if (err)
452 			goto free;
453 
454 		fs_get(&dst_ft->base);
455 	} else {
456 		mlx5_cmd_fs_delete_fte(dev,
457 				       src_ft->vport,
458 				       &src_fte->status,
459 				       src_ft->type, src_ft->id,
460 				       src_fte->index);
461 	}
462 
463 free:
464 	kvfree(match_value);
465 	return err;
466 }
467 
468 static int connect_prev_fts(struct fs_prio *locked_prio,
469 			    struct fs_prio *prev_prio,
470 			    struct mlx5_flow_table *next_ft)
471 {
472 	struct mlx5_flow_table *iter;
473 	int err = 0;
474 	struct mlx5_core_dev *dev = fs_get_dev(&prev_prio->base);
475 
476 	if (!dev)
477 		return -ENODEV;
478 
479 	mutex_lock(&prev_prio->base.lock);
480 	fs_for_each_ft(iter, prev_prio) {
481 		struct mlx5_flow_rule *src_dst =
482 			list_first_entry(&iter->star_rule.fte->dests,
483 					 struct mlx5_flow_rule, base.list);
484 		struct mlx5_flow_table *prev_ft = src_dst->dest_attr.ft;
485 
486 		if (prev_ft == next_ft)
487 			continue;
488 
489 		err = fs_set_star_rule(dev, iter, next_ft);
490 		if (err) {
491 			mlx5_core_warn(dev,
492 			    "mlx5: flow steering can't connect prev and next\n");
493 			goto unlock;
494 		} else {
495 			/* Assume ft's prio is locked */
496 			if (prev_ft) {
497 				struct fs_prio *prio;
498 
499 				fs_get_parent(prio, prev_ft);
500 				if (prio == locked_prio)
501 					fs_put_parent_locked(&prev_ft->base);
502 				else
503 					fs_put(&prev_ft->base);
504 			}
505 		}
506 	}
507 
508 unlock:
509 	mutex_unlock(&prev_prio->base.lock);
510 	return 0;
511 }
512 
513 static int create_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio)
514 {
515 	struct mlx5_flow_group *fg;
516 	int err;
517 	u32 *fg_in;
518 	u32 *match_value;
519 	struct mlx5_flow_table *next_ft;
520 	struct mlx5_flow_table *prev_ft;
521 	struct mlx5_flow_root_namespace *root = find_root(&prio->base);
522 	int fg_inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
523 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
524 
525 	fg_in = mlx5_vzalloc(fg_inlen);
526 	if (!fg_in) {
527 		mlx5_core_warn(root->dev, "failed to allocate inbox\n");
528 		return -ENOMEM;
529 	}
530 
531 	match_value = mlx5_vzalloc(match_len);
532 	if (!match_value) {
533 		mlx5_core_warn(root->dev, "failed to allocate inbox\n");
534 		kvfree(fg_in);
535 		return -ENOMEM;
536 	}
537 
538 	MLX5_SET(create_flow_group_in, fg_in, start_flow_index, ft->max_fte);
539 	MLX5_SET(create_flow_group_in, fg_in, end_flow_index, ft->max_fte);
540 	fg = fs_alloc_fg(fg_in);
541 	if (IS_ERR(fg)) {
542 		err = PTR_ERR(fg);
543 		goto out;
544 	}
545 	ft->star_rule.fg = fg;
546 	err =  mlx5_cmd_fs_create_fg(fs_get_dev(&prio->base),
547 				     fg_in, ft->vport, ft->type,
548 				     ft->id,
549 				     &fg->id);
550 	if (err)
551 		goto free_fg;
552 
553 	ft->star_rule.fte = alloc_star_ft_entry(ft, fg,
554 						      match_value,
555 						      ft->max_fte);
556 	if (IS_ERR(ft->star_rule.fte))
557 		goto free_star_rule;
558 
559 	mutex_lock(&root->fs_chain_lock);
560 	next_ft = find_next_ft(prio);
561 	err = fs_set_star_rule(root->dev, ft, next_ft);
562 	if (err) {
563 		mutex_unlock(&root->fs_chain_lock);
564 		goto free_star_rule;
565 	}
566 	if (next_ft) {
567 		struct fs_prio *parent;
568 
569 		fs_get_parent(parent, next_ft);
570 		fs_put(&next_ft->base);
571 	}
572 	prev_ft = find_prev_ft(ft, prio);
573 	if (prev_ft) {
574 		struct fs_prio *prev_parent;
575 
576 		fs_get_parent(prev_parent, prev_ft);
577 
578 		err = connect_prev_fts(NULL, prev_parent, ft);
579 		if (err) {
580 			mutex_unlock(&root->fs_chain_lock);
581 			goto destroy_chained_star_rule;
582 		}
583 		fs_put(&prev_ft->base);
584 	}
585 	mutex_unlock(&root->fs_chain_lock);
586 	kvfree(fg_in);
587 	kvfree(match_value);
588 
589 	return 0;
590 
591 destroy_chained_star_rule:
592 	fs_set_star_rule(fs_get_dev(&prio->base), ft, NULL);
593 	if (next_ft)
594 		fs_put(&next_ft->base);
595 free_star_rule:
596 	free_star_fte_entry(ft->star_rule.fte);
597 	mlx5_cmd_fs_destroy_fg(fs_get_dev(&ft->base), ft->vport,
598 			       ft->type, ft->id,
599 			       fg->id);
600 free_fg:
601 	kfree(fg);
602 out:
603 	kvfree(fg_in);
604 	kvfree(match_value);
605 	return err;
606 }
607 
608 static void destroy_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio)
609 {
610 	int err;
611 	struct mlx5_flow_root_namespace *root;
612 	struct mlx5_core_dev *dev = fs_get_dev(&prio->base);
613 	struct mlx5_flow_table *prev_ft, *next_ft;
614 	struct fs_prio *prev_prio;
615 
616 	WARN_ON(!dev);
617 
618 	root = find_root(&prio->base);
619 	if (!root)
620 		mlx5_core_err(dev,
621 		    "flow steering failed to find root of priority %s",
622 		    prio->base.name);
623 
624 	/* In order to ensure atomic deletion, first update
625 	 * prev ft to point on the next ft.
626 	 */
627 	mutex_lock(&root->fs_chain_lock);
628 	prev_ft = find_prev_ft(ft, prio);
629 	next_ft = find_next_ft(prio);
630 	if (prev_ft) {
631 		fs_get_parent(prev_prio, prev_ft);
632 		/*Prev is connected to ft, only if ft is the first(last) in the prio*/
633 		err = connect_prev_fts(prio, prev_prio, next_ft);
634 		if (err)
635 			mlx5_core_warn(root->dev,
636 				       "flow steering can't connect prev and next of flow table\n");
637 		fs_put(&prev_ft->base);
638 	}
639 
640 	err = fs_set_star_rule(root->dev, ft, NULL);
641 	/*One put is for fs_get in find next ft*/
642 	if (next_ft) {
643 		fs_put(&next_ft->base);
644 		if (!err)
645 			fs_put(&next_ft->base);
646 	}
647 
648 	mutex_unlock(&root->fs_chain_lock);
649 	err = mlx5_cmd_fs_destroy_fg(dev, ft->vport, ft->type, ft->id,
650 				     ft->star_rule.fg->id);
651 	if (err)
652 		mlx5_core_warn(dev,
653 			       "flow steering can't destroy star entry group(index:%d) of ft:%s\n", ft->star_rule.fg->start_index,
654 			       ft->base.name);
655 	free_star_fte_entry(ft->star_rule.fte);
656 
657 	kfree(ft->star_rule.fg);
658 	ft->star_rule.fg = NULL;
659 }
660 
661 static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
662 				 unsigned int prio)
663 {
664 	struct fs_prio *iter_prio;
665 
666 	fs_for_each_prio(iter_prio, ns) {
667 		if (iter_prio->prio == prio)
668 			return iter_prio;
669 	}
670 
671 	return NULL;
672 }
673 
674 static unsigned int _alloc_new_level(struct fs_prio *prio,
675 				     struct mlx5_flow_namespace *match);
676 
677 static unsigned int __alloc_new_level(struct mlx5_flow_namespace *ns,
678 				      struct fs_prio *prio)
679 {
680 	unsigned int level = 0;
681 	struct fs_prio *p;
682 
683 	if (!ns)
684 		return 0;
685 
686 	mutex_lock(&ns->base.lock);
687 	fs_for_each_prio(p, ns) {
688 		if (p != prio)
689 			level += p->max_ft;
690 		else
691 			break;
692 	}
693 	mutex_unlock(&ns->base.lock);
694 
695 	fs_get_parent(prio, ns);
696 	if (prio)
697 		WARN_ON(prio->base.type != FS_TYPE_PRIO);
698 
699 	return level + _alloc_new_level(prio, ns);
700 }
701 
702 /* Called under lock of priority, hence locking all upper objects */
703 static unsigned int _alloc_new_level(struct fs_prio *prio,
704 				     struct mlx5_flow_namespace *match)
705 {
706 	struct mlx5_flow_namespace *ns;
707 	struct fs_base *it;
708 	unsigned int level = 0;
709 
710 	if (!prio)
711 		return 0;
712 
713 	mutex_lock(&prio->base.lock);
714 	fs_for_each_ns_or_ft_reverse(it, prio) {
715 		if (it->type == FS_TYPE_NAMESPACE) {
716 			struct fs_prio *p;
717 
718 			fs_get_obj(ns, it);
719 
720 			if (match != ns) {
721 				mutex_lock(&ns->base.lock);
722 				fs_for_each_prio(p, ns)
723 					level += p->max_ft;
724 				mutex_unlock(&ns->base.lock);
725 			} else {
726 				break;
727 			}
728 		} else {
729 			struct mlx5_flow_table *ft;
730 
731 			fs_get_obj(ft, it);
732 			mutex_unlock(&prio->base.lock);
733 			return level + ft->level + 1;
734 		}
735 	}
736 
737 	fs_get_parent(ns, prio);
738 	mutex_unlock(&prio->base.lock);
739 	return __alloc_new_level(ns, prio) + level;
740 }
741 
742 static unsigned int alloc_new_level(struct fs_prio *prio)
743 {
744 	return _alloc_new_level(prio, NULL);
745 }
746 
747 static int update_root_ft_create(struct mlx5_flow_root_namespace *root,
748 				    struct mlx5_flow_table *ft)
749 {
750 	int err = 0;
751 	int min_level = INT_MAX;
752 
753 	if (root->root_ft)
754 		min_level = root->root_ft->level;
755 
756 	if (ft->level < min_level)
757 		err = mlx5_cmd_update_root_ft(root->dev, ft->type,
758 					      ft->id);
759 	else
760 		return err;
761 
762 	if (err)
763 		mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
764 			       ft->id);
765 	else
766 		root->root_ft = ft;
767 
768 	return err;
769 }
770 
771 static struct mlx5_flow_table *_create_ft_common(struct mlx5_flow_namespace *ns,
772 						 u16 vport,
773 						 struct fs_prio *fs_prio,
774 						 int max_fte,
775 						 const char *name)
776 {
777 	struct mlx5_flow_table *ft;
778 	int err;
779 	int log_table_sz;
780 	int ft_size;
781 	char gen_name[20];
782 	struct mlx5_flow_root_namespace *root = find_root(&ns->base);
783 	struct mlx5_core_dev *dev = fs_get_dev(&ns->base);
784 
785 	if (!root) {
786 		mlx5_core_err(dev,
787 		    "flow steering failed to find root of namespace %s",
788 		    ns->base.name);
789 		return ERR_PTR(-ENODEV);
790 	}
791 
792 	if (fs_prio->num_ft == fs_prio->max_ft)
793 		return ERR_PTR(-ENOSPC);
794 
795 	ft  = kzalloc(sizeof(*ft), GFP_KERNEL);
796 	if (!ft)
797 		return ERR_PTR(-ENOMEM);
798 
799 	fs_init_node(&ft->base, 1);
800 	INIT_LIST_HEAD(&ft->fgs);
801 
802 	/* Temporarily WA until we expose the level set in the API */
803 	if (root->table_type == FS_FT_ESW_EGRESS_ACL ||
804 		root->table_type == FS_FT_ESW_INGRESS_ACL)
805 		ft->level = 0;
806 	else
807 		ft->level = alloc_new_level(fs_prio);
808 
809 	ft->base.type = FS_TYPE_FLOW_TABLE;
810 	ft->vport = vport;
811 	ft->type = root->table_type;
812 	/*Two entries are reserved for star rules*/
813 	ft_size = roundup_pow_of_two(max_fte + 2);
814 	/*User isn't aware to those rules*/
815 	ft->max_fte = ft_size - 2;
816 	log_table_sz = ilog2(ft_size);
817 	err = mlx5_cmd_fs_create_ft(root->dev, ft->vport, ft->type,
818 				    ft->level, log_table_sz, &ft->id);
819 	if (err)
820 		goto free_ft;
821 
822 	err = create_star_rule(ft, fs_prio);
823 	if (err)
824 		goto del_ft;
825 
826 	if ((root->table_type == FS_FT_NIC_RX) && MLX5_CAP_FLOWTABLE(root->dev,
827 			       flow_table_properties_nic_receive.modify_root)) {
828 		err = update_root_ft_create(root, ft);
829 		if (err)
830 			goto destroy_star_rule;
831 	}
832 
833 	if (!name || !strlen(name)) {
834 		snprintf(gen_name, 20, "flow_table_%u", ft->id);
835 		_fs_add_node(&ft->base, gen_name, &fs_prio->base);
836 	} else {
837 		_fs_add_node(&ft->base, name, &fs_prio->base);
838 	}
839 	list_add_tail(&ft->base.list, &fs_prio->objs);
840 	fs_prio->num_ft++;
841 
842 	return ft;
843 
844 destroy_star_rule:
845 	destroy_star_rule(ft, fs_prio);
846 del_ft:
847 	mlx5_cmd_fs_destroy_ft(root->dev, ft->vport, ft->type, ft->id);
848 free_ft:
849 	kfree(ft);
850 	return ERR_PTR(err);
851 }
852 
853 static struct mlx5_flow_table *create_ft_common(struct mlx5_flow_namespace *ns,
854 						u16 vport,
855 						unsigned int prio,
856 						int max_fte,
857 						const char *name)
858 {
859 	struct fs_prio *fs_prio = NULL;
860 	fs_prio = find_prio(ns, prio);
861 	if (!fs_prio)
862 		return ERR_PTR(-EINVAL);
863 
864 	return _create_ft_common(ns, vport, fs_prio, max_fte, name);
865 }
866 
867 
868 static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns,
869 						   struct list_head *start);
870 
871 static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio,
872 						     struct list_head *start);
873 
874 static struct mlx5_flow_table *mlx5_create_autogrouped_shared_flow_table(struct fs_prio *fs_prio)
875 {
876 	struct mlx5_flow_table *ft;
877 
878 	ft = find_first_ft_in_prio(fs_prio, &fs_prio->objs);
879 	if (ft) {
880 		ft->shared_refcount++;
881 		return ft;
882 	}
883 
884 	return NULL;
885 }
886 
887 struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
888 							   int prio,
889 							   const char *name,
890 							   int num_flow_table_entries,
891 							   int max_num_groups)
892 {
893 	struct mlx5_flow_table *ft = NULL;
894 	struct fs_prio *fs_prio;
895 	bool is_shared_prio;
896 
897 	fs_prio = find_prio(ns, prio);
898 	if (!fs_prio)
899 		return ERR_PTR(-EINVAL);
900 
901 	is_shared_prio = fs_prio->flags & MLX5_CORE_FS_PRIO_SHARED;
902 	if (is_shared_prio) {
903 		mutex_lock(&fs_prio->shared_lock);
904 		ft = mlx5_create_autogrouped_shared_flow_table(fs_prio);
905 	}
906 
907 	if (ft)
908 		goto return_ft;
909 
910 	ft = create_ft_common(ns, 0, prio, num_flow_table_entries,
911 			      name);
912 	if (IS_ERR(ft))
913 		goto return_ft;
914 
915 	ft->autogroup.active = true;
916 	ft->autogroup.max_types = max_num_groups;
917 	if (is_shared_prio)
918 		ft->shared_refcount = 1;
919 
920 return_ft:
921 	if (is_shared_prio)
922 		mutex_unlock(&fs_prio->shared_lock);
923 	return ft;
924 }
925 EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
926 
927 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
928 						     u16 vport,
929 						     int prio,
930 						     const char *name,
931 						     int num_flow_table_entries)
932 {
933 	return create_ft_common(ns, vport, prio, num_flow_table_entries, name);
934 }
935 EXPORT_SYMBOL(mlx5_create_vport_flow_table);
936 
937 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
938 					       int prio,
939 					       const char *name,
940 					       int num_flow_table_entries)
941 {
942 	return create_ft_common(ns, 0, prio, num_flow_table_entries, name);
943 }
944 EXPORT_SYMBOL(mlx5_create_flow_table);
945 
946 static void _fs_del_ft(struct mlx5_flow_table *ft)
947 {
948 	int err;
949 	struct mlx5_core_dev *dev = fs_get_dev(&ft->base);
950 	struct fs_prio *prio;
951 
952 	err = mlx5_cmd_fs_destroy_ft(dev, ft->vport, ft->type, ft->id);
953 	if (err)
954 		mlx5_core_warn(dev, "flow steering can't destroy ft %s\n",
955 			       ft->base.name);
956 
957 	fs_get_parent(prio, ft);
958 	prio->num_ft--;
959 }
960 
961 static int update_root_ft_destroy(struct mlx5_flow_root_namespace *root,
962 				    struct mlx5_flow_table *ft)
963 {
964 	int err = 0;
965 	struct fs_prio *prio;
966 	struct mlx5_flow_table *next_ft = NULL;
967 	struct mlx5_flow_table *put_ft = NULL;
968 
969 	if (root->root_ft != ft)
970 		return 0;
971 
972 	fs_get_parent(prio, ft);
973 	/*Assuming objs containis only flow tables and
974 	 * flow tables are sorted by level.
975 	 */
976 	if (!list_is_last(&ft->base.list, &prio->objs)) {
977 		next_ft = list_next_entry(ft, base.list);
978 	} else {
979 		next_ft = find_next_ft(prio);
980 		put_ft = next_ft;
981 	}
982 
983 	if (next_ft) {
984 		err = mlx5_cmd_update_root_ft(root->dev, next_ft->type,
985 					      next_ft->id);
986 		if (err)
987 			mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
988 				       ft->id);
989 	}
990 	if (!err)
991 		root->root_ft = next_ft;
992 
993 	if (put_ft)
994 		fs_put(&put_ft->base);
995 
996 	return err;
997 }
998 
999 /*Objects in the same prio are destroyed in the reverse order they were createrd*/
1000 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
1001 {
1002 	int err = 0;
1003 	struct fs_prio *prio;
1004 	struct mlx5_flow_root_namespace *root;
1005 	bool is_shared_prio;
1006 	struct mlx5_core_dev *dev;
1007 
1008 	fs_get_parent(prio, ft);
1009 	root = find_root(&prio->base);
1010 	dev = fs_get_dev(&prio->base);
1011 
1012 	if (!root) {
1013 		mlx5_core_err(dev,
1014 		    "flow steering failed to find root of priority %s",
1015 		    prio->base.name);
1016 		return -ENODEV;
1017 	}
1018 
1019 	is_shared_prio = prio->flags & MLX5_CORE_FS_PRIO_SHARED;
1020 	if (is_shared_prio) {
1021 		mutex_lock(&prio->shared_lock);
1022 		if (ft->shared_refcount > 1) {
1023 			--ft->shared_refcount;
1024 			fs_put(&ft->base);
1025 			mutex_unlock(&prio->shared_lock);
1026 			return 0;
1027 		}
1028 	}
1029 
1030 	mutex_lock(&prio->base.lock);
1031 	mutex_lock(&ft->base.lock);
1032 
1033 	err = update_root_ft_destroy(root, ft);
1034 	if (err)
1035 		goto unlock_ft;
1036 
1037 	/* delete two last entries */
1038 	destroy_star_rule(ft, prio);
1039 
1040 	mutex_unlock(&ft->base.lock);
1041 	fs_remove_node_parent_locked(&ft->base);
1042 	mutex_unlock(&prio->base.lock);
1043 	if (is_shared_prio)
1044 		mutex_unlock(&prio->shared_lock);
1045 
1046 	return err;
1047 
1048 unlock_ft:
1049 	mutex_unlock(&ft->base.lock);
1050 	mutex_unlock(&prio->base.lock);
1051 	if (is_shared_prio)
1052 		mutex_unlock(&prio->shared_lock);
1053 
1054 	return err;
1055 }
1056 EXPORT_SYMBOL(mlx5_destroy_flow_table);
1057 
1058 static struct mlx5_flow_group *fs_create_fg(struct mlx5_core_dev *dev,
1059 					    struct mlx5_flow_table *ft,
1060 					    struct list_head *prev,
1061 					    u32 *fg_in,
1062 					    int refcount)
1063 {
1064 	struct mlx5_flow_group *fg;
1065 	int err;
1066 	char name[20];
1067 
1068 	fg = fs_alloc_fg(fg_in);
1069 	if (IS_ERR(fg))
1070 		return fg;
1071 
1072 	err =  mlx5_cmd_fs_create_fg(dev, fg_in,
1073 				     ft->vport, ft->type, ft->id,
1074 				     &fg->id);
1075 	if (err)
1076 		goto free_fg;
1077 
1078 	mutex_lock(&ft->base.lock);
1079 	if (ft->autogroup.active)
1080 		ft->autogroup.num_types++;
1081 
1082 	snprintf(name, sizeof(name), "group_%u", fg->id);
1083 	/*Add node to tree*/
1084 	fs_add_node(&fg->base, &ft->base, name, refcount);
1085 	/*Add node to group list*/
1086 	list_add(&fg->base.list, prev);
1087 	mutex_unlock(&ft->base.lock);
1088 
1089 	return fg;
1090 
1091 free_fg:
1092 	kfree(fg);
1093 	return ERR_PTR(err);
1094 }
1095 
1096 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
1097 					       u32 *in)
1098 {
1099 	struct mlx5_flow_group *fg;
1100 	struct mlx5_core_dev *dev = fs_get_dev(&ft->base);
1101 
1102 	if (!dev)
1103 		return ERR_PTR(-ENODEV);
1104 
1105 	if (ft->autogroup.active)
1106 		return ERR_PTR(-EPERM);
1107 
1108 	fg = fs_create_fg(dev, ft, ft->fgs.prev, in, 1);
1109 
1110 	return fg;
1111 }
1112 EXPORT_SYMBOL(mlx5_create_flow_group);
1113 
1114 /*Group is destoyed when all the rules in the group were removed*/
1115 static void fs_del_fg(struct mlx5_flow_group *fg)
1116 {
1117 	struct mlx5_flow_table *parent_ft;
1118 	struct mlx5_core_dev *dev;
1119 
1120 	fs_get_parent(parent_ft, fg);
1121 	dev = fs_get_dev(&parent_ft->base);
1122 	WARN_ON(!dev);
1123 
1124 	if (parent_ft->autogroup.active)
1125 		parent_ft->autogroup.num_types--;
1126 
1127 	if (mlx5_cmd_fs_destroy_fg(dev, parent_ft->vport,
1128 				   parent_ft->type,
1129 				   parent_ft->id, fg->id))
1130 		mlx5_core_warn(dev, "flow steering can't destroy fg\n");
1131 }
1132 
1133 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
1134 {
1135 	fs_remove_node(&fg->base);
1136 }
1137 EXPORT_SYMBOL(mlx5_destroy_flow_group);
1138 
1139 static bool _fs_match_exact_val(void *mask, void *val1, void *val2, size_t size)
1140 {
1141 	unsigned int i;
1142 
1143 	/* TODO: optimize by comparing 64bits when possible */
1144 	for (i = 0; i < size; i++, mask++, val1++, val2++)
1145 		if ((*((u8 *)val1) & (*(u8 *)mask)) !=
1146 		    ((*(u8 *)val2) & (*(u8 *)mask)))
1147 			return false;
1148 
1149 	return true;
1150 }
1151 
1152 bool fs_match_exact_val(struct mlx5_core_fs_mask *mask,
1153 			       void *val1, void *val2)
1154 {
1155 	if (mask->match_criteria_enable &
1156 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
1157 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1158 						val1, outer_headers);
1159 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1160 						val2, outer_headers);
1161 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1162 					      mask->match_criteria, outer_headers);
1163 
1164 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1165 					 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
1166 			return false;
1167 	}
1168 
1169 	if (mask->match_criteria_enable &
1170 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
1171 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1172 						val1, misc_parameters);
1173 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1174 						val2, misc_parameters);
1175 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1176 					  mask->match_criteria, misc_parameters);
1177 
1178 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1179 					 MLX5_ST_SZ_BYTES(fte_match_set_misc)))
1180 			return false;
1181 	}
1182 	if (mask->match_criteria_enable &
1183 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
1184 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1185 						val1, inner_headers);
1186 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1187 						val2, inner_headers);
1188 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1189 					  mask->match_criteria, inner_headers);
1190 
1191 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1192 					 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
1193 			return false;
1194 	}
1195 	return true;
1196 }
1197 
1198 bool fs_match_exact_mask(u8 match_criteria_enable1,
1199 				u8 match_criteria_enable2,
1200 				void *mask1, void *mask2)
1201 {
1202 	return match_criteria_enable1 == match_criteria_enable2 &&
1203 		!memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param));
1204 }
1205 
1206 static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns,
1207 							   struct list_head *start);
1208 
1209 static struct mlx5_flow_table *_find_first_ft_in_prio_reverse(struct fs_prio *prio,
1210 							      struct list_head *start)
1211 {
1212 	struct fs_base *it = container_of(start, struct fs_base, list);
1213 
1214 	if (!prio)
1215 		return NULL;
1216 
1217 	fs_for_each_ns_or_ft_continue_reverse(it, prio) {
1218 		struct mlx5_flow_namespace	*ns;
1219 		struct mlx5_flow_table		*ft;
1220 
1221 		if (it->type == FS_TYPE_FLOW_TABLE) {
1222 			fs_get_obj(ft, it);
1223 			fs_get(&ft->base);
1224 			return ft;
1225 		}
1226 
1227 		fs_get_obj(ns, it);
1228 		WARN_ON(ns->base.type != FS_TYPE_NAMESPACE);
1229 
1230 		ft = find_first_ft_in_ns_reverse(ns, &ns->prios);
1231 		if (ft)
1232 			return ft;
1233 	}
1234 
1235 	return NULL;
1236 }
1237 
1238 static struct mlx5_flow_table *find_first_ft_in_prio_reverse(struct fs_prio *prio,
1239 							     struct list_head *start)
1240 {
1241 	struct mlx5_flow_table *ft;
1242 
1243 	if (!prio)
1244 		return NULL;
1245 
1246 	mutex_lock(&prio->base.lock);
1247 	ft = _find_first_ft_in_prio_reverse(prio, start);
1248 	mutex_unlock(&prio->base.lock);
1249 
1250 	return ft;
1251 }
1252 
1253 static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns,
1254 							   struct list_head *start)
1255 {
1256 	struct fs_prio *prio;
1257 
1258 	if (!ns)
1259 		return NULL;
1260 
1261 	fs_get_obj(prio, container_of(start, struct fs_base, list));
1262 	mutex_lock(&ns->base.lock);
1263 	fs_for_each_prio_continue_reverse(prio, ns) {
1264 		struct mlx5_flow_table *ft;
1265 
1266 		ft = find_first_ft_in_prio_reverse(prio, &prio->objs);
1267 		if (ft) {
1268 			mutex_unlock(&ns->base.lock);
1269 			return ft;
1270 		}
1271 	}
1272 	mutex_unlock(&ns->base.lock);
1273 
1274 	return NULL;
1275 }
1276 
1277 /* Returned a held ft, assumed curr is protected, assumed curr's parent is
1278  * locked
1279  */
1280 static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr,
1281 					    struct fs_prio *prio)
1282 {
1283 	struct mlx5_flow_table *ft = NULL;
1284 	struct fs_base *curr_base;
1285 
1286 	if (!curr)
1287 		return NULL;
1288 
1289 	/* prio has either namespace or flow-tables, but not both */
1290 	if (!list_empty(&prio->objs) &&
1291 	    list_first_entry(&prio->objs, struct mlx5_flow_table, base.list) !=
1292 	    curr)
1293 		return NULL;
1294 
1295 	while (!ft && prio) {
1296 		struct mlx5_flow_namespace *ns;
1297 
1298 		fs_get_parent(ns, prio);
1299 		ft = find_first_ft_in_ns_reverse(ns, &prio->base.list);
1300 		curr_base = &ns->base;
1301 		fs_get_parent(prio, ns);
1302 
1303 		if (prio && !ft)
1304 			ft = find_first_ft_in_prio_reverse(prio,
1305 							   &curr_base->list);
1306 	}
1307 	return ft;
1308 }
1309 
1310 static struct mlx5_flow_table *_find_first_ft_in_prio(struct fs_prio *prio,
1311 						      struct list_head *start)
1312 {
1313 	struct fs_base	*it = container_of(start, struct fs_base, list);
1314 
1315 	if (!prio)
1316 		return NULL;
1317 
1318 	fs_for_each_ns_or_ft_continue(it, prio) {
1319 		struct mlx5_flow_namespace	*ns;
1320 		struct mlx5_flow_table		*ft;
1321 
1322 		if (it->type == FS_TYPE_FLOW_TABLE) {
1323 			fs_get_obj(ft, it);
1324 			fs_get(&ft->base);
1325 			return ft;
1326 		}
1327 
1328 		fs_get_obj(ns, it);
1329 		WARN_ON(ns->base.type != FS_TYPE_NAMESPACE);
1330 
1331 		ft = find_first_ft_in_ns(ns, &ns->prios);
1332 		if (ft)
1333 			return ft;
1334 	}
1335 
1336 	return NULL;
1337 }
1338 
1339 static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio,
1340 						     struct list_head *start)
1341 {
1342 	struct mlx5_flow_table *ft;
1343 
1344 	if (!prio)
1345 		return NULL;
1346 
1347 	mutex_lock(&prio->base.lock);
1348 	ft = _find_first_ft_in_prio(prio, start);
1349 	mutex_unlock(&prio->base.lock);
1350 
1351 	return ft;
1352 }
1353 
1354 static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns,
1355 						   struct list_head *start)
1356 {
1357 	struct fs_prio *prio;
1358 
1359 	if (!ns)
1360 		return NULL;
1361 
1362 	fs_get_obj(prio, container_of(start, struct fs_base, list));
1363 	mutex_lock(&ns->base.lock);
1364 	fs_for_each_prio_continue(prio, ns) {
1365 		struct mlx5_flow_table *ft;
1366 
1367 		ft = find_first_ft_in_prio(prio, &prio->objs);
1368 		if (ft) {
1369 			mutex_unlock(&ns->base.lock);
1370 			return ft;
1371 		}
1372 	}
1373 	mutex_unlock(&ns->base.lock);
1374 
1375 	return NULL;
1376 }
1377 
1378 /* returned a held ft, assumed curr is protected, assumed curr's parent is
1379  * locked
1380  */
1381 static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio)
1382 {
1383 	struct mlx5_flow_table *ft = NULL;
1384 	struct fs_base *curr_base;
1385 
1386 	while (!ft && prio) {
1387 		struct mlx5_flow_namespace *ns;
1388 
1389 		fs_get_parent(ns, prio);
1390 		ft = find_first_ft_in_ns(ns, &prio->base.list);
1391 		curr_base = &ns->base;
1392 		fs_get_parent(prio, ns);
1393 
1394 		if (!ft && prio)
1395 			ft = _find_first_ft_in_prio(prio, &curr_base->list);
1396 	}
1397 	return ft;
1398 }
1399 
1400 
1401 /* called under ft mutex lock */
1402 static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
1403 						u8 match_criteria_enable,
1404 						u32 *match_criteria)
1405 {
1406 	unsigned int group_size;
1407 	unsigned int candidate_index = 0;
1408 	unsigned int candidate_group_num = 0;
1409 	struct mlx5_flow_group *g;
1410 	struct mlx5_flow_group *ret;
1411 	struct list_head *prev = &ft->fgs;
1412 	struct mlx5_core_dev *dev;
1413 	u32 *in;
1414 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1415 	void *match_criteria_addr;
1416 
1417 	if (!ft->autogroup.active)
1418 		return ERR_PTR(-ENOENT);
1419 
1420 	dev = fs_get_dev(&ft->base);
1421 	if (!dev)
1422 		return ERR_PTR(-ENODEV);
1423 
1424 	in = mlx5_vzalloc(inlen);
1425 	if (!in) {
1426 		mlx5_core_warn(dev, "failed to allocate inbox\n");
1427 		return ERR_PTR(-ENOMEM);
1428 	}
1429 
1430 
1431 	if (ft->autogroup.num_types < ft->autogroup.max_types)
1432 		group_size = ft->max_fte / (ft->autogroup.max_types + 1);
1433 	else
1434 		group_size = 1;
1435 
1436 	if (group_size == 0) {
1437 		mlx5_core_warn(dev,
1438 			       "flow steering can't create group size of 0\n");
1439 		ret = ERR_PTR(-EINVAL);
1440 		goto out;
1441 	}
1442 
1443 	/* sorted by start_index */
1444 	fs_for_each_fg(g, ft) {
1445 		candidate_group_num++;
1446 		if (candidate_index + group_size > g->start_index)
1447 			candidate_index = g->start_index + g->max_ftes;
1448 		else
1449 			break;
1450 		prev = &g->base.list;
1451 	}
1452 
1453 	if (candidate_index + group_size > ft->max_fte) {
1454 		ret = ERR_PTR(-ENOSPC);
1455 		goto out;
1456 	}
1457 
1458 	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
1459 		 match_criteria_enable);
1460 	MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
1461 	MLX5_SET(create_flow_group_in, in, end_flow_index,   candidate_index +
1462 		 group_size - 1);
1463 	match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
1464 					   in, match_criteria);
1465 	memcpy(match_criteria_addr, match_criteria,
1466 	       MLX5_ST_SZ_BYTES(fte_match_param));
1467 
1468 	ret = fs_create_fg(dev, ft, prev, in, 0);
1469 out:
1470 	kvfree(in);
1471 	return ret;
1472 }
1473 
1474 static struct mlx5_flow_namespace *get_ns_with_notifiers(struct fs_base *node)
1475 {
1476 	struct mlx5_flow_namespace *ns = NULL;
1477 
1478 	while (node  && (node->type != FS_TYPE_NAMESPACE ||
1479 			      list_empty(&container_of(node, struct
1480 						       mlx5_flow_namespace,
1481 						       base)->list_notifiers)))
1482 		node = node->parent;
1483 
1484 	if (node)
1485 		fs_get_obj(ns, node);
1486 
1487 	return ns;
1488 }
1489 
1490 
1491 /*Assumption- fte is locked*/
1492 static void call_to_add_rule_notifiers(struct mlx5_flow_rule *dst,
1493 				      struct fs_fte *fte)
1494 {
1495 	struct mlx5_flow_namespace *ns;
1496 	struct mlx5_flow_handler *iter_handler;
1497 	struct fs_client_priv_data *iter_client;
1498 	void *data;
1499 	bool is_new_rule = list_first_entry(&fte->dests,
1500 					    struct mlx5_flow_rule,
1501 					    base.list) == dst;
1502 	int err;
1503 
1504 	ns = get_ns_with_notifiers(&fte->base);
1505 	if (!ns)
1506 		return;
1507 
1508 	down_read(&ns->notifiers_rw_sem);
1509 	list_for_each_entry(iter_handler, &ns->list_notifiers,
1510 			    list) {
1511 		if (iter_handler->add_dst_cb) {
1512 			data = NULL;
1513 			mutex_lock(&dst->clients_lock);
1514 			list_for_each_entry(
1515 				iter_client, &dst->clients_data, list) {
1516 				if (iter_client->fs_handler == iter_handler) {
1517 					data = iter_client->client_dst_data;
1518 					break;
1519 				}
1520 			}
1521 			mutex_unlock(&dst->clients_lock);
1522 			err  = iter_handler->add_dst_cb(dst,
1523 							is_new_rule,
1524 							data,
1525 							iter_handler->client_context);
1526 			if (err)
1527 				break;
1528 		}
1529 	}
1530 	up_read(&ns->notifiers_rw_sem);
1531 }
1532 
1533 static void call_to_del_rule_notifiers(struct mlx5_flow_rule *dst,
1534 				      struct fs_fte *fte)
1535 {
1536 	struct mlx5_flow_namespace *ns;
1537 	struct mlx5_flow_handler *iter_handler;
1538 	struct fs_client_priv_data *iter_client;
1539 	void *data;
1540 	bool ctx_changed = (fte->dests_size == 0);
1541 
1542 	ns = get_ns_with_notifiers(&fte->base);
1543 	if (!ns)
1544 		return;
1545 	down_read(&ns->notifiers_rw_sem);
1546 	list_for_each_entry(iter_handler, &ns->list_notifiers,
1547 			    list) {
1548 		data = NULL;
1549 		mutex_lock(&dst->clients_lock);
1550 		list_for_each_entry(iter_client, &dst->clients_data, list) {
1551 			if (iter_client->fs_handler == iter_handler) {
1552 				data = iter_client->client_dst_data;
1553 				break;
1554 			}
1555 		}
1556 		mutex_unlock(&dst->clients_lock);
1557 		if (iter_handler->del_dst_cb) {
1558 			iter_handler->del_dst_cb(dst, ctx_changed, data,
1559 						 iter_handler->client_context);
1560 		}
1561 	}
1562 	up_read(&ns->notifiers_rw_sem);
1563 }
1564 
1565 /* fte should not be deleted while calling this function */
1566 static struct mlx5_flow_rule *_fs_add_dst_fte(struct fs_fte *fte,
1567 					      struct mlx5_flow_group *fg,
1568 					      struct mlx5_flow_destination *dest)
1569 {
1570 	struct mlx5_flow_table *ft;
1571 	struct mlx5_flow_rule *dst;
1572 	int err;
1573 
1574 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1575 	if (!dst)
1576 		return ERR_PTR(-ENOMEM);
1577 
1578 	memcpy(&dst->dest_attr, dest, sizeof(*dest));
1579 	dst->base.type = FS_TYPE_FLOW_DEST;
1580 	INIT_LIST_HEAD(&dst->clients_data);
1581 	mutex_init(&dst->clients_lock);
1582 	fs_get_parent(ft, fg);
1583 	/*Add dest to dests list- added as first element after the head*/
1584 	list_add_tail(&dst->base.list, &fte->dests);
1585 	fte->dests_size++;
1586 	err = mlx5_cmd_fs_set_fte(fs_get_dev(&ft->base),
1587 				  ft->vport,
1588 				  &fte->status,
1589 				  fte->val, ft->type,
1590 				  ft->id, fte->index, fg->id, fte->flow_tag,
1591 				  fte->action, fte->dests_size, &fte->dests);
1592 	if (err)
1593 		goto free_dst;
1594 
1595 	list_del(&dst->base.list);
1596 
1597 	return dst;
1598 
1599 free_dst:
1600 	list_del(&dst->base.list);
1601 	kfree(dst);
1602 	fte->dests_size--;
1603 	return ERR_PTR(err);
1604 }
1605 
1606 static char *get_dest_name(struct mlx5_flow_destination *dest)
1607 {
1608 	char *name = kzalloc(sizeof(char) * 20, GFP_KERNEL);
1609 
1610 	switch (dest->type) {
1611 	case MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE:
1612 		snprintf(name, 20, "dest_%s_%u", "flow_table",
1613 			 dest->ft->id);
1614 		return name;
1615 	case MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT:
1616 		snprintf(name, 20, "dest_%s_%u", "vport",
1617 			 dest->vport_num);
1618 		return name;
1619 	case MLX5_FLOW_CONTEXT_DEST_TYPE_TIR:
1620 		snprintf(name, 20, "dest_%s_%u", "tir", dest->tir_num);
1621 		return name;
1622 	default:
1623 		kfree(name);
1624 		return NULL;
1625 	}
1626 }
1627 
1628 /* assumed fg is locked */
1629 static unsigned int fs_get_free_fg_index(struct mlx5_flow_group *fg,
1630 					 struct list_head **prev)
1631 {
1632 	struct fs_fte *fte;
1633 	unsigned int start = fg->start_index;
1634 
1635 	if (prev)
1636 		*prev = &fg->ftes;
1637 
1638 	/* assumed list is sorted by index */
1639 	fs_for_each_fte(fte, fg) {
1640 		if (fte->index != start)
1641 			return start;
1642 		start++;
1643 		if (prev)
1644 			*prev = &fte->base.list;
1645 	}
1646 
1647 	return start;
1648 }
1649 
1650 
1651 static struct fs_fte *fs_create_fte(struct mlx5_flow_group *fg,
1652 			     u32 *match_value,
1653 			     u8 action,
1654 			     u32 flow_tag,
1655 			     struct list_head **prev)
1656 {
1657 	struct fs_fte *fte;
1658 	int index = 0;
1659 
1660 	index = fs_get_free_fg_index(fg, prev);
1661 	fte = fs_alloc_fte(action, flow_tag, match_value, index);
1662 	if (IS_ERR(fte))
1663 		return fte;
1664 
1665 	return fte;
1666 }
1667 
1668 static void add_rule_to_tree(struct mlx5_flow_rule *rule,
1669 			     struct fs_fte *fte)
1670 {
1671 	char *dest_name;
1672 
1673 	dest_name = get_dest_name(&rule->dest_attr);
1674 	fs_add_node(&rule->base, &fte->base, dest_name, 1);
1675 	/* re-add to list, since fs_add_node reset our list */
1676 	list_add_tail(&rule->base.list, &fte->dests);
1677 	kfree(dest_name);
1678 	call_to_add_rule_notifiers(rule, fte);
1679 }
1680 
1681 static void fs_del_dst(struct mlx5_flow_rule *dst)
1682 {
1683 	struct mlx5_flow_table *ft;
1684 	struct mlx5_flow_group *fg;
1685 	struct fs_fte *fte;
1686 	u32	*match_value;
1687 	struct mlx5_core_dev *dev = fs_get_dev(&dst->base);
1688 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
1689 	int err;
1690 
1691 	WARN_ON(!dev);
1692 
1693 	match_value = mlx5_vzalloc(match_len);
1694 	if (!match_value) {
1695 		mlx5_core_warn(dev, "failed to allocate inbox\n");
1696 		return;
1697 	}
1698 
1699 	fs_get_parent(fte, dst);
1700 	fs_get_parent(fg, fte);
1701 	mutex_lock(&fg->base.lock);
1702 	memcpy(match_value, fte->val, sizeof(fte->val));
1703 	/* ft can't be changed as fg is locked */
1704 	fs_get_parent(ft, fg);
1705 	list_del(&dst->base.list);
1706 	fte->dests_size--;
1707 	if (fte->dests_size) {
1708 		err = mlx5_cmd_fs_set_fte(dev, ft->vport,
1709 					  &fte->status, match_value, ft->type,
1710 					  ft->id, fte->index, fg->id,
1711 					  fte->flow_tag, fte->action,
1712 					  fte->dests_size, &fte->dests);
1713 		if (err) {
1714 			mlx5_core_warn(dev, "%s can't delete dst %s\n",
1715 				       __func__, dst->base.name);
1716 			goto err;
1717 		}
1718 	}
1719 	call_to_del_rule_notifiers(dst, fte);
1720 err:
1721 	mutex_unlock(&fg->base.lock);
1722 	kvfree(match_value);
1723 }
1724 
1725 static void fs_del_fte(struct fs_fte *fte)
1726 {
1727 	struct mlx5_flow_table *ft;
1728 	struct mlx5_flow_group *fg;
1729 	int err;
1730 	struct mlx5_core_dev *dev;
1731 
1732 	fs_get_parent(fg, fte);
1733 	fs_get_parent(ft, fg);
1734 
1735 	dev = fs_get_dev(&ft->base);
1736 	WARN_ON(!dev);
1737 
1738 	err = mlx5_cmd_fs_delete_fte(dev, ft->vport, &fte->status,
1739 				     ft->type, ft->id, fte->index);
1740 	if (err)
1741 		mlx5_core_warn(dev, "flow steering can't delete fte %s\n",
1742 			       fte->base.name);
1743 
1744 	fg->num_ftes--;
1745 }
1746 
1747 /* assuming parent fg is locked */
1748 /* Add dst algorithm */
1749 static struct mlx5_flow_rule *fs_add_dst_fg(struct mlx5_flow_group *fg,
1750 						   u32 *match_value,
1751 						   u8 action,
1752 						   u32 flow_tag,
1753 						   struct mlx5_flow_destination *dest)
1754 {
1755 	struct fs_fte *fte;
1756 	struct mlx5_flow_rule *dst;
1757 	struct mlx5_flow_table *ft;
1758 	struct list_head *prev;
1759 	char fte_name[20];
1760 
1761 	mutex_lock(&fg->base.lock);
1762 	fs_for_each_fte(fte, fg) {
1763 		/* TODO: Check of size against PRM max size */
1764 		mutex_lock(&fte->base.lock);
1765 		if (fs_match_exact_val(&fg->mask, match_value, &fte->val) &&
1766 		    action == fte->action && flow_tag == fte->flow_tag) {
1767 			dst = _fs_add_dst_fte(fte, fg, dest);
1768 			mutex_unlock(&fte->base.lock);
1769 			if (IS_ERR(dst))
1770 				goto unlock_fg;
1771 			goto add_rule;
1772 		}
1773 		mutex_unlock(&fte->base.lock);
1774 	}
1775 
1776 	fs_get_parent(ft, fg);
1777 	if (fg->num_ftes == fg->max_ftes) {
1778 		dst = ERR_PTR(-ENOSPC);
1779 		goto unlock_fg;
1780 	}
1781 
1782 	fte = fs_create_fte(fg, match_value, action, flow_tag, &prev);
1783 	if (IS_ERR(fte)) {
1784 		dst = (void *)fte;
1785 		goto unlock_fg;
1786 	}
1787 	dst = _fs_add_dst_fte(fte, fg, dest);
1788 	if (IS_ERR(dst)) {
1789 		kfree(fte);
1790 		goto unlock_fg;
1791 	}
1792 
1793 	fg->num_ftes++;
1794 
1795 	snprintf(fte_name, sizeof(fte_name), "fte%u", fte->index);
1796 	/* Add node to tree */
1797 	fs_add_node(&fte->base, &fg->base, fte_name, 0);
1798 	list_add(&fte->base.list, prev);
1799 add_rule:
1800 	add_rule_to_tree(dst, fte);
1801 unlock_fg:
1802 	mutex_unlock(&fg->base.lock);
1803 	return dst;
1804 }
1805 
1806 static struct mlx5_flow_rule *fs_add_dst_ft(struct mlx5_flow_table *ft,
1807 					    u8 match_criteria_enable,
1808 					    u32 *match_criteria,
1809 					    u32 *match_value,
1810 					    u8 action, u32 flow_tag,
1811 					    struct mlx5_flow_destination *dest)
1812 {
1813 	/*? where dst_entry is allocated*/
1814 	struct mlx5_flow_group *g;
1815 	struct mlx5_flow_rule *dst;
1816 
1817 	fs_get(&ft->base);
1818 	mutex_lock(&ft->base.lock);
1819 	fs_for_each_fg(g, ft)
1820 		if (fs_match_exact_mask(g->mask.match_criteria_enable,
1821 					match_criteria_enable,
1822 					g->mask.match_criteria,
1823 					match_criteria)) {
1824 			mutex_unlock(&ft->base.lock);
1825 
1826 			dst = fs_add_dst_fg(g, match_value,
1827 					    action, flow_tag, dest);
1828 			if (PTR_ERR(dst) && PTR_ERR(dst) != -ENOSPC)
1829 				goto unlock;
1830 		}
1831 	mutex_unlock(&ft->base.lock);
1832 
1833 	g = create_autogroup(ft, match_criteria_enable, match_criteria);
1834 	if (IS_ERR(g)) {
1835 		dst = (void *)g;
1836 		goto unlock;
1837 	}
1838 
1839 	dst = fs_add_dst_fg(g, match_value,
1840 			    action, flow_tag, dest);
1841 	if (IS_ERR(dst)) {
1842 		/* Remove assumes refcount > 0 and autogroup creates a group
1843 		 * with a refcount = 0.
1844 		 */
1845 		fs_get(&g->base);
1846 		fs_remove_node(&g->base);
1847 		goto unlock;
1848 	}
1849 
1850 unlock:
1851 	fs_put(&ft->base);
1852 	return dst;
1853 }
1854 
1855 struct mlx5_flow_rule *
1856 mlx5_add_flow_rule(struct mlx5_flow_table *ft,
1857 		   u8 match_criteria_enable,
1858 		   u32 *match_criteria,
1859 		   u32 *match_value,
1860 		   u32 action,
1861 		   u32 flow_tag,
1862 		   struct mlx5_flow_destination *dest)
1863 {
1864 	struct mlx5_flow_rule *dst;
1865 	struct mlx5_flow_namespace *ns;
1866 
1867 	ns = get_ns_with_notifiers(&ft->base);
1868 	if (ns)
1869 		down_read(&ns->dests_rw_sem);
1870 	dst =  fs_add_dst_ft(ft, match_criteria_enable, match_criteria,
1871 			     match_value, action, flow_tag, dest);
1872 	if (ns)
1873 		up_read(&ns->dests_rw_sem);
1874 
1875 	return dst;
1876 
1877 
1878 }
1879 EXPORT_SYMBOL(mlx5_add_flow_rule);
1880 
1881 void mlx5_del_flow_rule(struct mlx5_flow_rule *dst)
1882 {
1883 	struct mlx5_flow_namespace *ns;
1884 
1885 	ns = get_ns_with_notifiers(&dst->base);
1886 	if (ns)
1887 		down_read(&ns->dests_rw_sem);
1888 	fs_remove_node(&dst->base);
1889 	if (ns)
1890 		up_read(&ns->dests_rw_sem);
1891 }
1892 EXPORT_SYMBOL(mlx5_del_flow_rule);
1893 
1894 #define MLX5_CORE_FS_ROOT_NS_NAME "root"
1895 #define MLX5_CORE_FS_ESW_EGRESS_ACL "esw_egress_root"
1896 #define MLX5_CORE_FS_ESW_INGRESS_ACL "esw_ingress_root"
1897 #define MLX5_CORE_FS_FDB_ROOT_NS_NAME "fdb_root"
1898 #define MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME "sniffer_rx_root"
1899 #define MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME "sniffer_tx_root"
1900 #define MLX5_CORE_FS_PRIO_MAX_FT 4
1901 #define MLX5_CORE_FS_PRIO_MAX_NS 1
1902 
1903 static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
1904 				      unsigned prio, int max_ft,
1905 				      const char *name, u8 flags)
1906 {
1907 	struct fs_prio *fs_prio;
1908 
1909 	fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
1910 	if (!fs_prio)
1911 		return ERR_PTR(-ENOMEM);
1912 
1913 	fs_prio->base.type = FS_TYPE_PRIO;
1914 	fs_add_node(&fs_prio->base, &ns->base, name, 1);
1915 	fs_prio->max_ft = max_ft;
1916 	fs_prio->max_ns = MLX5_CORE_FS_PRIO_MAX_NS;
1917 	fs_prio->prio = prio;
1918 	fs_prio->flags = flags;
1919 	list_add_tail(&fs_prio->base.list, &ns->prios);
1920 	INIT_LIST_HEAD(&fs_prio->objs);
1921 	mutex_init(&fs_prio->shared_lock);
1922 
1923 	return fs_prio;
1924 }
1925 
1926 static void cleanup_root_ns(struct mlx5_core_dev *dev)
1927 {
1928 	struct mlx5_flow_root_namespace *root_ns = dev->root_ns;
1929 	struct fs_prio *iter_prio;
1930 
1931 	if (!root_ns)
1932 		return;
1933 
1934 	/* stage 1 */
1935 	fs_for_each_prio(iter_prio, &root_ns->ns) {
1936 		struct mlx5_flow_namespace *iter_ns;
1937 
1938 		fs_for_each_ns(iter_ns, iter_prio) {
1939 			while (!list_empty(&iter_ns->prios)) {
1940 				struct fs_base *iter_prio2 =
1941 					list_first_entry(&iter_ns->prios,
1942 							 struct fs_base,
1943 							 list);
1944 
1945 				fs_remove_node(iter_prio2);
1946 			}
1947 		}
1948 	}
1949 
1950 	/* stage 2 */
1951 	fs_for_each_prio(iter_prio, &root_ns->ns) {
1952 		while (!list_empty(&iter_prio->objs)) {
1953 			struct fs_base *iter_ns =
1954 				list_first_entry(&iter_prio->objs,
1955 						 struct fs_base,
1956 						 list);
1957 
1958 				fs_remove_node(iter_ns);
1959 		}
1960 	}
1961 	/* stage 3 */
1962 	while (!list_empty(&root_ns->ns.prios)) {
1963 		struct fs_base *iter_prio =
1964 			list_first_entry(&root_ns->ns.prios,
1965 					 struct fs_base,
1966 					 list);
1967 
1968 		fs_remove_node(iter_prio);
1969 	}
1970 
1971 	fs_remove_node(&root_ns->ns.base);
1972 	dev->root_ns = NULL;
1973 }
1974 
1975 static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev,
1976 					struct mlx5_flow_root_namespace *root_ns)
1977 {
1978 	struct fs_base *prio;
1979 
1980 	if (!root_ns)
1981 		return;
1982 
1983 	if (!list_empty(&root_ns->ns.prios)) {
1984 		prio = list_first_entry(&root_ns->ns.prios,
1985 					struct fs_base,
1986 				 list);
1987 		fs_remove_node(prio);
1988 	}
1989 	fs_remove_node(&root_ns->ns.base);
1990 	root_ns = NULL;
1991 }
1992 
1993 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
1994 {
1995 	cleanup_root_ns(dev);
1996 	cleanup_single_prio_root_ns(dev, dev->sniffer_rx_root_ns);
1997 	cleanup_single_prio_root_ns(dev, dev->sniffer_tx_root_ns);
1998 	cleanup_single_prio_root_ns(dev, dev->fdb_root_ns);
1999 	cleanup_single_prio_root_ns(dev, dev->esw_egress_root_ns);
2000 	cleanup_single_prio_root_ns(dev, dev->esw_ingress_root_ns);
2001 }
2002 
2003 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
2004 						 *ns)
2005 {
2006 	ns->base.type = FS_TYPE_NAMESPACE;
2007 	init_rwsem(&ns->dests_rw_sem);
2008 	init_rwsem(&ns->notifiers_rw_sem);
2009 	INIT_LIST_HEAD(&ns->prios);
2010 	INIT_LIST_HEAD(&ns->list_notifiers);
2011 
2012 	return ns;
2013 }
2014 
2015 static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev,
2016 							  enum fs_ft_type
2017 							  table_type,
2018 							  char *name)
2019 {
2020 	struct mlx5_flow_root_namespace *root_ns;
2021 	struct mlx5_flow_namespace *ns;
2022 
2023 	/* create the root namespace */
2024 	root_ns = mlx5_vzalloc(sizeof(*root_ns));
2025 	if (!root_ns)
2026 		goto err;
2027 
2028 	root_ns->dev = dev;
2029 	root_ns->table_type = table_type;
2030 	mutex_init(&root_ns->fs_chain_lock);
2031 
2032 	ns = &root_ns->ns;
2033 	fs_init_namespace(ns);
2034 	fs_add_node(&ns->base, NULL, name, 1);
2035 
2036 	return root_ns;
2037 err:
2038 	return NULL;
2039 }
2040 
2041 static int init_fdb_root_ns(struct mlx5_core_dev *dev)
2042 {
2043 	struct fs_prio *prio;
2044 
2045 	dev->fdb_root_ns = create_root_ns(dev, FS_FT_FDB,
2046 					  MLX5_CORE_FS_FDB_ROOT_NS_NAME);
2047 	if (!dev->fdb_root_ns)
2048 		return -ENOMEM;
2049 
2050 	/* create 1 prio*/
2051 	prio = fs_create_prio(&dev->fdb_root_ns->ns, 0, 1, "fdb_prio", 0);
2052 	if (IS_ERR(prio))
2053 		return PTR_ERR(prio);
2054 	else
2055 		return 0;
2056 }
2057 
2058 #define MAX_VPORTS 128
2059 
2060 static int init_egress_acl_root_ns(struct mlx5_core_dev *dev)
2061 {
2062 	struct fs_prio *prio;
2063 
2064 	dev->esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL,
2065 						 MLX5_CORE_FS_ESW_EGRESS_ACL);
2066 	if (!dev->esw_egress_root_ns)
2067 		return -ENOMEM;
2068 
2069 	/* create 1 prio*/
2070 	prio = fs_create_prio(&dev->esw_egress_root_ns->ns, 0, MAX_VPORTS,
2071 			      "esw_egress_prio", 0);
2072 	if (IS_ERR(prio))
2073 		return PTR_ERR(prio);
2074 	else
2075 		return 0;
2076 }
2077 
2078 static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev)
2079 {
2080 	struct fs_prio *prio;
2081 
2082 	dev->esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL,
2083 						  MLX5_CORE_FS_ESW_INGRESS_ACL);
2084 	if (!dev->esw_ingress_root_ns)
2085 		return -ENOMEM;
2086 
2087 	/* create 1 prio*/
2088 	prio = fs_create_prio(&dev->esw_ingress_root_ns->ns, 0, MAX_VPORTS,
2089 			      "esw_ingress_prio", 0);
2090 	if (IS_ERR(prio))
2091 		return PTR_ERR(prio);
2092 	else
2093 		return 0;
2094 }
2095 
2096 static int init_sniffer_rx_root_ns(struct mlx5_core_dev *dev)
2097 {
2098 	struct fs_prio *prio;
2099 
2100 	dev->sniffer_rx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_RX,
2101 				     MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME);
2102 	if (!dev->sniffer_rx_root_ns)
2103 		return  -ENOMEM;
2104 
2105 	/* create 1 prio*/
2106 	prio = fs_create_prio(&dev->sniffer_rx_root_ns->ns, 0, 1,
2107 			      "sniffer_prio", 0);
2108 	if (IS_ERR(prio))
2109 		return PTR_ERR(prio);
2110 	else
2111 		return 0;
2112 }
2113 
2114 
2115 static int init_sniffer_tx_root_ns(struct mlx5_core_dev *dev)
2116 {
2117 	struct fs_prio *prio;
2118 
2119 	dev->sniffer_tx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_TX,
2120 						 MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME);
2121 	if (!dev->sniffer_tx_root_ns)
2122 		return  -ENOMEM;
2123 
2124 	/* create 1 prio*/
2125 	prio = fs_create_prio(&dev->sniffer_tx_root_ns->ns, 0, 1,
2126 			      "sniffer_prio", 0);
2127 	if (IS_ERR(prio))
2128 		return PTR_ERR(prio);
2129 	else
2130 		return 0;
2131 }
2132 
2133 static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio,
2134 						       const char *name)
2135 {
2136 	struct mlx5_flow_namespace	*ns;
2137 
2138 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
2139 	if (!ns)
2140 		return ERR_PTR(-ENOMEM);
2141 
2142 	fs_init_namespace(ns);
2143 	fs_add_node(&ns->base, &prio->base, name, 1);
2144 	list_add_tail(&ns->base.list, &prio->objs);
2145 
2146 	return ns;
2147 }
2148 
2149 #define FLOW_TABLE_BIT_SZ 1
2150 #define GET_FLOW_TABLE_CAP(dev, offset) \
2151 	((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) +	\
2152 			offset / 32)) >>					\
2153 	  (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
2154 
2155 static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
2156 {
2157 	int i;
2158 
2159 	for (i = 0; i < caps->arr_sz; i++) {
2160 		if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
2161 			return false;
2162 	}
2163 	return true;
2164 }
2165 
2166 static int _init_root_tree(struct mlx5_core_dev *dev, int max_ft_level,
2167 		    struct init_tree_node *node, struct fs_base *base_parent,
2168 		    struct init_tree_node *tree_parent)
2169 {
2170 	struct mlx5_flow_namespace *fs_ns;
2171 	struct fs_prio *fs_prio;
2172 	int priority;
2173 	struct fs_base *base;
2174 	int i;
2175 	int err = 0;
2176 
2177 	if (node->type == FS_TYPE_PRIO) {
2178 		if ((node->min_ft_level > max_ft_level) ||
2179 		    !has_required_caps(dev, &node->caps))
2180 			goto out;
2181 
2182 		fs_get_obj(fs_ns, base_parent);
2183 		priority = node - tree_parent->children;
2184 		fs_prio = fs_create_prio(fs_ns, priority,
2185 					 node->max_ft,
2186 					 node->name, node->flags);
2187 		if (IS_ERR(fs_prio)) {
2188 			err = PTR_ERR(fs_prio);
2189 			goto out;
2190 		}
2191 		base = &fs_prio->base;
2192 	} else if (node->type == FS_TYPE_NAMESPACE) {
2193 		fs_get_obj(fs_prio, base_parent);
2194 		fs_ns = fs_create_namespace(fs_prio, node->name);
2195 		if (IS_ERR(fs_ns)) {
2196 			err = PTR_ERR(fs_ns);
2197 			goto out;
2198 		}
2199 		base = &fs_ns->base;
2200 	} else {
2201 		return -EINVAL;
2202 	}
2203 	for (i = 0; i < node->ar_size; i++) {
2204 		err = _init_root_tree(dev, max_ft_level, &node->children[i], base,
2205 				      node);
2206 		if (err)
2207 			break;
2208 	}
2209 out:
2210 	return err;
2211 }
2212 
2213 static int init_root_tree(struct mlx5_core_dev *dev, int max_ft_level,
2214 		   struct init_tree_node *node, struct fs_base *parent)
2215 {
2216 	int i;
2217 	struct mlx5_flow_namespace *fs_ns;
2218 	int err = 0;
2219 
2220 	fs_get_obj(fs_ns, parent);
2221 	for (i = 0; i < node->ar_size; i++) {
2222 		err = _init_root_tree(dev, max_ft_level,
2223 				      &node->children[i], &fs_ns->base, node);
2224 		if (err)
2225 			break;
2226 	}
2227 	return err;
2228 }
2229 
2230 static int sum_max_ft_in_prio(struct fs_prio *prio);
2231 static int sum_max_ft_in_ns(struct mlx5_flow_namespace *ns)
2232 {
2233 	struct fs_prio *prio;
2234 	int sum = 0;
2235 
2236 	fs_for_each_prio(prio, ns) {
2237 		sum += sum_max_ft_in_prio(prio);
2238 	}
2239 	return  sum;
2240 }
2241 
2242 static int sum_max_ft_in_prio(struct fs_prio *prio)
2243 {
2244 	int sum = 0;
2245 	struct fs_base *it;
2246 	struct mlx5_flow_namespace	*ns;
2247 
2248 	if (prio->max_ft)
2249 		return prio->max_ft;
2250 
2251 	fs_for_each_ns_or_ft(it, prio) {
2252 		if (it->type == FS_TYPE_FLOW_TABLE)
2253 			continue;
2254 
2255 		fs_get_obj(ns, it);
2256 		sum += sum_max_ft_in_ns(ns);
2257 	}
2258 	prio->max_ft = sum;
2259 	return  sum;
2260 }
2261 
2262 static void set_max_ft(struct mlx5_flow_namespace *ns)
2263 {
2264 	struct fs_prio *prio;
2265 
2266 	if (!ns)
2267 		return;
2268 
2269 	fs_for_each_prio(prio, ns)
2270 		sum_max_ft_in_prio(prio);
2271 }
2272 
2273 static int init_root_ns(struct mlx5_core_dev *dev)
2274 {
2275 	int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
2276 					      flow_table_properties_nic_receive.
2277 					      max_ft_level);
2278 
2279 	dev->root_ns = create_root_ns(dev, FS_FT_NIC_RX,
2280 				      MLX5_CORE_FS_ROOT_NS_NAME);
2281 	if (IS_ERR_OR_NULL(dev->root_ns))
2282 		goto err;
2283 
2284 
2285 	if (init_root_tree(dev, max_ft_level, &root_fs, &dev->root_ns->ns.base))
2286 		goto err;
2287 
2288 	set_max_ft(&dev->root_ns->ns);
2289 
2290 	return 0;
2291 err:
2292 	return -ENOMEM;
2293 }
2294 
2295 u8 mlx5_get_match_criteria_enable(struct mlx5_flow_rule *rule)
2296 {
2297 	struct fs_base *pbase;
2298 	struct mlx5_flow_group *fg;
2299 
2300 	pbase = rule->base.parent;
2301 	WARN_ON(!pbase);
2302 	pbase = pbase->parent;
2303 	WARN_ON(!pbase);
2304 
2305 	fs_get_obj(fg, pbase);
2306 	return fg->mask.match_criteria_enable;
2307 }
2308 
2309 void mlx5_get_match_value(u32 *match_value,
2310 			  struct mlx5_flow_rule *rule)
2311 {
2312 	struct fs_base *pbase;
2313 	struct fs_fte *fte;
2314 
2315 	pbase = rule->base.parent;
2316 	WARN_ON(!pbase);
2317 	fs_get_obj(fte, pbase);
2318 
2319 	memcpy(match_value, fte->val, sizeof(fte->val));
2320 }
2321 
2322 void mlx5_get_match_criteria(u32 *match_criteria,
2323 			     struct mlx5_flow_rule *rule)
2324 {
2325 	struct fs_base *pbase;
2326 	struct mlx5_flow_group *fg;
2327 
2328 	pbase = rule->base.parent;
2329 	WARN_ON(!pbase);
2330 	pbase = pbase->parent;
2331 	WARN_ON(!pbase);
2332 
2333 	fs_get_obj(fg, pbase);
2334 	memcpy(match_criteria, &fg->mask.match_criteria,
2335 	       sizeof(fg->mask.match_criteria));
2336 }
2337 
2338 int mlx5_init_fs(struct mlx5_core_dev *dev)
2339 {
2340 	int err;
2341 
2342 	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
2343 		err = init_root_ns(dev);
2344 		if (err)
2345 			goto err;
2346 	}
2347 
2348 	err = init_fdb_root_ns(dev);
2349 	if (err)
2350 		goto err;
2351 
2352 	err = init_egress_acl_root_ns(dev);
2353 	if (err)
2354 		goto err;
2355 
2356 	err = init_ingress_acl_root_ns(dev);
2357 	if (err)
2358 		goto err;
2359 
2360 	err = init_sniffer_tx_root_ns(dev);
2361 	if (err)
2362 		goto err;
2363 
2364 	err = init_sniffer_rx_root_ns(dev);
2365 	if (err)
2366 		goto err;
2367 
2368 	return 0;
2369 err:
2370 	mlx5_cleanup_fs(dev);
2371 	return err;
2372 }
2373 
2374 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
2375 						  enum mlx5_flow_namespace_type type)
2376 {
2377 	struct mlx5_flow_root_namespace *root_ns = dev->root_ns;
2378 	int prio;
2379 	static struct fs_prio *fs_prio;
2380 	struct mlx5_flow_namespace *ns;
2381 
2382 	switch (type) {
2383 	case MLX5_FLOW_NAMESPACE_BYPASS:
2384 		prio = 0;
2385 		break;
2386 	case MLX5_FLOW_NAMESPACE_OFFLOADS:
2387 		prio = 1;
2388 		break;
2389 	case MLX5_FLOW_NAMESPACE_KERNEL:
2390 		prio = 2;
2391 		break;
2392 	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
2393 		prio = 3;
2394 		break;
2395 	case MLX5_FLOW_NAMESPACE_FDB:
2396 		if (dev->fdb_root_ns)
2397 			return &dev->fdb_root_ns->ns;
2398 		else
2399 			return NULL;
2400 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
2401 		if (dev->esw_egress_root_ns)
2402 			return &dev->esw_egress_root_ns->ns;
2403 		else
2404 			return NULL;
2405 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
2406 		if (dev->esw_ingress_root_ns)
2407 			return &dev->esw_ingress_root_ns->ns;
2408 		else
2409 			return NULL;
2410 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
2411 		if (dev->sniffer_rx_root_ns)
2412 			return &dev->sniffer_rx_root_ns->ns;
2413 		else
2414 			return NULL;
2415 	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
2416 		if (dev->sniffer_tx_root_ns)
2417 			return &dev->sniffer_tx_root_ns->ns;
2418 		else
2419 			return NULL;
2420 	default:
2421 		return NULL;
2422 	}
2423 
2424 	if (!root_ns)
2425 		return NULL;
2426 
2427 	fs_prio = find_prio(&root_ns->ns, prio);
2428 	if (!fs_prio)
2429 		return NULL;
2430 
2431 	ns = list_first_entry(&fs_prio->objs,
2432 			      typeof(*ns),
2433 			      base.list);
2434 
2435 	return ns;
2436 }
2437 EXPORT_SYMBOL(mlx5_get_flow_namespace);
2438 
2439 
2440 int mlx5_set_rule_private_data(struct mlx5_flow_rule *rule,
2441 				  struct mlx5_flow_handler *fs_handler,
2442 				  void  *client_data)
2443 {
2444 	struct fs_client_priv_data *priv_data;
2445 
2446 	mutex_lock(&rule->clients_lock);
2447 	/*Check that hanlder isn't exists in the list already*/
2448 	list_for_each_entry(priv_data, &rule->clients_data, list) {
2449 		if (priv_data->fs_handler == fs_handler) {
2450 			priv_data->client_dst_data = client_data;
2451 			goto unlock;
2452 		}
2453 	}
2454 	priv_data = kzalloc(sizeof(*priv_data), GFP_KERNEL);
2455 	if (!priv_data) {
2456 		mutex_unlock(&rule->clients_lock);
2457 		return -ENOMEM;
2458 	}
2459 
2460 	priv_data->client_dst_data = client_data;
2461 	priv_data->fs_handler = fs_handler;
2462 	list_add(&priv_data->list, &rule->clients_data);
2463 
2464 unlock:
2465 	mutex_unlock(&rule->clients_lock);
2466 
2467 	return 0;
2468 }
2469 
2470 static int remove_from_clients(struct mlx5_flow_rule *rule,
2471 			bool ctx_changed,
2472 			void *client_data,
2473 			void *context)
2474 {
2475 	struct fs_client_priv_data *iter_client;
2476 	struct fs_client_priv_data *temp_client;
2477 	struct mlx5_flow_handler *handler = (struct
2478 						mlx5_flow_handler*)context;
2479 
2480 	mutex_lock(&rule->clients_lock);
2481 	list_for_each_entry_safe(iter_client, temp_client,
2482 				 &rule->clients_data, list) {
2483 		if (iter_client->fs_handler == handler) {
2484 			list_del(&iter_client->list);
2485 			kfree(iter_client);
2486 			break;
2487 		}
2488 	}
2489 	mutex_unlock(&rule->clients_lock);
2490 
2491 	return 0;
2492 }
2493 
2494 struct mlx5_flow_handler *mlx5_register_rule_notifier(struct mlx5_core_dev *dev,
2495 								enum mlx5_flow_namespace_type ns_type,
2496 								rule_event_fn add_cb,
2497 								rule_event_fn del_cb,
2498 								void *context)
2499 {
2500 	struct mlx5_flow_namespace *ns;
2501 	struct mlx5_flow_handler *handler;
2502 
2503 	ns = mlx5_get_flow_namespace(dev, ns_type);
2504 	if (!ns)
2505 		return ERR_PTR(-EINVAL);
2506 
2507 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
2508 	if (!handler)
2509 		return ERR_PTR(-ENOMEM);
2510 
2511 	handler->add_dst_cb = add_cb;
2512 	handler->del_dst_cb = del_cb;
2513 	handler->client_context = context;
2514 	handler->ns = ns;
2515 	down_write(&ns->notifiers_rw_sem);
2516 	list_add_tail(&handler->list, &ns->list_notifiers);
2517 	up_write(&ns->notifiers_rw_sem);
2518 
2519 	return handler;
2520 }
2521 
2522 static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns,
2523 				rule_event_fn add_rule_cb,
2524 				void *context);
2525 
2526 void mlx5_unregister_rule_notifier(struct mlx5_flow_handler *handler)
2527 {
2528 	struct mlx5_flow_namespace *ns = handler->ns;
2529 
2530 	/*Remove from dst's clients*/
2531 	down_write(&ns->dests_rw_sem);
2532 	down_write(&ns->notifiers_rw_sem);
2533 	iterate_rules_in_ns(ns, remove_from_clients, handler);
2534 	list_del(&handler->list);
2535 	up_write(&ns->notifiers_rw_sem);
2536 	up_write(&ns->dests_rw_sem);
2537 	kfree(handler);
2538 }
2539 
2540 static void iterate_rules_in_ft(struct mlx5_flow_table *ft,
2541 				rule_event_fn add_rule_cb,
2542 				void *context)
2543 {
2544 	struct mlx5_flow_group *iter_fg;
2545 	struct fs_fte *iter_fte;
2546 	struct mlx5_flow_rule *iter_rule;
2547 	int err = 0;
2548 	bool is_new_rule;
2549 
2550 	mutex_lock(&ft->base.lock);
2551 	fs_for_each_fg(iter_fg, ft) {
2552 		mutex_lock(&iter_fg->base.lock);
2553 		fs_for_each_fte(iter_fte, iter_fg) {
2554 			mutex_lock(&iter_fte->base.lock);
2555 			is_new_rule = true;
2556 			fs_for_each_dst(iter_rule, iter_fte) {
2557 				fs_get(&iter_rule->base);
2558 				err = add_rule_cb(iter_rule,
2559 						 is_new_rule,
2560 						 NULL,
2561 						 context);
2562 				fs_put_parent_locked(&iter_rule->base);
2563 				if (err)
2564 					break;
2565 				is_new_rule = false;
2566 			}
2567 			mutex_unlock(&iter_fte->base.lock);
2568 			if (err)
2569 				break;
2570 		}
2571 		mutex_unlock(&iter_fg->base.lock);
2572 		if (err)
2573 			break;
2574 	}
2575 	mutex_unlock(&ft->base.lock);
2576 }
2577 
2578 static void iterate_rules_in_prio(struct fs_prio *prio,
2579 				  rule_event_fn add_rule_cb,
2580 				  void *context)
2581 {
2582 	struct fs_base *it;
2583 
2584 	mutex_lock(&prio->base.lock);
2585 	fs_for_each_ns_or_ft(it, prio) {
2586 		if (it->type == FS_TYPE_FLOW_TABLE) {
2587 			struct mlx5_flow_table	      *ft;
2588 
2589 			fs_get_obj(ft, it);
2590 			iterate_rules_in_ft(ft, add_rule_cb, context);
2591 		} else {
2592 			struct mlx5_flow_namespace *ns;
2593 
2594 			fs_get_obj(ns, it);
2595 			iterate_rules_in_ns(ns, add_rule_cb, context);
2596 		}
2597 	}
2598 	mutex_unlock(&prio->base.lock);
2599 }
2600 
2601 static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns,
2602 				rule_event_fn add_rule_cb,
2603 				void *context)
2604 {
2605 	struct fs_prio *iter_prio;
2606 
2607 	mutex_lock(&ns->base.lock);
2608 	fs_for_each_prio(iter_prio, ns) {
2609 		iterate_rules_in_prio(iter_prio, add_rule_cb, context);
2610 	}
2611 	mutex_unlock(&ns->base.lock);
2612 }
2613 
2614 void mlx5_flow_iterate_existing_rules(struct mlx5_flow_namespace *ns,
2615 					 rule_event_fn add_rule_cb,
2616 					 void *context)
2617 {
2618 	down_write(&ns->dests_rw_sem);
2619 	down_read(&ns->notifiers_rw_sem);
2620 	iterate_rules_in_ns(ns, add_rule_cb, context);
2621 	up_read(&ns->notifiers_rw_sem);
2622 	up_write(&ns->dests_rw_sem);
2623 }
2624 
2625 
2626 void mlx5_del_flow_rules_list(struct mlx5_flow_rules_list *rules_list)
2627 {
2628 	struct mlx5_flow_rule_node *iter_node;
2629 	struct mlx5_flow_rule_node *temp_node;
2630 
2631 	list_for_each_entry_safe(iter_node, temp_node, &rules_list->head, list) {
2632 		list_del(&iter_node->list);
2633 		kfree(iter_node);
2634 	}
2635 
2636 	kfree(rules_list);
2637 }
2638 
2639 #define ROCEV1_ETHERTYPE 0x8915
2640 static int set_rocev1_rules(struct list_head *rules_list)
2641 {
2642 	struct mlx5_flow_rule_node *rocev1_rule;
2643 
2644 	rocev1_rule = kzalloc(sizeof(*rocev1_rule), GFP_KERNEL);
2645 	if (!rocev1_rule)
2646 		return -ENOMEM;
2647 
2648 	rocev1_rule->match_criteria_enable =
2649 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2650 	MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_criteria, ethertype,
2651 		 0xffff);
2652 	MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_value, ethertype,
2653 		 ROCEV1_ETHERTYPE);
2654 
2655 	list_add_tail(&rocev1_rule->list, rules_list);
2656 
2657 	return 0;
2658 }
2659 
2660 #define ROCEV2_UDP_PORT 4791
2661 static int set_rocev2_rules(struct list_head *rules_list)
2662 {
2663 	struct mlx5_flow_rule_node *ipv4_rule;
2664 	struct mlx5_flow_rule_node *ipv6_rule;
2665 
2666 	ipv4_rule = kzalloc(sizeof(*ipv4_rule), GFP_KERNEL);
2667 	if (!ipv4_rule)
2668 		return -ENOMEM;
2669 
2670 	ipv6_rule = kzalloc(sizeof(*ipv6_rule), GFP_KERNEL);
2671 	if (!ipv6_rule) {
2672 		kfree(ipv4_rule);
2673 		return -ENOMEM;
2674 	}
2675 
2676 	ipv4_rule->match_criteria_enable =
2677 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2678 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ethertype,
2679 		 0xffff);
2680 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ethertype,
2681 		 0x0800);
2682 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ip_protocol,
2683 		 0xff);
2684 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ip_protocol,
2685 		 IPPROTO_UDP);
2686 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, udp_dport,
2687 		 0xffff);
2688 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, udp_dport,
2689 		 ROCEV2_UDP_PORT);
2690 
2691 	ipv6_rule->match_criteria_enable =
2692 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2693 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ethertype,
2694 		 0xffff);
2695 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ethertype,
2696 		 0x86dd);
2697 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ip_protocol,
2698 		 0xff);
2699 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ip_protocol,
2700 		 IPPROTO_UDP);
2701 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, udp_dport,
2702 		 0xffff);
2703 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, udp_dport,
2704 		 ROCEV2_UDP_PORT);
2705 
2706 	list_add_tail(&ipv4_rule->list, rules_list);
2707 	list_add_tail(&ipv6_rule->list, rules_list);
2708 
2709 	return 0;
2710 }
2711 
2712 
2713 struct mlx5_flow_rules_list *get_roce_flow_rules(u8 roce_mode)
2714 {
2715 	int err = 0;
2716 	struct mlx5_flow_rules_list *rules_list =
2717 		kzalloc(sizeof(*rules_list), GFP_KERNEL);
2718 
2719 	if (!rules_list)
2720 		return NULL;
2721 
2722 	INIT_LIST_HEAD(&rules_list->head);
2723 
2724 	if (roce_mode & MLX5_ROCE_VERSION_1_CAP) {
2725 		err = set_rocev1_rules(&rules_list->head);
2726 		if (err)
2727 			goto free_list;
2728 	}
2729 	if (roce_mode & MLX5_ROCE_VERSION_2_CAP)
2730 		err = set_rocev2_rules(&rules_list->head);
2731 	if (err)
2732 		goto free_list;
2733 
2734 	return rules_list;
2735 
2736 free_list:
2737 	mlx5_del_flow_rules_list(rules_list);
2738 	return NULL;
2739 }
2740