xref: /freebsd/sys/dev/mlx5/mlx5_core/mlx5_fs_tree.c (revision 148a8da8)
1 /*-
2  * Copyright (c) 2013-2017, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include <linux/module.h>
29 #include <dev/mlx5/driver.h>
30 #include "mlx5_core.h"
31 #include "fs_core.h"
32 #include <linux/string.h>
33 #include <linux/compiler.h>
34 
35 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
36 					 sizeof(struct init_tree_node))
37 
38 #define ADD_PRIO(name_val, flags_val, min_level_val, max_ft_val, caps_val, \
39 		 ...) {.type = FS_TYPE_PRIO,\
40 	.name = name_val,\
41 	.min_ft_level = min_level_val,\
42 	.flags = flags_val,\
43 	.max_ft = max_ft_val,\
44 	.caps = caps_val,\
45 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
46 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
47 }
48 
49 #define ADD_FT_PRIO(name_val, flags_val, max_ft_val,  ...)\
50 	ADD_PRIO(name_val, flags_val, 0, max_ft_val, {},\
51 		 __VA_ARGS__)\
52 
53 #define ADD_NS(name_val, ...) {.type = FS_TYPE_NAMESPACE,\
54 	.name = name_val,\
55 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
56 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
57 }
58 
59 #define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
60 				   sizeof(long))
61 
62 #define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
63 
64 #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
65 			       .caps = (long[]) {__VA_ARGS__}}
66 
67 #define BYPASS_MAX_FT 5
68 #define BYPASS_PRIO_MAX_FT 1
69 #define KERNEL_MAX_FT 3
70 #define LEFTOVER_MAX_FT 1
71 #define KENREL_MIN_LEVEL 3
72 #define LEFTOVER_MIN_LEVEL KENREL_MIN_LEVEL + 1
73 #define BYPASS_MIN_LEVEL MLX5_NUM_BYPASS_FTS + LEFTOVER_MIN_LEVEL
74 struct node_caps {
75 	size_t	arr_sz;
76 	long	*caps;
77 };
78 
79 struct init_tree_node {
80 	enum fs_type	type;
81 	const char	*name;
82 	struct init_tree_node *children;
83 	int ar_size;
84 	struct node_caps caps;
85 	u8  flags;
86 	int min_ft_level;
87 	int prio;
88 	int max_ft;
89 } root_fs = {
90 	.type = FS_TYPE_NAMESPACE,
91 	.name = "root",
92 	.ar_size = 3,
93 	.children = (struct init_tree_node[]) {
94 		ADD_PRIO("by_pass_prio", 0, BYPASS_MIN_LEVEL, 0,
95 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
96 					  FS_CAP(flow_table_properties_nic_receive.modify_root)),
97 			 ADD_NS("by_pass_ns",
98 				ADD_FT_PRIO("prio0", 0,
99 					    BYPASS_PRIO_MAX_FT),
100 				ADD_FT_PRIO("prio1", 0,
101 					    BYPASS_PRIO_MAX_FT),
102 				ADD_FT_PRIO("prio2", 0,
103 					    BYPASS_PRIO_MAX_FT),
104 				ADD_FT_PRIO("prio3", 0,
105 					    BYPASS_PRIO_MAX_FT),
106 				ADD_FT_PRIO("prio4", 0,
107 					    BYPASS_PRIO_MAX_FT),
108 				ADD_FT_PRIO("prio5", 0,
109 					    BYPASS_PRIO_MAX_FT),
110 				ADD_FT_PRIO("prio6", 0,
111 					    BYPASS_PRIO_MAX_FT),
112 				ADD_FT_PRIO("prio7", 0,
113 					    BYPASS_PRIO_MAX_FT),
114 				ADD_FT_PRIO("prio-mcast", 0,
115 					    BYPASS_PRIO_MAX_FT))),
116 		ADD_PRIO("kernel_prio", 0, KENREL_MIN_LEVEL, 0, {},
117 			 ADD_NS("kernel_ns",
118 				ADD_FT_PRIO("prio_kernel-0", 0,
119 					    KERNEL_MAX_FT))),
120 		ADD_PRIO("leftovers_prio", MLX5_CORE_FS_PRIO_SHARED,
121 			 LEFTOVER_MIN_LEVEL, 0,
122 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
123 					  FS_CAP(flow_table_properties_nic_receive.modify_root)),
124 			 ADD_NS("leftover_ns",
125 				ADD_FT_PRIO("leftovers_prio-0",
126 					MLX5_CORE_FS_PRIO_SHARED,
127 					LEFTOVER_MAX_FT)))
128 	}
129 };
130 
131 /* Tree creation functions */
132 
133 static struct mlx5_flow_root_namespace *find_root(struct fs_base *node)
134 {
135 	struct fs_base *parent;
136 
137 	/* Make sure we only read it once while we go up the tree */
138 	while ((parent = node->parent))
139 		node = parent;
140 
141 	if (node->type != FS_TYPE_NAMESPACE) {
142 		printf("mlx5_core: WARN: ""mlx5: flow steering node %s is not in tree or garbaged\n", node->name);
143 		return NULL;
144 	}
145 
146 	return container_of(container_of(node,
147 					 struct mlx5_flow_namespace,
148 					 base),
149 			    struct mlx5_flow_root_namespace,
150 			    ns);
151 }
152 
153 static inline struct mlx5_core_dev *fs_get_dev(struct fs_base *node)
154 {
155 	struct mlx5_flow_root_namespace *root = find_root(node);
156 
157 	if (root)
158 		return root->dev;
159 	return NULL;
160 }
161 
162 static void fs_init_node(struct fs_base *node,
163 			 unsigned int refcount)
164 {
165 	kref_init(&node->refcount);
166 	atomic_set(&node->users_refcount, refcount);
167 	init_completion(&node->complete);
168 	INIT_LIST_HEAD(&node->list);
169 	mutex_init(&node->lock);
170 }
171 
172 static void _fs_add_node(struct fs_base *node,
173 			 const char *name,
174 			 struct fs_base *parent)
175 {
176 	if (parent)
177 		atomic_inc(&parent->users_refcount);
178 	node->name = kstrdup_const(name, GFP_KERNEL);
179 	node->parent = parent;
180 }
181 
182 static void fs_add_node(struct fs_base *node,
183 			struct fs_base *parent, const char *name,
184 			unsigned int refcount)
185 {
186 	fs_init_node(node, refcount);
187 	_fs_add_node(node, name, parent);
188 }
189 
190 static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref),
191 		    bool parent_locked);
192 
193 static void fs_del_dst(struct mlx5_flow_rule *dst);
194 static void _fs_del_ft(struct mlx5_flow_table *ft);
195 static void fs_del_fg(struct mlx5_flow_group *fg);
196 static void fs_del_fte(struct fs_fte *fte);
197 
198 static void cmd_remove_node(struct fs_base *base)
199 {
200 	switch (base->type) {
201 	case FS_TYPE_FLOW_DEST:
202 		fs_del_dst(container_of(base, struct mlx5_flow_rule, base));
203 		break;
204 	case FS_TYPE_FLOW_TABLE:
205 		_fs_del_ft(container_of(base, struct mlx5_flow_table, base));
206 		break;
207 	case FS_TYPE_FLOW_GROUP:
208 		fs_del_fg(container_of(base, struct mlx5_flow_group, base));
209 		break;
210 	case FS_TYPE_FLOW_ENTRY:
211 		fs_del_fte(container_of(base, struct fs_fte, base));
212 		break;
213 	default:
214 		break;
215 	}
216 }
217 
218 static void __fs_remove_node(struct kref *kref)
219 {
220 	struct fs_base *node = container_of(kref, struct fs_base, refcount);
221 
222 	if (node->parent)
223 		mutex_lock(&node->parent->lock);
224 	mutex_lock(&node->lock);
225 	cmd_remove_node(node);
226 	mutex_unlock(&node->lock);
227 	complete(&node->complete);
228 	if (node->parent) {
229 		mutex_unlock(&node->parent->lock);
230 		_fs_put(node->parent, _fs_remove_node, false);
231 	}
232 }
233 
234 void _fs_remove_node(struct kref *kref)
235 {
236 	struct fs_base *node = container_of(kref, struct fs_base, refcount);
237 
238 	__fs_remove_node(kref);
239 	kfree_const(node->name);
240 	kfree(node);
241 }
242 
243 static void fs_get(struct fs_base *node)
244 {
245 	atomic_inc(&node->users_refcount);
246 }
247 
248 static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref),
249 		    bool parent_locked)
250 {
251 	struct fs_base *parent_node = node->parent;
252 
253 	if (parent_node && !parent_locked)
254 		mutex_lock(&parent_node->lock);
255 	if (atomic_dec_and_test(&node->users_refcount)) {
256 		if (parent_node) {
257 			/*remove from parent's list*/
258 			list_del_init(&node->list);
259 			mutex_unlock(&parent_node->lock);
260 		}
261 		kref_put(&node->refcount, kref_cb);
262 		if (parent_node && parent_locked)
263 			mutex_lock(&parent_node->lock);
264 	} else if (parent_node && !parent_locked) {
265 		mutex_unlock(&parent_node->lock);
266 	}
267 }
268 
269 static void fs_put(struct fs_base *node)
270 {
271 	_fs_put(node, __fs_remove_node, false);
272 }
273 
274 static void fs_put_parent_locked(struct fs_base *node)
275 {
276 	_fs_put(node, __fs_remove_node, true);
277 }
278 
279 static void fs_remove_node(struct fs_base *node)
280 {
281 	fs_put(node);
282 	wait_for_completion(&node->complete);
283 	kfree_const(node->name);
284 	kfree(node);
285 }
286 
287 static void fs_remove_node_parent_locked(struct fs_base *node)
288 {
289 	fs_put_parent_locked(node);
290 	wait_for_completion(&node->complete);
291 	kfree_const(node->name);
292 	kfree(node);
293 }
294 
295 static struct fs_fte *fs_alloc_fte(u8 action,
296 				   u32 flow_tag,
297 				   u32 *match_value,
298 				   unsigned int index)
299 {
300 	struct fs_fte *fte;
301 
302 
303 	fte = kzalloc(sizeof(*fte), GFP_KERNEL);
304 	if (!fte)
305 		return ERR_PTR(-ENOMEM);
306 
307 	memcpy(fte->val, match_value, sizeof(fte->val));
308 	fte->base.type =  FS_TYPE_FLOW_ENTRY;
309 	fte->dests_size = 0;
310 	fte->flow_tag = flow_tag;
311 	fte->index = index;
312 	INIT_LIST_HEAD(&fte->dests);
313 	fte->action = action;
314 
315 	return fte;
316 }
317 
318 static struct fs_fte *alloc_star_ft_entry(struct mlx5_flow_table *ft,
319 					  struct mlx5_flow_group *fg,
320 					  u32 *match_value,
321 					  unsigned int index)
322 {
323 	int err;
324 	struct fs_fte *fte;
325 	struct mlx5_flow_rule *dst;
326 
327 	if (fg->num_ftes == fg->max_ftes)
328 		return ERR_PTR(-ENOSPC);
329 
330 	fte = fs_alloc_fte(MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
331 			   MLX5_FS_DEFAULT_FLOW_TAG, match_value, index);
332 	if (IS_ERR(fte))
333 		return fte;
334 
335 	/*create dst*/
336 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
337 	if (!dst) {
338 		err = -ENOMEM;
339 		goto free_fte;
340 	}
341 
342 	fte->base.parent = &fg->base;
343 	fte->dests_size = 1;
344 	dst->dest_attr.type = MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE;
345 	dst->base.parent = &fte->base;
346 	list_add(&dst->base.list, &fte->dests);
347 	/* assumed that the callee creates the star rules sorted by index */
348 	list_add_tail(&fte->base.list, &fg->ftes);
349 	fg->num_ftes++;
350 
351 	return fte;
352 
353 free_fte:
354 	kfree(fte);
355 	return ERR_PTR(err);
356 }
357 
358 /* assume that fte can't be changed */
359 static void free_star_fte_entry(struct fs_fte *fte)
360 {
361 	struct mlx5_flow_group	*fg;
362 	struct mlx5_flow_rule	*dst, *temp;
363 
364 	fs_get_parent(fg, fte);
365 
366 	list_for_each_entry_safe(dst, temp, &fte->dests, base.list) {
367 		fte->dests_size--;
368 		list_del(&dst->base.list);
369 		kfree(dst);
370 	}
371 
372 	list_del(&fte->base.list);
373 	fg->num_ftes--;
374 	kfree(fte);
375 }
376 
377 static struct mlx5_flow_group *fs_alloc_fg(u32 *create_fg_in)
378 {
379 	struct mlx5_flow_group *fg;
380 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
381 					    create_fg_in, match_criteria);
382 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
383 					    create_fg_in,
384 					    match_criteria_enable);
385 	fg = kzalloc(sizeof(*fg), GFP_KERNEL);
386 	if (!fg)
387 		return ERR_PTR(-ENOMEM);
388 
389 	INIT_LIST_HEAD(&fg->ftes);
390 	fg->mask.match_criteria_enable = match_criteria_enable;
391 	memcpy(&fg->mask.match_criteria, match_criteria,
392 	       sizeof(fg->mask.match_criteria));
393 	fg->base.type =  FS_TYPE_FLOW_GROUP;
394 	fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in,
395 				   start_flow_index);
396 	fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in,
397 				end_flow_index) - fg->start_index + 1;
398 	return fg;
399 }
400 
401 static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio);
402 static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr,
403 					    struct fs_prio *prio);
404 
405 /* assumed src_ft and dst_ft can't be freed */
406 static int fs_set_star_rule(struct mlx5_core_dev *dev,
407 			    struct mlx5_flow_table *src_ft,
408 			    struct mlx5_flow_table *dst_ft)
409 {
410 	struct mlx5_flow_rule *src_dst;
411 	struct fs_fte *src_fte;
412 	int err = 0;
413 	u32 *match_value;
414 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
415 
416 	src_dst = list_first_entry(&src_ft->star_rule.fte->dests,
417 				   struct mlx5_flow_rule, base.list);
418 	match_value = mlx5_vzalloc(match_len);
419 	if (!match_value) {
420 		mlx5_core_warn(dev, "failed to allocate inbox\n");
421 		return -ENOMEM;
422 	}
423 	/*Create match context*/
424 
425 	fs_get_parent(src_fte, src_dst);
426 
427 	src_dst->dest_attr.ft = dst_ft;
428 	if (dst_ft) {
429 		err = mlx5_cmd_fs_set_fte(dev,
430 					  src_ft->vport,
431 					  &src_fte->status,
432 					  match_value, src_ft->type,
433 					  src_ft->id, src_fte->index,
434 					  src_ft->star_rule.fg->id,
435 					  src_fte->flow_tag,
436 					  src_fte->action,
437 					  src_fte->dests_size,
438 					  &src_fte->dests);
439 		if (err)
440 			goto free;
441 
442 		fs_get(&dst_ft->base);
443 	} else {
444 		mlx5_cmd_fs_delete_fte(dev,
445 				       src_ft->vport,
446 				       &src_fte->status,
447 				       src_ft->type, src_ft->id,
448 				       src_fte->index);
449 	}
450 
451 free:
452 	kvfree(match_value);
453 	return err;
454 }
455 
456 static int connect_prev_fts(struct fs_prio *locked_prio,
457 			    struct fs_prio *prev_prio,
458 			    struct mlx5_flow_table *next_ft)
459 {
460 	struct mlx5_flow_table *iter;
461 	int err = 0;
462 	struct mlx5_core_dev *dev = fs_get_dev(&prev_prio->base);
463 
464 	if (!dev)
465 		return -ENODEV;
466 
467 	mutex_lock(&prev_prio->base.lock);
468 	fs_for_each_ft(iter, prev_prio) {
469 		struct mlx5_flow_rule *src_dst =
470 			list_first_entry(&iter->star_rule.fte->dests,
471 					 struct mlx5_flow_rule, base.list);
472 		struct mlx5_flow_table *prev_ft = src_dst->dest_attr.ft;
473 
474 		if (prev_ft == next_ft)
475 			continue;
476 
477 		err = fs_set_star_rule(dev, iter, next_ft);
478 		if (err) {
479 			mlx5_core_warn(dev,
480 				       "mlx5: flow steering can't connect prev and next\n");
481 			goto unlock;
482 		} else {
483 			/* Assume ft's prio is locked */
484 			if (prev_ft) {
485 				struct fs_prio *prio;
486 
487 				fs_get_parent(prio, prev_ft);
488 				if (prio == locked_prio)
489 					fs_put_parent_locked(&prev_ft->base);
490 				else
491 					fs_put(&prev_ft->base);
492 			}
493 		}
494 	}
495 
496 unlock:
497 	mutex_unlock(&prev_prio->base.lock);
498 	return 0;
499 }
500 
501 static int create_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio)
502 {
503 	struct mlx5_flow_group *fg;
504 	int err;
505 	u32 *fg_in;
506 	u32 *match_value;
507 	struct mlx5_flow_table *next_ft;
508 	struct mlx5_flow_table *prev_ft;
509 	struct mlx5_flow_root_namespace *root = find_root(&prio->base);
510 	int fg_inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
511 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
512 
513 	fg_in = mlx5_vzalloc(fg_inlen);
514 	if (!fg_in) {
515 		mlx5_core_warn(root->dev, "failed to allocate inbox\n");
516 		return -ENOMEM;
517 	}
518 
519 	match_value = mlx5_vzalloc(match_len);
520 	if (!match_value) {
521 		mlx5_core_warn(root->dev, "failed to allocate inbox\n");
522 		kvfree(fg_in);
523 		return -ENOMEM;
524 	}
525 
526 	MLX5_SET(create_flow_group_in, fg_in, start_flow_index, ft->max_fte);
527 	MLX5_SET(create_flow_group_in, fg_in, end_flow_index, ft->max_fte);
528 	fg = fs_alloc_fg(fg_in);
529 	if (IS_ERR(fg)) {
530 		err = PTR_ERR(fg);
531 		goto out;
532 	}
533 	ft->star_rule.fg = fg;
534 	err =  mlx5_cmd_fs_create_fg(fs_get_dev(&prio->base),
535 				     fg_in, ft->vport, ft->type,
536 				     ft->id,
537 				     &fg->id);
538 	if (err)
539 		goto free_fg;
540 
541 	ft->star_rule.fte = alloc_star_ft_entry(ft, fg,
542 						      match_value,
543 						      ft->max_fte);
544 	if (IS_ERR(ft->star_rule.fte))
545 		goto free_star_rule;
546 
547 	mutex_lock(&root->fs_chain_lock);
548 	next_ft = find_next_ft(prio);
549 	err = fs_set_star_rule(root->dev, ft, next_ft);
550 	if (err) {
551 		mutex_unlock(&root->fs_chain_lock);
552 		goto free_star_rule;
553 	}
554 	if (next_ft) {
555 		struct fs_prio *parent;
556 
557 		fs_get_parent(parent, next_ft);
558 		fs_put(&next_ft->base);
559 	}
560 	prev_ft = find_prev_ft(ft, prio);
561 	if (prev_ft) {
562 		struct fs_prio *prev_parent;
563 
564 		fs_get_parent(prev_parent, prev_ft);
565 
566 		err = connect_prev_fts(NULL, prev_parent, ft);
567 		if (err) {
568 			mutex_unlock(&root->fs_chain_lock);
569 			goto destroy_chained_star_rule;
570 		}
571 		fs_put(&prev_ft->base);
572 	}
573 	mutex_unlock(&root->fs_chain_lock);
574 	kvfree(fg_in);
575 	kvfree(match_value);
576 
577 	return 0;
578 
579 destroy_chained_star_rule:
580 	fs_set_star_rule(fs_get_dev(&prio->base), ft, NULL);
581 	if (next_ft)
582 		fs_put(&next_ft->base);
583 free_star_rule:
584 	free_star_fte_entry(ft->star_rule.fte);
585 	mlx5_cmd_fs_destroy_fg(fs_get_dev(&ft->base), ft->vport,
586 			       ft->type, ft->id,
587 			       fg->id);
588 free_fg:
589 	kfree(fg);
590 out:
591 	kvfree(fg_in);
592 	kvfree(match_value);
593 	return err;
594 }
595 
596 static void destroy_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio)
597 {
598 	int err;
599 	struct mlx5_flow_root_namespace *root;
600 	struct mlx5_core_dev *dev = fs_get_dev(&prio->base);
601 	struct mlx5_flow_table *prev_ft, *next_ft;
602 	struct fs_prio *prev_prio;
603 
604 	WARN_ON(!dev);
605 
606 	root = find_root(&prio->base);
607 	if (!root)
608 		printf("mlx5_core: ERR: ""mlx5: flow steering failed to find root of priority %s", prio->base.name);
609 
610 	/* In order to ensure atomic deletion, first update
611 	 * prev ft to point on the next ft.
612 	 */
613 	mutex_lock(&root->fs_chain_lock);
614 	prev_ft = find_prev_ft(ft, prio);
615 	next_ft = find_next_ft(prio);
616 	if (prev_ft) {
617 		fs_get_parent(prev_prio, prev_ft);
618 		/*Prev is connected to ft, only if ft is the first(last) in the prio*/
619 		err = connect_prev_fts(prio, prev_prio, next_ft);
620 		if (err)
621 			mlx5_core_warn(root->dev,
622 				       "flow steering can't connect prev and next of flow table\n");
623 		fs_put(&prev_ft->base);
624 	}
625 
626 	err = fs_set_star_rule(root->dev, ft, NULL);
627 	/*One put is for fs_get in find next ft*/
628 	if (next_ft) {
629 		fs_put(&next_ft->base);
630 		if (!err)
631 			fs_put(&next_ft->base);
632 	}
633 
634 	mutex_unlock(&root->fs_chain_lock);
635 	err = mlx5_cmd_fs_destroy_fg(dev, ft->vport, ft->type, ft->id,
636 				     ft->star_rule.fg->id);
637 	if (err)
638 		mlx5_core_warn(dev,
639 			       "flow steering can't destroy star entry group(index:%d) of ft:%s\n", ft->star_rule.fg->start_index,
640 			       ft->base.name);
641 	free_star_fte_entry(ft->star_rule.fte);
642 
643 	kfree(ft->star_rule.fg);
644 	ft->star_rule.fg = NULL;
645 }
646 
647 static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
648 				 unsigned int prio)
649 {
650 	struct fs_prio *iter_prio;
651 
652 	fs_for_each_prio(iter_prio, ns) {
653 		if (iter_prio->prio == prio)
654 			return iter_prio;
655 	}
656 
657 	return NULL;
658 }
659 
660 static unsigned int _alloc_new_level(struct fs_prio *prio,
661 				     struct mlx5_flow_namespace *match);
662 
663 static unsigned int __alloc_new_level(struct mlx5_flow_namespace *ns,
664 				      struct fs_prio *prio)
665 {
666 	unsigned int level = 0;
667 	struct fs_prio *p;
668 
669 	if (!ns)
670 		return 0;
671 
672 	mutex_lock(&ns->base.lock);
673 	fs_for_each_prio(p, ns) {
674 		if (p != prio)
675 			level += p->max_ft;
676 		else
677 			break;
678 	}
679 	mutex_unlock(&ns->base.lock);
680 
681 	fs_get_parent(prio, ns);
682 	if (prio)
683 		WARN_ON(prio->base.type != FS_TYPE_PRIO);
684 
685 	return level + _alloc_new_level(prio, ns);
686 }
687 
688 /* Called under lock of priority, hence locking all upper objects */
689 static unsigned int _alloc_new_level(struct fs_prio *prio,
690 				     struct mlx5_flow_namespace *match)
691 {
692 	struct mlx5_flow_namespace *ns;
693 	struct fs_base *it;
694 	unsigned int level = 0;
695 
696 	if (!prio)
697 		return 0;
698 
699 	mutex_lock(&prio->base.lock);
700 	fs_for_each_ns_or_ft_reverse(it, prio) {
701 		if (it->type == FS_TYPE_NAMESPACE) {
702 			struct fs_prio *p;
703 
704 			fs_get_obj(ns, it);
705 
706 			if (match != ns) {
707 				mutex_lock(&ns->base.lock);
708 				fs_for_each_prio(p, ns)
709 					level += p->max_ft;
710 				mutex_unlock(&ns->base.lock);
711 			} else {
712 				break;
713 			}
714 		} else {
715 			struct mlx5_flow_table *ft;
716 
717 			fs_get_obj(ft, it);
718 			mutex_unlock(&prio->base.lock);
719 			return level + ft->level + 1;
720 		}
721 	}
722 
723 	fs_get_parent(ns, prio);
724 	mutex_unlock(&prio->base.lock);
725 	return __alloc_new_level(ns, prio) + level;
726 }
727 
728 static unsigned int alloc_new_level(struct fs_prio *prio)
729 {
730 	return _alloc_new_level(prio, NULL);
731 }
732 
733 static int update_root_ft_create(struct mlx5_flow_root_namespace *root,
734 				    struct mlx5_flow_table *ft)
735 {
736 	int err = 0;
737 	int min_level = INT_MAX;
738 
739 	if (root->root_ft)
740 		min_level = root->root_ft->level;
741 
742 	if (ft->level < min_level)
743 		err = mlx5_cmd_update_root_ft(root->dev, ft->type,
744 					      ft->id);
745 	else
746 		return err;
747 
748 	if (err)
749 		mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
750 			       ft->id);
751 	else
752 		root->root_ft = ft;
753 
754 	return err;
755 }
756 
757 static struct mlx5_flow_table *_create_ft_common(struct mlx5_flow_namespace *ns,
758 						 u16 vport,
759 						 struct fs_prio *fs_prio,
760 						 int max_fte,
761 						 const char *name)
762 {
763 	struct mlx5_flow_table *ft;
764 	int err;
765 	int log_table_sz;
766 	int ft_size;
767 	char gen_name[20];
768 	struct mlx5_flow_root_namespace *root =
769 		find_root(&ns->base);
770 
771 	if (!root) {
772 		printf("mlx5_core: ERR: ""mlx5: flow steering failed to find root of namespace %s", ns->base.name);
773 		return ERR_PTR(-ENODEV);
774 	}
775 
776 	if (fs_prio->num_ft == fs_prio->max_ft)
777 		return ERR_PTR(-ENOSPC);
778 
779 	ft  = kzalloc(sizeof(*ft), GFP_KERNEL);
780 	if (!ft)
781 		return ERR_PTR(-ENOMEM);
782 
783 	fs_init_node(&ft->base, 1);
784 	INIT_LIST_HEAD(&ft->fgs);
785 
786 	/* Temporarily WA until we expose the level set in the API */
787 	if (root->table_type == FS_FT_ESW_EGRESS_ACL ||
788 		root->table_type == FS_FT_ESW_INGRESS_ACL)
789 		ft->level = 0;
790 	else
791 		ft->level = alloc_new_level(fs_prio);
792 
793 	ft->base.type = FS_TYPE_FLOW_TABLE;
794 	ft->vport = vport;
795 	ft->type = root->table_type;
796 	/*Two entries are reserved for star rules*/
797 	ft_size = roundup_pow_of_two(max_fte + 2);
798 	/*User isn't aware to those rules*/
799 	ft->max_fte = ft_size - 2;
800 	log_table_sz = ilog2(ft_size);
801 	err = mlx5_cmd_fs_create_ft(root->dev, ft->vport, ft->type,
802 				    ft->level, log_table_sz, &ft->id);
803 	if (err)
804 		goto free_ft;
805 
806 	err = create_star_rule(ft, fs_prio);
807 	if (err)
808 		goto del_ft;
809 
810 	if ((root->table_type == FS_FT_NIC_RX) && MLX5_CAP_FLOWTABLE(root->dev,
811 			       flow_table_properties_nic_receive.modify_root)) {
812 		err = update_root_ft_create(root, ft);
813 		if (err)
814 			goto destroy_star_rule;
815 	}
816 
817 	if (!name || !strlen(name)) {
818 		snprintf(gen_name, 20, "flow_table_%u", ft->id);
819 		_fs_add_node(&ft->base, gen_name, &fs_prio->base);
820 	} else {
821 		_fs_add_node(&ft->base, name, &fs_prio->base);
822 	}
823 	list_add_tail(&ft->base.list, &fs_prio->objs);
824 	fs_prio->num_ft++;
825 
826 	return ft;
827 
828 destroy_star_rule:
829 	destroy_star_rule(ft, fs_prio);
830 del_ft:
831 	mlx5_cmd_fs_destroy_ft(root->dev, ft->vport, ft->type, ft->id);
832 free_ft:
833 	kfree(ft);
834 	return ERR_PTR(err);
835 }
836 
837 static struct mlx5_flow_table *create_ft_common(struct mlx5_flow_namespace *ns,
838 						u16 vport,
839 						unsigned int prio,
840 						int max_fte,
841 						const char *name)
842 {
843 	struct fs_prio *fs_prio = NULL;
844 	fs_prio = find_prio(ns, prio);
845 	if (!fs_prio)
846 		return ERR_PTR(-EINVAL);
847 
848 	return _create_ft_common(ns, vport, fs_prio, max_fte, name);
849 }
850 
851 
852 static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns,
853 						   struct list_head *start);
854 
855 static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio,
856 						     struct list_head *start);
857 
858 static struct mlx5_flow_table *mlx5_create_autogrouped_shared_flow_table(struct fs_prio *fs_prio)
859 {
860 	struct mlx5_flow_table *ft;
861 
862 	ft = find_first_ft_in_prio(fs_prio, &fs_prio->objs);
863 	if (ft) {
864 		ft->shared_refcount++;
865 		return ft;
866 	}
867 
868 	return NULL;
869 }
870 
871 struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
872 							   int prio,
873 							   const char *name,
874 							   int num_flow_table_entries,
875 							   int max_num_groups)
876 {
877 	struct mlx5_flow_table *ft = NULL;
878 	struct fs_prio *fs_prio;
879 	bool is_shared_prio;
880 
881 	fs_prio = find_prio(ns, prio);
882 	if (!fs_prio)
883 		return ERR_PTR(-EINVAL);
884 
885 	is_shared_prio = fs_prio->flags & MLX5_CORE_FS_PRIO_SHARED;
886 	if (is_shared_prio) {
887 		mutex_lock(&fs_prio->shared_lock);
888 		ft = mlx5_create_autogrouped_shared_flow_table(fs_prio);
889 	}
890 
891 	if (ft)
892 		goto return_ft;
893 
894 	ft = create_ft_common(ns, 0, prio, num_flow_table_entries,
895 			      name);
896 	if (IS_ERR(ft))
897 		goto return_ft;
898 
899 	ft->autogroup.active = true;
900 	ft->autogroup.max_types = max_num_groups;
901 	if (is_shared_prio)
902 		ft->shared_refcount = 1;
903 
904 return_ft:
905 	if (is_shared_prio)
906 		mutex_unlock(&fs_prio->shared_lock);
907 	return ft;
908 }
909 EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
910 
911 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
912 						     u16 vport,
913 						     int prio,
914 						     const char *name,
915 						     int num_flow_table_entries)
916 {
917 	return create_ft_common(ns, vport, prio, num_flow_table_entries, name);
918 }
919 EXPORT_SYMBOL(mlx5_create_vport_flow_table);
920 
921 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
922 					       int prio,
923 					       const char *name,
924 					       int num_flow_table_entries)
925 {
926 	return create_ft_common(ns, 0, prio, num_flow_table_entries, name);
927 }
928 EXPORT_SYMBOL(mlx5_create_flow_table);
929 
930 static void _fs_del_ft(struct mlx5_flow_table *ft)
931 {
932 	int err;
933 	struct mlx5_core_dev *dev = fs_get_dev(&ft->base);
934 	struct fs_prio *prio;
935 
936 	err = mlx5_cmd_fs_destroy_ft(dev, ft->vport, ft->type, ft->id);
937 	if (err)
938 		mlx5_core_warn(dev, "flow steering can't destroy ft %s\n",
939 			       ft->base.name);
940 
941 	fs_get_parent(prio, ft);
942 	prio->num_ft--;
943 }
944 
945 static int update_root_ft_destroy(struct mlx5_flow_root_namespace *root,
946 				    struct mlx5_flow_table *ft)
947 {
948 	int err = 0;
949 	struct fs_prio *prio;
950 	struct mlx5_flow_table *next_ft = NULL;
951 	struct mlx5_flow_table *put_ft = NULL;
952 
953 	if (root->root_ft != ft)
954 		return 0;
955 
956 	fs_get_parent(prio, ft);
957 	/*Assuming objs containis only flow tables and
958 	 * flow tables are sorted by level.
959 	 */
960 	if (!list_is_last(&ft->base.list, &prio->objs)) {
961 		next_ft = list_next_entry(ft, base.list);
962 	} else {
963 		next_ft = find_next_ft(prio);
964 		put_ft = next_ft;
965 	}
966 
967 	if (next_ft) {
968 		err = mlx5_cmd_update_root_ft(root->dev, next_ft->type,
969 					      next_ft->id);
970 		if (err)
971 			mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
972 				       ft->id);
973 	}
974 	if (!err)
975 		root->root_ft = next_ft;
976 
977 	if (put_ft)
978 		fs_put(&put_ft->base);
979 
980 	return err;
981 }
982 
983 /*Objects in the same prio are destroyed in the reverse order they were createrd*/
984 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
985 {
986 	int err = 0;
987 	struct fs_prio *prio;
988 	struct mlx5_flow_root_namespace *root;
989 	bool is_shared_prio;
990 
991 	fs_get_parent(prio, ft);
992 	root = find_root(&prio->base);
993 
994 	if (!root) {
995 		printf("mlx5_core: ERR: ""mlx5: flow steering failed to find root of priority %s", prio->base.name);
996 		return -ENODEV;
997 	}
998 
999 	is_shared_prio = prio->flags & MLX5_CORE_FS_PRIO_SHARED;
1000 	if (is_shared_prio) {
1001 		mutex_lock(&prio->shared_lock);
1002 		if (ft->shared_refcount > 1) {
1003 			--ft->shared_refcount;
1004 			fs_put(&ft->base);
1005 			mutex_unlock(&prio->shared_lock);
1006 			return 0;
1007 		}
1008 	}
1009 
1010 	mutex_lock(&prio->base.lock);
1011 	mutex_lock(&ft->base.lock);
1012 
1013 	err = update_root_ft_destroy(root, ft);
1014 	if (err)
1015 		goto unlock_ft;
1016 
1017 	/* delete two last entries */
1018 	destroy_star_rule(ft, prio);
1019 
1020 	mutex_unlock(&ft->base.lock);
1021 	fs_remove_node_parent_locked(&ft->base);
1022 	mutex_unlock(&prio->base.lock);
1023 	if (is_shared_prio)
1024 		mutex_unlock(&prio->shared_lock);
1025 
1026 	return err;
1027 
1028 unlock_ft:
1029 	mutex_unlock(&ft->base.lock);
1030 	mutex_unlock(&prio->base.lock);
1031 	if (is_shared_prio)
1032 		mutex_unlock(&prio->shared_lock);
1033 
1034 	return err;
1035 }
1036 EXPORT_SYMBOL(mlx5_destroy_flow_table);
1037 
1038 static struct mlx5_flow_group *fs_create_fg(struct mlx5_core_dev *dev,
1039 					    struct mlx5_flow_table *ft,
1040 					    struct list_head *prev,
1041 					    u32 *fg_in,
1042 					    int refcount)
1043 {
1044 	struct mlx5_flow_group *fg;
1045 	int err;
1046 	unsigned int end_index;
1047 	char name[20];
1048 
1049 	fg = fs_alloc_fg(fg_in);
1050 	if (IS_ERR(fg))
1051 		return fg;
1052 
1053 	end_index = fg->start_index + fg->max_ftes - 1;
1054 	err =  mlx5_cmd_fs_create_fg(dev, fg_in,
1055 				     ft->vport, ft->type, ft->id,
1056 				     &fg->id);
1057 	if (err)
1058 		goto free_fg;
1059 
1060 	mutex_lock(&ft->base.lock);
1061 	if (ft->autogroup.active)
1062 		ft->autogroup.num_types++;
1063 
1064 	snprintf(name, sizeof(name), "group_%u", fg->id);
1065 	/*Add node to tree*/
1066 	fs_add_node(&fg->base, &ft->base, name, refcount);
1067 	/*Add node to group list*/
1068 	list_add(&fg->base.list, prev);
1069 	mutex_unlock(&ft->base.lock);
1070 
1071 	return fg;
1072 
1073 free_fg:
1074 	kfree(fg);
1075 	return ERR_PTR(err);
1076 }
1077 
1078 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
1079 					       u32 *in)
1080 {
1081 	struct mlx5_flow_group *fg;
1082 	struct mlx5_core_dev *dev = fs_get_dev(&ft->base);
1083 
1084 	if (!dev)
1085 		return ERR_PTR(-ENODEV);
1086 
1087 	if (ft->autogroup.active)
1088 		return ERR_PTR(-EPERM);
1089 
1090 	fg = fs_create_fg(dev, ft, ft->fgs.prev, in, 1);
1091 
1092 	return fg;
1093 }
1094 EXPORT_SYMBOL(mlx5_create_flow_group);
1095 
1096 /*Group is destoyed when all the rules in the group were removed*/
1097 static void fs_del_fg(struct mlx5_flow_group *fg)
1098 {
1099 	struct mlx5_flow_table *parent_ft;
1100 	struct mlx5_core_dev *dev;
1101 
1102 	fs_get_parent(parent_ft, fg);
1103 	dev = fs_get_dev(&parent_ft->base);
1104 	WARN_ON(!dev);
1105 
1106 	if (parent_ft->autogroup.active)
1107 		parent_ft->autogroup.num_types--;
1108 
1109 	if (mlx5_cmd_fs_destroy_fg(dev, parent_ft->vport,
1110 				   parent_ft->type,
1111 				   parent_ft->id, fg->id))
1112 		mlx5_core_warn(dev, "flow steering can't destroy fg\n");
1113 }
1114 
1115 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
1116 {
1117 	fs_remove_node(&fg->base);
1118 }
1119 EXPORT_SYMBOL(mlx5_destroy_flow_group);
1120 
1121 static bool _fs_match_exact_val(void *mask, void *val1, void *val2, size_t size)
1122 {
1123 	unsigned int i;
1124 
1125 	/* TODO: optimize by comparing 64bits when possible */
1126 	for (i = 0; i < size; i++, mask++, val1++, val2++)
1127 		if ((*((u8 *)val1) & (*(u8 *)mask)) !=
1128 		    ((*(u8 *)val2) & (*(u8 *)mask)))
1129 			return false;
1130 
1131 	return true;
1132 }
1133 
1134 bool fs_match_exact_val(struct mlx5_core_fs_mask *mask,
1135 			       void *val1, void *val2)
1136 {
1137 	if (mask->match_criteria_enable &
1138 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
1139 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1140 						val1, outer_headers);
1141 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1142 						val2, outer_headers);
1143 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1144 					      mask->match_criteria, outer_headers);
1145 
1146 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1147 					 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
1148 			return false;
1149 	}
1150 
1151 	if (mask->match_criteria_enable &
1152 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
1153 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1154 						val1, misc_parameters);
1155 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1156 						val2, misc_parameters);
1157 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1158 					  mask->match_criteria, misc_parameters);
1159 
1160 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1161 					 MLX5_ST_SZ_BYTES(fte_match_set_misc)))
1162 			return false;
1163 	}
1164 	if (mask->match_criteria_enable &
1165 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
1166 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1167 						val1, inner_headers);
1168 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1169 						val2, inner_headers);
1170 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1171 					  mask->match_criteria, inner_headers);
1172 
1173 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1174 					 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
1175 			return false;
1176 	}
1177 	return true;
1178 }
1179 
1180 bool fs_match_exact_mask(u8 match_criteria_enable1,
1181 				u8 match_criteria_enable2,
1182 				void *mask1, void *mask2)
1183 {
1184 	return match_criteria_enable1 == match_criteria_enable2 &&
1185 		!memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param));
1186 }
1187 
1188 static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns,
1189 							   struct list_head *start);
1190 
1191 static struct mlx5_flow_table *_find_first_ft_in_prio_reverse(struct fs_prio *prio,
1192 							      struct list_head *start)
1193 {
1194 	struct fs_base *it = container_of(start, struct fs_base, list);
1195 
1196 	if (!prio)
1197 		return NULL;
1198 
1199 	fs_for_each_ns_or_ft_continue_reverse(it, prio) {
1200 		struct mlx5_flow_namespace	*ns;
1201 		struct mlx5_flow_table		*ft;
1202 
1203 		if (it->type == FS_TYPE_FLOW_TABLE) {
1204 			fs_get_obj(ft, it);
1205 			fs_get(&ft->base);
1206 			return ft;
1207 		}
1208 
1209 		fs_get_obj(ns, it);
1210 		WARN_ON(ns->base.type != FS_TYPE_NAMESPACE);
1211 
1212 		ft = find_first_ft_in_ns_reverse(ns, &ns->prios);
1213 		if (ft)
1214 			return ft;
1215 	}
1216 
1217 	return NULL;
1218 }
1219 
1220 static struct mlx5_flow_table *find_first_ft_in_prio_reverse(struct fs_prio *prio,
1221 							     struct list_head *start)
1222 {
1223 	struct mlx5_flow_table *ft;
1224 
1225 	if (!prio)
1226 		return NULL;
1227 
1228 	mutex_lock(&prio->base.lock);
1229 	ft = _find_first_ft_in_prio_reverse(prio, start);
1230 	mutex_unlock(&prio->base.lock);
1231 
1232 	return ft;
1233 }
1234 
1235 static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns,
1236 							   struct list_head *start)
1237 {
1238 	struct fs_prio *prio;
1239 
1240 	if (!ns)
1241 		return NULL;
1242 
1243 	fs_get_obj(prio, container_of(start, struct fs_base, list));
1244 	mutex_lock(&ns->base.lock);
1245 	fs_for_each_prio_continue_reverse(prio, ns) {
1246 		struct mlx5_flow_table *ft;
1247 
1248 		ft = find_first_ft_in_prio_reverse(prio, &prio->objs);
1249 		if (ft) {
1250 			mutex_unlock(&ns->base.lock);
1251 			return ft;
1252 		}
1253 	}
1254 	mutex_unlock(&ns->base.lock);
1255 
1256 	return NULL;
1257 }
1258 
1259 /* Returned a held ft, assumed curr is protected, assumed curr's parent is
1260  * locked
1261  */
1262 static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr,
1263 					    struct fs_prio *prio)
1264 {
1265 	struct mlx5_flow_table *ft = NULL;
1266 	struct fs_base *curr_base;
1267 
1268 	if (!curr)
1269 		return NULL;
1270 
1271 	/* prio has either namespace or flow-tables, but not both */
1272 	if (!list_empty(&prio->objs) &&
1273 	    list_first_entry(&prio->objs, struct mlx5_flow_table, base.list) !=
1274 	    curr)
1275 		return NULL;
1276 
1277 	while (!ft && prio) {
1278 		struct mlx5_flow_namespace *ns;
1279 
1280 		fs_get_parent(ns, prio);
1281 		ft = find_first_ft_in_ns_reverse(ns, &prio->base.list);
1282 		curr_base = &ns->base;
1283 		fs_get_parent(prio, ns);
1284 
1285 		if (prio && !ft)
1286 			ft = find_first_ft_in_prio_reverse(prio,
1287 							   &curr_base->list);
1288 	}
1289 	return ft;
1290 }
1291 
1292 static struct mlx5_flow_table *_find_first_ft_in_prio(struct fs_prio *prio,
1293 						      struct list_head *start)
1294 {
1295 	struct fs_base	*it = container_of(start, struct fs_base, list);
1296 
1297 	if (!prio)
1298 		return NULL;
1299 
1300 	fs_for_each_ns_or_ft_continue(it, prio) {
1301 		struct mlx5_flow_namespace	*ns;
1302 		struct mlx5_flow_table		*ft;
1303 
1304 		if (it->type == FS_TYPE_FLOW_TABLE) {
1305 			fs_get_obj(ft, it);
1306 			fs_get(&ft->base);
1307 			return ft;
1308 		}
1309 
1310 		fs_get_obj(ns, it);
1311 		WARN_ON(ns->base.type != FS_TYPE_NAMESPACE);
1312 
1313 		ft = find_first_ft_in_ns(ns, &ns->prios);
1314 		if (ft)
1315 			return ft;
1316 	}
1317 
1318 	return NULL;
1319 }
1320 
1321 static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio,
1322 						     struct list_head *start)
1323 {
1324 	struct mlx5_flow_table *ft;
1325 
1326 	if (!prio)
1327 		return NULL;
1328 
1329 	mutex_lock(&prio->base.lock);
1330 	ft = _find_first_ft_in_prio(prio, start);
1331 	mutex_unlock(&prio->base.lock);
1332 
1333 	return ft;
1334 }
1335 
1336 static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns,
1337 						   struct list_head *start)
1338 {
1339 	struct fs_prio *prio;
1340 
1341 	if (!ns)
1342 		return NULL;
1343 
1344 	fs_get_obj(prio, container_of(start, struct fs_base, list));
1345 	mutex_lock(&ns->base.lock);
1346 	fs_for_each_prio_continue(prio, ns) {
1347 		struct mlx5_flow_table *ft;
1348 
1349 		ft = find_first_ft_in_prio(prio, &prio->objs);
1350 		if (ft) {
1351 			mutex_unlock(&ns->base.lock);
1352 			return ft;
1353 		}
1354 	}
1355 	mutex_unlock(&ns->base.lock);
1356 
1357 	return NULL;
1358 }
1359 
1360 /* returned a held ft, assumed curr is protected, assumed curr's parent is
1361  * locked
1362  */
1363 static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio)
1364 {
1365 	struct mlx5_flow_table *ft = NULL;
1366 	struct fs_base *curr_base;
1367 
1368 	while (!ft && prio) {
1369 		struct mlx5_flow_namespace *ns;
1370 
1371 		fs_get_parent(ns, prio);
1372 		ft = find_first_ft_in_ns(ns, &prio->base.list);
1373 		curr_base = &ns->base;
1374 		fs_get_parent(prio, ns);
1375 
1376 		if (!ft && prio)
1377 			ft = _find_first_ft_in_prio(prio, &curr_base->list);
1378 	}
1379 	return ft;
1380 }
1381 
1382 
1383 /* called under ft mutex lock */
1384 static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
1385 						u8 match_criteria_enable,
1386 						u32 *match_criteria)
1387 {
1388 	unsigned int group_size;
1389 	unsigned int candidate_index = 0;
1390 	unsigned int candidate_group_num = 0;
1391 	struct mlx5_flow_group *g;
1392 	struct mlx5_flow_group *ret;
1393 	struct list_head *prev = &ft->fgs;
1394 	struct mlx5_core_dev *dev;
1395 	u32 *in;
1396 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1397 	void *match_criteria_addr;
1398 
1399 	if (!ft->autogroup.active)
1400 		return ERR_PTR(-ENOENT);
1401 
1402 	dev = fs_get_dev(&ft->base);
1403 	if (!dev)
1404 		return ERR_PTR(-ENODEV);
1405 
1406 	in = mlx5_vzalloc(inlen);
1407 	if (!in) {
1408 		mlx5_core_warn(dev, "failed to allocate inbox\n");
1409 		return ERR_PTR(-ENOMEM);
1410 	}
1411 
1412 
1413 	if (ft->autogroup.num_types < ft->autogroup.max_types)
1414 		group_size = ft->max_fte / (ft->autogroup.max_types + 1);
1415 	else
1416 		group_size = 1;
1417 
1418 	if (group_size == 0) {
1419 		mlx5_core_warn(dev,
1420 			       "flow steering can't create group size of 0\n");
1421 		ret = ERR_PTR(-EINVAL);
1422 		goto out;
1423 	}
1424 
1425 	/* sorted by start_index */
1426 	fs_for_each_fg(g, ft) {
1427 		candidate_group_num++;
1428 		if (candidate_index + group_size > g->start_index)
1429 			candidate_index = g->start_index + g->max_ftes;
1430 		else
1431 			break;
1432 		prev = &g->base.list;
1433 	}
1434 
1435 	if (candidate_index + group_size > ft->max_fte) {
1436 		ret = ERR_PTR(-ENOSPC);
1437 		goto out;
1438 	}
1439 
1440 	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
1441 		 match_criteria_enable);
1442 	MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
1443 	MLX5_SET(create_flow_group_in, in, end_flow_index,   candidate_index +
1444 		 group_size - 1);
1445 	match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
1446 					   in, match_criteria);
1447 	memcpy(match_criteria_addr, match_criteria,
1448 	       MLX5_ST_SZ_BYTES(fte_match_param));
1449 
1450 	ret = fs_create_fg(dev, ft, prev, in, 0);
1451 out:
1452 	kvfree(in);
1453 	return ret;
1454 }
1455 
1456 static struct mlx5_flow_namespace *get_ns_with_notifiers(struct fs_base *node)
1457 {
1458 	struct mlx5_flow_namespace *ns = NULL;
1459 
1460 	while (node  && (node->type != FS_TYPE_NAMESPACE ||
1461 			      list_empty(&container_of(node, struct
1462 						       mlx5_flow_namespace,
1463 						       base)->list_notifiers)))
1464 		node = node->parent;
1465 
1466 	if (node)
1467 		fs_get_obj(ns, node);
1468 
1469 	return ns;
1470 }
1471 
1472 
1473 /*Assumption- fte is locked*/
1474 static void call_to_add_rule_notifiers(struct mlx5_flow_rule *dst,
1475 				      struct fs_fte *fte)
1476 {
1477 	struct mlx5_flow_namespace *ns;
1478 	struct mlx5_flow_handler *iter_handler;
1479 	struct fs_client_priv_data *iter_client;
1480 	void *data;
1481 	bool is_new_rule = list_first_entry(&fte->dests,
1482 					    struct mlx5_flow_rule,
1483 					    base.list) == dst;
1484 	int err;
1485 
1486 	ns = get_ns_with_notifiers(&fte->base);
1487 	if (!ns)
1488 		return;
1489 
1490 	down_read(&ns->notifiers_rw_sem);
1491 	list_for_each_entry(iter_handler, &ns->list_notifiers,
1492 			    list) {
1493 		if (iter_handler->add_dst_cb) {
1494 			data = NULL;
1495 			mutex_lock(&dst->clients_lock);
1496 			list_for_each_entry(
1497 				iter_client, &dst->clients_data, list) {
1498 				if (iter_client->fs_handler == iter_handler) {
1499 					data = iter_client->client_dst_data;
1500 					break;
1501 				}
1502 			}
1503 			mutex_unlock(&dst->clients_lock);
1504 			err  = iter_handler->add_dst_cb(dst,
1505 							is_new_rule,
1506 							NULL,
1507 							iter_handler->client_context);
1508 			if (err)
1509 				break;
1510 		}
1511 	}
1512 	up_read(&ns->notifiers_rw_sem);
1513 }
1514 
1515 static void call_to_del_rule_notifiers(struct mlx5_flow_rule *dst,
1516 				      struct fs_fte *fte)
1517 {
1518 	struct mlx5_flow_namespace *ns;
1519 	struct mlx5_flow_handler *iter_handler;
1520 	struct fs_client_priv_data *iter_client;
1521 	void *data;
1522 	bool ctx_changed = (fte->dests_size == 0);
1523 
1524 	ns = get_ns_with_notifiers(&fte->base);
1525 	if (!ns)
1526 		return;
1527 	down_read(&ns->notifiers_rw_sem);
1528 	list_for_each_entry(iter_handler, &ns->list_notifiers,
1529 			    list) {
1530 		data = NULL;
1531 		mutex_lock(&dst->clients_lock);
1532 		list_for_each_entry(iter_client, &dst->clients_data, list) {
1533 			if (iter_client->fs_handler == iter_handler) {
1534 				data = iter_client->client_dst_data;
1535 				break;
1536 			}
1537 		}
1538 		mutex_unlock(&dst->clients_lock);
1539 		if (iter_handler->del_dst_cb) {
1540 			iter_handler->del_dst_cb(dst, ctx_changed, data,
1541 						 iter_handler->client_context);
1542 		}
1543 	}
1544 	up_read(&ns->notifiers_rw_sem);
1545 }
1546 
1547 /* fte should not be deleted while calling this function */
1548 static struct mlx5_flow_rule *_fs_add_dst_fte(struct fs_fte *fte,
1549 					      struct mlx5_flow_group *fg,
1550 					      struct mlx5_flow_destination *dest)
1551 {
1552 	struct mlx5_flow_table *ft;
1553 	struct mlx5_flow_rule *dst;
1554 	int err;
1555 
1556 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1557 	if (!dst)
1558 		return ERR_PTR(-ENOMEM);
1559 
1560 	memcpy(&dst->dest_attr, dest, sizeof(*dest));
1561 	dst->base.type = FS_TYPE_FLOW_DEST;
1562 	INIT_LIST_HEAD(&dst->clients_data);
1563 	mutex_init(&dst->clients_lock);
1564 	fs_get_parent(ft, fg);
1565 	/*Add dest to dests list- added as first element after the head*/
1566 	list_add_tail(&dst->base.list, &fte->dests);
1567 	fte->dests_size++;
1568 	err = mlx5_cmd_fs_set_fte(fs_get_dev(&ft->base),
1569 				  ft->vport,
1570 				  &fte->status,
1571 				  fte->val, ft->type,
1572 				  ft->id, fte->index, fg->id, fte->flow_tag,
1573 				  fte->action, fte->dests_size, &fte->dests);
1574 	if (err)
1575 		goto free_dst;
1576 
1577 	list_del(&dst->base.list);
1578 
1579 	return dst;
1580 
1581 free_dst:
1582 	list_del(&dst->base.list);
1583 	kfree(dst);
1584 	fte->dests_size--;
1585 	return ERR_PTR(err);
1586 }
1587 
1588 static char *get_dest_name(struct mlx5_flow_destination *dest)
1589 {
1590 	char *name = kzalloc(sizeof(char) * 20, GFP_KERNEL);
1591 
1592 	switch (dest->type) {
1593 	case MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE:
1594 		snprintf(name, 20, "dest_%s_%u", "flow_table",
1595 			 dest->ft->id);
1596 		return name;
1597 	case MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT:
1598 		snprintf(name, 20, "dest_%s_%u", "vport",
1599 			 dest->vport_num);
1600 		return name;
1601 	case MLX5_FLOW_CONTEXT_DEST_TYPE_TIR:
1602 		snprintf(name, 20, "dest_%s_%u", "tir", dest->tir_num);
1603 		return name;
1604 	default:
1605 		kfree(name);
1606 		return NULL;
1607 	}
1608 }
1609 
1610 /* assumed fg is locked */
1611 static unsigned int fs_get_free_fg_index(struct mlx5_flow_group *fg,
1612 					 struct list_head **prev)
1613 {
1614 	struct fs_fte *fte;
1615 	unsigned int start = fg->start_index;
1616 
1617 	if (prev)
1618 		*prev = &fg->ftes;
1619 
1620 	/* assumed list is sorted by index */
1621 	fs_for_each_fte(fte, fg) {
1622 		if (fte->index != start)
1623 			return start;
1624 		start++;
1625 		if (prev)
1626 			*prev = &fte->base.list;
1627 	}
1628 
1629 	return start;
1630 }
1631 
1632 
1633 static struct fs_fte *fs_create_fte(struct mlx5_flow_group *fg,
1634 			     u32 *match_value,
1635 			     u8 action,
1636 			     u32 flow_tag,
1637 			     struct list_head **prev)
1638 {
1639 	struct fs_fte *fte;
1640 	int index = 0;
1641 
1642 	index = fs_get_free_fg_index(fg, prev);
1643 	fte = fs_alloc_fte(action, flow_tag, match_value, index);
1644 	if (IS_ERR(fte))
1645 		return fte;
1646 
1647 	return fte;
1648 }
1649 
1650 static void add_rule_to_tree(struct mlx5_flow_rule *rule,
1651 			     struct fs_fte *fte)
1652 {
1653 	char *dest_name;
1654 
1655 	dest_name = get_dest_name(&rule->dest_attr);
1656 	fs_add_node(&rule->base, &fte->base, dest_name, 1);
1657 	/* re-add to list, since fs_add_node reset our list */
1658 	list_add_tail(&rule->base.list, &fte->dests);
1659 	kfree(dest_name);
1660 	call_to_add_rule_notifiers(rule, fte);
1661 }
1662 
1663 static void fs_del_dst(struct mlx5_flow_rule *dst)
1664 {
1665 	struct mlx5_flow_table *ft;
1666 	struct mlx5_flow_group *fg;
1667 	struct fs_fte *fte;
1668 	u32	*match_value;
1669 	struct mlx5_core_dev *dev = fs_get_dev(&dst->base);
1670 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
1671 	int err;
1672 
1673 	WARN_ON(!dev);
1674 
1675 	match_value = mlx5_vzalloc(match_len);
1676 	if (!match_value) {
1677 		mlx5_core_warn(dev, "failed to allocate inbox\n");
1678 		return;
1679 	}
1680 
1681 	fs_get_parent(fte, dst);
1682 	fs_get_parent(fg, fte);
1683 	mutex_lock(&fg->base.lock);
1684 	memcpy(match_value, fte->val, sizeof(fte->val));
1685 	/* ft can't be changed as fg is locked */
1686 	fs_get_parent(ft, fg);
1687 	list_del(&dst->base.list);
1688 	fte->dests_size--;
1689 	if (fte->dests_size) {
1690 		err = mlx5_cmd_fs_set_fte(dev, ft->vport,
1691 					  &fte->status, match_value, ft->type,
1692 					  ft->id, fte->index, fg->id,
1693 					  fte->flow_tag, fte->action,
1694 					  fte->dests_size, &fte->dests);
1695 		if (err) {
1696 			mlx5_core_warn(dev, "%s can't delete dst %s\n",
1697 				       __func__, dst->base.name);
1698 			goto err;
1699 		}
1700 	}
1701 	call_to_del_rule_notifiers(dst, fte);
1702 err:
1703 	mutex_unlock(&fg->base.lock);
1704 	kvfree(match_value);
1705 }
1706 
1707 static void fs_del_fte(struct fs_fte *fte)
1708 {
1709 	struct mlx5_flow_table *ft;
1710 	struct mlx5_flow_group *fg;
1711 	int err;
1712 	struct mlx5_core_dev *dev;
1713 
1714 	fs_get_parent(fg, fte);
1715 	fs_get_parent(ft, fg);
1716 
1717 	dev = fs_get_dev(&ft->base);
1718 	WARN_ON(!dev);
1719 
1720 	err = mlx5_cmd_fs_delete_fte(dev, ft->vport, &fte->status,
1721 				     ft->type, ft->id, fte->index);
1722 	if (err)
1723 		mlx5_core_warn(dev, "flow steering can't delete fte %s\n",
1724 			       fte->base.name);
1725 
1726 	fg->num_ftes--;
1727 }
1728 
1729 /* assuming parent fg is locked */
1730 /* Add dst algorithm */
1731 static struct mlx5_flow_rule *fs_add_dst_fg(struct mlx5_flow_group *fg,
1732 						   u32 *match_value,
1733 						   u8 action,
1734 						   u32 flow_tag,
1735 						   struct mlx5_flow_destination *dest)
1736 {
1737 	struct fs_fte *fte;
1738 	struct mlx5_flow_rule *dst;
1739 	struct mlx5_flow_table *ft;
1740 	struct list_head *prev;
1741 	char fte_name[20];
1742 
1743 	mutex_lock(&fg->base.lock);
1744 	fs_for_each_fte(fte, fg) {
1745 		/* TODO: Check of size against PRM max size */
1746 		mutex_lock(&fte->base.lock);
1747 		if (fs_match_exact_val(&fg->mask, match_value, &fte->val) &&
1748 		    action == fte->action && flow_tag == fte->flow_tag) {
1749 			dst = _fs_add_dst_fte(fte, fg, dest);
1750 			mutex_unlock(&fte->base.lock);
1751 			if (IS_ERR(dst))
1752 				goto unlock_fg;
1753 			goto add_rule;
1754 		}
1755 		mutex_unlock(&fte->base.lock);
1756 	}
1757 
1758 	fs_get_parent(ft, fg);
1759 	if (fg->num_ftes == fg->max_ftes) {
1760 		dst = ERR_PTR(-ENOSPC);
1761 		goto unlock_fg;
1762 	}
1763 
1764 	fte = fs_create_fte(fg, match_value, action, flow_tag, &prev);
1765 	if (IS_ERR(fte)) {
1766 		dst = (void *)fte;
1767 		goto unlock_fg;
1768 	}
1769 	dst = _fs_add_dst_fte(fte, fg, dest);
1770 	if (IS_ERR(dst)) {
1771 		kfree(fte);
1772 		goto unlock_fg;
1773 	}
1774 
1775 	fg->num_ftes++;
1776 
1777 	snprintf(fte_name, sizeof(fte_name), "fte%u", fte->index);
1778 	/* Add node to tree */
1779 	fs_add_node(&fte->base, &fg->base, fte_name, 0);
1780 	list_add(&fte->base.list, prev);
1781 add_rule:
1782 	add_rule_to_tree(dst, fte);
1783 unlock_fg:
1784 	mutex_unlock(&fg->base.lock);
1785 	return dst;
1786 }
1787 
1788 static struct mlx5_flow_rule *fs_add_dst_ft(struct mlx5_flow_table *ft,
1789 					    u8 match_criteria_enable,
1790 					    u32 *match_criteria,
1791 					    u32 *match_value,
1792 					    u8 action, u32 flow_tag,
1793 					    struct mlx5_flow_destination *dest)
1794 {
1795 	/*? where dst_entry is allocated*/
1796 	struct mlx5_flow_group *g;
1797 	struct mlx5_flow_rule *dst;
1798 
1799 	fs_get(&ft->base);
1800 	mutex_lock(&ft->base.lock);
1801 	fs_for_each_fg(g, ft)
1802 		if (fs_match_exact_mask(g->mask.match_criteria_enable,
1803 					match_criteria_enable,
1804 					g->mask.match_criteria,
1805 					match_criteria)) {
1806 			mutex_unlock(&ft->base.lock);
1807 
1808 			dst = fs_add_dst_fg(g, match_value,
1809 					    action, flow_tag, dest);
1810 			if (PTR_ERR(dst) && PTR_ERR(dst) != -ENOSPC)
1811 				goto unlock;
1812 		}
1813 	mutex_unlock(&ft->base.lock);
1814 
1815 	g = create_autogroup(ft, match_criteria_enable, match_criteria);
1816 	if (IS_ERR(g)) {
1817 		dst = (void *)g;
1818 		goto unlock;
1819 	}
1820 
1821 	dst = fs_add_dst_fg(g, match_value,
1822 			    action, flow_tag, dest);
1823 	if (IS_ERR(dst)) {
1824 		/* Remove assumes refcount > 0 and autogroup creates a group
1825 		 * with a refcount = 0.
1826 		 */
1827 		fs_get(&g->base);
1828 		fs_remove_node(&g->base);
1829 		goto unlock;
1830 	}
1831 
1832 unlock:
1833 	fs_put(&ft->base);
1834 	return dst;
1835 }
1836 
1837 struct mlx5_flow_rule *
1838 mlx5_add_flow_rule(struct mlx5_flow_table *ft,
1839 		   u8 match_criteria_enable,
1840 		   u32 *match_criteria,
1841 		   u32 *match_value,
1842 		   u32 action,
1843 		   u32 flow_tag,
1844 		   struct mlx5_flow_destination *dest)
1845 {
1846 	struct mlx5_flow_rule *dst;
1847 	struct mlx5_flow_namespace *ns;
1848 
1849 	ns = get_ns_with_notifiers(&ft->base);
1850 	if (ns)
1851 		down_read(&ns->dests_rw_sem);
1852 	dst =  fs_add_dst_ft(ft, match_criteria_enable, match_criteria,
1853 			     match_value, action, flow_tag, dest);
1854 	if (ns)
1855 		up_read(&ns->dests_rw_sem);
1856 
1857 	return dst;
1858 
1859 
1860 }
1861 EXPORT_SYMBOL(mlx5_add_flow_rule);
1862 
1863 void mlx5_del_flow_rule(struct mlx5_flow_rule *dst)
1864 {
1865 	struct mlx5_flow_namespace *ns;
1866 
1867 	ns = get_ns_with_notifiers(&dst->base);
1868 	if (ns)
1869 		down_read(&ns->dests_rw_sem);
1870 	fs_remove_node(&dst->base);
1871 	if (ns)
1872 		up_read(&ns->dests_rw_sem);
1873 }
1874 EXPORT_SYMBOL(mlx5_del_flow_rule);
1875 
1876 #define MLX5_CORE_FS_ROOT_NS_NAME "root"
1877 #define MLX5_CORE_FS_ESW_EGRESS_ACL "esw_egress_root"
1878 #define MLX5_CORE_FS_ESW_INGRESS_ACL "esw_ingress_root"
1879 #define MLX5_CORE_FS_FDB_ROOT_NS_NAME "fdb_root"
1880 #define MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME "sniffer_rx_root"
1881 #define MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME "sniffer_tx_root"
1882 #define MLX5_CORE_FS_PRIO_MAX_FT 4
1883 #define MLX5_CORE_FS_PRIO_MAX_NS 1
1884 
1885 static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
1886 				      unsigned prio, int max_ft,
1887 				      const char *name, u8 flags)
1888 {
1889 	struct fs_prio *fs_prio;
1890 
1891 	fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
1892 	if (!fs_prio)
1893 		return ERR_PTR(-ENOMEM);
1894 
1895 	fs_prio->base.type = FS_TYPE_PRIO;
1896 	fs_add_node(&fs_prio->base, &ns->base, name, 1);
1897 	fs_prio->max_ft = max_ft;
1898 	fs_prio->max_ns = MLX5_CORE_FS_PRIO_MAX_NS;
1899 	fs_prio->prio = prio;
1900 	fs_prio->flags = flags;
1901 	list_add_tail(&fs_prio->base.list, &ns->prios);
1902 	INIT_LIST_HEAD(&fs_prio->objs);
1903 	mutex_init(&fs_prio->shared_lock);
1904 
1905 	return fs_prio;
1906 }
1907 
1908 static void cleanup_root_ns(struct mlx5_core_dev *dev)
1909 {
1910 	struct mlx5_flow_root_namespace *root_ns = dev->root_ns;
1911 	struct fs_prio *iter_prio;
1912 
1913 	if (!root_ns)
1914 		return;
1915 
1916 	/* stage 1 */
1917 	fs_for_each_prio(iter_prio, &root_ns->ns) {
1918 		struct mlx5_flow_namespace *iter_ns;
1919 
1920 		fs_for_each_ns(iter_ns, iter_prio) {
1921 			while (!list_empty(&iter_ns->prios)) {
1922 				struct fs_base *iter_prio2 =
1923 					list_first_entry(&iter_ns->prios,
1924 							 struct fs_base,
1925 							 list);
1926 
1927 				fs_remove_node(iter_prio2);
1928 			}
1929 		}
1930 	}
1931 
1932 	/* stage 2 */
1933 	fs_for_each_prio(iter_prio, &root_ns->ns) {
1934 		while (!list_empty(&iter_prio->objs)) {
1935 			struct fs_base *iter_ns =
1936 				list_first_entry(&iter_prio->objs,
1937 						 struct fs_base,
1938 						 list);
1939 
1940 				fs_remove_node(iter_ns);
1941 		}
1942 	}
1943 	/* stage 3 */
1944 	while (!list_empty(&root_ns->ns.prios)) {
1945 		struct fs_base *iter_prio =
1946 			list_first_entry(&root_ns->ns.prios,
1947 					 struct fs_base,
1948 					 list);
1949 
1950 		fs_remove_node(iter_prio);
1951 	}
1952 
1953 	fs_remove_node(&root_ns->ns.base);
1954 	dev->root_ns = NULL;
1955 }
1956 
1957 static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev,
1958 					struct mlx5_flow_root_namespace *root_ns)
1959 {
1960 	struct fs_base *prio;
1961 
1962 	if (!root_ns)
1963 		return;
1964 
1965 	if (!list_empty(&root_ns->ns.prios)) {
1966 		prio = list_first_entry(&root_ns->ns.prios,
1967 					struct fs_base,
1968 				 list);
1969 		fs_remove_node(prio);
1970 	}
1971 	fs_remove_node(&root_ns->ns.base);
1972 	root_ns = NULL;
1973 }
1974 
1975 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
1976 {
1977 	cleanup_root_ns(dev);
1978 	cleanup_single_prio_root_ns(dev, dev->sniffer_rx_root_ns);
1979 	cleanup_single_prio_root_ns(dev, dev->sniffer_tx_root_ns);
1980 	cleanup_single_prio_root_ns(dev, dev->fdb_root_ns);
1981 	cleanup_single_prio_root_ns(dev, dev->esw_egress_root_ns);
1982 	cleanup_single_prio_root_ns(dev, dev->esw_ingress_root_ns);
1983 }
1984 
1985 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
1986 						 *ns)
1987 {
1988 	ns->base.type = FS_TYPE_NAMESPACE;
1989 	init_rwsem(&ns->dests_rw_sem);
1990 	init_rwsem(&ns->notifiers_rw_sem);
1991 	INIT_LIST_HEAD(&ns->prios);
1992 	INIT_LIST_HEAD(&ns->list_notifiers);
1993 
1994 	return ns;
1995 }
1996 
1997 static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev,
1998 							  enum fs_ft_type
1999 							  table_type,
2000 							  char *name)
2001 {
2002 	struct mlx5_flow_root_namespace *root_ns;
2003 	struct mlx5_flow_namespace *ns;
2004 
2005 	/* create the root namespace */
2006 	root_ns = mlx5_vzalloc(sizeof(*root_ns));
2007 	if (!root_ns)
2008 		goto err;
2009 
2010 	root_ns->dev = dev;
2011 	root_ns->table_type = table_type;
2012 	mutex_init(&root_ns->fs_chain_lock);
2013 
2014 	ns = &root_ns->ns;
2015 	fs_init_namespace(ns);
2016 	fs_add_node(&ns->base, NULL, name, 1);
2017 
2018 	return root_ns;
2019 err:
2020 	return NULL;
2021 }
2022 
2023 static int init_fdb_root_ns(struct mlx5_core_dev *dev)
2024 {
2025 	struct fs_prio *prio;
2026 
2027 	dev->fdb_root_ns = create_root_ns(dev, FS_FT_FDB,
2028 					  MLX5_CORE_FS_FDB_ROOT_NS_NAME);
2029 	if (!dev->fdb_root_ns)
2030 		return -ENOMEM;
2031 
2032 	/* create 1 prio*/
2033 	prio = fs_create_prio(&dev->fdb_root_ns->ns, 0, 1, "fdb_prio", 0);
2034 	if (IS_ERR(prio))
2035 		return PTR_ERR(prio);
2036 	else
2037 		return 0;
2038 }
2039 
2040 #define MAX_VPORTS 128
2041 
2042 static int init_egress_acl_root_ns(struct mlx5_core_dev *dev)
2043 {
2044 	struct fs_prio *prio;
2045 
2046 	dev->esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL,
2047 						 MLX5_CORE_FS_ESW_EGRESS_ACL);
2048 	if (!dev->esw_egress_root_ns)
2049 		return -ENOMEM;
2050 
2051 	/* create 1 prio*/
2052 	prio = fs_create_prio(&dev->esw_egress_root_ns->ns, 0, MAX_VPORTS,
2053 			      "esw_egress_prio", 0);
2054 	if (IS_ERR(prio))
2055 		return PTR_ERR(prio);
2056 	else
2057 		return 0;
2058 }
2059 
2060 static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev)
2061 {
2062 	struct fs_prio *prio;
2063 
2064 	dev->esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL,
2065 						  MLX5_CORE_FS_ESW_INGRESS_ACL);
2066 	if (!dev->esw_ingress_root_ns)
2067 		return -ENOMEM;
2068 
2069 	/* create 1 prio*/
2070 	prio = fs_create_prio(&dev->esw_ingress_root_ns->ns, 0, MAX_VPORTS,
2071 			      "esw_ingress_prio", 0);
2072 	if (IS_ERR(prio))
2073 		return PTR_ERR(prio);
2074 	else
2075 		return 0;
2076 }
2077 
2078 static int init_sniffer_rx_root_ns(struct mlx5_core_dev *dev)
2079 {
2080 	struct fs_prio *prio;
2081 
2082 	dev->sniffer_rx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_RX,
2083 				     MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME);
2084 	if (!dev->sniffer_rx_root_ns)
2085 		return  -ENOMEM;
2086 
2087 	/* create 1 prio*/
2088 	prio = fs_create_prio(&dev->sniffer_rx_root_ns->ns, 0, 1,
2089 			      "sniffer_prio", 0);
2090 	if (IS_ERR(prio))
2091 		return PTR_ERR(prio);
2092 	else
2093 		return 0;
2094 }
2095 
2096 
2097 static int init_sniffer_tx_root_ns(struct mlx5_core_dev *dev)
2098 {
2099 	struct fs_prio *prio;
2100 
2101 	dev->sniffer_tx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_TX,
2102 						 MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME);
2103 	if (!dev->sniffer_tx_root_ns)
2104 		return  -ENOMEM;
2105 
2106 	/* create 1 prio*/
2107 	prio = fs_create_prio(&dev->sniffer_tx_root_ns->ns, 0, 1,
2108 			      "sniffer_prio", 0);
2109 	if (IS_ERR(prio))
2110 		return PTR_ERR(prio);
2111 	else
2112 		return 0;
2113 }
2114 
2115 static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio,
2116 						       const char *name)
2117 {
2118 	struct mlx5_flow_namespace	*ns;
2119 
2120 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
2121 	if (!ns)
2122 		return ERR_PTR(-ENOMEM);
2123 
2124 	fs_init_namespace(ns);
2125 	fs_add_node(&ns->base, &prio->base, name, 1);
2126 	list_add_tail(&ns->base.list, &prio->objs);
2127 
2128 	return ns;
2129 }
2130 
2131 #define FLOW_TABLE_BIT_SZ 1
2132 #define GET_FLOW_TABLE_CAP(dev, offset) \
2133 	((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) +	\
2134 			offset / 32)) >>					\
2135 	  (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
2136 
2137 static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
2138 {
2139 	int i;
2140 
2141 	for (i = 0; i < caps->arr_sz; i++) {
2142 		if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
2143 			return false;
2144 	}
2145 	return true;
2146 }
2147 
2148 static int _init_root_tree(struct mlx5_core_dev *dev, int max_ft_level,
2149 		    struct init_tree_node *node, struct fs_base *base_parent,
2150 		    struct init_tree_node *tree_parent)
2151 {
2152 	struct mlx5_flow_namespace *fs_ns;
2153 	struct fs_prio *fs_prio;
2154 	int priority;
2155 	struct fs_base *base;
2156 	int i;
2157 	int err = 0;
2158 
2159 	if (node->type == FS_TYPE_PRIO) {
2160 		if ((node->min_ft_level > max_ft_level) ||
2161 		    !has_required_caps(dev, &node->caps))
2162 			goto out;
2163 
2164 		fs_get_obj(fs_ns, base_parent);
2165 		priority = node - tree_parent->children;
2166 		fs_prio = fs_create_prio(fs_ns, priority,
2167 					 node->max_ft,
2168 					 node->name, node->flags);
2169 		if (IS_ERR(fs_prio)) {
2170 			err = PTR_ERR(fs_prio);
2171 			goto out;
2172 		}
2173 		base = &fs_prio->base;
2174 	} else if (node->type == FS_TYPE_NAMESPACE) {
2175 		fs_get_obj(fs_prio, base_parent);
2176 		fs_ns = fs_create_namespace(fs_prio, node->name);
2177 		if (IS_ERR(fs_ns)) {
2178 			err = PTR_ERR(fs_ns);
2179 			goto out;
2180 		}
2181 		base = &fs_ns->base;
2182 	} else {
2183 		return -EINVAL;
2184 	}
2185 	for (i = 0; i < node->ar_size; i++) {
2186 		err = _init_root_tree(dev, max_ft_level, &node->children[i], base,
2187 				      node);
2188 		if (err)
2189 			break;
2190 	}
2191 out:
2192 	return err;
2193 }
2194 
2195 static int init_root_tree(struct mlx5_core_dev *dev, int max_ft_level,
2196 		   struct init_tree_node *node, struct fs_base *parent)
2197 {
2198 	int i;
2199 	struct mlx5_flow_namespace *fs_ns;
2200 	int err = 0;
2201 
2202 	fs_get_obj(fs_ns, parent);
2203 	for (i = 0; i < node->ar_size; i++) {
2204 		err = _init_root_tree(dev, max_ft_level,
2205 				      &node->children[i], &fs_ns->base, node);
2206 		if (err)
2207 			break;
2208 	}
2209 	return err;
2210 }
2211 
2212 static int sum_max_ft_in_prio(struct fs_prio *prio);
2213 static int sum_max_ft_in_ns(struct mlx5_flow_namespace *ns)
2214 {
2215 	struct fs_prio *prio;
2216 	int sum = 0;
2217 
2218 	fs_for_each_prio(prio, ns) {
2219 		sum += sum_max_ft_in_prio(prio);
2220 	}
2221 	return  sum;
2222 }
2223 
2224 static int sum_max_ft_in_prio(struct fs_prio *prio)
2225 {
2226 	int sum = 0;
2227 	struct fs_base *it;
2228 	struct mlx5_flow_namespace	*ns;
2229 
2230 	if (prio->max_ft)
2231 		return prio->max_ft;
2232 
2233 	fs_for_each_ns_or_ft(it, prio) {
2234 		if (it->type == FS_TYPE_FLOW_TABLE)
2235 			continue;
2236 
2237 		fs_get_obj(ns, it);
2238 		sum += sum_max_ft_in_ns(ns);
2239 	}
2240 	prio->max_ft = sum;
2241 	return  sum;
2242 }
2243 
2244 static void set_max_ft(struct mlx5_flow_namespace *ns)
2245 {
2246 	struct fs_prio *prio;
2247 
2248 	if (!ns)
2249 		return;
2250 
2251 	fs_for_each_prio(prio, ns)
2252 		sum_max_ft_in_prio(prio);
2253 }
2254 
2255 static int init_root_ns(struct mlx5_core_dev *dev)
2256 {
2257 	int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
2258 					      flow_table_properties_nic_receive.
2259 					      max_ft_level);
2260 
2261 	dev->root_ns = create_root_ns(dev, FS_FT_NIC_RX,
2262 				      MLX5_CORE_FS_ROOT_NS_NAME);
2263 	if (IS_ERR_OR_NULL(dev->root_ns))
2264 		goto err;
2265 
2266 
2267 	if (init_root_tree(dev, max_ft_level, &root_fs, &dev->root_ns->ns.base))
2268 		goto err;
2269 
2270 	set_max_ft(&dev->root_ns->ns);
2271 
2272 	return 0;
2273 err:
2274 	return -ENOMEM;
2275 }
2276 
2277 u8 mlx5_get_match_criteria_enable(struct mlx5_flow_rule *rule)
2278 {
2279 	struct fs_base *pbase;
2280 	struct mlx5_flow_group *fg;
2281 
2282 	pbase = rule->base.parent;
2283 	WARN_ON(!pbase);
2284 	pbase = pbase->parent;
2285 	WARN_ON(!pbase);
2286 
2287 	fs_get_obj(fg, pbase);
2288 	return fg->mask.match_criteria_enable;
2289 }
2290 
2291 void mlx5_get_match_value(u32 *match_value,
2292 			  struct mlx5_flow_rule *rule)
2293 {
2294 	struct fs_base *pbase;
2295 	struct fs_fte *fte;
2296 
2297 	pbase = rule->base.parent;
2298 	WARN_ON(!pbase);
2299 	fs_get_obj(fte, pbase);
2300 
2301 	memcpy(match_value, fte->val, sizeof(fte->val));
2302 }
2303 
2304 void mlx5_get_match_criteria(u32 *match_criteria,
2305 			     struct mlx5_flow_rule *rule)
2306 {
2307 	struct fs_base *pbase;
2308 	struct mlx5_flow_group *fg;
2309 
2310 	pbase = rule->base.parent;
2311 	WARN_ON(!pbase);
2312 	pbase = pbase->parent;
2313 	WARN_ON(!pbase);
2314 
2315 	fs_get_obj(fg, pbase);
2316 	memcpy(match_criteria, &fg->mask.match_criteria,
2317 	       sizeof(fg->mask.match_criteria));
2318 }
2319 
2320 int mlx5_init_fs(struct mlx5_core_dev *dev)
2321 {
2322 	int err;
2323 
2324 	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
2325 		err = init_root_ns(dev);
2326 		if (err)
2327 			goto err;
2328 	}
2329 
2330 	err = init_fdb_root_ns(dev);
2331 	if (err)
2332 		goto err;
2333 
2334 	err = init_egress_acl_root_ns(dev);
2335 	if (err)
2336 		goto err;
2337 
2338 	err = init_ingress_acl_root_ns(dev);
2339 	if (err)
2340 		goto err;
2341 
2342 	err = init_sniffer_tx_root_ns(dev);
2343 	if (err)
2344 		goto err;
2345 
2346 	err = init_sniffer_rx_root_ns(dev);
2347 	if (err)
2348 		goto err;
2349 
2350 	return 0;
2351 err:
2352 	mlx5_cleanup_fs(dev);
2353 	return err;
2354 }
2355 
2356 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
2357 						  enum mlx5_flow_namespace_type type)
2358 {
2359 	struct mlx5_flow_root_namespace *root_ns = dev->root_ns;
2360 	int prio;
2361 	static struct fs_prio *fs_prio;
2362 	struct mlx5_flow_namespace *ns;
2363 
2364 	switch (type) {
2365 	case MLX5_FLOW_NAMESPACE_BYPASS:
2366 		prio = 0;
2367 		break;
2368 	case MLX5_FLOW_NAMESPACE_KERNEL:
2369 		prio = 1;
2370 		break;
2371 	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
2372 		prio = 2;
2373 		break;
2374 	case MLX5_FLOW_NAMESPACE_FDB:
2375 		if (dev->fdb_root_ns)
2376 			return &dev->fdb_root_ns->ns;
2377 		else
2378 			return NULL;
2379 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
2380 		if (dev->esw_egress_root_ns)
2381 			return &dev->esw_egress_root_ns->ns;
2382 		else
2383 			return NULL;
2384 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
2385 		if (dev->esw_ingress_root_ns)
2386 			return &dev->esw_ingress_root_ns->ns;
2387 		else
2388 			return NULL;
2389 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
2390 		if (dev->sniffer_rx_root_ns)
2391 			return &dev->sniffer_rx_root_ns->ns;
2392 		else
2393 			return NULL;
2394 	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
2395 		if (dev->sniffer_tx_root_ns)
2396 			return &dev->sniffer_tx_root_ns->ns;
2397 		else
2398 			return NULL;
2399 	default:
2400 		return NULL;
2401 	}
2402 
2403 	if (!root_ns)
2404 		return NULL;
2405 
2406 	fs_prio = find_prio(&root_ns->ns, prio);
2407 	if (!fs_prio)
2408 		return NULL;
2409 
2410 	ns = list_first_entry(&fs_prio->objs,
2411 			      typeof(*ns),
2412 			      base.list);
2413 
2414 	return ns;
2415 }
2416 EXPORT_SYMBOL(mlx5_get_flow_namespace);
2417 
2418 
2419 int mlx5_set_rule_private_data(struct mlx5_flow_rule *rule,
2420 				  struct mlx5_flow_handler *fs_handler,
2421 				  void  *client_data)
2422 {
2423 	struct fs_client_priv_data *priv_data;
2424 
2425 	mutex_lock(&rule->clients_lock);
2426 	/*Check that hanlder isn't exists in the list already*/
2427 	list_for_each_entry(priv_data, &rule->clients_data, list) {
2428 		if (priv_data->fs_handler == fs_handler) {
2429 			priv_data->client_dst_data = client_data;
2430 			goto unlock;
2431 		}
2432 	}
2433 	priv_data = kzalloc(sizeof(*priv_data), GFP_KERNEL);
2434 	if (!priv_data) {
2435 		mutex_unlock(&rule->clients_lock);
2436 		return -ENOMEM;
2437 	}
2438 
2439 	priv_data->client_dst_data = client_data;
2440 	priv_data->fs_handler = fs_handler;
2441 	list_add(&priv_data->list, &rule->clients_data);
2442 
2443 unlock:
2444 	mutex_unlock(&rule->clients_lock);
2445 
2446 	return 0;
2447 }
2448 
2449 static int remove_from_clients(struct mlx5_flow_rule *rule,
2450 			bool ctx_changed,
2451 			void *client_data,
2452 			void *context)
2453 {
2454 	struct fs_client_priv_data *iter_client;
2455 	struct fs_client_priv_data *temp_client;
2456 	struct mlx5_flow_handler *handler = (struct
2457 						mlx5_flow_handler*)context;
2458 
2459 	mutex_lock(&rule->clients_lock);
2460 	list_for_each_entry_safe(iter_client, temp_client,
2461 				 &rule->clients_data, list) {
2462 		if (iter_client->fs_handler == handler) {
2463 			list_del(&iter_client->list);
2464 			kfree(iter_client);
2465 			break;
2466 		}
2467 	}
2468 	mutex_unlock(&rule->clients_lock);
2469 
2470 	return 0;
2471 }
2472 
2473 struct mlx5_flow_handler *mlx5_register_rule_notifier(struct mlx5_core_dev *dev,
2474 								enum mlx5_flow_namespace_type ns_type,
2475 								rule_event_fn add_cb,
2476 								rule_event_fn del_cb,
2477 								void *context)
2478 {
2479 	struct mlx5_flow_namespace *ns;
2480 	struct mlx5_flow_handler *handler;
2481 
2482 	ns = mlx5_get_flow_namespace(dev, ns_type);
2483 	if (!ns)
2484 		return ERR_PTR(-EINVAL);
2485 
2486 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
2487 	if (!handler)
2488 		return ERR_PTR(-ENOMEM);
2489 
2490 	handler->add_dst_cb = add_cb;
2491 	handler->del_dst_cb = del_cb;
2492 	handler->client_context = context;
2493 	handler->ns = ns;
2494 	down_write(&ns->notifiers_rw_sem);
2495 	list_add_tail(&handler->list, &ns->list_notifiers);
2496 	up_write(&ns->notifiers_rw_sem);
2497 
2498 	return handler;
2499 }
2500 
2501 static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns,
2502 				rule_event_fn add_rule_cb,
2503 				void *context);
2504 
2505 void mlx5_unregister_rule_notifier(struct mlx5_flow_handler *handler)
2506 {
2507 	struct mlx5_flow_namespace *ns = handler->ns;
2508 
2509 	/*Remove from dst's clients*/
2510 	down_write(&ns->dests_rw_sem);
2511 	down_write(&ns->notifiers_rw_sem);
2512 	iterate_rules_in_ns(ns, remove_from_clients, handler);
2513 	list_del(&handler->list);
2514 	up_write(&ns->notifiers_rw_sem);
2515 	up_write(&ns->dests_rw_sem);
2516 	kfree(handler);
2517 }
2518 
2519 static void iterate_rules_in_ft(struct mlx5_flow_table *ft,
2520 				rule_event_fn add_rule_cb,
2521 				void *context)
2522 {
2523 	struct mlx5_flow_group *iter_fg;
2524 	struct fs_fte *iter_fte;
2525 	struct mlx5_flow_rule *iter_rule;
2526 	int err = 0;
2527 	bool is_new_rule;
2528 
2529 	mutex_lock(&ft->base.lock);
2530 	fs_for_each_fg(iter_fg, ft) {
2531 		mutex_lock(&iter_fg->base.lock);
2532 		fs_for_each_fte(iter_fte, iter_fg) {
2533 			mutex_lock(&iter_fte->base.lock);
2534 			is_new_rule = true;
2535 			fs_for_each_dst(iter_rule, iter_fte) {
2536 				fs_get(&iter_rule->base);
2537 				err = add_rule_cb(iter_rule,
2538 						 is_new_rule,
2539 						 NULL,
2540 						 context);
2541 				fs_put_parent_locked(&iter_rule->base);
2542 				if (err)
2543 					break;
2544 				is_new_rule = false;
2545 			}
2546 			mutex_unlock(&iter_fte->base.lock);
2547 			if (err)
2548 				break;
2549 		}
2550 		mutex_unlock(&iter_fg->base.lock);
2551 		if (err)
2552 			break;
2553 	}
2554 	mutex_unlock(&ft->base.lock);
2555 }
2556 
2557 static void iterate_rules_in_prio(struct fs_prio *prio,
2558 				  rule_event_fn add_rule_cb,
2559 				  void *context)
2560 {
2561 	struct fs_base *it;
2562 
2563 	mutex_lock(&prio->base.lock);
2564 	fs_for_each_ns_or_ft(it, prio) {
2565 		if (it->type == FS_TYPE_FLOW_TABLE) {
2566 			struct mlx5_flow_table	      *ft;
2567 
2568 			fs_get_obj(ft, it);
2569 			iterate_rules_in_ft(ft, add_rule_cb, context);
2570 		} else {
2571 			struct mlx5_flow_namespace *ns;
2572 
2573 			fs_get_obj(ns, it);
2574 			iterate_rules_in_ns(ns, add_rule_cb, context);
2575 		}
2576 	}
2577 	mutex_unlock(&prio->base.lock);
2578 }
2579 
2580 static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns,
2581 				rule_event_fn add_rule_cb,
2582 				void *context)
2583 {
2584 	struct fs_prio *iter_prio;
2585 
2586 	mutex_lock(&ns->base.lock);
2587 	fs_for_each_prio(iter_prio, ns) {
2588 		iterate_rules_in_prio(iter_prio, add_rule_cb, context);
2589 	}
2590 	mutex_unlock(&ns->base.lock);
2591 }
2592 
2593 void mlx5_flow_iterate_existing_rules(struct mlx5_flow_namespace *ns,
2594 					 rule_event_fn add_rule_cb,
2595 					 void *context)
2596 {
2597 	down_write(&ns->dests_rw_sem);
2598 	down_read(&ns->notifiers_rw_sem);
2599 	iterate_rules_in_ns(ns, add_rule_cb, context);
2600 	up_read(&ns->notifiers_rw_sem);
2601 	up_write(&ns->dests_rw_sem);
2602 }
2603 
2604 
2605 void mlx5_del_flow_rules_list(struct mlx5_flow_rules_list *rules_list)
2606 {
2607 	struct mlx5_flow_rule_node *iter_node;
2608 	struct mlx5_flow_rule_node *temp_node;
2609 
2610 	list_for_each_entry_safe(iter_node, temp_node, &rules_list->head, list) {
2611 		list_del(&iter_node->list);
2612 		kfree(iter_node);
2613 	}
2614 
2615 	kfree(rules_list);
2616 }
2617 
2618 #define ROCEV1_ETHERTYPE 0x8915
2619 static int set_rocev1_rules(struct list_head *rules_list)
2620 {
2621 	struct mlx5_flow_rule_node *rocev1_rule;
2622 
2623 	rocev1_rule = kzalloc(sizeof(*rocev1_rule), GFP_KERNEL);
2624 	if (!rocev1_rule)
2625 		return -ENOMEM;
2626 
2627 	rocev1_rule->match_criteria_enable =
2628 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2629 	MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_criteria, ethertype,
2630 		 0xffff);
2631 	MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_value, ethertype,
2632 		 ROCEV1_ETHERTYPE);
2633 
2634 	list_add_tail(&rocev1_rule->list, rules_list);
2635 
2636 	return 0;
2637 }
2638 
2639 #define ROCEV2_UDP_PORT 4791
2640 static int set_rocev2_rules(struct list_head *rules_list)
2641 {
2642 	struct mlx5_flow_rule_node *ipv4_rule;
2643 	struct mlx5_flow_rule_node *ipv6_rule;
2644 
2645 	ipv4_rule = kzalloc(sizeof(*ipv4_rule), GFP_KERNEL);
2646 	if (!ipv4_rule)
2647 		return -ENOMEM;
2648 
2649 	ipv6_rule = kzalloc(sizeof(*ipv6_rule), GFP_KERNEL);
2650 	if (!ipv6_rule) {
2651 		kfree(ipv4_rule);
2652 		return -ENOMEM;
2653 	}
2654 
2655 	ipv4_rule->match_criteria_enable =
2656 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2657 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ethertype,
2658 		 0xffff);
2659 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ethertype,
2660 		 0x0800);
2661 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ip_protocol,
2662 		 0xff);
2663 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ip_protocol,
2664 		 IPPROTO_UDP);
2665 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, udp_dport,
2666 		 0xffff);
2667 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, udp_dport,
2668 		 ROCEV2_UDP_PORT);
2669 
2670 	ipv6_rule->match_criteria_enable =
2671 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2672 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ethertype,
2673 		 0xffff);
2674 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ethertype,
2675 		 0x86dd);
2676 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ip_protocol,
2677 		 0xff);
2678 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ip_protocol,
2679 		 IPPROTO_UDP);
2680 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, udp_dport,
2681 		 0xffff);
2682 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, udp_dport,
2683 		 ROCEV2_UDP_PORT);
2684 
2685 	list_add_tail(&ipv4_rule->list, rules_list);
2686 	list_add_tail(&ipv6_rule->list, rules_list);
2687 
2688 	return 0;
2689 }
2690 
2691 
2692 struct mlx5_flow_rules_list *get_roce_flow_rules(u8 roce_mode)
2693 {
2694 	int err = 0;
2695 	struct mlx5_flow_rules_list *rules_list =
2696 		kzalloc(sizeof(*rules_list), GFP_KERNEL);
2697 
2698 	if (!rules_list)
2699 		return NULL;
2700 
2701 	INIT_LIST_HEAD(&rules_list->head);
2702 
2703 	if (roce_mode & MLX5_ROCE_VERSION_1_CAP) {
2704 		err = set_rocev1_rules(&rules_list->head);
2705 		if (err)
2706 			goto free_list;
2707 	}
2708 	if (roce_mode & MLX5_ROCE_VERSION_2_CAP)
2709 		err = set_rocev2_rules(&rules_list->head);
2710 	if (err)
2711 		goto free_list;
2712 
2713 	return rules_list;
2714 
2715 free_list:
2716 	mlx5_del_flow_rules_list(rules_list);
2717 	return NULL;
2718 }
2719