xref: /freebsd/sys/dev/mlx5/mlx5_core/mlx5_fs_tree.c (revision bc56a8f9)
1 /*-
2  * Copyright (c) 2013-2017, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include <linux/module.h>
29 #include <dev/mlx5/driver.h>
30 #include "mlx5_core.h"
31 #include "fs_core.h"
32 #include <linux/string.h>
33 #include <linux/compiler.h>
34 
35 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
36 					 sizeof(struct init_tree_node))
37 
38 #define ADD_PRIO(name_val, flags_val, min_level_val, max_ft_val, caps_val, \
39 		 ...) {.type = FS_TYPE_PRIO,\
40 	.name = name_val,\
41 	.min_ft_level = min_level_val,\
42 	.flags = flags_val,\
43 	.max_ft = max_ft_val,\
44 	.caps = caps_val,\
45 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
46 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
47 }
48 
49 #define ADD_FT_PRIO(name_val, flags_val, max_ft_val,  ...)\
50 	ADD_PRIO(name_val, flags_val, 0, max_ft_val, {},\
51 		 __VA_ARGS__)\
52 
53 #define ADD_NS(name_val, ...) {.type = FS_TYPE_NAMESPACE,\
54 	.name = name_val,\
55 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
56 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
57 }
58 
59 #define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
60 				   sizeof(long))
61 
62 #define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
63 
64 #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
65 			       .caps = (long[]) {__VA_ARGS__}}
66 
67 #define BYPASS_MAX_FT 5
68 #define BYPASS_PRIO_MAX_FT 1
69 #define KERNEL_MAX_FT 5
70 #define LEFTOVER_MAX_FT 1
71 #define KENREL_MIN_LEVEL 3
72 #define LEFTOVER_MIN_LEVEL KENREL_MIN_LEVEL + 1
73 #define BYPASS_MIN_LEVEL MLX5_NUM_BYPASS_FTS + LEFTOVER_MIN_LEVEL
74 struct node_caps {
75 	size_t	arr_sz;
76 	long	*caps;
77 };
78 
79 struct init_tree_node {
80 	enum fs_type	type;
81 	const char	*name;
82 	struct init_tree_node *children;
83 	int ar_size;
84 	struct node_caps caps;
85 	u8  flags;
86 	int min_ft_level;
87 	int prio;
88 	int max_ft;
89 } root_fs = {
90 	.type = FS_TYPE_NAMESPACE,
91 	.name = "root",
92 	.ar_size = 3,
93 	.children = (struct init_tree_node[]) {
94 		ADD_PRIO("by_pass_prio", 0, BYPASS_MIN_LEVEL, 0,
95 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
96 					  FS_CAP(flow_table_properties_nic_receive.modify_root)),
97 			 ADD_NS("by_pass_ns",
98 				ADD_FT_PRIO("prio0", 0,
99 					    BYPASS_PRIO_MAX_FT),
100 				ADD_FT_PRIO("prio1", 0,
101 					    BYPASS_PRIO_MAX_FT),
102 				ADD_FT_PRIO("prio2", 0,
103 					    BYPASS_PRIO_MAX_FT),
104 				ADD_FT_PRIO("prio3", 0,
105 					    BYPASS_PRIO_MAX_FT),
106 				ADD_FT_PRIO("prio4", 0,
107 					    BYPASS_PRIO_MAX_FT),
108 				ADD_FT_PRIO("prio5", 0,
109 					    BYPASS_PRIO_MAX_FT),
110 				ADD_FT_PRIO("prio6", 0,
111 					    BYPASS_PRIO_MAX_FT),
112 				ADD_FT_PRIO("prio7", 0,
113 					    BYPASS_PRIO_MAX_FT),
114 				ADD_FT_PRIO("prio-mcast", 0,
115 					    BYPASS_PRIO_MAX_FT))),
116 		ADD_PRIO("kernel_prio", 0, KENREL_MIN_LEVEL, 0, {},
117 			 ADD_NS("kernel_ns",
118 				ADD_FT_PRIO("prio_kernel-0", 0,
119 					    KERNEL_MAX_FT))),
120 		ADD_PRIO("leftovers_prio", MLX5_CORE_FS_PRIO_SHARED,
121 			 LEFTOVER_MIN_LEVEL, 0,
122 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
123 					  FS_CAP(flow_table_properties_nic_receive.modify_root)),
124 			 ADD_NS("leftover_ns",
125 				ADD_FT_PRIO("leftovers_prio-0",
126 					MLX5_CORE_FS_PRIO_SHARED,
127 					LEFTOVER_MAX_FT)))
128 	}
129 };
130 
131 /* Tree creation functions */
132 
133 static struct mlx5_flow_root_namespace *find_root(struct fs_base *node)
134 {
135 	struct fs_base *parent;
136 
137 	/* Make sure we only read it once while we go up the tree */
138 	while ((parent = node->parent))
139 		node = parent;
140 
141 	if (node->type != FS_TYPE_NAMESPACE) {
142 		return NULL;
143 	}
144 
145 	return container_of(container_of(node,
146 					 struct mlx5_flow_namespace,
147 					 base),
148 			    struct mlx5_flow_root_namespace,
149 			    ns);
150 }
151 
152 static inline struct mlx5_core_dev *fs_get_dev(struct fs_base *node)
153 {
154 	struct mlx5_flow_root_namespace *root = find_root(node);
155 
156 	if (root)
157 		return root->dev;
158 	return NULL;
159 }
160 
161 static void fs_init_node(struct fs_base *node,
162 			 unsigned int refcount)
163 {
164 	kref_init(&node->refcount);
165 	atomic_set(&node->users_refcount, refcount);
166 	init_completion(&node->complete);
167 	INIT_LIST_HEAD(&node->list);
168 	mutex_init(&node->lock);
169 }
170 
171 static void _fs_add_node(struct fs_base *node,
172 			 const char *name,
173 			 struct fs_base *parent)
174 {
175 	if (parent)
176 		atomic_inc(&parent->users_refcount);
177 	node->name = kstrdup_const(name, GFP_KERNEL);
178 	node->parent = parent;
179 }
180 
181 static void fs_add_node(struct fs_base *node,
182 			struct fs_base *parent, const char *name,
183 			unsigned int refcount)
184 {
185 	fs_init_node(node, refcount);
186 	_fs_add_node(node, name, parent);
187 }
188 
189 static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref),
190 		    bool parent_locked);
191 
192 static void fs_del_dst(struct mlx5_flow_rule *dst);
193 static void _fs_del_ft(struct mlx5_flow_table *ft);
194 static void fs_del_fg(struct mlx5_flow_group *fg);
195 static void fs_del_fte(struct fs_fte *fte);
196 
197 static void cmd_remove_node(struct fs_base *base)
198 {
199 	switch (base->type) {
200 	case FS_TYPE_FLOW_DEST:
201 		fs_del_dst(container_of(base, struct mlx5_flow_rule, base));
202 		break;
203 	case FS_TYPE_FLOW_TABLE:
204 		_fs_del_ft(container_of(base, struct mlx5_flow_table, base));
205 		break;
206 	case FS_TYPE_FLOW_GROUP:
207 		fs_del_fg(container_of(base, struct mlx5_flow_group, base));
208 		break;
209 	case FS_TYPE_FLOW_ENTRY:
210 		fs_del_fte(container_of(base, struct fs_fte, base));
211 		break;
212 	default:
213 		break;
214 	}
215 }
216 
217 static void __fs_remove_node(struct kref *kref)
218 {
219 	struct fs_base *node = container_of(kref, struct fs_base, refcount);
220 
221 	if (node->parent)
222 		mutex_lock(&node->parent->lock);
223 	mutex_lock(&node->lock);
224 	cmd_remove_node(node);
225 	mutex_unlock(&node->lock);
226 	complete(&node->complete);
227 	if (node->parent) {
228 		mutex_unlock(&node->parent->lock);
229 		_fs_put(node->parent, _fs_remove_node, false);
230 	}
231 }
232 
233 void _fs_remove_node(struct kref *kref)
234 {
235 	struct fs_base *node = container_of(kref, struct fs_base, refcount);
236 
237 	__fs_remove_node(kref);
238 	kfree_const(node->name);
239 	kfree(node);
240 }
241 
242 static void fs_get(struct fs_base *node)
243 {
244 	atomic_inc(&node->users_refcount);
245 }
246 
247 static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref),
248 		    bool parent_locked)
249 {
250 	struct fs_base *parent_node = node->parent;
251 
252 	if (parent_node && !parent_locked)
253 		mutex_lock(&parent_node->lock);
254 	if (atomic_dec_and_test(&node->users_refcount)) {
255 		if (parent_node) {
256 			/*remove from parent's list*/
257 			list_del_init(&node->list);
258 			mutex_unlock(&parent_node->lock);
259 		}
260 		kref_put(&node->refcount, kref_cb);
261 		if (parent_node && parent_locked)
262 			mutex_lock(&parent_node->lock);
263 	} else if (parent_node && !parent_locked) {
264 		mutex_unlock(&parent_node->lock);
265 	}
266 }
267 
268 static void fs_put(struct fs_base *node)
269 {
270 	_fs_put(node, __fs_remove_node, false);
271 }
272 
273 static void fs_put_parent_locked(struct fs_base *node)
274 {
275 	_fs_put(node, __fs_remove_node, true);
276 }
277 
278 static void fs_remove_node(struct fs_base *node)
279 {
280 	fs_put(node);
281 	wait_for_completion(&node->complete);
282 	kfree_const(node->name);
283 	kfree(node);
284 }
285 
286 static void fs_remove_node_parent_locked(struct fs_base *node)
287 {
288 	fs_put_parent_locked(node);
289 	wait_for_completion(&node->complete);
290 	kfree_const(node->name);
291 	kfree(node);
292 }
293 
294 static struct fs_fte *fs_alloc_fte(u8 action,
295 				   u32 flow_tag,
296 				   u32 *match_value,
297 				   unsigned int index)
298 {
299 	struct fs_fte *fte;
300 
301 
302 	fte = kzalloc(sizeof(*fte), GFP_KERNEL);
303 	if (!fte)
304 		return ERR_PTR(-ENOMEM);
305 
306 	memcpy(fte->val, match_value, sizeof(fte->val));
307 	fte->base.type =  FS_TYPE_FLOW_ENTRY;
308 	fte->dests_size = 0;
309 	fte->flow_tag = flow_tag;
310 	fte->index = index;
311 	INIT_LIST_HEAD(&fte->dests);
312 	fte->action = action;
313 
314 	return fte;
315 }
316 
317 static struct fs_fte *alloc_star_ft_entry(struct mlx5_flow_table *ft,
318 					  struct mlx5_flow_group *fg,
319 					  u32 *match_value,
320 					  unsigned int index)
321 {
322 	int err;
323 	struct fs_fte *fte;
324 	struct mlx5_flow_rule *dst;
325 
326 	if (fg->num_ftes == fg->max_ftes)
327 		return ERR_PTR(-ENOSPC);
328 
329 	fte = fs_alloc_fte(MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
330 			   MLX5_FS_DEFAULT_FLOW_TAG, match_value, index);
331 	if (IS_ERR(fte))
332 		return fte;
333 
334 	/*create dst*/
335 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
336 	if (!dst) {
337 		err = -ENOMEM;
338 		goto free_fte;
339 	}
340 
341 	fte->base.parent = &fg->base;
342 	fte->dests_size = 1;
343 	dst->dest_attr.type = MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE;
344 	dst->base.parent = &fte->base;
345 	list_add(&dst->base.list, &fte->dests);
346 	/* assumed that the callee creates the star rules sorted by index */
347 	list_add_tail(&fte->base.list, &fg->ftes);
348 	fg->num_ftes++;
349 
350 	return fte;
351 
352 free_fte:
353 	kfree(fte);
354 	return ERR_PTR(err);
355 }
356 
357 /* assume that fte can't be changed */
358 static void free_star_fte_entry(struct fs_fte *fte)
359 {
360 	struct mlx5_flow_group	*fg;
361 	struct mlx5_flow_rule	*dst, *temp;
362 
363 	fs_get_parent(fg, fte);
364 
365 	list_for_each_entry_safe(dst, temp, &fte->dests, base.list) {
366 		fte->dests_size--;
367 		list_del(&dst->base.list);
368 		kfree(dst);
369 	}
370 
371 	list_del(&fte->base.list);
372 	fg->num_ftes--;
373 	kfree(fte);
374 }
375 
376 static struct mlx5_flow_group *fs_alloc_fg(u32 *create_fg_in)
377 {
378 	struct mlx5_flow_group *fg;
379 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
380 					    create_fg_in, match_criteria);
381 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
382 					    create_fg_in,
383 					    match_criteria_enable);
384 	fg = kzalloc(sizeof(*fg), GFP_KERNEL);
385 	if (!fg)
386 		return ERR_PTR(-ENOMEM);
387 
388 	INIT_LIST_HEAD(&fg->ftes);
389 	fg->mask.match_criteria_enable = match_criteria_enable;
390 	memcpy(&fg->mask.match_criteria, match_criteria,
391 	       sizeof(fg->mask.match_criteria));
392 	fg->base.type =  FS_TYPE_FLOW_GROUP;
393 	fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in,
394 				   start_flow_index);
395 	fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in,
396 				end_flow_index) - fg->start_index + 1;
397 	return fg;
398 }
399 
400 static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio);
401 static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr,
402 					    struct fs_prio *prio);
403 
404 /* assumed src_ft and dst_ft can't be freed */
405 static int fs_set_star_rule(struct mlx5_core_dev *dev,
406 			    struct mlx5_flow_table *src_ft,
407 			    struct mlx5_flow_table *dst_ft)
408 {
409 	struct mlx5_flow_rule *src_dst;
410 	struct fs_fte *src_fte;
411 	int err = 0;
412 	u32 *match_value;
413 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
414 
415 	src_dst = list_first_entry(&src_ft->star_rule.fte->dests,
416 				   struct mlx5_flow_rule, base.list);
417 	match_value = mlx5_vzalloc(match_len);
418 	if (!match_value) {
419 		mlx5_core_warn(dev, "failed to allocate inbox\n");
420 		return -ENOMEM;
421 	}
422 	/*Create match context*/
423 
424 	fs_get_parent(src_fte, src_dst);
425 
426 	src_dst->dest_attr.ft = dst_ft;
427 	if (dst_ft) {
428 		err = mlx5_cmd_fs_set_fte(dev,
429 					  src_ft->vport,
430 					  &src_fte->status,
431 					  match_value, src_ft->type,
432 					  src_ft->id, src_fte->index,
433 					  src_ft->star_rule.fg->id,
434 					  src_fte->flow_tag,
435 					  src_fte->action,
436 					  src_fte->dests_size,
437 					  &src_fte->dests);
438 		if (err)
439 			goto free;
440 
441 		fs_get(&dst_ft->base);
442 	} else {
443 		mlx5_cmd_fs_delete_fte(dev,
444 				       src_ft->vport,
445 				       &src_fte->status,
446 				       src_ft->type, src_ft->id,
447 				       src_fte->index);
448 	}
449 
450 free:
451 	kvfree(match_value);
452 	return err;
453 }
454 
455 static int connect_prev_fts(struct fs_prio *locked_prio,
456 			    struct fs_prio *prev_prio,
457 			    struct mlx5_flow_table *next_ft)
458 {
459 	struct mlx5_flow_table *iter;
460 	int err = 0;
461 	struct mlx5_core_dev *dev = fs_get_dev(&prev_prio->base);
462 
463 	if (!dev)
464 		return -ENODEV;
465 
466 	mutex_lock(&prev_prio->base.lock);
467 	fs_for_each_ft(iter, prev_prio) {
468 		struct mlx5_flow_rule *src_dst =
469 			list_first_entry(&iter->star_rule.fte->dests,
470 					 struct mlx5_flow_rule, base.list);
471 		struct mlx5_flow_table *prev_ft = src_dst->dest_attr.ft;
472 
473 		if (prev_ft == next_ft)
474 			continue;
475 
476 		err = fs_set_star_rule(dev, iter, next_ft);
477 		if (err) {
478 			mlx5_core_warn(dev,
479 			    "mlx5: flow steering can't connect prev and next\n");
480 			goto unlock;
481 		} else {
482 			/* Assume ft's prio is locked */
483 			if (prev_ft) {
484 				struct fs_prio *prio;
485 
486 				fs_get_parent(prio, prev_ft);
487 				if (prio == locked_prio)
488 					fs_put_parent_locked(&prev_ft->base);
489 				else
490 					fs_put(&prev_ft->base);
491 			}
492 		}
493 	}
494 
495 unlock:
496 	mutex_unlock(&prev_prio->base.lock);
497 	return 0;
498 }
499 
500 static int create_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio)
501 {
502 	struct mlx5_flow_group *fg;
503 	int err;
504 	u32 *fg_in;
505 	u32 *match_value;
506 	struct mlx5_flow_table *next_ft;
507 	struct mlx5_flow_table *prev_ft;
508 	struct mlx5_flow_root_namespace *root = find_root(&prio->base);
509 	int fg_inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
510 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
511 
512 	fg_in = mlx5_vzalloc(fg_inlen);
513 	if (!fg_in) {
514 		mlx5_core_warn(root->dev, "failed to allocate inbox\n");
515 		return -ENOMEM;
516 	}
517 
518 	match_value = mlx5_vzalloc(match_len);
519 	if (!match_value) {
520 		mlx5_core_warn(root->dev, "failed to allocate inbox\n");
521 		kvfree(fg_in);
522 		return -ENOMEM;
523 	}
524 
525 	MLX5_SET(create_flow_group_in, fg_in, start_flow_index, ft->max_fte);
526 	MLX5_SET(create_flow_group_in, fg_in, end_flow_index, ft->max_fte);
527 	fg = fs_alloc_fg(fg_in);
528 	if (IS_ERR(fg)) {
529 		err = PTR_ERR(fg);
530 		goto out;
531 	}
532 	ft->star_rule.fg = fg;
533 	err =  mlx5_cmd_fs_create_fg(fs_get_dev(&prio->base),
534 				     fg_in, ft->vport, ft->type,
535 				     ft->id,
536 				     &fg->id);
537 	if (err)
538 		goto free_fg;
539 
540 	ft->star_rule.fte = alloc_star_ft_entry(ft, fg,
541 						      match_value,
542 						      ft->max_fte);
543 	if (IS_ERR(ft->star_rule.fte))
544 		goto free_star_rule;
545 
546 	mutex_lock(&root->fs_chain_lock);
547 	next_ft = find_next_ft(prio);
548 	err = fs_set_star_rule(root->dev, ft, next_ft);
549 	if (err) {
550 		mutex_unlock(&root->fs_chain_lock);
551 		goto free_star_rule;
552 	}
553 	if (next_ft) {
554 		struct fs_prio *parent;
555 
556 		fs_get_parent(parent, next_ft);
557 		fs_put(&next_ft->base);
558 	}
559 	prev_ft = find_prev_ft(ft, prio);
560 	if (prev_ft) {
561 		struct fs_prio *prev_parent;
562 
563 		fs_get_parent(prev_parent, prev_ft);
564 
565 		err = connect_prev_fts(NULL, prev_parent, ft);
566 		if (err) {
567 			mutex_unlock(&root->fs_chain_lock);
568 			goto destroy_chained_star_rule;
569 		}
570 		fs_put(&prev_ft->base);
571 	}
572 	mutex_unlock(&root->fs_chain_lock);
573 	kvfree(fg_in);
574 	kvfree(match_value);
575 
576 	return 0;
577 
578 destroy_chained_star_rule:
579 	fs_set_star_rule(fs_get_dev(&prio->base), ft, NULL);
580 	if (next_ft)
581 		fs_put(&next_ft->base);
582 free_star_rule:
583 	free_star_fte_entry(ft->star_rule.fte);
584 	mlx5_cmd_fs_destroy_fg(fs_get_dev(&ft->base), ft->vport,
585 			       ft->type, ft->id,
586 			       fg->id);
587 free_fg:
588 	kfree(fg);
589 out:
590 	kvfree(fg_in);
591 	kvfree(match_value);
592 	return err;
593 }
594 
595 static void destroy_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio)
596 {
597 	int err;
598 	struct mlx5_flow_root_namespace *root;
599 	struct mlx5_core_dev *dev = fs_get_dev(&prio->base);
600 	struct mlx5_flow_table *prev_ft, *next_ft;
601 	struct fs_prio *prev_prio;
602 
603 	WARN_ON(!dev);
604 
605 	root = find_root(&prio->base);
606 	if (!root)
607 		mlx5_core_err(dev,
608 		    "flow steering failed to find root of priority %s",
609 		    prio->base.name);
610 
611 	/* In order to ensure atomic deletion, first update
612 	 * prev ft to point on the next ft.
613 	 */
614 	mutex_lock(&root->fs_chain_lock);
615 	prev_ft = find_prev_ft(ft, prio);
616 	next_ft = find_next_ft(prio);
617 	if (prev_ft) {
618 		fs_get_parent(prev_prio, prev_ft);
619 		/*Prev is connected to ft, only if ft is the first(last) in the prio*/
620 		err = connect_prev_fts(prio, prev_prio, next_ft);
621 		if (err)
622 			mlx5_core_warn(root->dev,
623 				       "flow steering can't connect prev and next of flow table\n");
624 		fs_put(&prev_ft->base);
625 	}
626 
627 	err = fs_set_star_rule(root->dev, ft, NULL);
628 	/*One put is for fs_get in find next ft*/
629 	if (next_ft) {
630 		fs_put(&next_ft->base);
631 		if (!err)
632 			fs_put(&next_ft->base);
633 	}
634 
635 	mutex_unlock(&root->fs_chain_lock);
636 	err = mlx5_cmd_fs_destroy_fg(dev, ft->vport, ft->type, ft->id,
637 				     ft->star_rule.fg->id);
638 	if (err)
639 		mlx5_core_warn(dev,
640 			       "flow steering can't destroy star entry group(index:%d) of ft:%s\n", ft->star_rule.fg->start_index,
641 			       ft->base.name);
642 	free_star_fte_entry(ft->star_rule.fte);
643 
644 	kfree(ft->star_rule.fg);
645 	ft->star_rule.fg = NULL;
646 }
647 
648 static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
649 				 unsigned int prio)
650 {
651 	struct fs_prio *iter_prio;
652 
653 	fs_for_each_prio(iter_prio, ns) {
654 		if (iter_prio->prio == prio)
655 			return iter_prio;
656 	}
657 
658 	return NULL;
659 }
660 
661 static unsigned int _alloc_new_level(struct fs_prio *prio,
662 				     struct mlx5_flow_namespace *match);
663 
664 static unsigned int __alloc_new_level(struct mlx5_flow_namespace *ns,
665 				      struct fs_prio *prio)
666 {
667 	unsigned int level = 0;
668 	struct fs_prio *p;
669 
670 	if (!ns)
671 		return 0;
672 
673 	mutex_lock(&ns->base.lock);
674 	fs_for_each_prio(p, ns) {
675 		if (p != prio)
676 			level += p->max_ft;
677 		else
678 			break;
679 	}
680 	mutex_unlock(&ns->base.lock);
681 
682 	fs_get_parent(prio, ns);
683 	if (prio)
684 		WARN_ON(prio->base.type != FS_TYPE_PRIO);
685 
686 	return level + _alloc_new_level(prio, ns);
687 }
688 
689 /* Called under lock of priority, hence locking all upper objects */
690 static unsigned int _alloc_new_level(struct fs_prio *prio,
691 				     struct mlx5_flow_namespace *match)
692 {
693 	struct mlx5_flow_namespace *ns;
694 	struct fs_base *it;
695 	unsigned int level = 0;
696 
697 	if (!prio)
698 		return 0;
699 
700 	mutex_lock(&prio->base.lock);
701 	fs_for_each_ns_or_ft_reverse(it, prio) {
702 		if (it->type == FS_TYPE_NAMESPACE) {
703 			struct fs_prio *p;
704 
705 			fs_get_obj(ns, it);
706 
707 			if (match != ns) {
708 				mutex_lock(&ns->base.lock);
709 				fs_for_each_prio(p, ns)
710 					level += p->max_ft;
711 				mutex_unlock(&ns->base.lock);
712 			} else {
713 				break;
714 			}
715 		} else {
716 			struct mlx5_flow_table *ft;
717 
718 			fs_get_obj(ft, it);
719 			mutex_unlock(&prio->base.lock);
720 			return level + ft->level + 1;
721 		}
722 	}
723 
724 	fs_get_parent(ns, prio);
725 	mutex_unlock(&prio->base.lock);
726 	return __alloc_new_level(ns, prio) + level;
727 }
728 
729 static unsigned int alloc_new_level(struct fs_prio *prio)
730 {
731 	return _alloc_new_level(prio, NULL);
732 }
733 
734 static int update_root_ft_create(struct mlx5_flow_root_namespace *root,
735 				    struct mlx5_flow_table *ft)
736 {
737 	int err = 0;
738 	int min_level = INT_MAX;
739 
740 	if (root->root_ft)
741 		min_level = root->root_ft->level;
742 
743 	if (ft->level < min_level)
744 		err = mlx5_cmd_update_root_ft(root->dev, ft->type,
745 					      ft->id);
746 	else
747 		return err;
748 
749 	if (err)
750 		mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
751 			       ft->id);
752 	else
753 		root->root_ft = ft;
754 
755 	return err;
756 }
757 
758 static struct mlx5_flow_table *_create_ft_common(struct mlx5_flow_namespace *ns,
759 						 u16 vport,
760 						 struct fs_prio *fs_prio,
761 						 int max_fte,
762 						 const char *name)
763 {
764 	struct mlx5_flow_table *ft;
765 	int err;
766 	int log_table_sz;
767 	int ft_size;
768 	char gen_name[20];
769 	struct mlx5_flow_root_namespace *root = find_root(&ns->base);
770 	struct mlx5_core_dev *dev = fs_get_dev(&ns->base);
771 
772 	if (!root) {
773 		mlx5_core_err(dev,
774 		    "flow steering failed to find root of namespace %s",
775 		    ns->base.name);
776 		return ERR_PTR(-ENODEV);
777 	}
778 
779 	if (fs_prio->num_ft == fs_prio->max_ft)
780 		return ERR_PTR(-ENOSPC);
781 
782 	ft  = kzalloc(sizeof(*ft), GFP_KERNEL);
783 	if (!ft)
784 		return ERR_PTR(-ENOMEM);
785 
786 	fs_init_node(&ft->base, 1);
787 	INIT_LIST_HEAD(&ft->fgs);
788 
789 	/* Temporarily WA until we expose the level set in the API */
790 	if (root->table_type == FS_FT_ESW_EGRESS_ACL ||
791 		root->table_type == FS_FT_ESW_INGRESS_ACL)
792 		ft->level = 0;
793 	else
794 		ft->level = alloc_new_level(fs_prio);
795 
796 	ft->base.type = FS_TYPE_FLOW_TABLE;
797 	ft->vport = vport;
798 	ft->type = root->table_type;
799 	/*Two entries are reserved for star rules*/
800 	ft_size = roundup_pow_of_two(max_fte + 2);
801 	/*User isn't aware to those rules*/
802 	ft->max_fte = ft_size - 2;
803 	log_table_sz = ilog2(ft_size);
804 	err = mlx5_cmd_fs_create_ft(root->dev, ft->vport, ft->type,
805 				    ft->level, log_table_sz, &ft->id);
806 	if (err)
807 		goto free_ft;
808 
809 	err = create_star_rule(ft, fs_prio);
810 	if (err)
811 		goto del_ft;
812 
813 	if ((root->table_type == FS_FT_NIC_RX) && MLX5_CAP_FLOWTABLE(root->dev,
814 			       flow_table_properties_nic_receive.modify_root)) {
815 		err = update_root_ft_create(root, ft);
816 		if (err)
817 			goto destroy_star_rule;
818 	}
819 
820 	if (!name || !strlen(name)) {
821 		snprintf(gen_name, 20, "flow_table_%u", ft->id);
822 		_fs_add_node(&ft->base, gen_name, &fs_prio->base);
823 	} else {
824 		_fs_add_node(&ft->base, name, &fs_prio->base);
825 	}
826 	list_add_tail(&ft->base.list, &fs_prio->objs);
827 	fs_prio->num_ft++;
828 
829 	return ft;
830 
831 destroy_star_rule:
832 	destroy_star_rule(ft, fs_prio);
833 del_ft:
834 	mlx5_cmd_fs_destroy_ft(root->dev, ft->vport, ft->type, ft->id);
835 free_ft:
836 	kfree(ft);
837 	return ERR_PTR(err);
838 }
839 
840 static struct mlx5_flow_table *create_ft_common(struct mlx5_flow_namespace *ns,
841 						u16 vport,
842 						unsigned int prio,
843 						int max_fte,
844 						const char *name)
845 {
846 	struct fs_prio *fs_prio = NULL;
847 	fs_prio = find_prio(ns, prio);
848 	if (!fs_prio)
849 		return ERR_PTR(-EINVAL);
850 
851 	return _create_ft_common(ns, vport, fs_prio, max_fte, name);
852 }
853 
854 
855 static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns,
856 						   struct list_head *start);
857 
858 static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio,
859 						     struct list_head *start);
860 
861 static struct mlx5_flow_table *mlx5_create_autogrouped_shared_flow_table(struct fs_prio *fs_prio)
862 {
863 	struct mlx5_flow_table *ft;
864 
865 	ft = find_first_ft_in_prio(fs_prio, &fs_prio->objs);
866 	if (ft) {
867 		ft->shared_refcount++;
868 		return ft;
869 	}
870 
871 	return NULL;
872 }
873 
874 struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
875 							   int prio,
876 							   const char *name,
877 							   int num_flow_table_entries,
878 							   int max_num_groups)
879 {
880 	struct mlx5_flow_table *ft = NULL;
881 	struct fs_prio *fs_prio;
882 	bool is_shared_prio;
883 
884 	fs_prio = find_prio(ns, prio);
885 	if (!fs_prio)
886 		return ERR_PTR(-EINVAL);
887 
888 	is_shared_prio = fs_prio->flags & MLX5_CORE_FS_PRIO_SHARED;
889 	if (is_shared_prio) {
890 		mutex_lock(&fs_prio->shared_lock);
891 		ft = mlx5_create_autogrouped_shared_flow_table(fs_prio);
892 	}
893 
894 	if (ft)
895 		goto return_ft;
896 
897 	ft = create_ft_common(ns, 0, prio, num_flow_table_entries,
898 			      name);
899 	if (IS_ERR(ft))
900 		goto return_ft;
901 
902 	ft->autogroup.active = true;
903 	ft->autogroup.max_types = max_num_groups;
904 	if (is_shared_prio)
905 		ft->shared_refcount = 1;
906 
907 return_ft:
908 	if (is_shared_prio)
909 		mutex_unlock(&fs_prio->shared_lock);
910 	return ft;
911 }
912 EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
913 
914 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
915 						     u16 vport,
916 						     int prio,
917 						     const char *name,
918 						     int num_flow_table_entries)
919 {
920 	return create_ft_common(ns, vport, prio, num_flow_table_entries, name);
921 }
922 EXPORT_SYMBOL(mlx5_create_vport_flow_table);
923 
924 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
925 					       int prio,
926 					       const char *name,
927 					       int num_flow_table_entries)
928 {
929 	return create_ft_common(ns, 0, prio, num_flow_table_entries, name);
930 }
931 EXPORT_SYMBOL(mlx5_create_flow_table);
932 
933 static void _fs_del_ft(struct mlx5_flow_table *ft)
934 {
935 	int err;
936 	struct mlx5_core_dev *dev = fs_get_dev(&ft->base);
937 	struct fs_prio *prio;
938 
939 	err = mlx5_cmd_fs_destroy_ft(dev, ft->vport, ft->type, ft->id);
940 	if (err)
941 		mlx5_core_warn(dev, "flow steering can't destroy ft %s\n",
942 			       ft->base.name);
943 
944 	fs_get_parent(prio, ft);
945 	prio->num_ft--;
946 }
947 
948 static int update_root_ft_destroy(struct mlx5_flow_root_namespace *root,
949 				    struct mlx5_flow_table *ft)
950 {
951 	int err = 0;
952 	struct fs_prio *prio;
953 	struct mlx5_flow_table *next_ft = NULL;
954 	struct mlx5_flow_table *put_ft = NULL;
955 
956 	if (root->root_ft != ft)
957 		return 0;
958 
959 	fs_get_parent(prio, ft);
960 	/*Assuming objs containis only flow tables and
961 	 * flow tables are sorted by level.
962 	 */
963 	if (!list_is_last(&ft->base.list, &prio->objs)) {
964 		next_ft = list_next_entry(ft, base.list);
965 	} else {
966 		next_ft = find_next_ft(prio);
967 		put_ft = next_ft;
968 	}
969 
970 	if (next_ft) {
971 		err = mlx5_cmd_update_root_ft(root->dev, next_ft->type,
972 					      next_ft->id);
973 		if (err)
974 			mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
975 				       ft->id);
976 	}
977 	if (!err)
978 		root->root_ft = next_ft;
979 
980 	if (put_ft)
981 		fs_put(&put_ft->base);
982 
983 	return err;
984 }
985 
986 /*Objects in the same prio are destroyed in the reverse order they were createrd*/
987 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
988 {
989 	int err = 0;
990 	struct fs_prio *prio;
991 	struct mlx5_flow_root_namespace *root;
992 	bool is_shared_prio;
993 	struct mlx5_core_dev *dev;
994 
995 	fs_get_parent(prio, ft);
996 	root = find_root(&prio->base);
997 	dev = fs_get_dev(&prio->base);
998 
999 	if (!root) {
1000 		mlx5_core_err(dev,
1001 		    "flow steering failed to find root of priority %s",
1002 		    prio->base.name);
1003 		return -ENODEV;
1004 	}
1005 
1006 	is_shared_prio = prio->flags & MLX5_CORE_FS_PRIO_SHARED;
1007 	if (is_shared_prio) {
1008 		mutex_lock(&prio->shared_lock);
1009 		if (ft->shared_refcount > 1) {
1010 			--ft->shared_refcount;
1011 			fs_put(&ft->base);
1012 			mutex_unlock(&prio->shared_lock);
1013 			return 0;
1014 		}
1015 	}
1016 
1017 	mutex_lock(&prio->base.lock);
1018 	mutex_lock(&ft->base.lock);
1019 
1020 	err = update_root_ft_destroy(root, ft);
1021 	if (err)
1022 		goto unlock_ft;
1023 
1024 	/* delete two last entries */
1025 	destroy_star_rule(ft, prio);
1026 
1027 	mutex_unlock(&ft->base.lock);
1028 	fs_remove_node_parent_locked(&ft->base);
1029 	mutex_unlock(&prio->base.lock);
1030 	if (is_shared_prio)
1031 		mutex_unlock(&prio->shared_lock);
1032 
1033 	return err;
1034 
1035 unlock_ft:
1036 	mutex_unlock(&ft->base.lock);
1037 	mutex_unlock(&prio->base.lock);
1038 	if (is_shared_prio)
1039 		mutex_unlock(&prio->shared_lock);
1040 
1041 	return err;
1042 }
1043 EXPORT_SYMBOL(mlx5_destroy_flow_table);
1044 
1045 static struct mlx5_flow_group *fs_create_fg(struct mlx5_core_dev *dev,
1046 					    struct mlx5_flow_table *ft,
1047 					    struct list_head *prev,
1048 					    u32 *fg_in,
1049 					    int refcount)
1050 {
1051 	struct mlx5_flow_group *fg;
1052 	int err;
1053 	unsigned int end_index;
1054 	char name[20];
1055 
1056 	fg = fs_alloc_fg(fg_in);
1057 	if (IS_ERR(fg))
1058 		return fg;
1059 
1060 	end_index = fg->start_index + fg->max_ftes - 1;
1061 	err =  mlx5_cmd_fs_create_fg(dev, fg_in,
1062 				     ft->vport, ft->type, ft->id,
1063 				     &fg->id);
1064 	if (err)
1065 		goto free_fg;
1066 
1067 	mutex_lock(&ft->base.lock);
1068 	if (ft->autogroup.active)
1069 		ft->autogroup.num_types++;
1070 
1071 	snprintf(name, sizeof(name), "group_%u", fg->id);
1072 	/*Add node to tree*/
1073 	fs_add_node(&fg->base, &ft->base, name, refcount);
1074 	/*Add node to group list*/
1075 	list_add(&fg->base.list, prev);
1076 	mutex_unlock(&ft->base.lock);
1077 
1078 	return fg;
1079 
1080 free_fg:
1081 	kfree(fg);
1082 	return ERR_PTR(err);
1083 }
1084 
1085 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
1086 					       u32 *in)
1087 {
1088 	struct mlx5_flow_group *fg;
1089 	struct mlx5_core_dev *dev = fs_get_dev(&ft->base);
1090 
1091 	if (!dev)
1092 		return ERR_PTR(-ENODEV);
1093 
1094 	if (ft->autogroup.active)
1095 		return ERR_PTR(-EPERM);
1096 
1097 	fg = fs_create_fg(dev, ft, ft->fgs.prev, in, 1);
1098 
1099 	return fg;
1100 }
1101 EXPORT_SYMBOL(mlx5_create_flow_group);
1102 
1103 /*Group is destoyed when all the rules in the group were removed*/
1104 static void fs_del_fg(struct mlx5_flow_group *fg)
1105 {
1106 	struct mlx5_flow_table *parent_ft;
1107 	struct mlx5_core_dev *dev;
1108 
1109 	fs_get_parent(parent_ft, fg);
1110 	dev = fs_get_dev(&parent_ft->base);
1111 	WARN_ON(!dev);
1112 
1113 	if (parent_ft->autogroup.active)
1114 		parent_ft->autogroup.num_types--;
1115 
1116 	if (mlx5_cmd_fs_destroy_fg(dev, parent_ft->vport,
1117 				   parent_ft->type,
1118 				   parent_ft->id, fg->id))
1119 		mlx5_core_warn(dev, "flow steering can't destroy fg\n");
1120 }
1121 
1122 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
1123 {
1124 	fs_remove_node(&fg->base);
1125 }
1126 EXPORT_SYMBOL(mlx5_destroy_flow_group);
1127 
1128 static bool _fs_match_exact_val(void *mask, void *val1, void *val2, size_t size)
1129 {
1130 	unsigned int i;
1131 
1132 	/* TODO: optimize by comparing 64bits when possible */
1133 	for (i = 0; i < size; i++, mask++, val1++, val2++)
1134 		if ((*((u8 *)val1) & (*(u8 *)mask)) !=
1135 		    ((*(u8 *)val2) & (*(u8 *)mask)))
1136 			return false;
1137 
1138 	return true;
1139 }
1140 
1141 bool fs_match_exact_val(struct mlx5_core_fs_mask *mask,
1142 			       void *val1, void *val2)
1143 {
1144 	if (mask->match_criteria_enable &
1145 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
1146 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1147 						val1, outer_headers);
1148 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1149 						val2, outer_headers);
1150 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1151 					      mask->match_criteria, outer_headers);
1152 
1153 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1154 					 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
1155 			return false;
1156 	}
1157 
1158 	if (mask->match_criteria_enable &
1159 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
1160 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1161 						val1, misc_parameters);
1162 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1163 						val2, misc_parameters);
1164 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1165 					  mask->match_criteria, misc_parameters);
1166 
1167 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1168 					 MLX5_ST_SZ_BYTES(fte_match_set_misc)))
1169 			return false;
1170 	}
1171 	if (mask->match_criteria_enable &
1172 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
1173 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1174 						val1, inner_headers);
1175 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1176 						val2, inner_headers);
1177 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1178 					  mask->match_criteria, inner_headers);
1179 
1180 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1181 					 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
1182 			return false;
1183 	}
1184 	return true;
1185 }
1186 
1187 bool fs_match_exact_mask(u8 match_criteria_enable1,
1188 				u8 match_criteria_enable2,
1189 				void *mask1, void *mask2)
1190 {
1191 	return match_criteria_enable1 == match_criteria_enable2 &&
1192 		!memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param));
1193 }
1194 
1195 static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns,
1196 							   struct list_head *start);
1197 
1198 static struct mlx5_flow_table *_find_first_ft_in_prio_reverse(struct fs_prio *prio,
1199 							      struct list_head *start)
1200 {
1201 	struct fs_base *it = container_of(start, struct fs_base, list);
1202 
1203 	if (!prio)
1204 		return NULL;
1205 
1206 	fs_for_each_ns_or_ft_continue_reverse(it, prio) {
1207 		struct mlx5_flow_namespace	*ns;
1208 		struct mlx5_flow_table		*ft;
1209 
1210 		if (it->type == FS_TYPE_FLOW_TABLE) {
1211 			fs_get_obj(ft, it);
1212 			fs_get(&ft->base);
1213 			return ft;
1214 		}
1215 
1216 		fs_get_obj(ns, it);
1217 		WARN_ON(ns->base.type != FS_TYPE_NAMESPACE);
1218 
1219 		ft = find_first_ft_in_ns_reverse(ns, &ns->prios);
1220 		if (ft)
1221 			return ft;
1222 	}
1223 
1224 	return NULL;
1225 }
1226 
1227 static struct mlx5_flow_table *find_first_ft_in_prio_reverse(struct fs_prio *prio,
1228 							     struct list_head *start)
1229 {
1230 	struct mlx5_flow_table *ft;
1231 
1232 	if (!prio)
1233 		return NULL;
1234 
1235 	mutex_lock(&prio->base.lock);
1236 	ft = _find_first_ft_in_prio_reverse(prio, start);
1237 	mutex_unlock(&prio->base.lock);
1238 
1239 	return ft;
1240 }
1241 
1242 static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns,
1243 							   struct list_head *start)
1244 {
1245 	struct fs_prio *prio;
1246 
1247 	if (!ns)
1248 		return NULL;
1249 
1250 	fs_get_obj(prio, container_of(start, struct fs_base, list));
1251 	mutex_lock(&ns->base.lock);
1252 	fs_for_each_prio_continue_reverse(prio, ns) {
1253 		struct mlx5_flow_table *ft;
1254 
1255 		ft = find_first_ft_in_prio_reverse(prio, &prio->objs);
1256 		if (ft) {
1257 			mutex_unlock(&ns->base.lock);
1258 			return ft;
1259 		}
1260 	}
1261 	mutex_unlock(&ns->base.lock);
1262 
1263 	return NULL;
1264 }
1265 
1266 /* Returned a held ft, assumed curr is protected, assumed curr's parent is
1267  * locked
1268  */
1269 static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr,
1270 					    struct fs_prio *prio)
1271 {
1272 	struct mlx5_flow_table *ft = NULL;
1273 	struct fs_base *curr_base;
1274 
1275 	if (!curr)
1276 		return NULL;
1277 
1278 	/* prio has either namespace or flow-tables, but not both */
1279 	if (!list_empty(&prio->objs) &&
1280 	    list_first_entry(&prio->objs, struct mlx5_flow_table, base.list) !=
1281 	    curr)
1282 		return NULL;
1283 
1284 	while (!ft && prio) {
1285 		struct mlx5_flow_namespace *ns;
1286 
1287 		fs_get_parent(ns, prio);
1288 		ft = find_first_ft_in_ns_reverse(ns, &prio->base.list);
1289 		curr_base = &ns->base;
1290 		fs_get_parent(prio, ns);
1291 
1292 		if (prio && !ft)
1293 			ft = find_first_ft_in_prio_reverse(prio,
1294 							   &curr_base->list);
1295 	}
1296 	return ft;
1297 }
1298 
1299 static struct mlx5_flow_table *_find_first_ft_in_prio(struct fs_prio *prio,
1300 						      struct list_head *start)
1301 {
1302 	struct fs_base	*it = container_of(start, struct fs_base, list);
1303 
1304 	if (!prio)
1305 		return NULL;
1306 
1307 	fs_for_each_ns_or_ft_continue(it, prio) {
1308 		struct mlx5_flow_namespace	*ns;
1309 		struct mlx5_flow_table		*ft;
1310 
1311 		if (it->type == FS_TYPE_FLOW_TABLE) {
1312 			fs_get_obj(ft, it);
1313 			fs_get(&ft->base);
1314 			return ft;
1315 		}
1316 
1317 		fs_get_obj(ns, it);
1318 		WARN_ON(ns->base.type != FS_TYPE_NAMESPACE);
1319 
1320 		ft = find_first_ft_in_ns(ns, &ns->prios);
1321 		if (ft)
1322 			return ft;
1323 	}
1324 
1325 	return NULL;
1326 }
1327 
1328 static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio,
1329 						     struct list_head *start)
1330 {
1331 	struct mlx5_flow_table *ft;
1332 
1333 	if (!prio)
1334 		return NULL;
1335 
1336 	mutex_lock(&prio->base.lock);
1337 	ft = _find_first_ft_in_prio(prio, start);
1338 	mutex_unlock(&prio->base.lock);
1339 
1340 	return ft;
1341 }
1342 
1343 static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns,
1344 						   struct list_head *start)
1345 {
1346 	struct fs_prio *prio;
1347 
1348 	if (!ns)
1349 		return NULL;
1350 
1351 	fs_get_obj(prio, container_of(start, struct fs_base, list));
1352 	mutex_lock(&ns->base.lock);
1353 	fs_for_each_prio_continue(prio, ns) {
1354 		struct mlx5_flow_table *ft;
1355 
1356 		ft = find_first_ft_in_prio(prio, &prio->objs);
1357 		if (ft) {
1358 			mutex_unlock(&ns->base.lock);
1359 			return ft;
1360 		}
1361 	}
1362 	mutex_unlock(&ns->base.lock);
1363 
1364 	return NULL;
1365 }
1366 
1367 /* returned a held ft, assumed curr is protected, assumed curr's parent is
1368  * locked
1369  */
1370 static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio)
1371 {
1372 	struct mlx5_flow_table *ft = NULL;
1373 	struct fs_base *curr_base;
1374 
1375 	while (!ft && prio) {
1376 		struct mlx5_flow_namespace *ns;
1377 
1378 		fs_get_parent(ns, prio);
1379 		ft = find_first_ft_in_ns(ns, &prio->base.list);
1380 		curr_base = &ns->base;
1381 		fs_get_parent(prio, ns);
1382 
1383 		if (!ft && prio)
1384 			ft = _find_first_ft_in_prio(prio, &curr_base->list);
1385 	}
1386 	return ft;
1387 }
1388 
1389 
1390 /* called under ft mutex lock */
1391 static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
1392 						u8 match_criteria_enable,
1393 						u32 *match_criteria)
1394 {
1395 	unsigned int group_size;
1396 	unsigned int candidate_index = 0;
1397 	unsigned int candidate_group_num = 0;
1398 	struct mlx5_flow_group *g;
1399 	struct mlx5_flow_group *ret;
1400 	struct list_head *prev = &ft->fgs;
1401 	struct mlx5_core_dev *dev;
1402 	u32 *in;
1403 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1404 	void *match_criteria_addr;
1405 
1406 	if (!ft->autogroup.active)
1407 		return ERR_PTR(-ENOENT);
1408 
1409 	dev = fs_get_dev(&ft->base);
1410 	if (!dev)
1411 		return ERR_PTR(-ENODEV);
1412 
1413 	in = mlx5_vzalloc(inlen);
1414 	if (!in) {
1415 		mlx5_core_warn(dev, "failed to allocate inbox\n");
1416 		return ERR_PTR(-ENOMEM);
1417 	}
1418 
1419 
1420 	if (ft->autogroup.num_types < ft->autogroup.max_types)
1421 		group_size = ft->max_fte / (ft->autogroup.max_types + 1);
1422 	else
1423 		group_size = 1;
1424 
1425 	if (group_size == 0) {
1426 		mlx5_core_warn(dev,
1427 			       "flow steering can't create group size of 0\n");
1428 		ret = ERR_PTR(-EINVAL);
1429 		goto out;
1430 	}
1431 
1432 	/* sorted by start_index */
1433 	fs_for_each_fg(g, ft) {
1434 		candidate_group_num++;
1435 		if (candidate_index + group_size > g->start_index)
1436 			candidate_index = g->start_index + g->max_ftes;
1437 		else
1438 			break;
1439 		prev = &g->base.list;
1440 	}
1441 
1442 	if (candidate_index + group_size > ft->max_fte) {
1443 		ret = ERR_PTR(-ENOSPC);
1444 		goto out;
1445 	}
1446 
1447 	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
1448 		 match_criteria_enable);
1449 	MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
1450 	MLX5_SET(create_flow_group_in, in, end_flow_index,   candidate_index +
1451 		 group_size - 1);
1452 	match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
1453 					   in, match_criteria);
1454 	memcpy(match_criteria_addr, match_criteria,
1455 	       MLX5_ST_SZ_BYTES(fte_match_param));
1456 
1457 	ret = fs_create_fg(dev, ft, prev, in, 0);
1458 out:
1459 	kvfree(in);
1460 	return ret;
1461 }
1462 
1463 static struct mlx5_flow_namespace *get_ns_with_notifiers(struct fs_base *node)
1464 {
1465 	struct mlx5_flow_namespace *ns = NULL;
1466 
1467 	while (node  && (node->type != FS_TYPE_NAMESPACE ||
1468 			      list_empty(&container_of(node, struct
1469 						       mlx5_flow_namespace,
1470 						       base)->list_notifiers)))
1471 		node = node->parent;
1472 
1473 	if (node)
1474 		fs_get_obj(ns, node);
1475 
1476 	return ns;
1477 }
1478 
1479 
1480 /*Assumption- fte is locked*/
1481 static void call_to_add_rule_notifiers(struct mlx5_flow_rule *dst,
1482 				      struct fs_fte *fte)
1483 {
1484 	struct mlx5_flow_namespace *ns;
1485 	struct mlx5_flow_handler *iter_handler;
1486 	struct fs_client_priv_data *iter_client;
1487 	void *data;
1488 	bool is_new_rule = list_first_entry(&fte->dests,
1489 					    struct mlx5_flow_rule,
1490 					    base.list) == dst;
1491 	int err;
1492 
1493 	ns = get_ns_with_notifiers(&fte->base);
1494 	if (!ns)
1495 		return;
1496 
1497 	down_read(&ns->notifiers_rw_sem);
1498 	list_for_each_entry(iter_handler, &ns->list_notifiers,
1499 			    list) {
1500 		if (iter_handler->add_dst_cb) {
1501 			data = NULL;
1502 			mutex_lock(&dst->clients_lock);
1503 			list_for_each_entry(
1504 				iter_client, &dst->clients_data, list) {
1505 				if (iter_client->fs_handler == iter_handler) {
1506 					data = iter_client->client_dst_data;
1507 					break;
1508 				}
1509 			}
1510 			mutex_unlock(&dst->clients_lock);
1511 			err  = iter_handler->add_dst_cb(dst,
1512 							is_new_rule,
1513 							NULL,
1514 							iter_handler->client_context);
1515 			if (err)
1516 				break;
1517 		}
1518 	}
1519 	up_read(&ns->notifiers_rw_sem);
1520 }
1521 
1522 static void call_to_del_rule_notifiers(struct mlx5_flow_rule *dst,
1523 				      struct fs_fte *fte)
1524 {
1525 	struct mlx5_flow_namespace *ns;
1526 	struct mlx5_flow_handler *iter_handler;
1527 	struct fs_client_priv_data *iter_client;
1528 	void *data;
1529 	bool ctx_changed = (fte->dests_size == 0);
1530 
1531 	ns = get_ns_with_notifiers(&fte->base);
1532 	if (!ns)
1533 		return;
1534 	down_read(&ns->notifiers_rw_sem);
1535 	list_for_each_entry(iter_handler, &ns->list_notifiers,
1536 			    list) {
1537 		data = NULL;
1538 		mutex_lock(&dst->clients_lock);
1539 		list_for_each_entry(iter_client, &dst->clients_data, list) {
1540 			if (iter_client->fs_handler == iter_handler) {
1541 				data = iter_client->client_dst_data;
1542 				break;
1543 			}
1544 		}
1545 		mutex_unlock(&dst->clients_lock);
1546 		if (iter_handler->del_dst_cb) {
1547 			iter_handler->del_dst_cb(dst, ctx_changed, data,
1548 						 iter_handler->client_context);
1549 		}
1550 	}
1551 	up_read(&ns->notifiers_rw_sem);
1552 }
1553 
1554 /* fte should not be deleted while calling this function */
1555 static struct mlx5_flow_rule *_fs_add_dst_fte(struct fs_fte *fte,
1556 					      struct mlx5_flow_group *fg,
1557 					      struct mlx5_flow_destination *dest)
1558 {
1559 	struct mlx5_flow_table *ft;
1560 	struct mlx5_flow_rule *dst;
1561 	int err;
1562 
1563 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1564 	if (!dst)
1565 		return ERR_PTR(-ENOMEM);
1566 
1567 	memcpy(&dst->dest_attr, dest, sizeof(*dest));
1568 	dst->base.type = FS_TYPE_FLOW_DEST;
1569 	INIT_LIST_HEAD(&dst->clients_data);
1570 	mutex_init(&dst->clients_lock);
1571 	fs_get_parent(ft, fg);
1572 	/*Add dest to dests list- added as first element after the head*/
1573 	list_add_tail(&dst->base.list, &fte->dests);
1574 	fte->dests_size++;
1575 	err = mlx5_cmd_fs_set_fte(fs_get_dev(&ft->base),
1576 				  ft->vport,
1577 				  &fte->status,
1578 				  fte->val, ft->type,
1579 				  ft->id, fte->index, fg->id, fte->flow_tag,
1580 				  fte->action, fte->dests_size, &fte->dests);
1581 	if (err)
1582 		goto free_dst;
1583 
1584 	list_del(&dst->base.list);
1585 
1586 	return dst;
1587 
1588 free_dst:
1589 	list_del(&dst->base.list);
1590 	kfree(dst);
1591 	fte->dests_size--;
1592 	return ERR_PTR(err);
1593 }
1594 
1595 static char *get_dest_name(struct mlx5_flow_destination *dest)
1596 {
1597 	char *name = kzalloc(sizeof(char) * 20, GFP_KERNEL);
1598 
1599 	switch (dest->type) {
1600 	case MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE:
1601 		snprintf(name, 20, "dest_%s_%u", "flow_table",
1602 			 dest->ft->id);
1603 		return name;
1604 	case MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT:
1605 		snprintf(name, 20, "dest_%s_%u", "vport",
1606 			 dest->vport_num);
1607 		return name;
1608 	case MLX5_FLOW_CONTEXT_DEST_TYPE_TIR:
1609 		snprintf(name, 20, "dest_%s_%u", "tir", dest->tir_num);
1610 		return name;
1611 	default:
1612 		kfree(name);
1613 		return NULL;
1614 	}
1615 }
1616 
1617 /* assumed fg is locked */
1618 static unsigned int fs_get_free_fg_index(struct mlx5_flow_group *fg,
1619 					 struct list_head **prev)
1620 {
1621 	struct fs_fte *fte;
1622 	unsigned int start = fg->start_index;
1623 
1624 	if (prev)
1625 		*prev = &fg->ftes;
1626 
1627 	/* assumed list is sorted by index */
1628 	fs_for_each_fte(fte, fg) {
1629 		if (fte->index != start)
1630 			return start;
1631 		start++;
1632 		if (prev)
1633 			*prev = &fte->base.list;
1634 	}
1635 
1636 	return start;
1637 }
1638 
1639 
1640 static struct fs_fte *fs_create_fte(struct mlx5_flow_group *fg,
1641 			     u32 *match_value,
1642 			     u8 action,
1643 			     u32 flow_tag,
1644 			     struct list_head **prev)
1645 {
1646 	struct fs_fte *fte;
1647 	int index = 0;
1648 
1649 	index = fs_get_free_fg_index(fg, prev);
1650 	fte = fs_alloc_fte(action, flow_tag, match_value, index);
1651 	if (IS_ERR(fte))
1652 		return fte;
1653 
1654 	return fte;
1655 }
1656 
1657 static void add_rule_to_tree(struct mlx5_flow_rule *rule,
1658 			     struct fs_fte *fte)
1659 {
1660 	char *dest_name;
1661 
1662 	dest_name = get_dest_name(&rule->dest_attr);
1663 	fs_add_node(&rule->base, &fte->base, dest_name, 1);
1664 	/* re-add to list, since fs_add_node reset our list */
1665 	list_add_tail(&rule->base.list, &fte->dests);
1666 	kfree(dest_name);
1667 	call_to_add_rule_notifiers(rule, fte);
1668 }
1669 
1670 static void fs_del_dst(struct mlx5_flow_rule *dst)
1671 {
1672 	struct mlx5_flow_table *ft;
1673 	struct mlx5_flow_group *fg;
1674 	struct fs_fte *fte;
1675 	u32	*match_value;
1676 	struct mlx5_core_dev *dev = fs_get_dev(&dst->base);
1677 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
1678 	int err;
1679 
1680 	WARN_ON(!dev);
1681 
1682 	match_value = mlx5_vzalloc(match_len);
1683 	if (!match_value) {
1684 		mlx5_core_warn(dev, "failed to allocate inbox\n");
1685 		return;
1686 	}
1687 
1688 	fs_get_parent(fte, dst);
1689 	fs_get_parent(fg, fte);
1690 	mutex_lock(&fg->base.lock);
1691 	memcpy(match_value, fte->val, sizeof(fte->val));
1692 	/* ft can't be changed as fg is locked */
1693 	fs_get_parent(ft, fg);
1694 	list_del(&dst->base.list);
1695 	fte->dests_size--;
1696 	if (fte->dests_size) {
1697 		err = mlx5_cmd_fs_set_fte(dev, ft->vport,
1698 					  &fte->status, match_value, ft->type,
1699 					  ft->id, fte->index, fg->id,
1700 					  fte->flow_tag, fte->action,
1701 					  fte->dests_size, &fte->dests);
1702 		if (err) {
1703 			mlx5_core_warn(dev, "%s can't delete dst %s\n",
1704 				       __func__, dst->base.name);
1705 			goto err;
1706 		}
1707 	}
1708 	call_to_del_rule_notifiers(dst, fte);
1709 err:
1710 	mutex_unlock(&fg->base.lock);
1711 	kvfree(match_value);
1712 }
1713 
1714 static void fs_del_fte(struct fs_fte *fte)
1715 {
1716 	struct mlx5_flow_table *ft;
1717 	struct mlx5_flow_group *fg;
1718 	int err;
1719 	struct mlx5_core_dev *dev;
1720 
1721 	fs_get_parent(fg, fte);
1722 	fs_get_parent(ft, fg);
1723 
1724 	dev = fs_get_dev(&ft->base);
1725 	WARN_ON(!dev);
1726 
1727 	err = mlx5_cmd_fs_delete_fte(dev, ft->vport, &fte->status,
1728 				     ft->type, ft->id, fte->index);
1729 	if (err)
1730 		mlx5_core_warn(dev, "flow steering can't delete fte %s\n",
1731 			       fte->base.name);
1732 
1733 	fg->num_ftes--;
1734 }
1735 
1736 /* assuming parent fg is locked */
1737 /* Add dst algorithm */
1738 static struct mlx5_flow_rule *fs_add_dst_fg(struct mlx5_flow_group *fg,
1739 						   u32 *match_value,
1740 						   u8 action,
1741 						   u32 flow_tag,
1742 						   struct mlx5_flow_destination *dest)
1743 {
1744 	struct fs_fte *fte;
1745 	struct mlx5_flow_rule *dst;
1746 	struct mlx5_flow_table *ft;
1747 	struct list_head *prev;
1748 	char fte_name[20];
1749 
1750 	mutex_lock(&fg->base.lock);
1751 	fs_for_each_fte(fte, fg) {
1752 		/* TODO: Check of size against PRM max size */
1753 		mutex_lock(&fte->base.lock);
1754 		if (fs_match_exact_val(&fg->mask, match_value, &fte->val) &&
1755 		    action == fte->action && flow_tag == fte->flow_tag) {
1756 			dst = _fs_add_dst_fte(fte, fg, dest);
1757 			mutex_unlock(&fte->base.lock);
1758 			if (IS_ERR(dst))
1759 				goto unlock_fg;
1760 			goto add_rule;
1761 		}
1762 		mutex_unlock(&fte->base.lock);
1763 	}
1764 
1765 	fs_get_parent(ft, fg);
1766 	if (fg->num_ftes == fg->max_ftes) {
1767 		dst = ERR_PTR(-ENOSPC);
1768 		goto unlock_fg;
1769 	}
1770 
1771 	fte = fs_create_fte(fg, match_value, action, flow_tag, &prev);
1772 	if (IS_ERR(fte)) {
1773 		dst = (void *)fte;
1774 		goto unlock_fg;
1775 	}
1776 	dst = _fs_add_dst_fte(fte, fg, dest);
1777 	if (IS_ERR(dst)) {
1778 		kfree(fte);
1779 		goto unlock_fg;
1780 	}
1781 
1782 	fg->num_ftes++;
1783 
1784 	snprintf(fte_name, sizeof(fte_name), "fte%u", fte->index);
1785 	/* Add node to tree */
1786 	fs_add_node(&fte->base, &fg->base, fte_name, 0);
1787 	list_add(&fte->base.list, prev);
1788 add_rule:
1789 	add_rule_to_tree(dst, fte);
1790 unlock_fg:
1791 	mutex_unlock(&fg->base.lock);
1792 	return dst;
1793 }
1794 
1795 static struct mlx5_flow_rule *fs_add_dst_ft(struct mlx5_flow_table *ft,
1796 					    u8 match_criteria_enable,
1797 					    u32 *match_criteria,
1798 					    u32 *match_value,
1799 					    u8 action, u32 flow_tag,
1800 					    struct mlx5_flow_destination *dest)
1801 {
1802 	/*? where dst_entry is allocated*/
1803 	struct mlx5_flow_group *g;
1804 	struct mlx5_flow_rule *dst;
1805 
1806 	fs_get(&ft->base);
1807 	mutex_lock(&ft->base.lock);
1808 	fs_for_each_fg(g, ft)
1809 		if (fs_match_exact_mask(g->mask.match_criteria_enable,
1810 					match_criteria_enable,
1811 					g->mask.match_criteria,
1812 					match_criteria)) {
1813 			mutex_unlock(&ft->base.lock);
1814 
1815 			dst = fs_add_dst_fg(g, match_value,
1816 					    action, flow_tag, dest);
1817 			if (PTR_ERR(dst) && PTR_ERR(dst) != -ENOSPC)
1818 				goto unlock;
1819 		}
1820 	mutex_unlock(&ft->base.lock);
1821 
1822 	g = create_autogroup(ft, match_criteria_enable, match_criteria);
1823 	if (IS_ERR(g)) {
1824 		dst = (void *)g;
1825 		goto unlock;
1826 	}
1827 
1828 	dst = fs_add_dst_fg(g, match_value,
1829 			    action, flow_tag, dest);
1830 	if (IS_ERR(dst)) {
1831 		/* Remove assumes refcount > 0 and autogroup creates a group
1832 		 * with a refcount = 0.
1833 		 */
1834 		fs_get(&g->base);
1835 		fs_remove_node(&g->base);
1836 		goto unlock;
1837 	}
1838 
1839 unlock:
1840 	fs_put(&ft->base);
1841 	return dst;
1842 }
1843 
1844 struct mlx5_flow_rule *
1845 mlx5_add_flow_rule(struct mlx5_flow_table *ft,
1846 		   u8 match_criteria_enable,
1847 		   u32 *match_criteria,
1848 		   u32 *match_value,
1849 		   u32 action,
1850 		   u32 flow_tag,
1851 		   struct mlx5_flow_destination *dest)
1852 {
1853 	struct mlx5_flow_rule *dst;
1854 	struct mlx5_flow_namespace *ns;
1855 
1856 	ns = get_ns_with_notifiers(&ft->base);
1857 	if (ns)
1858 		down_read(&ns->dests_rw_sem);
1859 	dst =  fs_add_dst_ft(ft, match_criteria_enable, match_criteria,
1860 			     match_value, action, flow_tag, dest);
1861 	if (ns)
1862 		up_read(&ns->dests_rw_sem);
1863 
1864 	return dst;
1865 
1866 
1867 }
1868 EXPORT_SYMBOL(mlx5_add_flow_rule);
1869 
1870 void mlx5_del_flow_rule(struct mlx5_flow_rule *dst)
1871 {
1872 	struct mlx5_flow_namespace *ns;
1873 
1874 	ns = get_ns_with_notifiers(&dst->base);
1875 	if (ns)
1876 		down_read(&ns->dests_rw_sem);
1877 	fs_remove_node(&dst->base);
1878 	if (ns)
1879 		up_read(&ns->dests_rw_sem);
1880 }
1881 EXPORT_SYMBOL(mlx5_del_flow_rule);
1882 
1883 #define MLX5_CORE_FS_ROOT_NS_NAME "root"
1884 #define MLX5_CORE_FS_ESW_EGRESS_ACL "esw_egress_root"
1885 #define MLX5_CORE_FS_ESW_INGRESS_ACL "esw_ingress_root"
1886 #define MLX5_CORE_FS_FDB_ROOT_NS_NAME "fdb_root"
1887 #define MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME "sniffer_rx_root"
1888 #define MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME "sniffer_tx_root"
1889 #define MLX5_CORE_FS_PRIO_MAX_FT 4
1890 #define MLX5_CORE_FS_PRIO_MAX_NS 1
1891 
1892 static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
1893 				      unsigned prio, int max_ft,
1894 				      const char *name, u8 flags)
1895 {
1896 	struct fs_prio *fs_prio;
1897 
1898 	fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
1899 	if (!fs_prio)
1900 		return ERR_PTR(-ENOMEM);
1901 
1902 	fs_prio->base.type = FS_TYPE_PRIO;
1903 	fs_add_node(&fs_prio->base, &ns->base, name, 1);
1904 	fs_prio->max_ft = max_ft;
1905 	fs_prio->max_ns = MLX5_CORE_FS_PRIO_MAX_NS;
1906 	fs_prio->prio = prio;
1907 	fs_prio->flags = flags;
1908 	list_add_tail(&fs_prio->base.list, &ns->prios);
1909 	INIT_LIST_HEAD(&fs_prio->objs);
1910 	mutex_init(&fs_prio->shared_lock);
1911 
1912 	return fs_prio;
1913 }
1914 
1915 static void cleanup_root_ns(struct mlx5_core_dev *dev)
1916 {
1917 	struct mlx5_flow_root_namespace *root_ns = dev->root_ns;
1918 	struct fs_prio *iter_prio;
1919 
1920 	if (!root_ns)
1921 		return;
1922 
1923 	/* stage 1 */
1924 	fs_for_each_prio(iter_prio, &root_ns->ns) {
1925 		struct mlx5_flow_namespace *iter_ns;
1926 
1927 		fs_for_each_ns(iter_ns, iter_prio) {
1928 			while (!list_empty(&iter_ns->prios)) {
1929 				struct fs_base *iter_prio2 =
1930 					list_first_entry(&iter_ns->prios,
1931 							 struct fs_base,
1932 							 list);
1933 
1934 				fs_remove_node(iter_prio2);
1935 			}
1936 		}
1937 	}
1938 
1939 	/* stage 2 */
1940 	fs_for_each_prio(iter_prio, &root_ns->ns) {
1941 		while (!list_empty(&iter_prio->objs)) {
1942 			struct fs_base *iter_ns =
1943 				list_first_entry(&iter_prio->objs,
1944 						 struct fs_base,
1945 						 list);
1946 
1947 				fs_remove_node(iter_ns);
1948 		}
1949 	}
1950 	/* stage 3 */
1951 	while (!list_empty(&root_ns->ns.prios)) {
1952 		struct fs_base *iter_prio =
1953 			list_first_entry(&root_ns->ns.prios,
1954 					 struct fs_base,
1955 					 list);
1956 
1957 		fs_remove_node(iter_prio);
1958 	}
1959 
1960 	fs_remove_node(&root_ns->ns.base);
1961 	dev->root_ns = NULL;
1962 }
1963 
1964 static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev,
1965 					struct mlx5_flow_root_namespace *root_ns)
1966 {
1967 	struct fs_base *prio;
1968 
1969 	if (!root_ns)
1970 		return;
1971 
1972 	if (!list_empty(&root_ns->ns.prios)) {
1973 		prio = list_first_entry(&root_ns->ns.prios,
1974 					struct fs_base,
1975 				 list);
1976 		fs_remove_node(prio);
1977 	}
1978 	fs_remove_node(&root_ns->ns.base);
1979 	root_ns = NULL;
1980 }
1981 
1982 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
1983 {
1984 	cleanup_root_ns(dev);
1985 	cleanup_single_prio_root_ns(dev, dev->sniffer_rx_root_ns);
1986 	cleanup_single_prio_root_ns(dev, dev->sniffer_tx_root_ns);
1987 	cleanup_single_prio_root_ns(dev, dev->fdb_root_ns);
1988 	cleanup_single_prio_root_ns(dev, dev->esw_egress_root_ns);
1989 	cleanup_single_prio_root_ns(dev, dev->esw_ingress_root_ns);
1990 }
1991 
1992 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
1993 						 *ns)
1994 {
1995 	ns->base.type = FS_TYPE_NAMESPACE;
1996 	init_rwsem(&ns->dests_rw_sem);
1997 	init_rwsem(&ns->notifiers_rw_sem);
1998 	INIT_LIST_HEAD(&ns->prios);
1999 	INIT_LIST_HEAD(&ns->list_notifiers);
2000 
2001 	return ns;
2002 }
2003 
2004 static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev,
2005 							  enum fs_ft_type
2006 							  table_type,
2007 							  char *name)
2008 {
2009 	struct mlx5_flow_root_namespace *root_ns;
2010 	struct mlx5_flow_namespace *ns;
2011 
2012 	/* create the root namespace */
2013 	root_ns = mlx5_vzalloc(sizeof(*root_ns));
2014 	if (!root_ns)
2015 		goto err;
2016 
2017 	root_ns->dev = dev;
2018 	root_ns->table_type = table_type;
2019 	mutex_init(&root_ns->fs_chain_lock);
2020 
2021 	ns = &root_ns->ns;
2022 	fs_init_namespace(ns);
2023 	fs_add_node(&ns->base, NULL, name, 1);
2024 
2025 	return root_ns;
2026 err:
2027 	return NULL;
2028 }
2029 
2030 static int init_fdb_root_ns(struct mlx5_core_dev *dev)
2031 {
2032 	struct fs_prio *prio;
2033 
2034 	dev->fdb_root_ns = create_root_ns(dev, FS_FT_FDB,
2035 					  MLX5_CORE_FS_FDB_ROOT_NS_NAME);
2036 	if (!dev->fdb_root_ns)
2037 		return -ENOMEM;
2038 
2039 	/* create 1 prio*/
2040 	prio = fs_create_prio(&dev->fdb_root_ns->ns, 0, 1, "fdb_prio", 0);
2041 	if (IS_ERR(prio))
2042 		return PTR_ERR(prio);
2043 	else
2044 		return 0;
2045 }
2046 
2047 #define MAX_VPORTS 128
2048 
2049 static int init_egress_acl_root_ns(struct mlx5_core_dev *dev)
2050 {
2051 	struct fs_prio *prio;
2052 
2053 	dev->esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL,
2054 						 MLX5_CORE_FS_ESW_EGRESS_ACL);
2055 	if (!dev->esw_egress_root_ns)
2056 		return -ENOMEM;
2057 
2058 	/* create 1 prio*/
2059 	prio = fs_create_prio(&dev->esw_egress_root_ns->ns, 0, MAX_VPORTS,
2060 			      "esw_egress_prio", 0);
2061 	if (IS_ERR(prio))
2062 		return PTR_ERR(prio);
2063 	else
2064 		return 0;
2065 }
2066 
2067 static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev)
2068 {
2069 	struct fs_prio *prio;
2070 
2071 	dev->esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL,
2072 						  MLX5_CORE_FS_ESW_INGRESS_ACL);
2073 	if (!dev->esw_ingress_root_ns)
2074 		return -ENOMEM;
2075 
2076 	/* create 1 prio*/
2077 	prio = fs_create_prio(&dev->esw_ingress_root_ns->ns, 0, MAX_VPORTS,
2078 			      "esw_ingress_prio", 0);
2079 	if (IS_ERR(prio))
2080 		return PTR_ERR(prio);
2081 	else
2082 		return 0;
2083 }
2084 
2085 static int init_sniffer_rx_root_ns(struct mlx5_core_dev *dev)
2086 {
2087 	struct fs_prio *prio;
2088 
2089 	dev->sniffer_rx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_RX,
2090 				     MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME);
2091 	if (!dev->sniffer_rx_root_ns)
2092 		return  -ENOMEM;
2093 
2094 	/* create 1 prio*/
2095 	prio = fs_create_prio(&dev->sniffer_rx_root_ns->ns, 0, 1,
2096 			      "sniffer_prio", 0);
2097 	if (IS_ERR(prio))
2098 		return PTR_ERR(prio);
2099 	else
2100 		return 0;
2101 }
2102 
2103 
2104 static int init_sniffer_tx_root_ns(struct mlx5_core_dev *dev)
2105 {
2106 	struct fs_prio *prio;
2107 
2108 	dev->sniffer_tx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_TX,
2109 						 MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME);
2110 	if (!dev->sniffer_tx_root_ns)
2111 		return  -ENOMEM;
2112 
2113 	/* create 1 prio*/
2114 	prio = fs_create_prio(&dev->sniffer_tx_root_ns->ns, 0, 1,
2115 			      "sniffer_prio", 0);
2116 	if (IS_ERR(prio))
2117 		return PTR_ERR(prio);
2118 	else
2119 		return 0;
2120 }
2121 
2122 static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio,
2123 						       const char *name)
2124 {
2125 	struct mlx5_flow_namespace	*ns;
2126 
2127 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
2128 	if (!ns)
2129 		return ERR_PTR(-ENOMEM);
2130 
2131 	fs_init_namespace(ns);
2132 	fs_add_node(&ns->base, &prio->base, name, 1);
2133 	list_add_tail(&ns->base.list, &prio->objs);
2134 
2135 	return ns;
2136 }
2137 
2138 #define FLOW_TABLE_BIT_SZ 1
2139 #define GET_FLOW_TABLE_CAP(dev, offset) \
2140 	((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) +	\
2141 			offset / 32)) >>					\
2142 	  (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
2143 
2144 static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
2145 {
2146 	int i;
2147 
2148 	for (i = 0; i < caps->arr_sz; i++) {
2149 		if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
2150 			return false;
2151 	}
2152 	return true;
2153 }
2154 
2155 static int _init_root_tree(struct mlx5_core_dev *dev, int max_ft_level,
2156 		    struct init_tree_node *node, struct fs_base *base_parent,
2157 		    struct init_tree_node *tree_parent)
2158 {
2159 	struct mlx5_flow_namespace *fs_ns;
2160 	struct fs_prio *fs_prio;
2161 	int priority;
2162 	struct fs_base *base;
2163 	int i;
2164 	int err = 0;
2165 
2166 	if (node->type == FS_TYPE_PRIO) {
2167 		if ((node->min_ft_level > max_ft_level) ||
2168 		    !has_required_caps(dev, &node->caps))
2169 			goto out;
2170 
2171 		fs_get_obj(fs_ns, base_parent);
2172 		priority = node - tree_parent->children;
2173 		fs_prio = fs_create_prio(fs_ns, priority,
2174 					 node->max_ft,
2175 					 node->name, node->flags);
2176 		if (IS_ERR(fs_prio)) {
2177 			err = PTR_ERR(fs_prio);
2178 			goto out;
2179 		}
2180 		base = &fs_prio->base;
2181 	} else if (node->type == FS_TYPE_NAMESPACE) {
2182 		fs_get_obj(fs_prio, base_parent);
2183 		fs_ns = fs_create_namespace(fs_prio, node->name);
2184 		if (IS_ERR(fs_ns)) {
2185 			err = PTR_ERR(fs_ns);
2186 			goto out;
2187 		}
2188 		base = &fs_ns->base;
2189 	} else {
2190 		return -EINVAL;
2191 	}
2192 	for (i = 0; i < node->ar_size; i++) {
2193 		err = _init_root_tree(dev, max_ft_level, &node->children[i], base,
2194 				      node);
2195 		if (err)
2196 			break;
2197 	}
2198 out:
2199 	return err;
2200 }
2201 
2202 static int init_root_tree(struct mlx5_core_dev *dev, int max_ft_level,
2203 		   struct init_tree_node *node, struct fs_base *parent)
2204 {
2205 	int i;
2206 	struct mlx5_flow_namespace *fs_ns;
2207 	int err = 0;
2208 
2209 	fs_get_obj(fs_ns, parent);
2210 	for (i = 0; i < node->ar_size; i++) {
2211 		err = _init_root_tree(dev, max_ft_level,
2212 				      &node->children[i], &fs_ns->base, node);
2213 		if (err)
2214 			break;
2215 	}
2216 	return err;
2217 }
2218 
2219 static int sum_max_ft_in_prio(struct fs_prio *prio);
2220 static int sum_max_ft_in_ns(struct mlx5_flow_namespace *ns)
2221 {
2222 	struct fs_prio *prio;
2223 	int sum = 0;
2224 
2225 	fs_for_each_prio(prio, ns) {
2226 		sum += sum_max_ft_in_prio(prio);
2227 	}
2228 	return  sum;
2229 }
2230 
2231 static int sum_max_ft_in_prio(struct fs_prio *prio)
2232 {
2233 	int sum = 0;
2234 	struct fs_base *it;
2235 	struct mlx5_flow_namespace	*ns;
2236 
2237 	if (prio->max_ft)
2238 		return prio->max_ft;
2239 
2240 	fs_for_each_ns_or_ft(it, prio) {
2241 		if (it->type == FS_TYPE_FLOW_TABLE)
2242 			continue;
2243 
2244 		fs_get_obj(ns, it);
2245 		sum += sum_max_ft_in_ns(ns);
2246 	}
2247 	prio->max_ft = sum;
2248 	return  sum;
2249 }
2250 
2251 static void set_max_ft(struct mlx5_flow_namespace *ns)
2252 {
2253 	struct fs_prio *prio;
2254 
2255 	if (!ns)
2256 		return;
2257 
2258 	fs_for_each_prio(prio, ns)
2259 		sum_max_ft_in_prio(prio);
2260 }
2261 
2262 static int init_root_ns(struct mlx5_core_dev *dev)
2263 {
2264 	int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
2265 					      flow_table_properties_nic_receive.
2266 					      max_ft_level);
2267 
2268 	dev->root_ns = create_root_ns(dev, FS_FT_NIC_RX,
2269 				      MLX5_CORE_FS_ROOT_NS_NAME);
2270 	if (IS_ERR_OR_NULL(dev->root_ns))
2271 		goto err;
2272 
2273 
2274 	if (init_root_tree(dev, max_ft_level, &root_fs, &dev->root_ns->ns.base))
2275 		goto err;
2276 
2277 	set_max_ft(&dev->root_ns->ns);
2278 
2279 	return 0;
2280 err:
2281 	return -ENOMEM;
2282 }
2283 
2284 u8 mlx5_get_match_criteria_enable(struct mlx5_flow_rule *rule)
2285 {
2286 	struct fs_base *pbase;
2287 	struct mlx5_flow_group *fg;
2288 
2289 	pbase = rule->base.parent;
2290 	WARN_ON(!pbase);
2291 	pbase = pbase->parent;
2292 	WARN_ON(!pbase);
2293 
2294 	fs_get_obj(fg, pbase);
2295 	return fg->mask.match_criteria_enable;
2296 }
2297 
2298 void mlx5_get_match_value(u32 *match_value,
2299 			  struct mlx5_flow_rule *rule)
2300 {
2301 	struct fs_base *pbase;
2302 	struct fs_fte *fte;
2303 
2304 	pbase = rule->base.parent;
2305 	WARN_ON(!pbase);
2306 	fs_get_obj(fte, pbase);
2307 
2308 	memcpy(match_value, fte->val, sizeof(fte->val));
2309 }
2310 
2311 void mlx5_get_match_criteria(u32 *match_criteria,
2312 			     struct mlx5_flow_rule *rule)
2313 {
2314 	struct fs_base *pbase;
2315 	struct mlx5_flow_group *fg;
2316 
2317 	pbase = rule->base.parent;
2318 	WARN_ON(!pbase);
2319 	pbase = pbase->parent;
2320 	WARN_ON(!pbase);
2321 
2322 	fs_get_obj(fg, pbase);
2323 	memcpy(match_criteria, &fg->mask.match_criteria,
2324 	       sizeof(fg->mask.match_criteria));
2325 }
2326 
2327 int mlx5_init_fs(struct mlx5_core_dev *dev)
2328 {
2329 	int err;
2330 
2331 	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
2332 		err = init_root_ns(dev);
2333 		if (err)
2334 			goto err;
2335 	}
2336 
2337 	err = init_fdb_root_ns(dev);
2338 	if (err)
2339 		goto err;
2340 
2341 	err = init_egress_acl_root_ns(dev);
2342 	if (err)
2343 		goto err;
2344 
2345 	err = init_ingress_acl_root_ns(dev);
2346 	if (err)
2347 		goto err;
2348 
2349 	err = init_sniffer_tx_root_ns(dev);
2350 	if (err)
2351 		goto err;
2352 
2353 	err = init_sniffer_rx_root_ns(dev);
2354 	if (err)
2355 		goto err;
2356 
2357 	return 0;
2358 err:
2359 	mlx5_cleanup_fs(dev);
2360 	return err;
2361 }
2362 
2363 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
2364 						  enum mlx5_flow_namespace_type type)
2365 {
2366 	struct mlx5_flow_root_namespace *root_ns = dev->root_ns;
2367 	int prio;
2368 	static struct fs_prio *fs_prio;
2369 	struct mlx5_flow_namespace *ns;
2370 
2371 	switch (type) {
2372 	case MLX5_FLOW_NAMESPACE_BYPASS:
2373 		prio = 0;
2374 		break;
2375 	case MLX5_FLOW_NAMESPACE_KERNEL:
2376 		prio = 1;
2377 		break;
2378 	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
2379 		prio = 2;
2380 		break;
2381 	case MLX5_FLOW_NAMESPACE_FDB:
2382 		if (dev->fdb_root_ns)
2383 			return &dev->fdb_root_ns->ns;
2384 		else
2385 			return NULL;
2386 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
2387 		if (dev->esw_egress_root_ns)
2388 			return &dev->esw_egress_root_ns->ns;
2389 		else
2390 			return NULL;
2391 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
2392 		if (dev->esw_ingress_root_ns)
2393 			return &dev->esw_ingress_root_ns->ns;
2394 		else
2395 			return NULL;
2396 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
2397 		if (dev->sniffer_rx_root_ns)
2398 			return &dev->sniffer_rx_root_ns->ns;
2399 		else
2400 			return NULL;
2401 	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
2402 		if (dev->sniffer_tx_root_ns)
2403 			return &dev->sniffer_tx_root_ns->ns;
2404 		else
2405 			return NULL;
2406 	default:
2407 		return NULL;
2408 	}
2409 
2410 	if (!root_ns)
2411 		return NULL;
2412 
2413 	fs_prio = find_prio(&root_ns->ns, prio);
2414 	if (!fs_prio)
2415 		return NULL;
2416 
2417 	ns = list_first_entry(&fs_prio->objs,
2418 			      typeof(*ns),
2419 			      base.list);
2420 
2421 	return ns;
2422 }
2423 EXPORT_SYMBOL(mlx5_get_flow_namespace);
2424 
2425 
2426 int mlx5_set_rule_private_data(struct mlx5_flow_rule *rule,
2427 				  struct mlx5_flow_handler *fs_handler,
2428 				  void  *client_data)
2429 {
2430 	struct fs_client_priv_data *priv_data;
2431 
2432 	mutex_lock(&rule->clients_lock);
2433 	/*Check that hanlder isn't exists in the list already*/
2434 	list_for_each_entry(priv_data, &rule->clients_data, list) {
2435 		if (priv_data->fs_handler == fs_handler) {
2436 			priv_data->client_dst_data = client_data;
2437 			goto unlock;
2438 		}
2439 	}
2440 	priv_data = kzalloc(sizeof(*priv_data), GFP_KERNEL);
2441 	if (!priv_data) {
2442 		mutex_unlock(&rule->clients_lock);
2443 		return -ENOMEM;
2444 	}
2445 
2446 	priv_data->client_dst_data = client_data;
2447 	priv_data->fs_handler = fs_handler;
2448 	list_add(&priv_data->list, &rule->clients_data);
2449 
2450 unlock:
2451 	mutex_unlock(&rule->clients_lock);
2452 
2453 	return 0;
2454 }
2455 
2456 static int remove_from_clients(struct mlx5_flow_rule *rule,
2457 			bool ctx_changed,
2458 			void *client_data,
2459 			void *context)
2460 {
2461 	struct fs_client_priv_data *iter_client;
2462 	struct fs_client_priv_data *temp_client;
2463 	struct mlx5_flow_handler *handler = (struct
2464 						mlx5_flow_handler*)context;
2465 
2466 	mutex_lock(&rule->clients_lock);
2467 	list_for_each_entry_safe(iter_client, temp_client,
2468 				 &rule->clients_data, list) {
2469 		if (iter_client->fs_handler == handler) {
2470 			list_del(&iter_client->list);
2471 			kfree(iter_client);
2472 			break;
2473 		}
2474 	}
2475 	mutex_unlock(&rule->clients_lock);
2476 
2477 	return 0;
2478 }
2479 
2480 struct mlx5_flow_handler *mlx5_register_rule_notifier(struct mlx5_core_dev *dev,
2481 								enum mlx5_flow_namespace_type ns_type,
2482 								rule_event_fn add_cb,
2483 								rule_event_fn del_cb,
2484 								void *context)
2485 {
2486 	struct mlx5_flow_namespace *ns;
2487 	struct mlx5_flow_handler *handler;
2488 
2489 	ns = mlx5_get_flow_namespace(dev, ns_type);
2490 	if (!ns)
2491 		return ERR_PTR(-EINVAL);
2492 
2493 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
2494 	if (!handler)
2495 		return ERR_PTR(-ENOMEM);
2496 
2497 	handler->add_dst_cb = add_cb;
2498 	handler->del_dst_cb = del_cb;
2499 	handler->client_context = context;
2500 	handler->ns = ns;
2501 	down_write(&ns->notifiers_rw_sem);
2502 	list_add_tail(&handler->list, &ns->list_notifiers);
2503 	up_write(&ns->notifiers_rw_sem);
2504 
2505 	return handler;
2506 }
2507 
2508 static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns,
2509 				rule_event_fn add_rule_cb,
2510 				void *context);
2511 
2512 void mlx5_unregister_rule_notifier(struct mlx5_flow_handler *handler)
2513 {
2514 	struct mlx5_flow_namespace *ns = handler->ns;
2515 
2516 	/*Remove from dst's clients*/
2517 	down_write(&ns->dests_rw_sem);
2518 	down_write(&ns->notifiers_rw_sem);
2519 	iterate_rules_in_ns(ns, remove_from_clients, handler);
2520 	list_del(&handler->list);
2521 	up_write(&ns->notifiers_rw_sem);
2522 	up_write(&ns->dests_rw_sem);
2523 	kfree(handler);
2524 }
2525 
2526 static void iterate_rules_in_ft(struct mlx5_flow_table *ft,
2527 				rule_event_fn add_rule_cb,
2528 				void *context)
2529 {
2530 	struct mlx5_flow_group *iter_fg;
2531 	struct fs_fte *iter_fte;
2532 	struct mlx5_flow_rule *iter_rule;
2533 	int err = 0;
2534 	bool is_new_rule;
2535 
2536 	mutex_lock(&ft->base.lock);
2537 	fs_for_each_fg(iter_fg, ft) {
2538 		mutex_lock(&iter_fg->base.lock);
2539 		fs_for_each_fte(iter_fte, iter_fg) {
2540 			mutex_lock(&iter_fte->base.lock);
2541 			is_new_rule = true;
2542 			fs_for_each_dst(iter_rule, iter_fte) {
2543 				fs_get(&iter_rule->base);
2544 				err = add_rule_cb(iter_rule,
2545 						 is_new_rule,
2546 						 NULL,
2547 						 context);
2548 				fs_put_parent_locked(&iter_rule->base);
2549 				if (err)
2550 					break;
2551 				is_new_rule = false;
2552 			}
2553 			mutex_unlock(&iter_fte->base.lock);
2554 			if (err)
2555 				break;
2556 		}
2557 		mutex_unlock(&iter_fg->base.lock);
2558 		if (err)
2559 			break;
2560 	}
2561 	mutex_unlock(&ft->base.lock);
2562 }
2563 
2564 static void iterate_rules_in_prio(struct fs_prio *prio,
2565 				  rule_event_fn add_rule_cb,
2566 				  void *context)
2567 {
2568 	struct fs_base *it;
2569 
2570 	mutex_lock(&prio->base.lock);
2571 	fs_for_each_ns_or_ft(it, prio) {
2572 		if (it->type == FS_TYPE_FLOW_TABLE) {
2573 			struct mlx5_flow_table	      *ft;
2574 
2575 			fs_get_obj(ft, it);
2576 			iterate_rules_in_ft(ft, add_rule_cb, context);
2577 		} else {
2578 			struct mlx5_flow_namespace *ns;
2579 
2580 			fs_get_obj(ns, it);
2581 			iterate_rules_in_ns(ns, add_rule_cb, context);
2582 		}
2583 	}
2584 	mutex_unlock(&prio->base.lock);
2585 }
2586 
2587 static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns,
2588 				rule_event_fn add_rule_cb,
2589 				void *context)
2590 {
2591 	struct fs_prio *iter_prio;
2592 
2593 	mutex_lock(&ns->base.lock);
2594 	fs_for_each_prio(iter_prio, ns) {
2595 		iterate_rules_in_prio(iter_prio, add_rule_cb, context);
2596 	}
2597 	mutex_unlock(&ns->base.lock);
2598 }
2599 
2600 void mlx5_flow_iterate_existing_rules(struct mlx5_flow_namespace *ns,
2601 					 rule_event_fn add_rule_cb,
2602 					 void *context)
2603 {
2604 	down_write(&ns->dests_rw_sem);
2605 	down_read(&ns->notifiers_rw_sem);
2606 	iterate_rules_in_ns(ns, add_rule_cb, context);
2607 	up_read(&ns->notifiers_rw_sem);
2608 	up_write(&ns->dests_rw_sem);
2609 }
2610 
2611 
2612 void mlx5_del_flow_rules_list(struct mlx5_flow_rules_list *rules_list)
2613 {
2614 	struct mlx5_flow_rule_node *iter_node;
2615 	struct mlx5_flow_rule_node *temp_node;
2616 
2617 	list_for_each_entry_safe(iter_node, temp_node, &rules_list->head, list) {
2618 		list_del(&iter_node->list);
2619 		kfree(iter_node);
2620 	}
2621 
2622 	kfree(rules_list);
2623 }
2624 
2625 #define ROCEV1_ETHERTYPE 0x8915
2626 static int set_rocev1_rules(struct list_head *rules_list)
2627 {
2628 	struct mlx5_flow_rule_node *rocev1_rule;
2629 
2630 	rocev1_rule = kzalloc(sizeof(*rocev1_rule), GFP_KERNEL);
2631 	if (!rocev1_rule)
2632 		return -ENOMEM;
2633 
2634 	rocev1_rule->match_criteria_enable =
2635 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2636 	MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_criteria, ethertype,
2637 		 0xffff);
2638 	MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_value, ethertype,
2639 		 ROCEV1_ETHERTYPE);
2640 
2641 	list_add_tail(&rocev1_rule->list, rules_list);
2642 
2643 	return 0;
2644 }
2645 
2646 #define ROCEV2_UDP_PORT 4791
2647 static int set_rocev2_rules(struct list_head *rules_list)
2648 {
2649 	struct mlx5_flow_rule_node *ipv4_rule;
2650 	struct mlx5_flow_rule_node *ipv6_rule;
2651 
2652 	ipv4_rule = kzalloc(sizeof(*ipv4_rule), GFP_KERNEL);
2653 	if (!ipv4_rule)
2654 		return -ENOMEM;
2655 
2656 	ipv6_rule = kzalloc(sizeof(*ipv6_rule), GFP_KERNEL);
2657 	if (!ipv6_rule) {
2658 		kfree(ipv4_rule);
2659 		return -ENOMEM;
2660 	}
2661 
2662 	ipv4_rule->match_criteria_enable =
2663 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2664 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ethertype,
2665 		 0xffff);
2666 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ethertype,
2667 		 0x0800);
2668 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ip_protocol,
2669 		 0xff);
2670 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ip_protocol,
2671 		 IPPROTO_UDP);
2672 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, udp_dport,
2673 		 0xffff);
2674 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, udp_dport,
2675 		 ROCEV2_UDP_PORT);
2676 
2677 	ipv6_rule->match_criteria_enable =
2678 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2679 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ethertype,
2680 		 0xffff);
2681 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ethertype,
2682 		 0x86dd);
2683 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ip_protocol,
2684 		 0xff);
2685 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ip_protocol,
2686 		 IPPROTO_UDP);
2687 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, udp_dport,
2688 		 0xffff);
2689 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, udp_dport,
2690 		 ROCEV2_UDP_PORT);
2691 
2692 	list_add_tail(&ipv4_rule->list, rules_list);
2693 	list_add_tail(&ipv6_rule->list, rules_list);
2694 
2695 	return 0;
2696 }
2697 
2698 
2699 struct mlx5_flow_rules_list *get_roce_flow_rules(u8 roce_mode)
2700 {
2701 	int err = 0;
2702 	struct mlx5_flow_rules_list *rules_list =
2703 		kzalloc(sizeof(*rules_list), GFP_KERNEL);
2704 
2705 	if (!rules_list)
2706 		return NULL;
2707 
2708 	INIT_LIST_HEAD(&rules_list->head);
2709 
2710 	if (roce_mode & MLX5_ROCE_VERSION_1_CAP) {
2711 		err = set_rocev1_rules(&rules_list->head);
2712 		if (err)
2713 			goto free_list;
2714 	}
2715 	if (roce_mode & MLX5_ROCE_VERSION_2_CAP)
2716 		err = set_rocev2_rules(&rules_list->head);
2717 	if (err)
2718 		goto free_list;
2719 
2720 	return rules_list;
2721 
2722 free_list:
2723 	mlx5_del_flow_rules_list(rules_list);
2724 	return NULL;
2725 }
2726