xref: /freebsd/sys/dev/mlx5/mlx5_core/mlx5_fs_tree.c (revision bb4645b9)
1 /*-
2  * Copyright (c) 2013-2021, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include "opt_rss.h"
27 #include "opt_ratelimit.h"
28 
29 #include <linux/module.h>
30 #include <dev/mlx5/driver.h>
31 #include <dev/mlx5/mlx5_core/mlx5_core.h>
32 #include <dev/mlx5/mlx5_core/fs_core.h>
33 #include <linux/string.h>
34 #include <linux/compiler.h>
35 
36 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
37 					 sizeof(struct init_tree_node))
38 
39 #define ADD_PRIO(name_val, flags_val, min_level_val, max_ft_val, caps_val, \
40 		 ...) {.type = FS_TYPE_PRIO,\
41 	.name = name_val,\
42 	.min_ft_level = min_level_val,\
43 	.flags = flags_val,\
44 	.max_ft = max_ft_val,\
45 	.caps = caps_val,\
46 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
47 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
48 }
49 
50 #define ADD_FT_PRIO(name_val, flags_val, max_ft_val,  ...)\
51 	ADD_PRIO(name_val, flags_val, 0, max_ft_val, {},\
52 		 __VA_ARGS__)\
53 
54 #define ADD_NS(name_val, ...) {.type = FS_TYPE_NAMESPACE,\
55 	.name = name_val,\
56 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
57 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
58 }
59 
60 #define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
61 				   sizeof(long))
62 
63 #define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
64 
65 #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
66 			       .caps = (long[]) {__VA_ARGS__}}
67 
68 /* Flowtable sizes: */
69 #define	BYPASS_MAX_FT 5
70 #define	BYPASS_PRIO_MAX_FT 1
71 #define	OFFLOADS_MAX_FT 2
72 #define	KERNEL_MAX_FT 5
73 #define	LEFTOVER_MAX_FT 1
74 
75 /* Flowtable levels: */
76 #define	OFFLOADS_MIN_LEVEL 3
77 #define	KERNEL_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
78 #define	LEFTOVER_MIN_LEVEL (KERNEL_MIN_LEVEL + 1)
79 #define	BYPASS_MIN_LEVEL (MLX5_NUM_BYPASS_FTS + LEFTOVER_MIN_LEVEL)
80 
81 struct node_caps {
82 	size_t	arr_sz;
83 	long	*caps;
84 };
85 
86 struct init_tree_node {
87 	enum fs_type	type;
88 	const char	*name;
89 	struct init_tree_node *children;
90 	int ar_size;
91 	struct node_caps caps;
92 	u8  flags;
93 	int min_ft_level;
94 	int prio;
95 	int max_ft;
96 } root_fs = {
97 	.type = FS_TYPE_NAMESPACE,
98 	.name = "root",
99 	.ar_size = 4,
100 	.children = (struct init_tree_node[]) {
101 		ADD_PRIO("by_pass_prio", 0, BYPASS_MIN_LEVEL, 0,
102 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
103 					  FS_CAP(flow_table_properties_nic_receive.modify_root)),
104 			 ADD_NS("by_pass_ns",
105 				ADD_FT_PRIO("prio0", 0,
106 					    BYPASS_PRIO_MAX_FT),
107 				ADD_FT_PRIO("prio1", 0,
108 					    BYPASS_PRIO_MAX_FT),
109 				ADD_FT_PRIO("prio2", 0,
110 					    BYPASS_PRIO_MAX_FT),
111 				ADD_FT_PRIO("prio3", 0,
112 					    BYPASS_PRIO_MAX_FT),
113 				ADD_FT_PRIO("prio4", 0,
114 					    BYPASS_PRIO_MAX_FT),
115 				ADD_FT_PRIO("prio5", 0,
116 					    BYPASS_PRIO_MAX_FT),
117 				ADD_FT_PRIO("prio6", 0,
118 					    BYPASS_PRIO_MAX_FT),
119 				ADD_FT_PRIO("prio7", 0,
120 					    BYPASS_PRIO_MAX_FT),
121 				ADD_FT_PRIO("prio-mcast", 0,
122 					    BYPASS_PRIO_MAX_FT))),
123 		ADD_PRIO("offloads_prio", 0, OFFLOADS_MIN_LEVEL, 0, {},
124 			 ADD_NS("offloads_ns",
125 				ADD_FT_PRIO("prio_offloads-0", 0,
126 					    OFFLOADS_MAX_FT))),
127 		ADD_PRIO("kernel_prio", 0, KERNEL_MIN_LEVEL, 0, {},
128 			 ADD_NS("kernel_ns",
129 				ADD_FT_PRIO("prio_kernel-0", 0,
130 					    KERNEL_MAX_FT))),
131 		ADD_PRIO("leftovers_prio", MLX5_CORE_FS_PRIO_SHARED,
132 			 LEFTOVER_MIN_LEVEL, 0,
133 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
134 					  FS_CAP(flow_table_properties_nic_receive.modify_root)),
135 			 ADD_NS("leftover_ns",
136 				ADD_FT_PRIO("leftovers_prio-0",
137 					MLX5_CORE_FS_PRIO_SHARED,
138 					LEFTOVER_MAX_FT)))
139 	}
140 };
141 
142 /* Tree creation functions */
143 
144 static struct mlx5_flow_root_namespace *find_root(struct fs_base *node)
145 {
146 	struct fs_base *parent;
147 
148 	/* Make sure we only read it once while we go up the tree */
149 	while ((parent = node->parent))
150 		node = parent;
151 
152 	if (node->type != FS_TYPE_NAMESPACE) {
153 		return NULL;
154 	}
155 
156 	return container_of(container_of(node,
157 					 struct mlx5_flow_namespace,
158 					 base),
159 			    struct mlx5_flow_root_namespace,
160 			    ns);
161 }
162 
163 static inline struct mlx5_core_dev *fs_get_dev(struct fs_base *node)
164 {
165 	struct mlx5_flow_root_namespace *root = find_root(node);
166 
167 	if (root)
168 		return root->dev;
169 	return NULL;
170 }
171 
172 static void fs_init_node(struct fs_base *node,
173 			 unsigned int refcount)
174 {
175 	kref_init(&node->refcount);
176 	atomic_set(&node->users_refcount, refcount);
177 	init_completion(&node->complete);
178 	INIT_LIST_HEAD(&node->list);
179 	mutex_init(&node->lock);
180 }
181 
182 static void _fs_add_node(struct fs_base *node,
183 			 const char *name,
184 			 struct fs_base *parent)
185 {
186 	if (parent)
187 		atomic_inc(&parent->users_refcount);
188 	node->name = kstrdup_const(name, GFP_KERNEL);
189 	node->parent = parent;
190 }
191 
192 static void fs_add_node(struct fs_base *node,
193 			struct fs_base *parent, const char *name,
194 			unsigned int refcount)
195 {
196 	fs_init_node(node, refcount);
197 	_fs_add_node(node, name, parent);
198 }
199 
200 static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref),
201 		    bool parent_locked);
202 
203 static void fs_del_dst(struct mlx5_flow_rule *dst);
204 static void _fs_del_ft(struct mlx5_flow_table *ft);
205 static void fs_del_fg(struct mlx5_flow_group *fg);
206 static void fs_del_fte(struct fs_fte *fte);
207 
208 static void cmd_remove_node(struct fs_base *base)
209 {
210 	switch (base->type) {
211 	case FS_TYPE_FLOW_DEST:
212 		fs_del_dst(container_of(base, struct mlx5_flow_rule, base));
213 		break;
214 	case FS_TYPE_FLOW_TABLE:
215 		_fs_del_ft(container_of(base, struct mlx5_flow_table, base));
216 		break;
217 	case FS_TYPE_FLOW_GROUP:
218 		fs_del_fg(container_of(base, struct mlx5_flow_group, base));
219 		break;
220 	case FS_TYPE_FLOW_ENTRY:
221 		fs_del_fte(container_of(base, struct fs_fte, base));
222 		break;
223 	default:
224 		break;
225 	}
226 }
227 
228 static void __fs_remove_node(struct kref *kref)
229 {
230 	struct fs_base *node = container_of(kref, struct fs_base, refcount);
231 
232 	if (node->parent)
233 		mutex_lock(&node->parent->lock);
234 	mutex_lock(&node->lock);
235 	cmd_remove_node(node);
236 	mutex_unlock(&node->lock);
237 	complete(&node->complete);
238 	if (node->parent) {
239 		mutex_unlock(&node->parent->lock);
240 		_fs_put(node->parent, _fs_remove_node, false);
241 	}
242 }
243 
244 void _fs_remove_node(struct kref *kref)
245 {
246 	struct fs_base *node = container_of(kref, struct fs_base, refcount);
247 
248 	__fs_remove_node(kref);
249 	kfree_const(node->name);
250 	kfree(node);
251 }
252 
253 static void fs_get(struct fs_base *node)
254 {
255 	atomic_inc(&node->users_refcount);
256 }
257 
258 static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref),
259 		    bool parent_locked)
260 {
261 	struct fs_base *parent_node = node->parent;
262 
263 	if (parent_node && !parent_locked)
264 		mutex_lock(&parent_node->lock);
265 	if (atomic_dec_and_test(&node->users_refcount)) {
266 		if (parent_node) {
267 			/*remove from parent's list*/
268 			list_del_init(&node->list);
269 			mutex_unlock(&parent_node->lock);
270 		}
271 		kref_put(&node->refcount, kref_cb);
272 		if (parent_node && parent_locked)
273 			mutex_lock(&parent_node->lock);
274 	} else if (parent_node && !parent_locked) {
275 		mutex_unlock(&parent_node->lock);
276 	}
277 }
278 
279 static void fs_put(struct fs_base *node)
280 {
281 	_fs_put(node, __fs_remove_node, false);
282 }
283 
284 static void fs_put_parent_locked(struct fs_base *node)
285 {
286 	_fs_put(node, __fs_remove_node, true);
287 }
288 
289 static void fs_remove_node(struct fs_base *node)
290 {
291 	fs_put(node);
292 	wait_for_completion(&node->complete);
293 	kfree_const(node->name);
294 	kfree(node);
295 }
296 
297 static void fs_remove_node_parent_locked(struct fs_base *node)
298 {
299 	fs_put_parent_locked(node);
300 	wait_for_completion(&node->complete);
301 	kfree_const(node->name);
302 	kfree(node);
303 }
304 
305 static struct fs_fte *fs_alloc_fte(u8 action,
306 				   u32 flow_tag,
307 				   u32 *match_value,
308 				   unsigned int index)
309 {
310 	struct fs_fte *fte;
311 
312 
313 	fte = kzalloc(sizeof(*fte), GFP_KERNEL);
314 	if (!fte)
315 		return ERR_PTR(-ENOMEM);
316 
317 	memcpy(fte->val, match_value, sizeof(fte->val));
318 	fte->base.type =  FS_TYPE_FLOW_ENTRY;
319 	fte->dests_size = 0;
320 	fte->flow_tag = flow_tag;
321 	fte->index = index;
322 	INIT_LIST_HEAD(&fte->dests);
323 	fte->action = action;
324 
325 	return fte;
326 }
327 
328 static struct fs_fte *alloc_star_ft_entry(struct mlx5_flow_table *ft,
329 					  struct mlx5_flow_group *fg,
330 					  u32 *match_value,
331 					  unsigned int index)
332 {
333 	int err;
334 	struct fs_fte *fte;
335 	struct mlx5_flow_rule *dst;
336 
337 	if (fg->num_ftes == fg->max_ftes)
338 		return ERR_PTR(-ENOSPC);
339 
340 	fte = fs_alloc_fte(MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
341 			   MLX5_FS_DEFAULT_FLOW_TAG, match_value, index);
342 	if (IS_ERR(fte))
343 		return fte;
344 
345 	/*create dst*/
346 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
347 	if (!dst) {
348 		err = -ENOMEM;
349 		goto free_fte;
350 	}
351 
352 	fte->base.parent = &fg->base;
353 	fte->dests_size = 1;
354 	dst->dest_attr.type = MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE;
355 	dst->base.parent = &fte->base;
356 	list_add(&dst->base.list, &fte->dests);
357 	/* assumed that the callee creates the star rules sorted by index */
358 	list_add_tail(&fte->base.list, &fg->ftes);
359 	fg->num_ftes++;
360 
361 	return fte;
362 
363 free_fte:
364 	kfree(fte);
365 	return ERR_PTR(err);
366 }
367 
368 /* assume that fte can't be changed */
369 static void free_star_fte_entry(struct fs_fte *fte)
370 {
371 	struct mlx5_flow_group	*fg;
372 	struct mlx5_flow_rule	*dst, *temp;
373 
374 	fs_get_parent(fg, fte);
375 
376 	list_for_each_entry_safe(dst, temp, &fte->dests, base.list) {
377 		fte->dests_size--;
378 		list_del(&dst->base.list);
379 		kfree(dst);
380 	}
381 
382 	list_del(&fte->base.list);
383 	fg->num_ftes--;
384 	kfree(fte);
385 }
386 
387 static struct mlx5_flow_group *fs_alloc_fg(u32 *create_fg_in)
388 {
389 	struct mlx5_flow_group *fg;
390 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
391 					    create_fg_in, match_criteria);
392 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
393 					    create_fg_in,
394 					    match_criteria_enable);
395 	fg = kzalloc(sizeof(*fg), GFP_KERNEL);
396 	if (!fg)
397 		return ERR_PTR(-ENOMEM);
398 
399 	INIT_LIST_HEAD(&fg->ftes);
400 	fg->mask.match_criteria_enable = match_criteria_enable;
401 	memcpy(&fg->mask.match_criteria, match_criteria,
402 	       sizeof(fg->mask.match_criteria));
403 	fg->base.type =  FS_TYPE_FLOW_GROUP;
404 	fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in,
405 				   start_flow_index);
406 	fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in,
407 				end_flow_index) - fg->start_index + 1;
408 	return fg;
409 }
410 
411 static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio);
412 static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr,
413 					    struct fs_prio *prio);
414 
415 /* assumed src_ft and dst_ft can't be freed */
416 static int fs_set_star_rule(struct mlx5_core_dev *dev,
417 			    struct mlx5_flow_table *src_ft,
418 			    struct mlx5_flow_table *dst_ft)
419 {
420 	struct mlx5_flow_rule *src_dst;
421 	struct fs_fte *src_fte;
422 	int err = 0;
423 	u32 *match_value;
424 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
425 
426 	src_dst = list_first_entry(&src_ft->star_rule.fte->dests,
427 				   struct mlx5_flow_rule, base.list);
428 	match_value = mlx5_vzalloc(match_len);
429 	if (!match_value) {
430 		mlx5_core_warn(dev, "failed to allocate inbox\n");
431 		return -ENOMEM;
432 	}
433 	/*Create match context*/
434 
435 	fs_get_parent(src_fte, src_dst);
436 
437 	src_dst->dest_attr.ft = dst_ft;
438 	if (dst_ft) {
439 		err = mlx5_cmd_fs_set_fte(dev,
440 					  src_ft->vport,
441 					  &src_fte->status,
442 					  match_value, src_ft->type,
443 					  src_ft->id, src_fte->index,
444 					  src_ft->star_rule.fg->id,
445 					  src_fte->flow_tag,
446 					  src_fte->action,
447 					  src_fte->dests_size,
448 					  &src_fte->dests);
449 		if (err)
450 			goto free;
451 
452 		fs_get(&dst_ft->base);
453 	} else {
454 		mlx5_cmd_fs_delete_fte(dev,
455 				       src_ft->vport,
456 				       &src_fte->status,
457 				       src_ft->type, src_ft->id,
458 				       src_fte->index);
459 	}
460 
461 free:
462 	kvfree(match_value);
463 	return err;
464 }
465 
466 static int connect_prev_fts(struct fs_prio *locked_prio,
467 			    struct fs_prio *prev_prio,
468 			    struct mlx5_flow_table *next_ft)
469 {
470 	struct mlx5_flow_table *iter;
471 	int err = 0;
472 	struct mlx5_core_dev *dev = fs_get_dev(&prev_prio->base);
473 
474 	if (!dev)
475 		return -ENODEV;
476 
477 	mutex_lock(&prev_prio->base.lock);
478 	fs_for_each_ft(iter, prev_prio) {
479 		struct mlx5_flow_rule *src_dst =
480 			list_first_entry(&iter->star_rule.fte->dests,
481 					 struct mlx5_flow_rule, base.list);
482 		struct mlx5_flow_table *prev_ft = src_dst->dest_attr.ft;
483 
484 		if (prev_ft == next_ft)
485 			continue;
486 
487 		err = fs_set_star_rule(dev, iter, next_ft);
488 		if (err) {
489 			mlx5_core_warn(dev,
490 			    "mlx5: flow steering can't connect prev and next\n");
491 			goto unlock;
492 		} else {
493 			/* Assume ft's prio is locked */
494 			if (prev_ft) {
495 				struct fs_prio *prio;
496 
497 				fs_get_parent(prio, prev_ft);
498 				if (prio == locked_prio)
499 					fs_put_parent_locked(&prev_ft->base);
500 				else
501 					fs_put(&prev_ft->base);
502 			}
503 		}
504 	}
505 
506 unlock:
507 	mutex_unlock(&prev_prio->base.lock);
508 	return 0;
509 }
510 
511 static int create_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio)
512 {
513 	struct mlx5_flow_group *fg;
514 	int err;
515 	u32 *fg_in;
516 	u32 *match_value;
517 	struct mlx5_flow_table *next_ft;
518 	struct mlx5_flow_table *prev_ft;
519 	struct mlx5_flow_root_namespace *root = find_root(&prio->base);
520 	int fg_inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
521 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
522 
523 	fg_in = mlx5_vzalloc(fg_inlen);
524 	if (!fg_in) {
525 		mlx5_core_warn(root->dev, "failed to allocate inbox\n");
526 		return -ENOMEM;
527 	}
528 
529 	match_value = mlx5_vzalloc(match_len);
530 	if (!match_value) {
531 		mlx5_core_warn(root->dev, "failed to allocate inbox\n");
532 		kvfree(fg_in);
533 		return -ENOMEM;
534 	}
535 
536 	MLX5_SET(create_flow_group_in, fg_in, start_flow_index, ft->max_fte);
537 	MLX5_SET(create_flow_group_in, fg_in, end_flow_index, ft->max_fte);
538 	fg = fs_alloc_fg(fg_in);
539 	if (IS_ERR(fg)) {
540 		err = PTR_ERR(fg);
541 		goto out;
542 	}
543 	ft->star_rule.fg = fg;
544 	err =  mlx5_cmd_fs_create_fg(fs_get_dev(&prio->base),
545 				     fg_in, ft->vport, ft->type,
546 				     ft->id,
547 				     &fg->id);
548 	if (err)
549 		goto free_fg;
550 
551 	ft->star_rule.fte = alloc_star_ft_entry(ft, fg,
552 						      match_value,
553 						      ft->max_fte);
554 	if (IS_ERR(ft->star_rule.fte))
555 		goto free_star_rule;
556 
557 	mutex_lock(&root->fs_chain_lock);
558 	next_ft = find_next_ft(prio);
559 	err = fs_set_star_rule(root->dev, ft, next_ft);
560 	if (err) {
561 		mutex_unlock(&root->fs_chain_lock);
562 		goto free_star_rule;
563 	}
564 	if (next_ft) {
565 		struct fs_prio *parent;
566 
567 		fs_get_parent(parent, next_ft);
568 		fs_put(&next_ft->base);
569 	}
570 	prev_ft = find_prev_ft(ft, prio);
571 	if (prev_ft) {
572 		struct fs_prio *prev_parent;
573 
574 		fs_get_parent(prev_parent, prev_ft);
575 
576 		err = connect_prev_fts(NULL, prev_parent, ft);
577 		if (err) {
578 			mutex_unlock(&root->fs_chain_lock);
579 			goto destroy_chained_star_rule;
580 		}
581 		fs_put(&prev_ft->base);
582 	}
583 	mutex_unlock(&root->fs_chain_lock);
584 	kvfree(fg_in);
585 	kvfree(match_value);
586 
587 	return 0;
588 
589 destroy_chained_star_rule:
590 	fs_set_star_rule(fs_get_dev(&prio->base), ft, NULL);
591 	if (next_ft)
592 		fs_put(&next_ft->base);
593 free_star_rule:
594 	free_star_fte_entry(ft->star_rule.fte);
595 	mlx5_cmd_fs_destroy_fg(fs_get_dev(&ft->base), ft->vport,
596 			       ft->type, ft->id,
597 			       fg->id);
598 free_fg:
599 	kfree(fg);
600 out:
601 	kvfree(fg_in);
602 	kvfree(match_value);
603 	return err;
604 }
605 
606 static void destroy_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio)
607 {
608 	int err;
609 	struct mlx5_flow_root_namespace *root;
610 	struct mlx5_core_dev *dev = fs_get_dev(&prio->base);
611 	struct mlx5_flow_table *prev_ft, *next_ft;
612 	struct fs_prio *prev_prio;
613 
614 	WARN_ON(!dev);
615 
616 	root = find_root(&prio->base);
617 	if (!root)
618 		mlx5_core_err(dev,
619 		    "flow steering failed to find root of priority %s",
620 		    prio->base.name);
621 
622 	/* In order to ensure atomic deletion, first update
623 	 * prev ft to point on the next ft.
624 	 */
625 	mutex_lock(&root->fs_chain_lock);
626 	prev_ft = find_prev_ft(ft, prio);
627 	next_ft = find_next_ft(prio);
628 	if (prev_ft) {
629 		fs_get_parent(prev_prio, prev_ft);
630 		/*Prev is connected to ft, only if ft is the first(last) in the prio*/
631 		err = connect_prev_fts(prio, prev_prio, next_ft);
632 		if (err)
633 			mlx5_core_warn(root->dev,
634 				       "flow steering can't connect prev and next of flow table\n");
635 		fs_put(&prev_ft->base);
636 	}
637 
638 	err = fs_set_star_rule(root->dev, ft, NULL);
639 	/*One put is for fs_get in find next ft*/
640 	if (next_ft) {
641 		fs_put(&next_ft->base);
642 		if (!err)
643 			fs_put(&next_ft->base);
644 	}
645 
646 	mutex_unlock(&root->fs_chain_lock);
647 	err = mlx5_cmd_fs_destroy_fg(dev, ft->vport, ft->type, ft->id,
648 				     ft->star_rule.fg->id);
649 	if (err)
650 		mlx5_core_warn(dev,
651 			       "flow steering can't destroy star entry group(index:%d) of ft:%s\n", ft->star_rule.fg->start_index,
652 			       ft->base.name);
653 	free_star_fte_entry(ft->star_rule.fte);
654 
655 	kfree(ft->star_rule.fg);
656 	ft->star_rule.fg = NULL;
657 }
658 
659 static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
660 				 unsigned int prio)
661 {
662 	struct fs_prio *iter_prio;
663 
664 	fs_for_each_prio(iter_prio, ns) {
665 		if (iter_prio->prio == prio)
666 			return iter_prio;
667 	}
668 
669 	return NULL;
670 }
671 
672 static unsigned int _alloc_new_level(struct fs_prio *prio,
673 				     struct mlx5_flow_namespace *match);
674 
675 static unsigned int __alloc_new_level(struct mlx5_flow_namespace *ns,
676 				      struct fs_prio *prio)
677 {
678 	unsigned int level = 0;
679 	struct fs_prio *p;
680 
681 	if (!ns)
682 		return 0;
683 
684 	mutex_lock(&ns->base.lock);
685 	fs_for_each_prio(p, ns) {
686 		if (p != prio)
687 			level += p->max_ft;
688 		else
689 			break;
690 	}
691 	mutex_unlock(&ns->base.lock);
692 
693 	fs_get_parent(prio, ns);
694 	if (prio)
695 		WARN_ON(prio->base.type != FS_TYPE_PRIO);
696 
697 	return level + _alloc_new_level(prio, ns);
698 }
699 
700 /* Called under lock of priority, hence locking all upper objects */
701 static unsigned int _alloc_new_level(struct fs_prio *prio,
702 				     struct mlx5_flow_namespace *match)
703 {
704 	struct mlx5_flow_namespace *ns;
705 	struct fs_base *it;
706 	unsigned int level = 0;
707 
708 	if (!prio)
709 		return 0;
710 
711 	mutex_lock(&prio->base.lock);
712 	fs_for_each_ns_or_ft_reverse(it, prio) {
713 		if (it->type == FS_TYPE_NAMESPACE) {
714 			struct fs_prio *p;
715 
716 			fs_get_obj(ns, it);
717 
718 			if (match != ns) {
719 				mutex_lock(&ns->base.lock);
720 				fs_for_each_prio(p, ns)
721 					level += p->max_ft;
722 				mutex_unlock(&ns->base.lock);
723 			} else {
724 				break;
725 			}
726 		} else {
727 			struct mlx5_flow_table *ft;
728 
729 			fs_get_obj(ft, it);
730 			mutex_unlock(&prio->base.lock);
731 			return level + ft->level + 1;
732 		}
733 	}
734 
735 	fs_get_parent(ns, prio);
736 	mutex_unlock(&prio->base.lock);
737 	return __alloc_new_level(ns, prio) + level;
738 }
739 
740 static unsigned int alloc_new_level(struct fs_prio *prio)
741 {
742 	return _alloc_new_level(prio, NULL);
743 }
744 
745 static int update_root_ft_create(struct mlx5_flow_root_namespace *root,
746 				    struct mlx5_flow_table *ft)
747 {
748 	int err = 0;
749 	int min_level = INT_MAX;
750 
751 	if (root->root_ft)
752 		min_level = root->root_ft->level;
753 
754 	if (ft->level < min_level)
755 		err = mlx5_cmd_update_root_ft(root->dev, ft->type,
756 					      ft->id);
757 	else
758 		return err;
759 
760 	if (err)
761 		mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
762 			       ft->id);
763 	else
764 		root->root_ft = ft;
765 
766 	return err;
767 }
768 
769 static struct mlx5_flow_table *_create_ft_common(struct mlx5_flow_namespace *ns,
770 						 u16 vport,
771 						 struct fs_prio *fs_prio,
772 						 int max_fte,
773 						 const char *name)
774 {
775 	struct mlx5_flow_table *ft;
776 	int err;
777 	int log_table_sz;
778 	int ft_size;
779 	char gen_name[20];
780 	struct mlx5_flow_root_namespace *root = find_root(&ns->base);
781 	struct mlx5_core_dev *dev = fs_get_dev(&ns->base);
782 
783 	if (!root) {
784 		mlx5_core_err(dev,
785 		    "flow steering failed to find root of namespace %s",
786 		    ns->base.name);
787 		return ERR_PTR(-ENODEV);
788 	}
789 
790 	if (fs_prio->num_ft == fs_prio->max_ft)
791 		return ERR_PTR(-ENOSPC);
792 
793 	ft  = kzalloc(sizeof(*ft), GFP_KERNEL);
794 	if (!ft)
795 		return ERR_PTR(-ENOMEM);
796 
797 	fs_init_node(&ft->base, 1);
798 	INIT_LIST_HEAD(&ft->fgs);
799 
800 	/* Temporarily WA until we expose the level set in the API */
801 	if (root->table_type == FS_FT_ESW_EGRESS_ACL ||
802 		root->table_type == FS_FT_ESW_INGRESS_ACL)
803 		ft->level = 0;
804 	else
805 		ft->level = alloc_new_level(fs_prio);
806 
807 	ft->base.type = FS_TYPE_FLOW_TABLE;
808 	ft->vport = vport;
809 	ft->type = root->table_type;
810 	/*Two entries are reserved for star rules*/
811 	ft_size = roundup_pow_of_two(max_fte + 2);
812 	/*User isn't aware to those rules*/
813 	ft->max_fte = ft_size - 2;
814 	log_table_sz = ilog2(ft_size);
815 	err = mlx5_cmd_fs_create_ft(root->dev, ft->vport, ft->type,
816 				    ft->level, log_table_sz, &ft->id);
817 	if (err)
818 		goto free_ft;
819 
820 	err = create_star_rule(ft, fs_prio);
821 	if (err)
822 		goto del_ft;
823 
824 	if ((root->table_type == FS_FT_NIC_RX) && MLX5_CAP_FLOWTABLE(root->dev,
825 			       flow_table_properties_nic_receive.modify_root)) {
826 		err = update_root_ft_create(root, ft);
827 		if (err)
828 			goto destroy_star_rule;
829 	}
830 
831 	if (!name || !strlen(name)) {
832 		snprintf(gen_name, 20, "flow_table_%u", ft->id);
833 		_fs_add_node(&ft->base, gen_name, &fs_prio->base);
834 	} else {
835 		_fs_add_node(&ft->base, name, &fs_prio->base);
836 	}
837 	list_add_tail(&ft->base.list, &fs_prio->objs);
838 	fs_prio->num_ft++;
839 
840 	return ft;
841 
842 destroy_star_rule:
843 	destroy_star_rule(ft, fs_prio);
844 del_ft:
845 	mlx5_cmd_fs_destroy_ft(root->dev, ft->vport, ft->type, ft->id);
846 free_ft:
847 	kfree(ft);
848 	return ERR_PTR(err);
849 }
850 
851 static struct mlx5_flow_table *create_ft_common(struct mlx5_flow_namespace *ns,
852 						u16 vport,
853 						unsigned int prio,
854 						int max_fte,
855 						const char *name)
856 {
857 	struct fs_prio *fs_prio = NULL;
858 	fs_prio = find_prio(ns, prio);
859 	if (!fs_prio)
860 		return ERR_PTR(-EINVAL);
861 
862 	return _create_ft_common(ns, vport, fs_prio, max_fte, name);
863 }
864 
865 
866 static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns,
867 						   struct list_head *start);
868 
869 static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio,
870 						     struct list_head *start);
871 
872 static struct mlx5_flow_table *mlx5_create_autogrouped_shared_flow_table(struct fs_prio *fs_prio)
873 {
874 	struct mlx5_flow_table *ft;
875 
876 	ft = find_first_ft_in_prio(fs_prio, &fs_prio->objs);
877 	if (ft) {
878 		ft->shared_refcount++;
879 		return ft;
880 	}
881 
882 	return NULL;
883 }
884 
885 struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
886 							   int prio,
887 							   const char *name,
888 							   int num_flow_table_entries,
889 							   int max_num_groups)
890 {
891 	struct mlx5_flow_table *ft = NULL;
892 	struct fs_prio *fs_prio;
893 	bool is_shared_prio;
894 
895 	fs_prio = find_prio(ns, prio);
896 	if (!fs_prio)
897 		return ERR_PTR(-EINVAL);
898 
899 	is_shared_prio = fs_prio->flags & MLX5_CORE_FS_PRIO_SHARED;
900 	if (is_shared_prio) {
901 		mutex_lock(&fs_prio->shared_lock);
902 		ft = mlx5_create_autogrouped_shared_flow_table(fs_prio);
903 	}
904 
905 	if (ft)
906 		goto return_ft;
907 
908 	ft = create_ft_common(ns, 0, prio, num_flow_table_entries,
909 			      name);
910 	if (IS_ERR(ft))
911 		goto return_ft;
912 
913 	ft->autogroup.active = true;
914 	ft->autogroup.max_types = max_num_groups;
915 	if (is_shared_prio)
916 		ft->shared_refcount = 1;
917 
918 return_ft:
919 	if (is_shared_prio)
920 		mutex_unlock(&fs_prio->shared_lock);
921 	return ft;
922 }
923 EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
924 
925 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
926 						     u16 vport,
927 						     int prio,
928 						     const char *name,
929 						     int num_flow_table_entries)
930 {
931 	return create_ft_common(ns, vport, prio, num_flow_table_entries, name);
932 }
933 EXPORT_SYMBOL(mlx5_create_vport_flow_table);
934 
935 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
936 					       int prio,
937 					       const char *name,
938 					       int num_flow_table_entries)
939 {
940 	return create_ft_common(ns, 0, prio, num_flow_table_entries, name);
941 }
942 EXPORT_SYMBOL(mlx5_create_flow_table);
943 
944 static void _fs_del_ft(struct mlx5_flow_table *ft)
945 {
946 	int err;
947 	struct mlx5_core_dev *dev = fs_get_dev(&ft->base);
948 	struct fs_prio *prio;
949 
950 	err = mlx5_cmd_fs_destroy_ft(dev, ft->vport, ft->type, ft->id);
951 	if (err)
952 		mlx5_core_warn(dev, "flow steering can't destroy ft %s\n",
953 			       ft->base.name);
954 
955 	fs_get_parent(prio, ft);
956 	prio->num_ft--;
957 }
958 
959 static int update_root_ft_destroy(struct mlx5_flow_root_namespace *root,
960 				    struct mlx5_flow_table *ft)
961 {
962 	int err = 0;
963 	struct fs_prio *prio;
964 	struct mlx5_flow_table *next_ft = NULL;
965 	struct mlx5_flow_table *put_ft = NULL;
966 
967 	if (root->root_ft != ft)
968 		return 0;
969 
970 	fs_get_parent(prio, ft);
971 	/*Assuming objs containis only flow tables and
972 	 * flow tables are sorted by level.
973 	 */
974 	if (!list_is_last(&ft->base.list, &prio->objs)) {
975 		next_ft = list_next_entry(ft, base.list);
976 	} else {
977 		next_ft = find_next_ft(prio);
978 		put_ft = next_ft;
979 	}
980 
981 	if (next_ft) {
982 		err = mlx5_cmd_update_root_ft(root->dev, next_ft->type,
983 					      next_ft->id);
984 		if (err)
985 			mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
986 				       ft->id);
987 	}
988 	if (!err)
989 		root->root_ft = next_ft;
990 
991 	if (put_ft)
992 		fs_put(&put_ft->base);
993 
994 	return err;
995 }
996 
997 /*Objects in the same prio are destroyed in the reverse order they were createrd*/
998 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
999 {
1000 	int err = 0;
1001 	struct fs_prio *prio;
1002 	struct mlx5_flow_root_namespace *root;
1003 	bool is_shared_prio;
1004 	struct mlx5_core_dev *dev;
1005 
1006 	fs_get_parent(prio, ft);
1007 	root = find_root(&prio->base);
1008 	dev = fs_get_dev(&prio->base);
1009 
1010 	if (!root) {
1011 		mlx5_core_err(dev,
1012 		    "flow steering failed to find root of priority %s",
1013 		    prio->base.name);
1014 		return -ENODEV;
1015 	}
1016 
1017 	is_shared_prio = prio->flags & MLX5_CORE_FS_PRIO_SHARED;
1018 	if (is_shared_prio) {
1019 		mutex_lock(&prio->shared_lock);
1020 		if (ft->shared_refcount > 1) {
1021 			--ft->shared_refcount;
1022 			fs_put(&ft->base);
1023 			mutex_unlock(&prio->shared_lock);
1024 			return 0;
1025 		}
1026 	}
1027 
1028 	mutex_lock(&prio->base.lock);
1029 	mutex_lock(&ft->base.lock);
1030 
1031 	err = update_root_ft_destroy(root, ft);
1032 	if (err)
1033 		goto unlock_ft;
1034 
1035 	/* delete two last entries */
1036 	destroy_star_rule(ft, prio);
1037 
1038 	mutex_unlock(&ft->base.lock);
1039 	fs_remove_node_parent_locked(&ft->base);
1040 	mutex_unlock(&prio->base.lock);
1041 	if (is_shared_prio)
1042 		mutex_unlock(&prio->shared_lock);
1043 
1044 	return err;
1045 
1046 unlock_ft:
1047 	mutex_unlock(&ft->base.lock);
1048 	mutex_unlock(&prio->base.lock);
1049 	if (is_shared_prio)
1050 		mutex_unlock(&prio->shared_lock);
1051 
1052 	return err;
1053 }
1054 EXPORT_SYMBOL(mlx5_destroy_flow_table);
1055 
1056 static struct mlx5_flow_group *fs_create_fg(struct mlx5_core_dev *dev,
1057 					    struct mlx5_flow_table *ft,
1058 					    struct list_head *prev,
1059 					    u32 *fg_in,
1060 					    int refcount)
1061 {
1062 	struct mlx5_flow_group *fg;
1063 	int err;
1064 	char name[20];
1065 
1066 	fg = fs_alloc_fg(fg_in);
1067 	if (IS_ERR(fg))
1068 		return fg;
1069 
1070 	err =  mlx5_cmd_fs_create_fg(dev, fg_in,
1071 				     ft->vport, ft->type, ft->id,
1072 				     &fg->id);
1073 	if (err)
1074 		goto free_fg;
1075 
1076 	mutex_lock(&ft->base.lock);
1077 	if (ft->autogroup.active)
1078 		ft->autogroup.num_types++;
1079 
1080 	snprintf(name, sizeof(name), "group_%u", fg->id);
1081 	/*Add node to tree*/
1082 	fs_add_node(&fg->base, &ft->base, name, refcount);
1083 	/*Add node to group list*/
1084 	list_add(&fg->base.list, prev);
1085 	mutex_unlock(&ft->base.lock);
1086 
1087 	return fg;
1088 
1089 free_fg:
1090 	kfree(fg);
1091 	return ERR_PTR(err);
1092 }
1093 
1094 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
1095 					       u32 *in)
1096 {
1097 	struct mlx5_flow_group *fg;
1098 	struct mlx5_core_dev *dev = fs_get_dev(&ft->base);
1099 
1100 	if (!dev)
1101 		return ERR_PTR(-ENODEV);
1102 
1103 	if (ft->autogroup.active)
1104 		return ERR_PTR(-EPERM);
1105 
1106 	fg = fs_create_fg(dev, ft, ft->fgs.prev, in, 1);
1107 
1108 	return fg;
1109 }
1110 EXPORT_SYMBOL(mlx5_create_flow_group);
1111 
1112 /*Group is destoyed when all the rules in the group were removed*/
1113 static void fs_del_fg(struct mlx5_flow_group *fg)
1114 {
1115 	struct mlx5_flow_table *parent_ft;
1116 	struct mlx5_core_dev *dev;
1117 
1118 	fs_get_parent(parent_ft, fg);
1119 	dev = fs_get_dev(&parent_ft->base);
1120 	WARN_ON(!dev);
1121 
1122 	if (parent_ft->autogroup.active)
1123 		parent_ft->autogroup.num_types--;
1124 
1125 	if (mlx5_cmd_fs_destroy_fg(dev, parent_ft->vport,
1126 				   parent_ft->type,
1127 				   parent_ft->id, fg->id))
1128 		mlx5_core_warn(dev, "flow steering can't destroy fg\n");
1129 }
1130 
1131 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
1132 {
1133 	fs_remove_node(&fg->base);
1134 }
1135 EXPORT_SYMBOL(mlx5_destroy_flow_group);
1136 
1137 static bool _fs_match_exact_val(void *mask, void *val1, void *val2, size_t size)
1138 {
1139 	unsigned int i;
1140 
1141 	/* TODO: optimize by comparing 64bits when possible */
1142 	for (i = 0; i < size; i++, mask++, val1++, val2++)
1143 		if ((*((u8 *)val1) & (*(u8 *)mask)) !=
1144 		    ((*(u8 *)val2) & (*(u8 *)mask)))
1145 			return false;
1146 
1147 	return true;
1148 }
1149 
1150 bool fs_match_exact_val(struct mlx5_core_fs_mask *mask,
1151 			       void *val1, void *val2)
1152 {
1153 	if (mask->match_criteria_enable &
1154 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
1155 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1156 						val1, outer_headers);
1157 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1158 						val2, outer_headers);
1159 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1160 					      mask->match_criteria, outer_headers);
1161 
1162 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1163 					 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
1164 			return false;
1165 	}
1166 
1167 	if (mask->match_criteria_enable &
1168 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
1169 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1170 						val1, misc_parameters);
1171 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1172 						val2, misc_parameters);
1173 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1174 					  mask->match_criteria, misc_parameters);
1175 
1176 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1177 					 MLX5_ST_SZ_BYTES(fte_match_set_misc)))
1178 			return false;
1179 	}
1180 	if (mask->match_criteria_enable &
1181 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
1182 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1183 						val1, inner_headers);
1184 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1185 						val2, inner_headers);
1186 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1187 					  mask->match_criteria, inner_headers);
1188 
1189 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1190 					 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
1191 			return false;
1192 	}
1193 	return true;
1194 }
1195 
1196 bool fs_match_exact_mask(u8 match_criteria_enable1,
1197 				u8 match_criteria_enable2,
1198 				void *mask1, void *mask2)
1199 {
1200 	return match_criteria_enable1 == match_criteria_enable2 &&
1201 		!memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param));
1202 }
1203 
1204 static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns,
1205 							   struct list_head *start);
1206 
1207 static struct mlx5_flow_table *_find_first_ft_in_prio_reverse(struct fs_prio *prio,
1208 							      struct list_head *start)
1209 {
1210 	struct fs_base *it = container_of(start, struct fs_base, list);
1211 
1212 	if (!prio)
1213 		return NULL;
1214 
1215 	fs_for_each_ns_or_ft_continue_reverse(it, prio) {
1216 		struct mlx5_flow_namespace	*ns;
1217 		struct mlx5_flow_table		*ft;
1218 
1219 		if (it->type == FS_TYPE_FLOW_TABLE) {
1220 			fs_get_obj(ft, it);
1221 			fs_get(&ft->base);
1222 			return ft;
1223 		}
1224 
1225 		fs_get_obj(ns, it);
1226 		WARN_ON(ns->base.type != FS_TYPE_NAMESPACE);
1227 
1228 		ft = find_first_ft_in_ns_reverse(ns, &ns->prios);
1229 		if (ft)
1230 			return ft;
1231 	}
1232 
1233 	return NULL;
1234 }
1235 
1236 static struct mlx5_flow_table *find_first_ft_in_prio_reverse(struct fs_prio *prio,
1237 							     struct list_head *start)
1238 {
1239 	struct mlx5_flow_table *ft;
1240 
1241 	if (!prio)
1242 		return NULL;
1243 
1244 	mutex_lock(&prio->base.lock);
1245 	ft = _find_first_ft_in_prio_reverse(prio, start);
1246 	mutex_unlock(&prio->base.lock);
1247 
1248 	return ft;
1249 }
1250 
1251 static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns,
1252 							   struct list_head *start)
1253 {
1254 	struct fs_prio *prio;
1255 
1256 	if (!ns)
1257 		return NULL;
1258 
1259 	fs_get_obj(prio, container_of(start, struct fs_base, list));
1260 	mutex_lock(&ns->base.lock);
1261 	fs_for_each_prio_continue_reverse(prio, ns) {
1262 		struct mlx5_flow_table *ft;
1263 
1264 		ft = find_first_ft_in_prio_reverse(prio, &prio->objs);
1265 		if (ft) {
1266 			mutex_unlock(&ns->base.lock);
1267 			return ft;
1268 		}
1269 	}
1270 	mutex_unlock(&ns->base.lock);
1271 
1272 	return NULL;
1273 }
1274 
1275 /* Returned a held ft, assumed curr is protected, assumed curr's parent is
1276  * locked
1277  */
1278 static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr,
1279 					    struct fs_prio *prio)
1280 {
1281 	struct mlx5_flow_table *ft = NULL;
1282 	struct fs_base *curr_base;
1283 
1284 	if (!curr)
1285 		return NULL;
1286 
1287 	/* prio has either namespace or flow-tables, but not both */
1288 	if (!list_empty(&prio->objs) &&
1289 	    list_first_entry(&prio->objs, struct mlx5_flow_table, base.list) !=
1290 	    curr)
1291 		return NULL;
1292 
1293 	while (!ft && prio) {
1294 		struct mlx5_flow_namespace *ns;
1295 
1296 		fs_get_parent(ns, prio);
1297 		ft = find_first_ft_in_ns_reverse(ns, &prio->base.list);
1298 		curr_base = &ns->base;
1299 		fs_get_parent(prio, ns);
1300 
1301 		if (prio && !ft)
1302 			ft = find_first_ft_in_prio_reverse(prio,
1303 							   &curr_base->list);
1304 	}
1305 	return ft;
1306 }
1307 
1308 static struct mlx5_flow_table *_find_first_ft_in_prio(struct fs_prio *prio,
1309 						      struct list_head *start)
1310 {
1311 	struct fs_base	*it = container_of(start, struct fs_base, list);
1312 
1313 	if (!prio)
1314 		return NULL;
1315 
1316 	fs_for_each_ns_or_ft_continue(it, prio) {
1317 		struct mlx5_flow_namespace	*ns;
1318 		struct mlx5_flow_table		*ft;
1319 
1320 		if (it->type == FS_TYPE_FLOW_TABLE) {
1321 			fs_get_obj(ft, it);
1322 			fs_get(&ft->base);
1323 			return ft;
1324 		}
1325 
1326 		fs_get_obj(ns, it);
1327 		WARN_ON(ns->base.type != FS_TYPE_NAMESPACE);
1328 
1329 		ft = find_first_ft_in_ns(ns, &ns->prios);
1330 		if (ft)
1331 			return ft;
1332 	}
1333 
1334 	return NULL;
1335 }
1336 
1337 static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio,
1338 						     struct list_head *start)
1339 {
1340 	struct mlx5_flow_table *ft;
1341 
1342 	if (!prio)
1343 		return NULL;
1344 
1345 	mutex_lock(&prio->base.lock);
1346 	ft = _find_first_ft_in_prio(prio, start);
1347 	mutex_unlock(&prio->base.lock);
1348 
1349 	return ft;
1350 }
1351 
1352 static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns,
1353 						   struct list_head *start)
1354 {
1355 	struct fs_prio *prio;
1356 
1357 	if (!ns)
1358 		return NULL;
1359 
1360 	fs_get_obj(prio, container_of(start, struct fs_base, list));
1361 	mutex_lock(&ns->base.lock);
1362 	fs_for_each_prio_continue(prio, ns) {
1363 		struct mlx5_flow_table *ft;
1364 
1365 		ft = find_first_ft_in_prio(prio, &prio->objs);
1366 		if (ft) {
1367 			mutex_unlock(&ns->base.lock);
1368 			return ft;
1369 		}
1370 	}
1371 	mutex_unlock(&ns->base.lock);
1372 
1373 	return NULL;
1374 }
1375 
1376 /* returned a held ft, assumed curr is protected, assumed curr's parent is
1377  * locked
1378  */
1379 static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio)
1380 {
1381 	struct mlx5_flow_table *ft = NULL;
1382 	struct fs_base *curr_base;
1383 
1384 	while (!ft && prio) {
1385 		struct mlx5_flow_namespace *ns;
1386 
1387 		fs_get_parent(ns, prio);
1388 		ft = find_first_ft_in_ns(ns, &prio->base.list);
1389 		curr_base = &ns->base;
1390 		fs_get_parent(prio, ns);
1391 
1392 		if (!ft && prio)
1393 			ft = _find_first_ft_in_prio(prio, &curr_base->list);
1394 	}
1395 	return ft;
1396 }
1397 
1398 
1399 /* called under ft mutex lock */
1400 static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
1401 						u8 match_criteria_enable,
1402 						u32 *match_criteria)
1403 {
1404 	unsigned int group_size;
1405 	unsigned int candidate_index = 0;
1406 	struct mlx5_flow_group *g;
1407 	struct mlx5_flow_group *ret;
1408 	struct list_head *prev = &ft->fgs;
1409 	struct mlx5_core_dev *dev;
1410 	u32 *in;
1411 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1412 	void *match_criteria_addr;
1413 
1414 	if (!ft->autogroup.active)
1415 		return ERR_PTR(-ENOENT);
1416 
1417 	dev = fs_get_dev(&ft->base);
1418 	if (!dev)
1419 		return ERR_PTR(-ENODEV);
1420 
1421 	in = mlx5_vzalloc(inlen);
1422 	if (!in) {
1423 		mlx5_core_warn(dev, "failed to allocate inbox\n");
1424 		return ERR_PTR(-ENOMEM);
1425 	}
1426 
1427 
1428 	if (ft->autogroup.num_types < ft->autogroup.max_types)
1429 		group_size = ft->max_fte / (ft->autogroup.max_types + 1);
1430 	else
1431 		group_size = 1;
1432 
1433 	if (group_size == 0) {
1434 		mlx5_core_warn(dev,
1435 			       "flow steering can't create group size of 0\n");
1436 		ret = ERR_PTR(-EINVAL);
1437 		goto out;
1438 	}
1439 
1440 	/* sorted by start_index */
1441 	fs_for_each_fg(g, ft) {
1442 		if (candidate_index + group_size > g->start_index)
1443 			candidate_index = g->start_index + g->max_ftes;
1444 		else
1445 			break;
1446 		prev = &g->base.list;
1447 	}
1448 
1449 	if (candidate_index + group_size > ft->max_fte) {
1450 		ret = ERR_PTR(-ENOSPC);
1451 		goto out;
1452 	}
1453 
1454 	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
1455 		 match_criteria_enable);
1456 	MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
1457 	MLX5_SET(create_flow_group_in, in, end_flow_index,   candidate_index +
1458 		 group_size - 1);
1459 	match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
1460 					   in, match_criteria);
1461 	memcpy(match_criteria_addr, match_criteria,
1462 	       MLX5_ST_SZ_BYTES(fte_match_param));
1463 
1464 	ret = fs_create_fg(dev, ft, prev, in, 0);
1465 out:
1466 	kvfree(in);
1467 	return ret;
1468 }
1469 
1470 static struct mlx5_flow_namespace *get_ns_with_notifiers(struct fs_base *node)
1471 {
1472 	struct mlx5_flow_namespace *ns = NULL;
1473 
1474 	while (node  && (node->type != FS_TYPE_NAMESPACE ||
1475 			      list_empty(&container_of(node, struct
1476 						       mlx5_flow_namespace,
1477 						       base)->list_notifiers)))
1478 		node = node->parent;
1479 
1480 	if (node)
1481 		fs_get_obj(ns, node);
1482 
1483 	return ns;
1484 }
1485 
1486 
1487 /*Assumption- fte is locked*/
1488 static void call_to_add_rule_notifiers(struct mlx5_flow_rule *dst,
1489 				      struct fs_fte *fte)
1490 {
1491 	struct mlx5_flow_namespace *ns;
1492 	struct mlx5_flow_handler *iter_handler;
1493 	struct fs_client_priv_data *iter_client;
1494 	void *data;
1495 	bool is_new_rule = list_first_entry(&fte->dests,
1496 					    struct mlx5_flow_rule,
1497 					    base.list) == dst;
1498 	int err;
1499 
1500 	ns = get_ns_with_notifiers(&fte->base);
1501 	if (!ns)
1502 		return;
1503 
1504 	down_read(&ns->notifiers_rw_sem);
1505 	list_for_each_entry(iter_handler, &ns->list_notifiers,
1506 			    list) {
1507 		if (iter_handler->add_dst_cb) {
1508 			data = NULL;
1509 			mutex_lock(&dst->clients_lock);
1510 			list_for_each_entry(
1511 				iter_client, &dst->clients_data, list) {
1512 				if (iter_client->fs_handler == iter_handler) {
1513 					data = iter_client->client_dst_data;
1514 					break;
1515 				}
1516 			}
1517 			mutex_unlock(&dst->clients_lock);
1518 			err  = iter_handler->add_dst_cb(dst,
1519 							is_new_rule,
1520 							data,
1521 							iter_handler->client_context);
1522 			if (err)
1523 				break;
1524 		}
1525 	}
1526 	up_read(&ns->notifiers_rw_sem);
1527 }
1528 
1529 static void call_to_del_rule_notifiers(struct mlx5_flow_rule *dst,
1530 				      struct fs_fte *fte)
1531 {
1532 	struct mlx5_flow_namespace *ns;
1533 	struct mlx5_flow_handler *iter_handler;
1534 	struct fs_client_priv_data *iter_client;
1535 	void *data;
1536 	bool ctx_changed = (fte->dests_size == 0);
1537 
1538 	ns = get_ns_with_notifiers(&fte->base);
1539 	if (!ns)
1540 		return;
1541 	down_read(&ns->notifiers_rw_sem);
1542 	list_for_each_entry(iter_handler, &ns->list_notifiers,
1543 			    list) {
1544 		data = NULL;
1545 		mutex_lock(&dst->clients_lock);
1546 		list_for_each_entry(iter_client, &dst->clients_data, list) {
1547 			if (iter_client->fs_handler == iter_handler) {
1548 				data = iter_client->client_dst_data;
1549 				break;
1550 			}
1551 		}
1552 		mutex_unlock(&dst->clients_lock);
1553 		if (iter_handler->del_dst_cb) {
1554 			iter_handler->del_dst_cb(dst, ctx_changed, data,
1555 						 iter_handler->client_context);
1556 		}
1557 	}
1558 	up_read(&ns->notifiers_rw_sem);
1559 }
1560 
1561 /* fte should not be deleted while calling this function */
1562 static struct mlx5_flow_rule *_fs_add_dst_fte(struct fs_fte *fte,
1563 					      struct mlx5_flow_group *fg,
1564 					      struct mlx5_flow_destination *dest)
1565 {
1566 	struct mlx5_flow_table *ft;
1567 	struct mlx5_flow_rule *dst;
1568 	int err;
1569 
1570 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1571 	if (!dst)
1572 		return ERR_PTR(-ENOMEM);
1573 
1574 	memcpy(&dst->dest_attr, dest, sizeof(*dest));
1575 	dst->base.type = FS_TYPE_FLOW_DEST;
1576 	INIT_LIST_HEAD(&dst->clients_data);
1577 	mutex_init(&dst->clients_lock);
1578 	fs_get_parent(ft, fg);
1579 	/*Add dest to dests list- added as first element after the head*/
1580 	list_add_tail(&dst->base.list, &fte->dests);
1581 	fte->dests_size++;
1582 	err = mlx5_cmd_fs_set_fte(fs_get_dev(&ft->base),
1583 				  ft->vport,
1584 				  &fte->status,
1585 				  fte->val, ft->type,
1586 				  ft->id, fte->index, fg->id, fte->flow_tag,
1587 				  fte->action, fte->dests_size, &fte->dests);
1588 	if (err)
1589 		goto free_dst;
1590 
1591 	list_del(&dst->base.list);
1592 
1593 	return dst;
1594 
1595 free_dst:
1596 	list_del(&dst->base.list);
1597 	kfree(dst);
1598 	fte->dests_size--;
1599 	return ERR_PTR(err);
1600 }
1601 
1602 static char *get_dest_name(struct mlx5_flow_destination *dest)
1603 {
1604 	char *name = kzalloc(sizeof(char) * 20, GFP_KERNEL);
1605 
1606 	switch (dest->type) {
1607 	case MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE:
1608 		snprintf(name, 20, "dest_%s_%u", "flow_table",
1609 			 dest->ft->id);
1610 		return name;
1611 	case MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT:
1612 		snprintf(name, 20, "dest_%s_%u", "vport",
1613 			 dest->vport_num);
1614 		return name;
1615 	case MLX5_FLOW_CONTEXT_DEST_TYPE_TIR:
1616 		snprintf(name, 20, "dest_%s_%u", "tir", dest->tir_num);
1617 		return name;
1618 	default:
1619 		kfree(name);
1620 		return NULL;
1621 	}
1622 }
1623 
1624 /* assumed fg is locked */
1625 static unsigned int fs_get_free_fg_index(struct mlx5_flow_group *fg,
1626 					 struct list_head **prev)
1627 {
1628 	struct fs_fte *fte;
1629 	unsigned int start = fg->start_index;
1630 
1631 	if (prev)
1632 		*prev = &fg->ftes;
1633 
1634 	/* assumed list is sorted by index */
1635 	fs_for_each_fte(fte, fg) {
1636 		if (fte->index != start)
1637 			return start;
1638 		start++;
1639 		if (prev)
1640 			*prev = &fte->base.list;
1641 	}
1642 
1643 	return start;
1644 }
1645 
1646 
1647 static struct fs_fte *fs_create_fte(struct mlx5_flow_group *fg,
1648 			     u32 *match_value,
1649 			     u8 action,
1650 			     u32 flow_tag,
1651 			     struct list_head **prev)
1652 {
1653 	struct fs_fte *fte;
1654 	int index = 0;
1655 
1656 	index = fs_get_free_fg_index(fg, prev);
1657 	fte = fs_alloc_fte(action, flow_tag, match_value, index);
1658 	if (IS_ERR(fte))
1659 		return fte;
1660 
1661 	return fte;
1662 }
1663 
1664 static void add_rule_to_tree(struct mlx5_flow_rule *rule,
1665 			     struct fs_fte *fte)
1666 {
1667 	char *dest_name;
1668 
1669 	dest_name = get_dest_name(&rule->dest_attr);
1670 	fs_add_node(&rule->base, &fte->base, dest_name, 1);
1671 	/* re-add to list, since fs_add_node reset our list */
1672 	list_add_tail(&rule->base.list, &fte->dests);
1673 	kfree(dest_name);
1674 	call_to_add_rule_notifiers(rule, fte);
1675 }
1676 
1677 static void fs_del_dst(struct mlx5_flow_rule *dst)
1678 {
1679 	struct mlx5_flow_table *ft;
1680 	struct mlx5_flow_group *fg;
1681 	struct fs_fte *fte;
1682 	u32	*match_value;
1683 	struct mlx5_core_dev *dev = fs_get_dev(&dst->base);
1684 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
1685 	int err;
1686 
1687 	WARN_ON(!dev);
1688 
1689 	match_value = mlx5_vzalloc(match_len);
1690 	if (!match_value) {
1691 		mlx5_core_warn(dev, "failed to allocate inbox\n");
1692 		return;
1693 	}
1694 
1695 	fs_get_parent(fte, dst);
1696 	fs_get_parent(fg, fte);
1697 	mutex_lock(&fg->base.lock);
1698 	memcpy(match_value, fte->val, sizeof(fte->val));
1699 	/* ft can't be changed as fg is locked */
1700 	fs_get_parent(ft, fg);
1701 	list_del(&dst->base.list);
1702 	fte->dests_size--;
1703 	if (fte->dests_size) {
1704 		err = mlx5_cmd_fs_set_fte(dev, ft->vport,
1705 					  &fte->status, match_value, ft->type,
1706 					  ft->id, fte->index, fg->id,
1707 					  fte->flow_tag, fte->action,
1708 					  fte->dests_size, &fte->dests);
1709 		if (err) {
1710 			mlx5_core_warn(dev, "%s can't delete dst %s\n",
1711 				       __func__, dst->base.name);
1712 			goto err;
1713 		}
1714 	}
1715 	call_to_del_rule_notifiers(dst, fte);
1716 err:
1717 	mutex_unlock(&fg->base.lock);
1718 	kvfree(match_value);
1719 }
1720 
1721 static void fs_del_fte(struct fs_fte *fte)
1722 {
1723 	struct mlx5_flow_table *ft;
1724 	struct mlx5_flow_group *fg;
1725 	int err;
1726 	struct mlx5_core_dev *dev;
1727 
1728 	fs_get_parent(fg, fte);
1729 	fs_get_parent(ft, fg);
1730 
1731 	dev = fs_get_dev(&ft->base);
1732 	WARN_ON(!dev);
1733 
1734 	err = mlx5_cmd_fs_delete_fte(dev, ft->vport, &fte->status,
1735 				     ft->type, ft->id, fte->index);
1736 	if (err)
1737 		mlx5_core_warn(dev, "flow steering can't delete fte %s\n",
1738 			       fte->base.name);
1739 
1740 	fg->num_ftes--;
1741 }
1742 
1743 /* assuming parent fg is locked */
1744 /* Add dst algorithm */
1745 static struct mlx5_flow_rule *fs_add_dst_fg(struct mlx5_flow_group *fg,
1746 						   u32 *match_value,
1747 						   u8 action,
1748 						   u32 flow_tag,
1749 						   struct mlx5_flow_destination *dest)
1750 {
1751 	struct fs_fte *fte;
1752 	struct mlx5_flow_rule *dst;
1753 	struct mlx5_flow_table *ft;
1754 	struct list_head *prev;
1755 	char fte_name[20];
1756 
1757 	mutex_lock(&fg->base.lock);
1758 	fs_for_each_fte(fte, fg) {
1759 		/* TODO: Check of size against PRM max size */
1760 		mutex_lock(&fte->base.lock);
1761 		if (fs_match_exact_val(&fg->mask, match_value, &fte->val) &&
1762 		    action == fte->action && flow_tag == fte->flow_tag) {
1763 			dst = _fs_add_dst_fte(fte, fg, dest);
1764 			mutex_unlock(&fte->base.lock);
1765 			if (IS_ERR(dst))
1766 				goto unlock_fg;
1767 			goto add_rule;
1768 		}
1769 		mutex_unlock(&fte->base.lock);
1770 	}
1771 
1772 	fs_get_parent(ft, fg);
1773 	if (fg->num_ftes == fg->max_ftes) {
1774 		dst = ERR_PTR(-ENOSPC);
1775 		goto unlock_fg;
1776 	}
1777 
1778 	fte = fs_create_fte(fg, match_value, action, flow_tag, &prev);
1779 	if (IS_ERR(fte)) {
1780 		dst = (void *)fte;
1781 		goto unlock_fg;
1782 	}
1783 	dst = _fs_add_dst_fte(fte, fg, dest);
1784 	if (IS_ERR(dst)) {
1785 		kfree(fte);
1786 		goto unlock_fg;
1787 	}
1788 
1789 	fg->num_ftes++;
1790 
1791 	snprintf(fte_name, sizeof(fte_name), "fte%u", fte->index);
1792 	/* Add node to tree */
1793 	fs_add_node(&fte->base, &fg->base, fte_name, 0);
1794 	list_add(&fte->base.list, prev);
1795 add_rule:
1796 	add_rule_to_tree(dst, fte);
1797 unlock_fg:
1798 	mutex_unlock(&fg->base.lock);
1799 	return dst;
1800 }
1801 
1802 static struct mlx5_flow_rule *fs_add_dst_ft(struct mlx5_flow_table *ft,
1803 					    u8 match_criteria_enable,
1804 					    u32 *match_criteria,
1805 					    u32 *match_value,
1806 					    u8 action, u32 flow_tag,
1807 					    struct mlx5_flow_destination *dest)
1808 {
1809 	/*? where dst_entry is allocated*/
1810 	struct mlx5_flow_group *g;
1811 	struct mlx5_flow_rule *dst;
1812 
1813 	fs_get(&ft->base);
1814 	mutex_lock(&ft->base.lock);
1815 	fs_for_each_fg(g, ft)
1816 		if (fs_match_exact_mask(g->mask.match_criteria_enable,
1817 					match_criteria_enable,
1818 					g->mask.match_criteria,
1819 					match_criteria)) {
1820 			mutex_unlock(&ft->base.lock);
1821 
1822 			dst = fs_add_dst_fg(g, match_value,
1823 					    action, flow_tag, dest);
1824 			if (PTR_ERR(dst) && PTR_ERR(dst) != -ENOSPC)
1825 				goto unlock;
1826 		}
1827 	mutex_unlock(&ft->base.lock);
1828 
1829 	g = create_autogroup(ft, match_criteria_enable, match_criteria);
1830 	if (IS_ERR(g)) {
1831 		dst = (void *)g;
1832 		goto unlock;
1833 	}
1834 
1835 	dst = fs_add_dst_fg(g, match_value,
1836 			    action, flow_tag, dest);
1837 	if (IS_ERR(dst)) {
1838 		/* Remove assumes refcount > 0 and autogroup creates a group
1839 		 * with a refcount = 0.
1840 		 */
1841 		fs_get(&g->base);
1842 		fs_remove_node(&g->base);
1843 		goto unlock;
1844 	}
1845 
1846 unlock:
1847 	fs_put(&ft->base);
1848 	return dst;
1849 }
1850 
1851 struct mlx5_flow_rule *
1852 mlx5_add_flow_rule(struct mlx5_flow_table *ft,
1853 		   u8 match_criteria_enable,
1854 		   u32 *match_criteria,
1855 		   u32 *match_value,
1856 		   u32 action,
1857 		   u32 flow_tag,
1858 		   struct mlx5_flow_destination *dest)
1859 {
1860 	struct mlx5_flow_rule *dst;
1861 	struct mlx5_flow_namespace *ns;
1862 
1863 	ns = get_ns_with_notifiers(&ft->base);
1864 	if (ns)
1865 		down_read(&ns->dests_rw_sem);
1866 	dst =  fs_add_dst_ft(ft, match_criteria_enable, match_criteria,
1867 			     match_value, action, flow_tag, dest);
1868 	if (ns)
1869 		up_read(&ns->dests_rw_sem);
1870 
1871 	return dst;
1872 
1873 
1874 }
1875 EXPORT_SYMBOL(mlx5_add_flow_rule);
1876 
1877 void mlx5_del_flow_rule(struct mlx5_flow_rule **pp)
1878 {
1879 	struct mlx5_flow_namespace *ns;
1880 	struct mlx5_flow_rule *dst;
1881 
1882 	dst = *pp;
1883 	*pp = NULL;
1884 
1885 	if (IS_ERR_OR_NULL(dst))
1886 		return;
1887 	ns = get_ns_with_notifiers(&dst->base);
1888 	if (ns)
1889 		down_read(&ns->dests_rw_sem);
1890 	fs_remove_node(&dst->base);
1891 	if (ns)
1892 		up_read(&ns->dests_rw_sem);
1893 }
1894 EXPORT_SYMBOL(mlx5_del_flow_rule);
1895 
1896 #define MLX5_CORE_FS_ROOT_NS_NAME "root"
1897 #define MLX5_CORE_FS_ESW_EGRESS_ACL "esw_egress_root"
1898 #define MLX5_CORE_FS_ESW_INGRESS_ACL "esw_ingress_root"
1899 #define MLX5_CORE_FS_FDB_ROOT_NS_NAME "fdb_root"
1900 #define MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME "sniffer_rx_root"
1901 #define MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME "sniffer_tx_root"
1902 #define MLX5_CORE_FS_PRIO_MAX_FT 4
1903 #define MLX5_CORE_FS_PRIO_MAX_NS 1
1904 
1905 static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
1906 				      unsigned prio, int max_ft,
1907 				      const char *name, u8 flags)
1908 {
1909 	struct fs_prio *fs_prio;
1910 
1911 	fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
1912 	if (!fs_prio)
1913 		return ERR_PTR(-ENOMEM);
1914 
1915 	fs_prio->base.type = FS_TYPE_PRIO;
1916 	fs_add_node(&fs_prio->base, &ns->base, name, 1);
1917 	fs_prio->max_ft = max_ft;
1918 	fs_prio->max_ns = MLX5_CORE_FS_PRIO_MAX_NS;
1919 	fs_prio->prio = prio;
1920 	fs_prio->flags = flags;
1921 	list_add_tail(&fs_prio->base.list, &ns->prios);
1922 	INIT_LIST_HEAD(&fs_prio->objs);
1923 	mutex_init(&fs_prio->shared_lock);
1924 
1925 	return fs_prio;
1926 }
1927 
1928 static void cleanup_root_ns(struct mlx5_core_dev *dev)
1929 {
1930 	struct mlx5_flow_root_namespace *root_ns = dev->root_ns;
1931 	struct fs_prio *iter_prio;
1932 
1933 	if (!root_ns)
1934 		return;
1935 
1936 	/* stage 1 */
1937 	fs_for_each_prio(iter_prio, &root_ns->ns) {
1938 		struct mlx5_flow_namespace *iter_ns;
1939 
1940 		fs_for_each_ns(iter_ns, iter_prio) {
1941 			while (!list_empty(&iter_ns->prios)) {
1942 				struct fs_base *iter_prio2 =
1943 					list_first_entry(&iter_ns->prios,
1944 							 struct fs_base,
1945 							 list);
1946 
1947 				fs_remove_node(iter_prio2);
1948 			}
1949 		}
1950 	}
1951 
1952 	/* stage 2 */
1953 	fs_for_each_prio(iter_prio, &root_ns->ns) {
1954 		while (!list_empty(&iter_prio->objs)) {
1955 			struct fs_base *iter_ns =
1956 				list_first_entry(&iter_prio->objs,
1957 						 struct fs_base,
1958 						 list);
1959 
1960 				fs_remove_node(iter_ns);
1961 		}
1962 	}
1963 	/* stage 3 */
1964 	while (!list_empty(&root_ns->ns.prios)) {
1965 		struct fs_base *iter_prio =
1966 			list_first_entry(&root_ns->ns.prios,
1967 					 struct fs_base,
1968 					 list);
1969 
1970 		fs_remove_node(iter_prio);
1971 	}
1972 
1973 	fs_remove_node(&root_ns->ns.base);
1974 	dev->root_ns = NULL;
1975 }
1976 
1977 static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev,
1978 					struct mlx5_flow_root_namespace *root_ns)
1979 {
1980 	struct fs_base *prio;
1981 
1982 	if (!root_ns)
1983 		return;
1984 
1985 	if (!list_empty(&root_ns->ns.prios)) {
1986 		prio = list_first_entry(&root_ns->ns.prios,
1987 					struct fs_base,
1988 				 list);
1989 		fs_remove_node(prio);
1990 	}
1991 	fs_remove_node(&root_ns->ns.base);
1992 	root_ns = NULL;
1993 }
1994 
1995 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
1996 {
1997 	cleanup_root_ns(dev);
1998 	cleanup_single_prio_root_ns(dev, dev->sniffer_rx_root_ns);
1999 	cleanup_single_prio_root_ns(dev, dev->sniffer_tx_root_ns);
2000 	cleanup_single_prio_root_ns(dev, dev->fdb_root_ns);
2001 	cleanup_single_prio_root_ns(dev, dev->esw_egress_root_ns);
2002 	cleanup_single_prio_root_ns(dev, dev->esw_ingress_root_ns);
2003 }
2004 
2005 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
2006 						 *ns)
2007 {
2008 	ns->base.type = FS_TYPE_NAMESPACE;
2009 	init_rwsem(&ns->dests_rw_sem);
2010 	init_rwsem(&ns->notifiers_rw_sem);
2011 	INIT_LIST_HEAD(&ns->prios);
2012 	INIT_LIST_HEAD(&ns->list_notifiers);
2013 
2014 	return ns;
2015 }
2016 
2017 static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev,
2018 							  enum fs_ft_type
2019 							  table_type,
2020 							  char *name)
2021 {
2022 	struct mlx5_flow_root_namespace *root_ns;
2023 	struct mlx5_flow_namespace *ns;
2024 
2025 	/* create the root namespace */
2026 	root_ns = mlx5_vzalloc(sizeof(*root_ns));
2027 	if (!root_ns)
2028 		goto err;
2029 
2030 	root_ns->dev = dev;
2031 	root_ns->table_type = table_type;
2032 	mutex_init(&root_ns->fs_chain_lock);
2033 
2034 	ns = &root_ns->ns;
2035 	fs_init_namespace(ns);
2036 	fs_add_node(&ns->base, NULL, name, 1);
2037 
2038 	return root_ns;
2039 err:
2040 	return NULL;
2041 }
2042 
2043 static int init_fdb_root_ns(struct mlx5_core_dev *dev)
2044 {
2045 	struct fs_prio *prio;
2046 
2047 	dev->fdb_root_ns = create_root_ns(dev, FS_FT_FDB,
2048 					  MLX5_CORE_FS_FDB_ROOT_NS_NAME);
2049 	if (!dev->fdb_root_ns)
2050 		return -ENOMEM;
2051 
2052 	/* create 1 prio*/
2053 	prio = fs_create_prio(&dev->fdb_root_ns->ns, 0, 1, "fdb_prio", 0);
2054 	if (IS_ERR(prio))
2055 		return PTR_ERR(prio);
2056 	else
2057 		return 0;
2058 }
2059 
2060 #define MAX_VPORTS 128
2061 
2062 static int init_egress_acl_root_ns(struct mlx5_core_dev *dev)
2063 {
2064 	struct fs_prio *prio;
2065 
2066 	dev->esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL,
2067 						 MLX5_CORE_FS_ESW_EGRESS_ACL);
2068 	if (!dev->esw_egress_root_ns)
2069 		return -ENOMEM;
2070 
2071 	/* create 1 prio*/
2072 	prio = fs_create_prio(&dev->esw_egress_root_ns->ns, 0, MAX_VPORTS,
2073 			      "esw_egress_prio", 0);
2074 	if (IS_ERR(prio))
2075 		return PTR_ERR(prio);
2076 	else
2077 		return 0;
2078 }
2079 
2080 static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev)
2081 {
2082 	struct fs_prio *prio;
2083 
2084 	dev->esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL,
2085 						  MLX5_CORE_FS_ESW_INGRESS_ACL);
2086 	if (!dev->esw_ingress_root_ns)
2087 		return -ENOMEM;
2088 
2089 	/* create 1 prio*/
2090 	prio = fs_create_prio(&dev->esw_ingress_root_ns->ns, 0, MAX_VPORTS,
2091 			      "esw_ingress_prio", 0);
2092 	if (IS_ERR(prio))
2093 		return PTR_ERR(prio);
2094 	else
2095 		return 0;
2096 }
2097 
2098 static int init_sniffer_rx_root_ns(struct mlx5_core_dev *dev)
2099 {
2100 	struct fs_prio *prio;
2101 
2102 	dev->sniffer_rx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_RX,
2103 				     MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME);
2104 	if (!dev->sniffer_rx_root_ns)
2105 		return  -ENOMEM;
2106 
2107 	/* create 1 prio*/
2108 	prio = fs_create_prio(&dev->sniffer_rx_root_ns->ns, 0, 1,
2109 			      "sniffer_prio", 0);
2110 	if (IS_ERR(prio))
2111 		return PTR_ERR(prio);
2112 	else
2113 		return 0;
2114 }
2115 
2116 
2117 static int init_sniffer_tx_root_ns(struct mlx5_core_dev *dev)
2118 {
2119 	struct fs_prio *prio;
2120 
2121 	dev->sniffer_tx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_TX,
2122 						 MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME);
2123 	if (!dev->sniffer_tx_root_ns)
2124 		return  -ENOMEM;
2125 
2126 	/* create 1 prio*/
2127 	prio = fs_create_prio(&dev->sniffer_tx_root_ns->ns, 0, 1,
2128 			      "sniffer_prio", 0);
2129 	if (IS_ERR(prio))
2130 		return PTR_ERR(prio);
2131 	else
2132 		return 0;
2133 }
2134 
2135 static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio,
2136 						       const char *name)
2137 {
2138 	struct mlx5_flow_namespace	*ns;
2139 
2140 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
2141 	if (!ns)
2142 		return ERR_PTR(-ENOMEM);
2143 
2144 	fs_init_namespace(ns);
2145 	fs_add_node(&ns->base, &prio->base, name, 1);
2146 	list_add_tail(&ns->base.list, &prio->objs);
2147 
2148 	return ns;
2149 }
2150 
2151 #define FLOW_TABLE_BIT_SZ 1
2152 #define GET_FLOW_TABLE_CAP(dev, offset) \
2153 	((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) +	\
2154 			offset / 32)) >>					\
2155 	  (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
2156 
2157 static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
2158 {
2159 	int i;
2160 
2161 	for (i = 0; i < caps->arr_sz; i++) {
2162 		if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
2163 			return false;
2164 	}
2165 	return true;
2166 }
2167 
2168 static int _init_root_tree(struct mlx5_core_dev *dev, int max_ft_level,
2169 		    struct init_tree_node *node, struct fs_base *base_parent,
2170 		    struct init_tree_node *tree_parent)
2171 {
2172 	struct mlx5_flow_namespace *fs_ns;
2173 	struct fs_prio *fs_prio;
2174 	int priority;
2175 	struct fs_base *base;
2176 	int i;
2177 	int err = 0;
2178 
2179 	if (node->type == FS_TYPE_PRIO) {
2180 		if ((node->min_ft_level > max_ft_level) ||
2181 		    !has_required_caps(dev, &node->caps))
2182 			goto out;
2183 
2184 		fs_get_obj(fs_ns, base_parent);
2185 		priority = node - tree_parent->children;
2186 		fs_prio = fs_create_prio(fs_ns, priority,
2187 					 node->max_ft,
2188 					 node->name, node->flags);
2189 		if (IS_ERR(fs_prio)) {
2190 			err = PTR_ERR(fs_prio);
2191 			goto out;
2192 		}
2193 		base = &fs_prio->base;
2194 	} else if (node->type == FS_TYPE_NAMESPACE) {
2195 		fs_get_obj(fs_prio, base_parent);
2196 		fs_ns = fs_create_namespace(fs_prio, node->name);
2197 		if (IS_ERR(fs_ns)) {
2198 			err = PTR_ERR(fs_ns);
2199 			goto out;
2200 		}
2201 		base = &fs_ns->base;
2202 	} else {
2203 		return -EINVAL;
2204 	}
2205 	for (i = 0; i < node->ar_size; i++) {
2206 		err = _init_root_tree(dev, max_ft_level, &node->children[i], base,
2207 				      node);
2208 		if (err)
2209 			break;
2210 	}
2211 out:
2212 	return err;
2213 }
2214 
2215 static int init_root_tree(struct mlx5_core_dev *dev, int max_ft_level,
2216 		   struct init_tree_node *node, struct fs_base *parent)
2217 {
2218 	int i;
2219 	struct mlx5_flow_namespace *fs_ns;
2220 	int err = 0;
2221 
2222 	fs_get_obj(fs_ns, parent);
2223 	for (i = 0; i < node->ar_size; i++) {
2224 		err = _init_root_tree(dev, max_ft_level,
2225 				      &node->children[i], &fs_ns->base, node);
2226 		if (err)
2227 			break;
2228 	}
2229 	return err;
2230 }
2231 
2232 static int sum_max_ft_in_prio(struct fs_prio *prio);
2233 static int sum_max_ft_in_ns(struct mlx5_flow_namespace *ns)
2234 {
2235 	struct fs_prio *prio;
2236 	int sum = 0;
2237 
2238 	fs_for_each_prio(prio, ns) {
2239 		sum += sum_max_ft_in_prio(prio);
2240 	}
2241 	return  sum;
2242 }
2243 
2244 static int sum_max_ft_in_prio(struct fs_prio *prio)
2245 {
2246 	int sum = 0;
2247 	struct fs_base *it;
2248 	struct mlx5_flow_namespace	*ns;
2249 
2250 	if (prio->max_ft)
2251 		return prio->max_ft;
2252 
2253 	fs_for_each_ns_or_ft(it, prio) {
2254 		if (it->type == FS_TYPE_FLOW_TABLE)
2255 			continue;
2256 
2257 		fs_get_obj(ns, it);
2258 		sum += sum_max_ft_in_ns(ns);
2259 	}
2260 	prio->max_ft = sum;
2261 	return  sum;
2262 }
2263 
2264 static void set_max_ft(struct mlx5_flow_namespace *ns)
2265 {
2266 	struct fs_prio *prio;
2267 
2268 	if (!ns)
2269 		return;
2270 
2271 	fs_for_each_prio(prio, ns)
2272 		sum_max_ft_in_prio(prio);
2273 }
2274 
2275 static int init_root_ns(struct mlx5_core_dev *dev)
2276 {
2277 	int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
2278 					      flow_table_properties_nic_receive.
2279 					      max_ft_level);
2280 
2281 	dev->root_ns = create_root_ns(dev, FS_FT_NIC_RX,
2282 				      MLX5_CORE_FS_ROOT_NS_NAME);
2283 	if (IS_ERR_OR_NULL(dev->root_ns))
2284 		goto err;
2285 
2286 
2287 	if (init_root_tree(dev, max_ft_level, &root_fs, &dev->root_ns->ns.base))
2288 		goto err;
2289 
2290 	set_max_ft(&dev->root_ns->ns);
2291 
2292 	return 0;
2293 err:
2294 	return -ENOMEM;
2295 }
2296 
2297 u8 mlx5_get_match_criteria_enable(struct mlx5_flow_rule *rule)
2298 {
2299 	struct fs_base *pbase;
2300 	struct mlx5_flow_group *fg;
2301 
2302 	pbase = rule->base.parent;
2303 	WARN_ON(!pbase);
2304 	pbase = pbase->parent;
2305 	WARN_ON(!pbase);
2306 
2307 	fs_get_obj(fg, pbase);
2308 	return fg->mask.match_criteria_enable;
2309 }
2310 
2311 void mlx5_get_match_value(u32 *match_value,
2312 			  struct mlx5_flow_rule *rule)
2313 {
2314 	struct fs_base *pbase;
2315 	struct fs_fte *fte;
2316 
2317 	pbase = rule->base.parent;
2318 	WARN_ON(!pbase);
2319 	fs_get_obj(fte, pbase);
2320 
2321 	memcpy(match_value, fte->val, sizeof(fte->val));
2322 }
2323 
2324 void mlx5_get_match_criteria(u32 *match_criteria,
2325 			     struct mlx5_flow_rule *rule)
2326 {
2327 	struct fs_base *pbase;
2328 	struct mlx5_flow_group *fg;
2329 
2330 	pbase = rule->base.parent;
2331 	WARN_ON(!pbase);
2332 	pbase = pbase->parent;
2333 	WARN_ON(!pbase);
2334 
2335 	fs_get_obj(fg, pbase);
2336 	memcpy(match_criteria, &fg->mask.match_criteria,
2337 	       sizeof(fg->mask.match_criteria));
2338 }
2339 
2340 int mlx5_init_fs(struct mlx5_core_dev *dev)
2341 {
2342 	int err;
2343 
2344 	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
2345 		err = init_root_ns(dev);
2346 		if (err)
2347 			goto err;
2348 	}
2349 
2350 	err = init_fdb_root_ns(dev);
2351 	if (err)
2352 		goto err;
2353 
2354 	err = init_egress_acl_root_ns(dev);
2355 	if (err)
2356 		goto err;
2357 
2358 	err = init_ingress_acl_root_ns(dev);
2359 	if (err)
2360 		goto err;
2361 
2362 	err = init_sniffer_tx_root_ns(dev);
2363 	if (err)
2364 		goto err;
2365 
2366 	err = init_sniffer_rx_root_ns(dev);
2367 	if (err)
2368 		goto err;
2369 
2370 	return 0;
2371 err:
2372 	mlx5_cleanup_fs(dev);
2373 	return err;
2374 }
2375 
2376 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
2377 						  enum mlx5_flow_namespace_type type)
2378 {
2379 	struct mlx5_flow_root_namespace *root_ns = dev->root_ns;
2380 	int prio;
2381 	static struct fs_prio *fs_prio;
2382 	struct mlx5_flow_namespace *ns;
2383 
2384 	switch (type) {
2385 	case MLX5_FLOW_NAMESPACE_BYPASS:
2386 		prio = 0;
2387 		break;
2388 	case MLX5_FLOW_NAMESPACE_OFFLOADS:
2389 		prio = 1;
2390 		break;
2391 	case MLX5_FLOW_NAMESPACE_KERNEL:
2392 		prio = 2;
2393 		break;
2394 	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
2395 		prio = 3;
2396 		break;
2397 	case MLX5_FLOW_NAMESPACE_FDB:
2398 		if (dev->fdb_root_ns)
2399 			return &dev->fdb_root_ns->ns;
2400 		else
2401 			return NULL;
2402 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
2403 		if (dev->esw_egress_root_ns)
2404 			return &dev->esw_egress_root_ns->ns;
2405 		else
2406 			return NULL;
2407 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
2408 		if (dev->esw_ingress_root_ns)
2409 			return &dev->esw_ingress_root_ns->ns;
2410 		else
2411 			return NULL;
2412 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
2413 		if (dev->sniffer_rx_root_ns)
2414 			return &dev->sniffer_rx_root_ns->ns;
2415 		else
2416 			return NULL;
2417 	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
2418 		if (dev->sniffer_tx_root_ns)
2419 			return &dev->sniffer_tx_root_ns->ns;
2420 		else
2421 			return NULL;
2422 	default:
2423 		return NULL;
2424 	}
2425 
2426 	if (!root_ns)
2427 		return NULL;
2428 
2429 	fs_prio = find_prio(&root_ns->ns, prio);
2430 	if (!fs_prio)
2431 		return NULL;
2432 
2433 	ns = list_first_entry(&fs_prio->objs,
2434 			      typeof(*ns),
2435 			      base.list);
2436 
2437 	return ns;
2438 }
2439 EXPORT_SYMBOL(mlx5_get_flow_namespace);
2440 
2441 
2442 int mlx5_set_rule_private_data(struct mlx5_flow_rule *rule,
2443 				  struct mlx5_flow_handler *fs_handler,
2444 				  void  *client_data)
2445 {
2446 	struct fs_client_priv_data *priv_data;
2447 
2448 	mutex_lock(&rule->clients_lock);
2449 	/*Check that hanlder isn't exists in the list already*/
2450 	list_for_each_entry(priv_data, &rule->clients_data, list) {
2451 		if (priv_data->fs_handler == fs_handler) {
2452 			priv_data->client_dst_data = client_data;
2453 			goto unlock;
2454 		}
2455 	}
2456 	priv_data = kzalloc(sizeof(*priv_data), GFP_KERNEL);
2457 	if (!priv_data) {
2458 		mutex_unlock(&rule->clients_lock);
2459 		return -ENOMEM;
2460 	}
2461 
2462 	priv_data->client_dst_data = client_data;
2463 	priv_data->fs_handler = fs_handler;
2464 	list_add(&priv_data->list, &rule->clients_data);
2465 
2466 unlock:
2467 	mutex_unlock(&rule->clients_lock);
2468 
2469 	return 0;
2470 }
2471 
2472 static int remove_from_clients(struct mlx5_flow_rule *rule,
2473 			bool ctx_changed,
2474 			void *client_data,
2475 			void *context)
2476 {
2477 	struct fs_client_priv_data *iter_client;
2478 	struct fs_client_priv_data *temp_client;
2479 	struct mlx5_flow_handler *handler = (struct
2480 						mlx5_flow_handler*)context;
2481 
2482 	mutex_lock(&rule->clients_lock);
2483 	list_for_each_entry_safe(iter_client, temp_client,
2484 				 &rule->clients_data, list) {
2485 		if (iter_client->fs_handler == handler) {
2486 			list_del(&iter_client->list);
2487 			kfree(iter_client);
2488 			break;
2489 		}
2490 	}
2491 	mutex_unlock(&rule->clients_lock);
2492 
2493 	return 0;
2494 }
2495 
2496 struct mlx5_flow_handler *mlx5_register_rule_notifier(struct mlx5_core_dev *dev,
2497 								enum mlx5_flow_namespace_type ns_type,
2498 								rule_event_fn add_cb,
2499 								rule_event_fn del_cb,
2500 								void *context)
2501 {
2502 	struct mlx5_flow_namespace *ns;
2503 	struct mlx5_flow_handler *handler;
2504 
2505 	ns = mlx5_get_flow_namespace(dev, ns_type);
2506 	if (!ns)
2507 		return ERR_PTR(-EINVAL);
2508 
2509 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
2510 	if (!handler)
2511 		return ERR_PTR(-ENOMEM);
2512 
2513 	handler->add_dst_cb = add_cb;
2514 	handler->del_dst_cb = del_cb;
2515 	handler->client_context = context;
2516 	handler->ns = ns;
2517 	down_write(&ns->notifiers_rw_sem);
2518 	list_add_tail(&handler->list, &ns->list_notifiers);
2519 	up_write(&ns->notifiers_rw_sem);
2520 
2521 	return handler;
2522 }
2523 
2524 static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns,
2525 				rule_event_fn add_rule_cb,
2526 				void *context);
2527 
2528 void mlx5_unregister_rule_notifier(struct mlx5_flow_handler *handler)
2529 {
2530 	struct mlx5_flow_namespace *ns = handler->ns;
2531 
2532 	/*Remove from dst's clients*/
2533 	down_write(&ns->dests_rw_sem);
2534 	down_write(&ns->notifiers_rw_sem);
2535 	iterate_rules_in_ns(ns, remove_from_clients, handler);
2536 	list_del(&handler->list);
2537 	up_write(&ns->notifiers_rw_sem);
2538 	up_write(&ns->dests_rw_sem);
2539 	kfree(handler);
2540 }
2541 
2542 static void iterate_rules_in_ft(struct mlx5_flow_table *ft,
2543 				rule_event_fn add_rule_cb,
2544 				void *context)
2545 {
2546 	struct mlx5_flow_group *iter_fg;
2547 	struct fs_fte *iter_fte;
2548 	struct mlx5_flow_rule *iter_rule;
2549 	int err = 0;
2550 	bool is_new_rule;
2551 
2552 	mutex_lock(&ft->base.lock);
2553 	fs_for_each_fg(iter_fg, ft) {
2554 		mutex_lock(&iter_fg->base.lock);
2555 		fs_for_each_fte(iter_fte, iter_fg) {
2556 			mutex_lock(&iter_fte->base.lock);
2557 			is_new_rule = true;
2558 			fs_for_each_dst(iter_rule, iter_fte) {
2559 				fs_get(&iter_rule->base);
2560 				err = add_rule_cb(iter_rule,
2561 						 is_new_rule,
2562 						 NULL,
2563 						 context);
2564 				fs_put_parent_locked(&iter_rule->base);
2565 				if (err)
2566 					break;
2567 				is_new_rule = false;
2568 			}
2569 			mutex_unlock(&iter_fte->base.lock);
2570 			if (err)
2571 				break;
2572 		}
2573 		mutex_unlock(&iter_fg->base.lock);
2574 		if (err)
2575 			break;
2576 	}
2577 	mutex_unlock(&ft->base.lock);
2578 }
2579 
2580 static void iterate_rules_in_prio(struct fs_prio *prio,
2581 				  rule_event_fn add_rule_cb,
2582 				  void *context)
2583 {
2584 	struct fs_base *it;
2585 
2586 	mutex_lock(&prio->base.lock);
2587 	fs_for_each_ns_or_ft(it, prio) {
2588 		if (it->type == FS_TYPE_FLOW_TABLE) {
2589 			struct mlx5_flow_table	      *ft;
2590 
2591 			fs_get_obj(ft, it);
2592 			iterate_rules_in_ft(ft, add_rule_cb, context);
2593 		} else {
2594 			struct mlx5_flow_namespace *ns;
2595 
2596 			fs_get_obj(ns, it);
2597 			iterate_rules_in_ns(ns, add_rule_cb, context);
2598 		}
2599 	}
2600 	mutex_unlock(&prio->base.lock);
2601 }
2602 
2603 static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns,
2604 				rule_event_fn add_rule_cb,
2605 				void *context)
2606 {
2607 	struct fs_prio *iter_prio;
2608 
2609 	mutex_lock(&ns->base.lock);
2610 	fs_for_each_prio(iter_prio, ns) {
2611 		iterate_rules_in_prio(iter_prio, add_rule_cb, context);
2612 	}
2613 	mutex_unlock(&ns->base.lock);
2614 }
2615 
2616 void mlx5_flow_iterate_existing_rules(struct mlx5_flow_namespace *ns,
2617 					 rule_event_fn add_rule_cb,
2618 					 void *context)
2619 {
2620 	down_write(&ns->dests_rw_sem);
2621 	down_read(&ns->notifiers_rw_sem);
2622 	iterate_rules_in_ns(ns, add_rule_cb, context);
2623 	up_read(&ns->notifiers_rw_sem);
2624 	up_write(&ns->dests_rw_sem);
2625 }
2626 
2627 
2628 void mlx5_del_flow_rules_list(struct mlx5_flow_rules_list *rules_list)
2629 {
2630 	struct mlx5_flow_rule_node *iter_node;
2631 	struct mlx5_flow_rule_node *temp_node;
2632 
2633 	list_for_each_entry_safe(iter_node, temp_node, &rules_list->head, list) {
2634 		list_del(&iter_node->list);
2635 		kfree(iter_node);
2636 	}
2637 
2638 	kfree(rules_list);
2639 }
2640 
2641 #define ROCEV1_ETHERTYPE 0x8915
2642 static int set_rocev1_rules(struct list_head *rules_list)
2643 {
2644 	struct mlx5_flow_rule_node *rocev1_rule;
2645 
2646 	rocev1_rule = kzalloc(sizeof(*rocev1_rule), GFP_KERNEL);
2647 	if (!rocev1_rule)
2648 		return -ENOMEM;
2649 
2650 	rocev1_rule->match_criteria_enable =
2651 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2652 	MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_criteria, ethertype,
2653 		 0xffff);
2654 	MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_value, ethertype,
2655 		 ROCEV1_ETHERTYPE);
2656 
2657 	list_add_tail(&rocev1_rule->list, rules_list);
2658 
2659 	return 0;
2660 }
2661 
2662 #define ROCEV2_UDP_PORT 4791
2663 static int set_rocev2_rules(struct list_head *rules_list)
2664 {
2665 	struct mlx5_flow_rule_node *ipv4_rule;
2666 	struct mlx5_flow_rule_node *ipv6_rule;
2667 
2668 	ipv4_rule = kzalloc(sizeof(*ipv4_rule), GFP_KERNEL);
2669 	if (!ipv4_rule)
2670 		return -ENOMEM;
2671 
2672 	ipv6_rule = kzalloc(sizeof(*ipv6_rule), GFP_KERNEL);
2673 	if (!ipv6_rule) {
2674 		kfree(ipv4_rule);
2675 		return -ENOMEM;
2676 	}
2677 
2678 	ipv4_rule->match_criteria_enable =
2679 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2680 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ethertype,
2681 		 0xffff);
2682 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ethertype,
2683 		 0x0800);
2684 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ip_protocol,
2685 		 0xff);
2686 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ip_protocol,
2687 		 IPPROTO_UDP);
2688 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, udp_dport,
2689 		 0xffff);
2690 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, udp_dport,
2691 		 ROCEV2_UDP_PORT);
2692 
2693 	ipv6_rule->match_criteria_enable =
2694 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2695 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ethertype,
2696 		 0xffff);
2697 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ethertype,
2698 		 0x86dd);
2699 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ip_protocol,
2700 		 0xff);
2701 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ip_protocol,
2702 		 IPPROTO_UDP);
2703 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, udp_dport,
2704 		 0xffff);
2705 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, udp_dport,
2706 		 ROCEV2_UDP_PORT);
2707 
2708 	list_add_tail(&ipv4_rule->list, rules_list);
2709 	list_add_tail(&ipv6_rule->list, rules_list);
2710 
2711 	return 0;
2712 }
2713 
2714 
2715 struct mlx5_flow_rules_list *get_roce_flow_rules(u8 roce_mode)
2716 {
2717 	int err = 0;
2718 	struct mlx5_flow_rules_list *rules_list =
2719 		kzalloc(sizeof(*rules_list), GFP_KERNEL);
2720 
2721 	if (!rules_list)
2722 		return NULL;
2723 
2724 	INIT_LIST_HEAD(&rules_list->head);
2725 
2726 	if (roce_mode & MLX5_ROCE_VERSION_1_CAP) {
2727 		err = set_rocev1_rules(&rules_list->head);
2728 		if (err)
2729 			goto free_list;
2730 	}
2731 	if (roce_mode & MLX5_ROCE_VERSION_2_CAP)
2732 		err = set_rocev2_rules(&rules_list->head);
2733 	if (err)
2734 		goto free_list;
2735 
2736 	return rules_list;
2737 
2738 free_list:
2739 	mlx5_del_flow_rules_list(rules_list);
2740 	return NULL;
2741 }
2742 
2743 struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
2744 						 enum mlx5_flow_namespace_type ns_type,
2745 						 u8 num_actions,
2746 						 void *modify_actions)
2747 {
2748 	struct mlx5_modify_hdr *modify_hdr;
2749 	int err;
2750 
2751 	modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL);
2752 	if (!modify_hdr)
2753 		return ERR_PTR(-ENOMEM);
2754 
2755 	modify_hdr->ns_type = ns_type;
2756 	err = mlx5_cmd_modify_header_alloc(dev, ns_type, num_actions,
2757 					   modify_actions, modify_hdr);
2758 	if (err) {
2759 		kfree(modify_hdr);
2760 		return ERR_PTR(err);
2761 	}
2762 
2763 	return modify_hdr;
2764 }
2765 EXPORT_SYMBOL(mlx5_modify_header_alloc);
2766 
2767 void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
2768                                 struct mlx5_modify_hdr *modify_hdr)
2769 {
2770         mlx5_cmd_modify_header_dealloc(dev, modify_hdr);
2771         kfree(modify_hdr);
2772 }
2773 EXPORT_SYMBOL(mlx5_modify_header_dealloc);
2774 
2775 struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
2776                                                      struct mlx5_pkt_reformat_params *params,
2777                                                      enum mlx5_flow_namespace_type ns_type)
2778 {
2779         struct mlx5_pkt_reformat *pkt_reformat;
2780         int err;
2781 
2782         pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL);
2783         if (!pkt_reformat)
2784                 return ERR_PTR(-ENOMEM);
2785 
2786         pkt_reformat->ns_type = ns_type;
2787         pkt_reformat->reformat_type = params->type;
2788 	err = mlx5_cmd_packet_reformat_alloc(dev, params, ns_type,
2789 					     pkt_reformat);
2790         if (err) {
2791                 kfree(pkt_reformat);
2792                 return ERR_PTR(err);
2793         }
2794 
2795         return pkt_reformat;
2796 }
2797 EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
2798 
2799 void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
2800                                   struct mlx5_pkt_reformat *pkt_reformat)
2801 {
2802         mlx5_cmd_packet_reformat_dealloc(dev, pkt_reformat);
2803         kfree(pkt_reformat);
2804 }
2805 EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
2806 
2807