xref: /freebsd/sys/dev/mlx5/mlx5_core/mlx5_fs_tree.c (revision 12c56d7d)
1 /*-
2  * Copyright (c) 2013-2017, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "opt_rss.h"
29 #include "opt_ratelimit.h"
30 
31 #include <linux/module.h>
32 #include <dev/mlx5/driver.h>
33 #include <dev/mlx5/mlx5_core/mlx5_core.h>
34 #include <dev/mlx5/mlx5_core/fs_core.h>
35 #include <linux/string.h>
36 #include <linux/compiler.h>
37 
38 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
39 					 sizeof(struct init_tree_node))
40 
41 #define ADD_PRIO(name_val, flags_val, min_level_val, max_ft_val, caps_val, \
42 		 ...) {.type = FS_TYPE_PRIO,\
43 	.name = name_val,\
44 	.min_ft_level = min_level_val,\
45 	.flags = flags_val,\
46 	.max_ft = max_ft_val,\
47 	.caps = caps_val,\
48 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
49 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
50 }
51 
52 #define ADD_FT_PRIO(name_val, flags_val, max_ft_val,  ...)\
53 	ADD_PRIO(name_val, flags_val, 0, max_ft_val, {},\
54 		 __VA_ARGS__)\
55 
56 #define ADD_NS(name_val, ...) {.type = FS_TYPE_NAMESPACE,\
57 	.name = name_val,\
58 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
59 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
60 }
61 
62 #define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
63 				   sizeof(long))
64 
65 #define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
66 
67 #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
68 			       .caps = (long[]) {__VA_ARGS__}}
69 
70 #define BYPASS_MAX_FT 5
71 #define BYPASS_PRIO_MAX_FT 1
72 #define KERNEL_MAX_FT 5
73 #define LEFTOVER_MAX_FT 1
74 #define KERNEL_MIN_LEVEL 3
75 #define LEFTOVER_MIN_LEVEL KERNEL_MIN_LEVEL + 1
76 #define BYPASS_MIN_LEVEL MLX5_NUM_BYPASS_FTS + LEFTOVER_MIN_LEVEL
77 struct node_caps {
78 	size_t	arr_sz;
79 	long	*caps;
80 };
81 
82 struct init_tree_node {
83 	enum fs_type	type;
84 	const char	*name;
85 	struct init_tree_node *children;
86 	int ar_size;
87 	struct node_caps caps;
88 	u8  flags;
89 	int min_ft_level;
90 	int prio;
91 	int max_ft;
92 } root_fs = {
93 	.type = FS_TYPE_NAMESPACE,
94 	.name = "root",
95 	.ar_size = 3,
96 	.children = (struct init_tree_node[]) {
97 		ADD_PRIO("by_pass_prio", 0, BYPASS_MIN_LEVEL, 0,
98 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
99 					  FS_CAP(flow_table_properties_nic_receive.modify_root)),
100 			 ADD_NS("by_pass_ns",
101 				ADD_FT_PRIO("prio0", 0,
102 					    BYPASS_PRIO_MAX_FT),
103 				ADD_FT_PRIO("prio1", 0,
104 					    BYPASS_PRIO_MAX_FT),
105 				ADD_FT_PRIO("prio2", 0,
106 					    BYPASS_PRIO_MAX_FT),
107 				ADD_FT_PRIO("prio3", 0,
108 					    BYPASS_PRIO_MAX_FT),
109 				ADD_FT_PRIO("prio4", 0,
110 					    BYPASS_PRIO_MAX_FT),
111 				ADD_FT_PRIO("prio5", 0,
112 					    BYPASS_PRIO_MAX_FT),
113 				ADD_FT_PRIO("prio6", 0,
114 					    BYPASS_PRIO_MAX_FT),
115 				ADD_FT_PRIO("prio7", 0,
116 					    BYPASS_PRIO_MAX_FT),
117 				ADD_FT_PRIO("prio-mcast", 0,
118 					    BYPASS_PRIO_MAX_FT))),
119 		ADD_PRIO("kernel_prio", 0, KERNEL_MIN_LEVEL, 0, {},
120 			 ADD_NS("kernel_ns",
121 				ADD_FT_PRIO("prio_kernel-0", 0,
122 					    KERNEL_MAX_FT))),
123 		ADD_PRIO("leftovers_prio", MLX5_CORE_FS_PRIO_SHARED,
124 			 LEFTOVER_MIN_LEVEL, 0,
125 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
126 					  FS_CAP(flow_table_properties_nic_receive.modify_root)),
127 			 ADD_NS("leftover_ns",
128 				ADD_FT_PRIO("leftovers_prio-0",
129 					MLX5_CORE_FS_PRIO_SHARED,
130 					LEFTOVER_MAX_FT)))
131 	}
132 };
133 
134 /* Tree creation functions */
135 
136 static struct mlx5_flow_root_namespace *find_root(struct fs_base *node)
137 {
138 	struct fs_base *parent;
139 
140 	/* Make sure we only read it once while we go up the tree */
141 	while ((parent = node->parent))
142 		node = parent;
143 
144 	if (node->type != FS_TYPE_NAMESPACE) {
145 		return NULL;
146 	}
147 
148 	return container_of(container_of(node,
149 					 struct mlx5_flow_namespace,
150 					 base),
151 			    struct mlx5_flow_root_namespace,
152 			    ns);
153 }
154 
155 static inline struct mlx5_core_dev *fs_get_dev(struct fs_base *node)
156 {
157 	struct mlx5_flow_root_namespace *root = find_root(node);
158 
159 	if (root)
160 		return root->dev;
161 	return NULL;
162 }
163 
164 static void fs_init_node(struct fs_base *node,
165 			 unsigned int refcount)
166 {
167 	kref_init(&node->refcount);
168 	atomic_set(&node->users_refcount, refcount);
169 	init_completion(&node->complete);
170 	INIT_LIST_HEAD(&node->list);
171 	mutex_init(&node->lock);
172 }
173 
174 static void _fs_add_node(struct fs_base *node,
175 			 const char *name,
176 			 struct fs_base *parent)
177 {
178 	if (parent)
179 		atomic_inc(&parent->users_refcount);
180 	node->name = kstrdup_const(name, GFP_KERNEL);
181 	node->parent = parent;
182 }
183 
184 static void fs_add_node(struct fs_base *node,
185 			struct fs_base *parent, const char *name,
186 			unsigned int refcount)
187 {
188 	fs_init_node(node, refcount);
189 	_fs_add_node(node, name, parent);
190 }
191 
192 static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref),
193 		    bool parent_locked);
194 
195 static void fs_del_dst(struct mlx5_flow_rule *dst);
196 static void _fs_del_ft(struct mlx5_flow_table *ft);
197 static void fs_del_fg(struct mlx5_flow_group *fg);
198 static void fs_del_fte(struct fs_fte *fte);
199 
200 static void cmd_remove_node(struct fs_base *base)
201 {
202 	switch (base->type) {
203 	case FS_TYPE_FLOW_DEST:
204 		fs_del_dst(container_of(base, struct mlx5_flow_rule, base));
205 		break;
206 	case FS_TYPE_FLOW_TABLE:
207 		_fs_del_ft(container_of(base, struct mlx5_flow_table, base));
208 		break;
209 	case FS_TYPE_FLOW_GROUP:
210 		fs_del_fg(container_of(base, struct mlx5_flow_group, base));
211 		break;
212 	case FS_TYPE_FLOW_ENTRY:
213 		fs_del_fte(container_of(base, struct fs_fte, base));
214 		break;
215 	default:
216 		break;
217 	}
218 }
219 
220 static void __fs_remove_node(struct kref *kref)
221 {
222 	struct fs_base *node = container_of(kref, struct fs_base, refcount);
223 
224 	if (node->parent)
225 		mutex_lock(&node->parent->lock);
226 	mutex_lock(&node->lock);
227 	cmd_remove_node(node);
228 	mutex_unlock(&node->lock);
229 	complete(&node->complete);
230 	if (node->parent) {
231 		mutex_unlock(&node->parent->lock);
232 		_fs_put(node->parent, _fs_remove_node, false);
233 	}
234 }
235 
236 void _fs_remove_node(struct kref *kref)
237 {
238 	struct fs_base *node = container_of(kref, struct fs_base, refcount);
239 
240 	__fs_remove_node(kref);
241 	kfree_const(node->name);
242 	kfree(node);
243 }
244 
245 static void fs_get(struct fs_base *node)
246 {
247 	atomic_inc(&node->users_refcount);
248 }
249 
250 static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref),
251 		    bool parent_locked)
252 {
253 	struct fs_base *parent_node = node->parent;
254 
255 	if (parent_node && !parent_locked)
256 		mutex_lock(&parent_node->lock);
257 	if (atomic_dec_and_test(&node->users_refcount)) {
258 		if (parent_node) {
259 			/*remove from parent's list*/
260 			list_del_init(&node->list);
261 			mutex_unlock(&parent_node->lock);
262 		}
263 		kref_put(&node->refcount, kref_cb);
264 		if (parent_node && parent_locked)
265 			mutex_lock(&parent_node->lock);
266 	} else if (parent_node && !parent_locked) {
267 		mutex_unlock(&parent_node->lock);
268 	}
269 }
270 
271 static void fs_put(struct fs_base *node)
272 {
273 	_fs_put(node, __fs_remove_node, false);
274 }
275 
276 static void fs_put_parent_locked(struct fs_base *node)
277 {
278 	_fs_put(node, __fs_remove_node, true);
279 }
280 
281 static void fs_remove_node(struct fs_base *node)
282 {
283 	fs_put(node);
284 	wait_for_completion(&node->complete);
285 	kfree_const(node->name);
286 	kfree(node);
287 }
288 
289 static void fs_remove_node_parent_locked(struct fs_base *node)
290 {
291 	fs_put_parent_locked(node);
292 	wait_for_completion(&node->complete);
293 	kfree_const(node->name);
294 	kfree(node);
295 }
296 
297 static struct fs_fte *fs_alloc_fte(u8 action,
298 				   u32 flow_tag,
299 				   u32 *match_value,
300 				   unsigned int index)
301 {
302 	struct fs_fte *fte;
303 
304 
305 	fte = kzalloc(sizeof(*fte), GFP_KERNEL);
306 	if (!fte)
307 		return ERR_PTR(-ENOMEM);
308 
309 	memcpy(fte->val, match_value, sizeof(fte->val));
310 	fte->base.type =  FS_TYPE_FLOW_ENTRY;
311 	fte->dests_size = 0;
312 	fte->flow_tag = flow_tag;
313 	fte->index = index;
314 	INIT_LIST_HEAD(&fte->dests);
315 	fte->action = action;
316 
317 	return fte;
318 }
319 
320 static struct fs_fte *alloc_star_ft_entry(struct mlx5_flow_table *ft,
321 					  struct mlx5_flow_group *fg,
322 					  u32 *match_value,
323 					  unsigned int index)
324 {
325 	int err;
326 	struct fs_fte *fte;
327 	struct mlx5_flow_rule *dst;
328 
329 	if (fg->num_ftes == fg->max_ftes)
330 		return ERR_PTR(-ENOSPC);
331 
332 	fte = fs_alloc_fte(MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
333 			   MLX5_FS_DEFAULT_FLOW_TAG, match_value, index);
334 	if (IS_ERR(fte))
335 		return fte;
336 
337 	/*create dst*/
338 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
339 	if (!dst) {
340 		err = -ENOMEM;
341 		goto free_fte;
342 	}
343 
344 	fte->base.parent = &fg->base;
345 	fte->dests_size = 1;
346 	dst->dest_attr.type = MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE;
347 	dst->base.parent = &fte->base;
348 	list_add(&dst->base.list, &fte->dests);
349 	/* assumed that the callee creates the star rules sorted by index */
350 	list_add_tail(&fte->base.list, &fg->ftes);
351 	fg->num_ftes++;
352 
353 	return fte;
354 
355 free_fte:
356 	kfree(fte);
357 	return ERR_PTR(err);
358 }
359 
360 /* assume that fte can't be changed */
361 static void free_star_fte_entry(struct fs_fte *fte)
362 {
363 	struct mlx5_flow_group	*fg;
364 	struct mlx5_flow_rule	*dst, *temp;
365 
366 	fs_get_parent(fg, fte);
367 
368 	list_for_each_entry_safe(dst, temp, &fte->dests, base.list) {
369 		fte->dests_size--;
370 		list_del(&dst->base.list);
371 		kfree(dst);
372 	}
373 
374 	list_del(&fte->base.list);
375 	fg->num_ftes--;
376 	kfree(fte);
377 }
378 
379 static struct mlx5_flow_group *fs_alloc_fg(u32 *create_fg_in)
380 {
381 	struct mlx5_flow_group *fg;
382 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
383 					    create_fg_in, match_criteria);
384 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
385 					    create_fg_in,
386 					    match_criteria_enable);
387 	fg = kzalloc(sizeof(*fg), GFP_KERNEL);
388 	if (!fg)
389 		return ERR_PTR(-ENOMEM);
390 
391 	INIT_LIST_HEAD(&fg->ftes);
392 	fg->mask.match_criteria_enable = match_criteria_enable;
393 	memcpy(&fg->mask.match_criteria, match_criteria,
394 	       sizeof(fg->mask.match_criteria));
395 	fg->base.type =  FS_TYPE_FLOW_GROUP;
396 	fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in,
397 				   start_flow_index);
398 	fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in,
399 				end_flow_index) - fg->start_index + 1;
400 	return fg;
401 }
402 
403 static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio);
404 static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr,
405 					    struct fs_prio *prio);
406 
407 /* assumed src_ft and dst_ft can't be freed */
408 static int fs_set_star_rule(struct mlx5_core_dev *dev,
409 			    struct mlx5_flow_table *src_ft,
410 			    struct mlx5_flow_table *dst_ft)
411 {
412 	struct mlx5_flow_rule *src_dst;
413 	struct fs_fte *src_fte;
414 	int err = 0;
415 	u32 *match_value;
416 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
417 
418 	src_dst = list_first_entry(&src_ft->star_rule.fte->dests,
419 				   struct mlx5_flow_rule, base.list);
420 	match_value = mlx5_vzalloc(match_len);
421 	if (!match_value) {
422 		mlx5_core_warn(dev, "failed to allocate inbox\n");
423 		return -ENOMEM;
424 	}
425 	/*Create match context*/
426 
427 	fs_get_parent(src_fte, src_dst);
428 
429 	src_dst->dest_attr.ft = dst_ft;
430 	if (dst_ft) {
431 		err = mlx5_cmd_fs_set_fte(dev,
432 					  src_ft->vport,
433 					  &src_fte->status,
434 					  match_value, src_ft->type,
435 					  src_ft->id, src_fte->index,
436 					  src_ft->star_rule.fg->id,
437 					  src_fte->flow_tag,
438 					  src_fte->action,
439 					  src_fte->dests_size,
440 					  &src_fte->dests);
441 		if (err)
442 			goto free;
443 
444 		fs_get(&dst_ft->base);
445 	} else {
446 		mlx5_cmd_fs_delete_fte(dev,
447 				       src_ft->vport,
448 				       &src_fte->status,
449 				       src_ft->type, src_ft->id,
450 				       src_fte->index);
451 	}
452 
453 free:
454 	kvfree(match_value);
455 	return err;
456 }
457 
458 static int connect_prev_fts(struct fs_prio *locked_prio,
459 			    struct fs_prio *prev_prio,
460 			    struct mlx5_flow_table *next_ft)
461 {
462 	struct mlx5_flow_table *iter;
463 	int err = 0;
464 	struct mlx5_core_dev *dev = fs_get_dev(&prev_prio->base);
465 
466 	if (!dev)
467 		return -ENODEV;
468 
469 	mutex_lock(&prev_prio->base.lock);
470 	fs_for_each_ft(iter, prev_prio) {
471 		struct mlx5_flow_rule *src_dst =
472 			list_first_entry(&iter->star_rule.fte->dests,
473 					 struct mlx5_flow_rule, base.list);
474 		struct mlx5_flow_table *prev_ft = src_dst->dest_attr.ft;
475 
476 		if (prev_ft == next_ft)
477 			continue;
478 
479 		err = fs_set_star_rule(dev, iter, next_ft);
480 		if (err) {
481 			mlx5_core_warn(dev,
482 			    "mlx5: flow steering can't connect prev and next\n");
483 			goto unlock;
484 		} else {
485 			/* Assume ft's prio is locked */
486 			if (prev_ft) {
487 				struct fs_prio *prio;
488 
489 				fs_get_parent(prio, prev_ft);
490 				if (prio == locked_prio)
491 					fs_put_parent_locked(&prev_ft->base);
492 				else
493 					fs_put(&prev_ft->base);
494 			}
495 		}
496 	}
497 
498 unlock:
499 	mutex_unlock(&prev_prio->base.lock);
500 	return 0;
501 }
502 
503 static int create_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio)
504 {
505 	struct mlx5_flow_group *fg;
506 	int err;
507 	u32 *fg_in;
508 	u32 *match_value;
509 	struct mlx5_flow_table *next_ft;
510 	struct mlx5_flow_table *prev_ft;
511 	struct mlx5_flow_root_namespace *root = find_root(&prio->base);
512 	int fg_inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
513 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
514 
515 	fg_in = mlx5_vzalloc(fg_inlen);
516 	if (!fg_in) {
517 		mlx5_core_warn(root->dev, "failed to allocate inbox\n");
518 		return -ENOMEM;
519 	}
520 
521 	match_value = mlx5_vzalloc(match_len);
522 	if (!match_value) {
523 		mlx5_core_warn(root->dev, "failed to allocate inbox\n");
524 		kvfree(fg_in);
525 		return -ENOMEM;
526 	}
527 
528 	MLX5_SET(create_flow_group_in, fg_in, start_flow_index, ft->max_fte);
529 	MLX5_SET(create_flow_group_in, fg_in, end_flow_index, ft->max_fte);
530 	fg = fs_alloc_fg(fg_in);
531 	if (IS_ERR(fg)) {
532 		err = PTR_ERR(fg);
533 		goto out;
534 	}
535 	ft->star_rule.fg = fg;
536 	err =  mlx5_cmd_fs_create_fg(fs_get_dev(&prio->base),
537 				     fg_in, ft->vport, ft->type,
538 				     ft->id,
539 				     &fg->id);
540 	if (err)
541 		goto free_fg;
542 
543 	ft->star_rule.fte = alloc_star_ft_entry(ft, fg,
544 						      match_value,
545 						      ft->max_fte);
546 	if (IS_ERR(ft->star_rule.fte))
547 		goto free_star_rule;
548 
549 	mutex_lock(&root->fs_chain_lock);
550 	next_ft = find_next_ft(prio);
551 	err = fs_set_star_rule(root->dev, ft, next_ft);
552 	if (err) {
553 		mutex_unlock(&root->fs_chain_lock);
554 		goto free_star_rule;
555 	}
556 	if (next_ft) {
557 		struct fs_prio *parent;
558 
559 		fs_get_parent(parent, next_ft);
560 		fs_put(&next_ft->base);
561 	}
562 	prev_ft = find_prev_ft(ft, prio);
563 	if (prev_ft) {
564 		struct fs_prio *prev_parent;
565 
566 		fs_get_parent(prev_parent, prev_ft);
567 
568 		err = connect_prev_fts(NULL, prev_parent, ft);
569 		if (err) {
570 			mutex_unlock(&root->fs_chain_lock);
571 			goto destroy_chained_star_rule;
572 		}
573 		fs_put(&prev_ft->base);
574 	}
575 	mutex_unlock(&root->fs_chain_lock);
576 	kvfree(fg_in);
577 	kvfree(match_value);
578 
579 	return 0;
580 
581 destroy_chained_star_rule:
582 	fs_set_star_rule(fs_get_dev(&prio->base), ft, NULL);
583 	if (next_ft)
584 		fs_put(&next_ft->base);
585 free_star_rule:
586 	free_star_fte_entry(ft->star_rule.fte);
587 	mlx5_cmd_fs_destroy_fg(fs_get_dev(&ft->base), ft->vport,
588 			       ft->type, ft->id,
589 			       fg->id);
590 free_fg:
591 	kfree(fg);
592 out:
593 	kvfree(fg_in);
594 	kvfree(match_value);
595 	return err;
596 }
597 
598 static void destroy_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio)
599 {
600 	int err;
601 	struct mlx5_flow_root_namespace *root;
602 	struct mlx5_core_dev *dev = fs_get_dev(&prio->base);
603 	struct mlx5_flow_table *prev_ft, *next_ft;
604 	struct fs_prio *prev_prio;
605 
606 	WARN_ON(!dev);
607 
608 	root = find_root(&prio->base);
609 	if (!root)
610 		mlx5_core_err(dev,
611 		    "flow steering failed to find root of priority %s",
612 		    prio->base.name);
613 
614 	/* In order to ensure atomic deletion, first update
615 	 * prev ft to point on the next ft.
616 	 */
617 	mutex_lock(&root->fs_chain_lock);
618 	prev_ft = find_prev_ft(ft, prio);
619 	next_ft = find_next_ft(prio);
620 	if (prev_ft) {
621 		fs_get_parent(prev_prio, prev_ft);
622 		/*Prev is connected to ft, only if ft is the first(last) in the prio*/
623 		err = connect_prev_fts(prio, prev_prio, next_ft);
624 		if (err)
625 			mlx5_core_warn(root->dev,
626 				       "flow steering can't connect prev and next of flow table\n");
627 		fs_put(&prev_ft->base);
628 	}
629 
630 	err = fs_set_star_rule(root->dev, ft, NULL);
631 	/*One put is for fs_get in find next ft*/
632 	if (next_ft) {
633 		fs_put(&next_ft->base);
634 		if (!err)
635 			fs_put(&next_ft->base);
636 	}
637 
638 	mutex_unlock(&root->fs_chain_lock);
639 	err = mlx5_cmd_fs_destroy_fg(dev, ft->vport, ft->type, ft->id,
640 				     ft->star_rule.fg->id);
641 	if (err)
642 		mlx5_core_warn(dev,
643 			       "flow steering can't destroy star entry group(index:%d) of ft:%s\n", ft->star_rule.fg->start_index,
644 			       ft->base.name);
645 	free_star_fte_entry(ft->star_rule.fte);
646 
647 	kfree(ft->star_rule.fg);
648 	ft->star_rule.fg = NULL;
649 }
650 
651 static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
652 				 unsigned int prio)
653 {
654 	struct fs_prio *iter_prio;
655 
656 	fs_for_each_prio(iter_prio, ns) {
657 		if (iter_prio->prio == prio)
658 			return iter_prio;
659 	}
660 
661 	return NULL;
662 }
663 
664 static unsigned int _alloc_new_level(struct fs_prio *prio,
665 				     struct mlx5_flow_namespace *match);
666 
667 static unsigned int __alloc_new_level(struct mlx5_flow_namespace *ns,
668 				      struct fs_prio *prio)
669 {
670 	unsigned int level = 0;
671 	struct fs_prio *p;
672 
673 	if (!ns)
674 		return 0;
675 
676 	mutex_lock(&ns->base.lock);
677 	fs_for_each_prio(p, ns) {
678 		if (p != prio)
679 			level += p->max_ft;
680 		else
681 			break;
682 	}
683 	mutex_unlock(&ns->base.lock);
684 
685 	fs_get_parent(prio, ns);
686 	if (prio)
687 		WARN_ON(prio->base.type != FS_TYPE_PRIO);
688 
689 	return level + _alloc_new_level(prio, ns);
690 }
691 
692 /* Called under lock of priority, hence locking all upper objects */
693 static unsigned int _alloc_new_level(struct fs_prio *prio,
694 				     struct mlx5_flow_namespace *match)
695 {
696 	struct mlx5_flow_namespace *ns;
697 	struct fs_base *it;
698 	unsigned int level = 0;
699 
700 	if (!prio)
701 		return 0;
702 
703 	mutex_lock(&prio->base.lock);
704 	fs_for_each_ns_or_ft_reverse(it, prio) {
705 		if (it->type == FS_TYPE_NAMESPACE) {
706 			struct fs_prio *p;
707 
708 			fs_get_obj(ns, it);
709 
710 			if (match != ns) {
711 				mutex_lock(&ns->base.lock);
712 				fs_for_each_prio(p, ns)
713 					level += p->max_ft;
714 				mutex_unlock(&ns->base.lock);
715 			} else {
716 				break;
717 			}
718 		} else {
719 			struct mlx5_flow_table *ft;
720 
721 			fs_get_obj(ft, it);
722 			mutex_unlock(&prio->base.lock);
723 			return level + ft->level + 1;
724 		}
725 	}
726 
727 	fs_get_parent(ns, prio);
728 	mutex_unlock(&prio->base.lock);
729 	return __alloc_new_level(ns, prio) + level;
730 }
731 
732 static unsigned int alloc_new_level(struct fs_prio *prio)
733 {
734 	return _alloc_new_level(prio, NULL);
735 }
736 
737 static int update_root_ft_create(struct mlx5_flow_root_namespace *root,
738 				    struct mlx5_flow_table *ft)
739 {
740 	int err = 0;
741 	int min_level = INT_MAX;
742 
743 	if (root->root_ft)
744 		min_level = root->root_ft->level;
745 
746 	if (ft->level < min_level)
747 		err = mlx5_cmd_update_root_ft(root->dev, ft->type,
748 					      ft->id);
749 	else
750 		return err;
751 
752 	if (err)
753 		mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
754 			       ft->id);
755 	else
756 		root->root_ft = ft;
757 
758 	return err;
759 }
760 
761 static struct mlx5_flow_table *_create_ft_common(struct mlx5_flow_namespace *ns,
762 						 u16 vport,
763 						 struct fs_prio *fs_prio,
764 						 int max_fte,
765 						 const char *name)
766 {
767 	struct mlx5_flow_table *ft;
768 	int err;
769 	int log_table_sz;
770 	int ft_size;
771 	char gen_name[20];
772 	struct mlx5_flow_root_namespace *root = find_root(&ns->base);
773 	struct mlx5_core_dev *dev = fs_get_dev(&ns->base);
774 
775 	if (!root) {
776 		mlx5_core_err(dev,
777 		    "flow steering failed to find root of namespace %s",
778 		    ns->base.name);
779 		return ERR_PTR(-ENODEV);
780 	}
781 
782 	if (fs_prio->num_ft == fs_prio->max_ft)
783 		return ERR_PTR(-ENOSPC);
784 
785 	ft  = kzalloc(sizeof(*ft), GFP_KERNEL);
786 	if (!ft)
787 		return ERR_PTR(-ENOMEM);
788 
789 	fs_init_node(&ft->base, 1);
790 	INIT_LIST_HEAD(&ft->fgs);
791 
792 	/* Temporarily WA until we expose the level set in the API */
793 	if (root->table_type == FS_FT_ESW_EGRESS_ACL ||
794 		root->table_type == FS_FT_ESW_INGRESS_ACL)
795 		ft->level = 0;
796 	else
797 		ft->level = alloc_new_level(fs_prio);
798 
799 	ft->base.type = FS_TYPE_FLOW_TABLE;
800 	ft->vport = vport;
801 	ft->type = root->table_type;
802 	/*Two entries are reserved for star rules*/
803 	ft_size = roundup_pow_of_two(max_fte + 2);
804 	/*User isn't aware to those rules*/
805 	ft->max_fte = ft_size - 2;
806 	log_table_sz = ilog2(ft_size);
807 	err = mlx5_cmd_fs_create_ft(root->dev, ft->vport, ft->type,
808 				    ft->level, log_table_sz, &ft->id);
809 	if (err)
810 		goto free_ft;
811 
812 	err = create_star_rule(ft, fs_prio);
813 	if (err)
814 		goto del_ft;
815 
816 	if ((root->table_type == FS_FT_NIC_RX) && MLX5_CAP_FLOWTABLE(root->dev,
817 			       flow_table_properties_nic_receive.modify_root)) {
818 		err = update_root_ft_create(root, ft);
819 		if (err)
820 			goto destroy_star_rule;
821 	}
822 
823 	if (!name || !strlen(name)) {
824 		snprintf(gen_name, 20, "flow_table_%u", ft->id);
825 		_fs_add_node(&ft->base, gen_name, &fs_prio->base);
826 	} else {
827 		_fs_add_node(&ft->base, name, &fs_prio->base);
828 	}
829 	list_add_tail(&ft->base.list, &fs_prio->objs);
830 	fs_prio->num_ft++;
831 
832 	return ft;
833 
834 destroy_star_rule:
835 	destroy_star_rule(ft, fs_prio);
836 del_ft:
837 	mlx5_cmd_fs_destroy_ft(root->dev, ft->vport, ft->type, ft->id);
838 free_ft:
839 	kfree(ft);
840 	return ERR_PTR(err);
841 }
842 
843 static struct mlx5_flow_table *create_ft_common(struct mlx5_flow_namespace *ns,
844 						u16 vport,
845 						unsigned int prio,
846 						int max_fte,
847 						const char *name)
848 {
849 	struct fs_prio *fs_prio = NULL;
850 	fs_prio = find_prio(ns, prio);
851 	if (!fs_prio)
852 		return ERR_PTR(-EINVAL);
853 
854 	return _create_ft_common(ns, vport, fs_prio, max_fte, name);
855 }
856 
857 
858 static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns,
859 						   struct list_head *start);
860 
861 static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio,
862 						     struct list_head *start);
863 
864 static struct mlx5_flow_table *mlx5_create_autogrouped_shared_flow_table(struct fs_prio *fs_prio)
865 {
866 	struct mlx5_flow_table *ft;
867 
868 	ft = find_first_ft_in_prio(fs_prio, &fs_prio->objs);
869 	if (ft) {
870 		ft->shared_refcount++;
871 		return ft;
872 	}
873 
874 	return NULL;
875 }
876 
877 struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
878 							   int prio,
879 							   const char *name,
880 							   int num_flow_table_entries,
881 							   int max_num_groups)
882 {
883 	struct mlx5_flow_table *ft = NULL;
884 	struct fs_prio *fs_prio;
885 	bool is_shared_prio;
886 
887 	fs_prio = find_prio(ns, prio);
888 	if (!fs_prio)
889 		return ERR_PTR(-EINVAL);
890 
891 	is_shared_prio = fs_prio->flags & MLX5_CORE_FS_PRIO_SHARED;
892 	if (is_shared_prio) {
893 		mutex_lock(&fs_prio->shared_lock);
894 		ft = mlx5_create_autogrouped_shared_flow_table(fs_prio);
895 	}
896 
897 	if (ft)
898 		goto return_ft;
899 
900 	ft = create_ft_common(ns, 0, prio, num_flow_table_entries,
901 			      name);
902 	if (IS_ERR(ft))
903 		goto return_ft;
904 
905 	ft->autogroup.active = true;
906 	ft->autogroup.max_types = max_num_groups;
907 	if (is_shared_prio)
908 		ft->shared_refcount = 1;
909 
910 return_ft:
911 	if (is_shared_prio)
912 		mutex_unlock(&fs_prio->shared_lock);
913 	return ft;
914 }
915 EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
916 
917 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
918 						     u16 vport,
919 						     int prio,
920 						     const char *name,
921 						     int num_flow_table_entries)
922 {
923 	return create_ft_common(ns, vport, prio, num_flow_table_entries, name);
924 }
925 EXPORT_SYMBOL(mlx5_create_vport_flow_table);
926 
927 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
928 					       int prio,
929 					       const char *name,
930 					       int num_flow_table_entries)
931 {
932 	return create_ft_common(ns, 0, prio, num_flow_table_entries, name);
933 }
934 EXPORT_SYMBOL(mlx5_create_flow_table);
935 
936 static void _fs_del_ft(struct mlx5_flow_table *ft)
937 {
938 	int err;
939 	struct mlx5_core_dev *dev = fs_get_dev(&ft->base);
940 	struct fs_prio *prio;
941 
942 	err = mlx5_cmd_fs_destroy_ft(dev, ft->vport, ft->type, ft->id);
943 	if (err)
944 		mlx5_core_warn(dev, "flow steering can't destroy ft %s\n",
945 			       ft->base.name);
946 
947 	fs_get_parent(prio, ft);
948 	prio->num_ft--;
949 }
950 
951 static int update_root_ft_destroy(struct mlx5_flow_root_namespace *root,
952 				    struct mlx5_flow_table *ft)
953 {
954 	int err = 0;
955 	struct fs_prio *prio;
956 	struct mlx5_flow_table *next_ft = NULL;
957 	struct mlx5_flow_table *put_ft = NULL;
958 
959 	if (root->root_ft != ft)
960 		return 0;
961 
962 	fs_get_parent(prio, ft);
963 	/*Assuming objs containis only flow tables and
964 	 * flow tables are sorted by level.
965 	 */
966 	if (!list_is_last(&ft->base.list, &prio->objs)) {
967 		next_ft = list_next_entry(ft, base.list);
968 	} else {
969 		next_ft = find_next_ft(prio);
970 		put_ft = next_ft;
971 	}
972 
973 	if (next_ft) {
974 		err = mlx5_cmd_update_root_ft(root->dev, next_ft->type,
975 					      next_ft->id);
976 		if (err)
977 			mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
978 				       ft->id);
979 	}
980 	if (!err)
981 		root->root_ft = next_ft;
982 
983 	if (put_ft)
984 		fs_put(&put_ft->base);
985 
986 	return err;
987 }
988 
989 /*Objects in the same prio are destroyed in the reverse order they were createrd*/
990 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
991 {
992 	int err = 0;
993 	struct fs_prio *prio;
994 	struct mlx5_flow_root_namespace *root;
995 	bool is_shared_prio;
996 	struct mlx5_core_dev *dev;
997 
998 	fs_get_parent(prio, ft);
999 	root = find_root(&prio->base);
1000 	dev = fs_get_dev(&prio->base);
1001 
1002 	if (!root) {
1003 		mlx5_core_err(dev,
1004 		    "flow steering failed to find root of priority %s",
1005 		    prio->base.name);
1006 		return -ENODEV;
1007 	}
1008 
1009 	is_shared_prio = prio->flags & MLX5_CORE_FS_PRIO_SHARED;
1010 	if (is_shared_prio) {
1011 		mutex_lock(&prio->shared_lock);
1012 		if (ft->shared_refcount > 1) {
1013 			--ft->shared_refcount;
1014 			fs_put(&ft->base);
1015 			mutex_unlock(&prio->shared_lock);
1016 			return 0;
1017 		}
1018 	}
1019 
1020 	mutex_lock(&prio->base.lock);
1021 	mutex_lock(&ft->base.lock);
1022 
1023 	err = update_root_ft_destroy(root, ft);
1024 	if (err)
1025 		goto unlock_ft;
1026 
1027 	/* delete two last entries */
1028 	destroy_star_rule(ft, prio);
1029 
1030 	mutex_unlock(&ft->base.lock);
1031 	fs_remove_node_parent_locked(&ft->base);
1032 	mutex_unlock(&prio->base.lock);
1033 	if (is_shared_prio)
1034 		mutex_unlock(&prio->shared_lock);
1035 
1036 	return err;
1037 
1038 unlock_ft:
1039 	mutex_unlock(&ft->base.lock);
1040 	mutex_unlock(&prio->base.lock);
1041 	if (is_shared_prio)
1042 		mutex_unlock(&prio->shared_lock);
1043 
1044 	return err;
1045 }
1046 EXPORT_SYMBOL(mlx5_destroy_flow_table);
1047 
1048 static struct mlx5_flow_group *fs_create_fg(struct mlx5_core_dev *dev,
1049 					    struct mlx5_flow_table *ft,
1050 					    struct list_head *prev,
1051 					    u32 *fg_in,
1052 					    int refcount)
1053 {
1054 	struct mlx5_flow_group *fg;
1055 	int err;
1056 	unsigned int end_index;
1057 	char name[20];
1058 
1059 	fg = fs_alloc_fg(fg_in);
1060 	if (IS_ERR(fg))
1061 		return fg;
1062 
1063 	end_index = fg->start_index + fg->max_ftes - 1;
1064 	err =  mlx5_cmd_fs_create_fg(dev, fg_in,
1065 				     ft->vport, ft->type, ft->id,
1066 				     &fg->id);
1067 	if (err)
1068 		goto free_fg;
1069 
1070 	mutex_lock(&ft->base.lock);
1071 	if (ft->autogroup.active)
1072 		ft->autogroup.num_types++;
1073 
1074 	snprintf(name, sizeof(name), "group_%u", fg->id);
1075 	/*Add node to tree*/
1076 	fs_add_node(&fg->base, &ft->base, name, refcount);
1077 	/*Add node to group list*/
1078 	list_add(&fg->base.list, prev);
1079 	mutex_unlock(&ft->base.lock);
1080 
1081 	return fg;
1082 
1083 free_fg:
1084 	kfree(fg);
1085 	return ERR_PTR(err);
1086 }
1087 
1088 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
1089 					       u32 *in)
1090 {
1091 	struct mlx5_flow_group *fg;
1092 	struct mlx5_core_dev *dev = fs_get_dev(&ft->base);
1093 
1094 	if (!dev)
1095 		return ERR_PTR(-ENODEV);
1096 
1097 	if (ft->autogroup.active)
1098 		return ERR_PTR(-EPERM);
1099 
1100 	fg = fs_create_fg(dev, ft, ft->fgs.prev, in, 1);
1101 
1102 	return fg;
1103 }
1104 EXPORT_SYMBOL(mlx5_create_flow_group);
1105 
1106 /*Group is destoyed when all the rules in the group were removed*/
1107 static void fs_del_fg(struct mlx5_flow_group *fg)
1108 {
1109 	struct mlx5_flow_table *parent_ft;
1110 	struct mlx5_core_dev *dev;
1111 
1112 	fs_get_parent(parent_ft, fg);
1113 	dev = fs_get_dev(&parent_ft->base);
1114 	WARN_ON(!dev);
1115 
1116 	if (parent_ft->autogroup.active)
1117 		parent_ft->autogroup.num_types--;
1118 
1119 	if (mlx5_cmd_fs_destroy_fg(dev, parent_ft->vport,
1120 				   parent_ft->type,
1121 				   parent_ft->id, fg->id))
1122 		mlx5_core_warn(dev, "flow steering can't destroy fg\n");
1123 }
1124 
1125 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
1126 {
1127 	fs_remove_node(&fg->base);
1128 }
1129 EXPORT_SYMBOL(mlx5_destroy_flow_group);
1130 
1131 static bool _fs_match_exact_val(void *mask, void *val1, void *val2, size_t size)
1132 {
1133 	unsigned int i;
1134 
1135 	/* TODO: optimize by comparing 64bits when possible */
1136 	for (i = 0; i < size; i++, mask++, val1++, val2++)
1137 		if ((*((u8 *)val1) & (*(u8 *)mask)) !=
1138 		    ((*(u8 *)val2) & (*(u8 *)mask)))
1139 			return false;
1140 
1141 	return true;
1142 }
1143 
1144 bool fs_match_exact_val(struct mlx5_core_fs_mask *mask,
1145 			       void *val1, void *val2)
1146 {
1147 	if (mask->match_criteria_enable &
1148 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
1149 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1150 						val1, outer_headers);
1151 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1152 						val2, outer_headers);
1153 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1154 					      mask->match_criteria, outer_headers);
1155 
1156 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1157 					 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
1158 			return false;
1159 	}
1160 
1161 	if (mask->match_criteria_enable &
1162 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
1163 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1164 						val1, misc_parameters);
1165 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1166 						val2, misc_parameters);
1167 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1168 					  mask->match_criteria, misc_parameters);
1169 
1170 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1171 					 MLX5_ST_SZ_BYTES(fte_match_set_misc)))
1172 			return false;
1173 	}
1174 	if (mask->match_criteria_enable &
1175 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
1176 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1177 						val1, inner_headers);
1178 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1179 						val2, inner_headers);
1180 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1181 					  mask->match_criteria, inner_headers);
1182 
1183 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1184 					 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
1185 			return false;
1186 	}
1187 	return true;
1188 }
1189 
1190 bool fs_match_exact_mask(u8 match_criteria_enable1,
1191 				u8 match_criteria_enable2,
1192 				void *mask1, void *mask2)
1193 {
1194 	return match_criteria_enable1 == match_criteria_enable2 &&
1195 		!memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param));
1196 }
1197 
1198 static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns,
1199 							   struct list_head *start);
1200 
1201 static struct mlx5_flow_table *_find_first_ft_in_prio_reverse(struct fs_prio *prio,
1202 							      struct list_head *start)
1203 {
1204 	struct fs_base *it = container_of(start, struct fs_base, list);
1205 
1206 	if (!prio)
1207 		return NULL;
1208 
1209 	fs_for_each_ns_or_ft_continue_reverse(it, prio) {
1210 		struct mlx5_flow_namespace	*ns;
1211 		struct mlx5_flow_table		*ft;
1212 
1213 		if (it->type == FS_TYPE_FLOW_TABLE) {
1214 			fs_get_obj(ft, it);
1215 			fs_get(&ft->base);
1216 			return ft;
1217 		}
1218 
1219 		fs_get_obj(ns, it);
1220 		WARN_ON(ns->base.type != FS_TYPE_NAMESPACE);
1221 
1222 		ft = find_first_ft_in_ns_reverse(ns, &ns->prios);
1223 		if (ft)
1224 			return ft;
1225 	}
1226 
1227 	return NULL;
1228 }
1229 
1230 static struct mlx5_flow_table *find_first_ft_in_prio_reverse(struct fs_prio *prio,
1231 							     struct list_head *start)
1232 {
1233 	struct mlx5_flow_table *ft;
1234 
1235 	if (!prio)
1236 		return NULL;
1237 
1238 	mutex_lock(&prio->base.lock);
1239 	ft = _find_first_ft_in_prio_reverse(prio, start);
1240 	mutex_unlock(&prio->base.lock);
1241 
1242 	return ft;
1243 }
1244 
1245 static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns,
1246 							   struct list_head *start)
1247 {
1248 	struct fs_prio *prio;
1249 
1250 	if (!ns)
1251 		return NULL;
1252 
1253 	fs_get_obj(prio, container_of(start, struct fs_base, list));
1254 	mutex_lock(&ns->base.lock);
1255 	fs_for_each_prio_continue_reverse(prio, ns) {
1256 		struct mlx5_flow_table *ft;
1257 
1258 		ft = find_first_ft_in_prio_reverse(prio, &prio->objs);
1259 		if (ft) {
1260 			mutex_unlock(&ns->base.lock);
1261 			return ft;
1262 		}
1263 	}
1264 	mutex_unlock(&ns->base.lock);
1265 
1266 	return NULL;
1267 }
1268 
1269 /* Returned a held ft, assumed curr is protected, assumed curr's parent is
1270  * locked
1271  */
1272 static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr,
1273 					    struct fs_prio *prio)
1274 {
1275 	struct mlx5_flow_table *ft = NULL;
1276 	struct fs_base *curr_base;
1277 
1278 	if (!curr)
1279 		return NULL;
1280 
1281 	/* prio has either namespace or flow-tables, but not both */
1282 	if (!list_empty(&prio->objs) &&
1283 	    list_first_entry(&prio->objs, struct mlx5_flow_table, base.list) !=
1284 	    curr)
1285 		return NULL;
1286 
1287 	while (!ft && prio) {
1288 		struct mlx5_flow_namespace *ns;
1289 
1290 		fs_get_parent(ns, prio);
1291 		ft = find_first_ft_in_ns_reverse(ns, &prio->base.list);
1292 		curr_base = &ns->base;
1293 		fs_get_parent(prio, ns);
1294 
1295 		if (prio && !ft)
1296 			ft = find_first_ft_in_prio_reverse(prio,
1297 							   &curr_base->list);
1298 	}
1299 	return ft;
1300 }
1301 
1302 static struct mlx5_flow_table *_find_first_ft_in_prio(struct fs_prio *prio,
1303 						      struct list_head *start)
1304 {
1305 	struct fs_base	*it = container_of(start, struct fs_base, list);
1306 
1307 	if (!prio)
1308 		return NULL;
1309 
1310 	fs_for_each_ns_or_ft_continue(it, prio) {
1311 		struct mlx5_flow_namespace	*ns;
1312 		struct mlx5_flow_table		*ft;
1313 
1314 		if (it->type == FS_TYPE_FLOW_TABLE) {
1315 			fs_get_obj(ft, it);
1316 			fs_get(&ft->base);
1317 			return ft;
1318 		}
1319 
1320 		fs_get_obj(ns, it);
1321 		WARN_ON(ns->base.type != FS_TYPE_NAMESPACE);
1322 
1323 		ft = find_first_ft_in_ns(ns, &ns->prios);
1324 		if (ft)
1325 			return ft;
1326 	}
1327 
1328 	return NULL;
1329 }
1330 
1331 static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio,
1332 						     struct list_head *start)
1333 {
1334 	struct mlx5_flow_table *ft;
1335 
1336 	if (!prio)
1337 		return NULL;
1338 
1339 	mutex_lock(&prio->base.lock);
1340 	ft = _find_first_ft_in_prio(prio, start);
1341 	mutex_unlock(&prio->base.lock);
1342 
1343 	return ft;
1344 }
1345 
1346 static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns,
1347 						   struct list_head *start)
1348 {
1349 	struct fs_prio *prio;
1350 
1351 	if (!ns)
1352 		return NULL;
1353 
1354 	fs_get_obj(prio, container_of(start, struct fs_base, list));
1355 	mutex_lock(&ns->base.lock);
1356 	fs_for_each_prio_continue(prio, ns) {
1357 		struct mlx5_flow_table *ft;
1358 
1359 		ft = find_first_ft_in_prio(prio, &prio->objs);
1360 		if (ft) {
1361 			mutex_unlock(&ns->base.lock);
1362 			return ft;
1363 		}
1364 	}
1365 	mutex_unlock(&ns->base.lock);
1366 
1367 	return NULL;
1368 }
1369 
1370 /* returned a held ft, assumed curr is protected, assumed curr's parent is
1371  * locked
1372  */
1373 static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio)
1374 {
1375 	struct mlx5_flow_table *ft = NULL;
1376 	struct fs_base *curr_base;
1377 
1378 	while (!ft && prio) {
1379 		struct mlx5_flow_namespace *ns;
1380 
1381 		fs_get_parent(ns, prio);
1382 		ft = find_first_ft_in_ns(ns, &prio->base.list);
1383 		curr_base = &ns->base;
1384 		fs_get_parent(prio, ns);
1385 
1386 		if (!ft && prio)
1387 			ft = _find_first_ft_in_prio(prio, &curr_base->list);
1388 	}
1389 	return ft;
1390 }
1391 
1392 
1393 /* called under ft mutex lock */
1394 static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
1395 						u8 match_criteria_enable,
1396 						u32 *match_criteria)
1397 {
1398 	unsigned int group_size;
1399 	unsigned int candidate_index = 0;
1400 	unsigned int candidate_group_num = 0;
1401 	struct mlx5_flow_group *g;
1402 	struct mlx5_flow_group *ret;
1403 	struct list_head *prev = &ft->fgs;
1404 	struct mlx5_core_dev *dev;
1405 	u32 *in;
1406 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1407 	void *match_criteria_addr;
1408 
1409 	if (!ft->autogroup.active)
1410 		return ERR_PTR(-ENOENT);
1411 
1412 	dev = fs_get_dev(&ft->base);
1413 	if (!dev)
1414 		return ERR_PTR(-ENODEV);
1415 
1416 	in = mlx5_vzalloc(inlen);
1417 	if (!in) {
1418 		mlx5_core_warn(dev, "failed to allocate inbox\n");
1419 		return ERR_PTR(-ENOMEM);
1420 	}
1421 
1422 
1423 	if (ft->autogroup.num_types < ft->autogroup.max_types)
1424 		group_size = ft->max_fte / (ft->autogroup.max_types + 1);
1425 	else
1426 		group_size = 1;
1427 
1428 	if (group_size == 0) {
1429 		mlx5_core_warn(dev,
1430 			       "flow steering can't create group size of 0\n");
1431 		ret = ERR_PTR(-EINVAL);
1432 		goto out;
1433 	}
1434 
1435 	/* sorted by start_index */
1436 	fs_for_each_fg(g, ft) {
1437 		candidate_group_num++;
1438 		if (candidate_index + group_size > g->start_index)
1439 			candidate_index = g->start_index + g->max_ftes;
1440 		else
1441 			break;
1442 		prev = &g->base.list;
1443 	}
1444 
1445 	if (candidate_index + group_size > ft->max_fte) {
1446 		ret = ERR_PTR(-ENOSPC);
1447 		goto out;
1448 	}
1449 
1450 	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
1451 		 match_criteria_enable);
1452 	MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
1453 	MLX5_SET(create_flow_group_in, in, end_flow_index,   candidate_index +
1454 		 group_size - 1);
1455 	match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
1456 					   in, match_criteria);
1457 	memcpy(match_criteria_addr, match_criteria,
1458 	       MLX5_ST_SZ_BYTES(fte_match_param));
1459 
1460 	ret = fs_create_fg(dev, ft, prev, in, 0);
1461 out:
1462 	kvfree(in);
1463 	return ret;
1464 }
1465 
1466 static struct mlx5_flow_namespace *get_ns_with_notifiers(struct fs_base *node)
1467 {
1468 	struct mlx5_flow_namespace *ns = NULL;
1469 
1470 	while (node  && (node->type != FS_TYPE_NAMESPACE ||
1471 			      list_empty(&container_of(node, struct
1472 						       mlx5_flow_namespace,
1473 						       base)->list_notifiers)))
1474 		node = node->parent;
1475 
1476 	if (node)
1477 		fs_get_obj(ns, node);
1478 
1479 	return ns;
1480 }
1481 
1482 
1483 /*Assumption- fte is locked*/
1484 static void call_to_add_rule_notifiers(struct mlx5_flow_rule *dst,
1485 				      struct fs_fte *fte)
1486 {
1487 	struct mlx5_flow_namespace *ns;
1488 	struct mlx5_flow_handler *iter_handler;
1489 	struct fs_client_priv_data *iter_client;
1490 	void *data;
1491 	bool is_new_rule = list_first_entry(&fte->dests,
1492 					    struct mlx5_flow_rule,
1493 					    base.list) == dst;
1494 	int err;
1495 
1496 	ns = get_ns_with_notifiers(&fte->base);
1497 	if (!ns)
1498 		return;
1499 
1500 	down_read(&ns->notifiers_rw_sem);
1501 	list_for_each_entry(iter_handler, &ns->list_notifiers,
1502 			    list) {
1503 		if (iter_handler->add_dst_cb) {
1504 			data = NULL;
1505 			mutex_lock(&dst->clients_lock);
1506 			list_for_each_entry(
1507 				iter_client, &dst->clients_data, list) {
1508 				if (iter_client->fs_handler == iter_handler) {
1509 					data = iter_client->client_dst_data;
1510 					break;
1511 				}
1512 			}
1513 			mutex_unlock(&dst->clients_lock);
1514 			err  = iter_handler->add_dst_cb(dst,
1515 							is_new_rule,
1516 							NULL,
1517 							iter_handler->client_context);
1518 			if (err)
1519 				break;
1520 		}
1521 	}
1522 	up_read(&ns->notifiers_rw_sem);
1523 }
1524 
1525 static void call_to_del_rule_notifiers(struct mlx5_flow_rule *dst,
1526 				      struct fs_fte *fte)
1527 {
1528 	struct mlx5_flow_namespace *ns;
1529 	struct mlx5_flow_handler *iter_handler;
1530 	struct fs_client_priv_data *iter_client;
1531 	void *data;
1532 	bool ctx_changed = (fte->dests_size == 0);
1533 
1534 	ns = get_ns_with_notifiers(&fte->base);
1535 	if (!ns)
1536 		return;
1537 	down_read(&ns->notifiers_rw_sem);
1538 	list_for_each_entry(iter_handler, &ns->list_notifiers,
1539 			    list) {
1540 		data = NULL;
1541 		mutex_lock(&dst->clients_lock);
1542 		list_for_each_entry(iter_client, &dst->clients_data, list) {
1543 			if (iter_client->fs_handler == iter_handler) {
1544 				data = iter_client->client_dst_data;
1545 				break;
1546 			}
1547 		}
1548 		mutex_unlock(&dst->clients_lock);
1549 		if (iter_handler->del_dst_cb) {
1550 			iter_handler->del_dst_cb(dst, ctx_changed, data,
1551 						 iter_handler->client_context);
1552 		}
1553 	}
1554 	up_read(&ns->notifiers_rw_sem);
1555 }
1556 
1557 /* fte should not be deleted while calling this function */
1558 static struct mlx5_flow_rule *_fs_add_dst_fte(struct fs_fte *fte,
1559 					      struct mlx5_flow_group *fg,
1560 					      struct mlx5_flow_destination *dest)
1561 {
1562 	struct mlx5_flow_table *ft;
1563 	struct mlx5_flow_rule *dst;
1564 	int err;
1565 
1566 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1567 	if (!dst)
1568 		return ERR_PTR(-ENOMEM);
1569 
1570 	memcpy(&dst->dest_attr, dest, sizeof(*dest));
1571 	dst->base.type = FS_TYPE_FLOW_DEST;
1572 	INIT_LIST_HEAD(&dst->clients_data);
1573 	mutex_init(&dst->clients_lock);
1574 	fs_get_parent(ft, fg);
1575 	/*Add dest to dests list- added as first element after the head*/
1576 	list_add_tail(&dst->base.list, &fte->dests);
1577 	fte->dests_size++;
1578 	err = mlx5_cmd_fs_set_fte(fs_get_dev(&ft->base),
1579 				  ft->vport,
1580 				  &fte->status,
1581 				  fte->val, ft->type,
1582 				  ft->id, fte->index, fg->id, fte->flow_tag,
1583 				  fte->action, fte->dests_size, &fte->dests);
1584 	if (err)
1585 		goto free_dst;
1586 
1587 	list_del(&dst->base.list);
1588 
1589 	return dst;
1590 
1591 free_dst:
1592 	list_del(&dst->base.list);
1593 	kfree(dst);
1594 	fte->dests_size--;
1595 	return ERR_PTR(err);
1596 }
1597 
1598 static char *get_dest_name(struct mlx5_flow_destination *dest)
1599 {
1600 	char *name = kzalloc(sizeof(char) * 20, GFP_KERNEL);
1601 
1602 	switch (dest->type) {
1603 	case MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE:
1604 		snprintf(name, 20, "dest_%s_%u", "flow_table",
1605 			 dest->ft->id);
1606 		return name;
1607 	case MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT:
1608 		snprintf(name, 20, "dest_%s_%u", "vport",
1609 			 dest->vport_num);
1610 		return name;
1611 	case MLX5_FLOW_CONTEXT_DEST_TYPE_TIR:
1612 		snprintf(name, 20, "dest_%s_%u", "tir", dest->tir_num);
1613 		return name;
1614 	default:
1615 		kfree(name);
1616 		return NULL;
1617 	}
1618 }
1619 
1620 /* assumed fg is locked */
1621 static unsigned int fs_get_free_fg_index(struct mlx5_flow_group *fg,
1622 					 struct list_head **prev)
1623 {
1624 	struct fs_fte *fte;
1625 	unsigned int start = fg->start_index;
1626 
1627 	if (prev)
1628 		*prev = &fg->ftes;
1629 
1630 	/* assumed list is sorted by index */
1631 	fs_for_each_fte(fte, fg) {
1632 		if (fte->index != start)
1633 			return start;
1634 		start++;
1635 		if (prev)
1636 			*prev = &fte->base.list;
1637 	}
1638 
1639 	return start;
1640 }
1641 
1642 
1643 static struct fs_fte *fs_create_fte(struct mlx5_flow_group *fg,
1644 			     u32 *match_value,
1645 			     u8 action,
1646 			     u32 flow_tag,
1647 			     struct list_head **prev)
1648 {
1649 	struct fs_fte *fte;
1650 	int index = 0;
1651 
1652 	index = fs_get_free_fg_index(fg, prev);
1653 	fte = fs_alloc_fte(action, flow_tag, match_value, index);
1654 	if (IS_ERR(fte))
1655 		return fte;
1656 
1657 	return fte;
1658 }
1659 
1660 static void add_rule_to_tree(struct mlx5_flow_rule *rule,
1661 			     struct fs_fte *fte)
1662 {
1663 	char *dest_name;
1664 
1665 	dest_name = get_dest_name(&rule->dest_attr);
1666 	fs_add_node(&rule->base, &fte->base, dest_name, 1);
1667 	/* re-add to list, since fs_add_node reset our list */
1668 	list_add_tail(&rule->base.list, &fte->dests);
1669 	kfree(dest_name);
1670 	call_to_add_rule_notifiers(rule, fte);
1671 }
1672 
1673 static void fs_del_dst(struct mlx5_flow_rule *dst)
1674 {
1675 	struct mlx5_flow_table *ft;
1676 	struct mlx5_flow_group *fg;
1677 	struct fs_fte *fte;
1678 	u32	*match_value;
1679 	struct mlx5_core_dev *dev = fs_get_dev(&dst->base);
1680 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
1681 	int err;
1682 
1683 	WARN_ON(!dev);
1684 
1685 	match_value = mlx5_vzalloc(match_len);
1686 	if (!match_value) {
1687 		mlx5_core_warn(dev, "failed to allocate inbox\n");
1688 		return;
1689 	}
1690 
1691 	fs_get_parent(fte, dst);
1692 	fs_get_parent(fg, fte);
1693 	mutex_lock(&fg->base.lock);
1694 	memcpy(match_value, fte->val, sizeof(fte->val));
1695 	/* ft can't be changed as fg is locked */
1696 	fs_get_parent(ft, fg);
1697 	list_del(&dst->base.list);
1698 	fte->dests_size--;
1699 	if (fte->dests_size) {
1700 		err = mlx5_cmd_fs_set_fte(dev, ft->vport,
1701 					  &fte->status, match_value, ft->type,
1702 					  ft->id, fte->index, fg->id,
1703 					  fte->flow_tag, fte->action,
1704 					  fte->dests_size, &fte->dests);
1705 		if (err) {
1706 			mlx5_core_warn(dev, "%s can't delete dst %s\n",
1707 				       __func__, dst->base.name);
1708 			goto err;
1709 		}
1710 	}
1711 	call_to_del_rule_notifiers(dst, fte);
1712 err:
1713 	mutex_unlock(&fg->base.lock);
1714 	kvfree(match_value);
1715 }
1716 
1717 static void fs_del_fte(struct fs_fte *fte)
1718 {
1719 	struct mlx5_flow_table *ft;
1720 	struct mlx5_flow_group *fg;
1721 	int err;
1722 	struct mlx5_core_dev *dev;
1723 
1724 	fs_get_parent(fg, fte);
1725 	fs_get_parent(ft, fg);
1726 
1727 	dev = fs_get_dev(&ft->base);
1728 	WARN_ON(!dev);
1729 
1730 	err = mlx5_cmd_fs_delete_fte(dev, ft->vport, &fte->status,
1731 				     ft->type, ft->id, fte->index);
1732 	if (err)
1733 		mlx5_core_warn(dev, "flow steering can't delete fte %s\n",
1734 			       fte->base.name);
1735 
1736 	fg->num_ftes--;
1737 }
1738 
1739 /* assuming parent fg is locked */
1740 /* Add dst algorithm */
1741 static struct mlx5_flow_rule *fs_add_dst_fg(struct mlx5_flow_group *fg,
1742 						   u32 *match_value,
1743 						   u8 action,
1744 						   u32 flow_tag,
1745 						   struct mlx5_flow_destination *dest)
1746 {
1747 	struct fs_fte *fte;
1748 	struct mlx5_flow_rule *dst;
1749 	struct mlx5_flow_table *ft;
1750 	struct list_head *prev;
1751 	char fte_name[20];
1752 
1753 	mutex_lock(&fg->base.lock);
1754 	fs_for_each_fte(fte, fg) {
1755 		/* TODO: Check of size against PRM max size */
1756 		mutex_lock(&fte->base.lock);
1757 		if (fs_match_exact_val(&fg->mask, match_value, &fte->val) &&
1758 		    action == fte->action && flow_tag == fte->flow_tag) {
1759 			dst = _fs_add_dst_fte(fte, fg, dest);
1760 			mutex_unlock(&fte->base.lock);
1761 			if (IS_ERR(dst))
1762 				goto unlock_fg;
1763 			goto add_rule;
1764 		}
1765 		mutex_unlock(&fte->base.lock);
1766 	}
1767 
1768 	fs_get_parent(ft, fg);
1769 	if (fg->num_ftes == fg->max_ftes) {
1770 		dst = ERR_PTR(-ENOSPC);
1771 		goto unlock_fg;
1772 	}
1773 
1774 	fte = fs_create_fte(fg, match_value, action, flow_tag, &prev);
1775 	if (IS_ERR(fte)) {
1776 		dst = (void *)fte;
1777 		goto unlock_fg;
1778 	}
1779 	dst = _fs_add_dst_fte(fte, fg, dest);
1780 	if (IS_ERR(dst)) {
1781 		kfree(fte);
1782 		goto unlock_fg;
1783 	}
1784 
1785 	fg->num_ftes++;
1786 
1787 	snprintf(fte_name, sizeof(fte_name), "fte%u", fte->index);
1788 	/* Add node to tree */
1789 	fs_add_node(&fte->base, &fg->base, fte_name, 0);
1790 	list_add(&fte->base.list, prev);
1791 add_rule:
1792 	add_rule_to_tree(dst, fte);
1793 unlock_fg:
1794 	mutex_unlock(&fg->base.lock);
1795 	return dst;
1796 }
1797 
1798 static struct mlx5_flow_rule *fs_add_dst_ft(struct mlx5_flow_table *ft,
1799 					    u8 match_criteria_enable,
1800 					    u32 *match_criteria,
1801 					    u32 *match_value,
1802 					    u8 action, u32 flow_tag,
1803 					    struct mlx5_flow_destination *dest)
1804 {
1805 	/*? where dst_entry is allocated*/
1806 	struct mlx5_flow_group *g;
1807 	struct mlx5_flow_rule *dst;
1808 
1809 	fs_get(&ft->base);
1810 	mutex_lock(&ft->base.lock);
1811 	fs_for_each_fg(g, ft)
1812 		if (fs_match_exact_mask(g->mask.match_criteria_enable,
1813 					match_criteria_enable,
1814 					g->mask.match_criteria,
1815 					match_criteria)) {
1816 			mutex_unlock(&ft->base.lock);
1817 
1818 			dst = fs_add_dst_fg(g, match_value,
1819 					    action, flow_tag, dest);
1820 			if (PTR_ERR(dst) && PTR_ERR(dst) != -ENOSPC)
1821 				goto unlock;
1822 		}
1823 	mutex_unlock(&ft->base.lock);
1824 
1825 	g = create_autogroup(ft, match_criteria_enable, match_criteria);
1826 	if (IS_ERR(g)) {
1827 		dst = (void *)g;
1828 		goto unlock;
1829 	}
1830 
1831 	dst = fs_add_dst_fg(g, match_value,
1832 			    action, flow_tag, dest);
1833 	if (IS_ERR(dst)) {
1834 		/* Remove assumes refcount > 0 and autogroup creates a group
1835 		 * with a refcount = 0.
1836 		 */
1837 		fs_get(&g->base);
1838 		fs_remove_node(&g->base);
1839 		goto unlock;
1840 	}
1841 
1842 unlock:
1843 	fs_put(&ft->base);
1844 	return dst;
1845 }
1846 
1847 struct mlx5_flow_rule *
1848 mlx5_add_flow_rule(struct mlx5_flow_table *ft,
1849 		   u8 match_criteria_enable,
1850 		   u32 *match_criteria,
1851 		   u32 *match_value,
1852 		   u32 action,
1853 		   u32 flow_tag,
1854 		   struct mlx5_flow_destination *dest)
1855 {
1856 	struct mlx5_flow_rule *dst;
1857 	struct mlx5_flow_namespace *ns;
1858 
1859 	ns = get_ns_with_notifiers(&ft->base);
1860 	if (ns)
1861 		down_read(&ns->dests_rw_sem);
1862 	dst =  fs_add_dst_ft(ft, match_criteria_enable, match_criteria,
1863 			     match_value, action, flow_tag, dest);
1864 	if (ns)
1865 		up_read(&ns->dests_rw_sem);
1866 
1867 	return dst;
1868 
1869 
1870 }
1871 EXPORT_SYMBOL(mlx5_add_flow_rule);
1872 
1873 void mlx5_del_flow_rule(struct mlx5_flow_rule *dst)
1874 {
1875 	struct mlx5_flow_namespace *ns;
1876 
1877 	ns = get_ns_with_notifiers(&dst->base);
1878 	if (ns)
1879 		down_read(&ns->dests_rw_sem);
1880 	fs_remove_node(&dst->base);
1881 	if (ns)
1882 		up_read(&ns->dests_rw_sem);
1883 }
1884 EXPORT_SYMBOL(mlx5_del_flow_rule);
1885 
1886 #define MLX5_CORE_FS_ROOT_NS_NAME "root"
1887 #define MLX5_CORE_FS_ESW_EGRESS_ACL "esw_egress_root"
1888 #define MLX5_CORE_FS_ESW_INGRESS_ACL "esw_ingress_root"
1889 #define MLX5_CORE_FS_FDB_ROOT_NS_NAME "fdb_root"
1890 #define MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME "sniffer_rx_root"
1891 #define MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME "sniffer_tx_root"
1892 #define MLX5_CORE_FS_PRIO_MAX_FT 4
1893 #define MLX5_CORE_FS_PRIO_MAX_NS 1
1894 
1895 static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
1896 				      unsigned prio, int max_ft,
1897 				      const char *name, u8 flags)
1898 {
1899 	struct fs_prio *fs_prio;
1900 
1901 	fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
1902 	if (!fs_prio)
1903 		return ERR_PTR(-ENOMEM);
1904 
1905 	fs_prio->base.type = FS_TYPE_PRIO;
1906 	fs_add_node(&fs_prio->base, &ns->base, name, 1);
1907 	fs_prio->max_ft = max_ft;
1908 	fs_prio->max_ns = MLX5_CORE_FS_PRIO_MAX_NS;
1909 	fs_prio->prio = prio;
1910 	fs_prio->flags = flags;
1911 	list_add_tail(&fs_prio->base.list, &ns->prios);
1912 	INIT_LIST_HEAD(&fs_prio->objs);
1913 	mutex_init(&fs_prio->shared_lock);
1914 
1915 	return fs_prio;
1916 }
1917 
1918 static void cleanup_root_ns(struct mlx5_core_dev *dev)
1919 {
1920 	struct mlx5_flow_root_namespace *root_ns = dev->root_ns;
1921 	struct fs_prio *iter_prio;
1922 
1923 	if (!root_ns)
1924 		return;
1925 
1926 	/* stage 1 */
1927 	fs_for_each_prio(iter_prio, &root_ns->ns) {
1928 		struct mlx5_flow_namespace *iter_ns;
1929 
1930 		fs_for_each_ns(iter_ns, iter_prio) {
1931 			while (!list_empty(&iter_ns->prios)) {
1932 				struct fs_base *iter_prio2 =
1933 					list_first_entry(&iter_ns->prios,
1934 							 struct fs_base,
1935 							 list);
1936 
1937 				fs_remove_node(iter_prio2);
1938 			}
1939 		}
1940 	}
1941 
1942 	/* stage 2 */
1943 	fs_for_each_prio(iter_prio, &root_ns->ns) {
1944 		while (!list_empty(&iter_prio->objs)) {
1945 			struct fs_base *iter_ns =
1946 				list_first_entry(&iter_prio->objs,
1947 						 struct fs_base,
1948 						 list);
1949 
1950 				fs_remove_node(iter_ns);
1951 		}
1952 	}
1953 	/* stage 3 */
1954 	while (!list_empty(&root_ns->ns.prios)) {
1955 		struct fs_base *iter_prio =
1956 			list_first_entry(&root_ns->ns.prios,
1957 					 struct fs_base,
1958 					 list);
1959 
1960 		fs_remove_node(iter_prio);
1961 	}
1962 
1963 	fs_remove_node(&root_ns->ns.base);
1964 	dev->root_ns = NULL;
1965 }
1966 
1967 static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev,
1968 					struct mlx5_flow_root_namespace *root_ns)
1969 {
1970 	struct fs_base *prio;
1971 
1972 	if (!root_ns)
1973 		return;
1974 
1975 	if (!list_empty(&root_ns->ns.prios)) {
1976 		prio = list_first_entry(&root_ns->ns.prios,
1977 					struct fs_base,
1978 				 list);
1979 		fs_remove_node(prio);
1980 	}
1981 	fs_remove_node(&root_ns->ns.base);
1982 	root_ns = NULL;
1983 }
1984 
1985 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
1986 {
1987 	cleanup_root_ns(dev);
1988 	cleanup_single_prio_root_ns(dev, dev->sniffer_rx_root_ns);
1989 	cleanup_single_prio_root_ns(dev, dev->sniffer_tx_root_ns);
1990 	cleanup_single_prio_root_ns(dev, dev->fdb_root_ns);
1991 	cleanup_single_prio_root_ns(dev, dev->esw_egress_root_ns);
1992 	cleanup_single_prio_root_ns(dev, dev->esw_ingress_root_ns);
1993 }
1994 
1995 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
1996 						 *ns)
1997 {
1998 	ns->base.type = FS_TYPE_NAMESPACE;
1999 	init_rwsem(&ns->dests_rw_sem);
2000 	init_rwsem(&ns->notifiers_rw_sem);
2001 	INIT_LIST_HEAD(&ns->prios);
2002 	INIT_LIST_HEAD(&ns->list_notifiers);
2003 
2004 	return ns;
2005 }
2006 
2007 static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev,
2008 							  enum fs_ft_type
2009 							  table_type,
2010 							  char *name)
2011 {
2012 	struct mlx5_flow_root_namespace *root_ns;
2013 	struct mlx5_flow_namespace *ns;
2014 
2015 	/* create the root namespace */
2016 	root_ns = mlx5_vzalloc(sizeof(*root_ns));
2017 	if (!root_ns)
2018 		goto err;
2019 
2020 	root_ns->dev = dev;
2021 	root_ns->table_type = table_type;
2022 	mutex_init(&root_ns->fs_chain_lock);
2023 
2024 	ns = &root_ns->ns;
2025 	fs_init_namespace(ns);
2026 	fs_add_node(&ns->base, NULL, name, 1);
2027 
2028 	return root_ns;
2029 err:
2030 	return NULL;
2031 }
2032 
2033 static int init_fdb_root_ns(struct mlx5_core_dev *dev)
2034 {
2035 	struct fs_prio *prio;
2036 
2037 	dev->fdb_root_ns = create_root_ns(dev, FS_FT_FDB,
2038 					  MLX5_CORE_FS_FDB_ROOT_NS_NAME);
2039 	if (!dev->fdb_root_ns)
2040 		return -ENOMEM;
2041 
2042 	/* create 1 prio*/
2043 	prio = fs_create_prio(&dev->fdb_root_ns->ns, 0, 1, "fdb_prio", 0);
2044 	if (IS_ERR(prio))
2045 		return PTR_ERR(prio);
2046 	else
2047 		return 0;
2048 }
2049 
2050 #define MAX_VPORTS 128
2051 
2052 static int init_egress_acl_root_ns(struct mlx5_core_dev *dev)
2053 {
2054 	struct fs_prio *prio;
2055 
2056 	dev->esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL,
2057 						 MLX5_CORE_FS_ESW_EGRESS_ACL);
2058 	if (!dev->esw_egress_root_ns)
2059 		return -ENOMEM;
2060 
2061 	/* create 1 prio*/
2062 	prio = fs_create_prio(&dev->esw_egress_root_ns->ns, 0, MAX_VPORTS,
2063 			      "esw_egress_prio", 0);
2064 	if (IS_ERR(prio))
2065 		return PTR_ERR(prio);
2066 	else
2067 		return 0;
2068 }
2069 
2070 static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev)
2071 {
2072 	struct fs_prio *prio;
2073 
2074 	dev->esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL,
2075 						  MLX5_CORE_FS_ESW_INGRESS_ACL);
2076 	if (!dev->esw_ingress_root_ns)
2077 		return -ENOMEM;
2078 
2079 	/* create 1 prio*/
2080 	prio = fs_create_prio(&dev->esw_ingress_root_ns->ns, 0, MAX_VPORTS,
2081 			      "esw_ingress_prio", 0);
2082 	if (IS_ERR(prio))
2083 		return PTR_ERR(prio);
2084 	else
2085 		return 0;
2086 }
2087 
2088 static int init_sniffer_rx_root_ns(struct mlx5_core_dev *dev)
2089 {
2090 	struct fs_prio *prio;
2091 
2092 	dev->sniffer_rx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_RX,
2093 				     MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME);
2094 	if (!dev->sniffer_rx_root_ns)
2095 		return  -ENOMEM;
2096 
2097 	/* create 1 prio*/
2098 	prio = fs_create_prio(&dev->sniffer_rx_root_ns->ns, 0, 1,
2099 			      "sniffer_prio", 0);
2100 	if (IS_ERR(prio))
2101 		return PTR_ERR(prio);
2102 	else
2103 		return 0;
2104 }
2105 
2106 
2107 static int init_sniffer_tx_root_ns(struct mlx5_core_dev *dev)
2108 {
2109 	struct fs_prio *prio;
2110 
2111 	dev->sniffer_tx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_TX,
2112 						 MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME);
2113 	if (!dev->sniffer_tx_root_ns)
2114 		return  -ENOMEM;
2115 
2116 	/* create 1 prio*/
2117 	prio = fs_create_prio(&dev->sniffer_tx_root_ns->ns, 0, 1,
2118 			      "sniffer_prio", 0);
2119 	if (IS_ERR(prio))
2120 		return PTR_ERR(prio);
2121 	else
2122 		return 0;
2123 }
2124 
2125 static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio,
2126 						       const char *name)
2127 {
2128 	struct mlx5_flow_namespace	*ns;
2129 
2130 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
2131 	if (!ns)
2132 		return ERR_PTR(-ENOMEM);
2133 
2134 	fs_init_namespace(ns);
2135 	fs_add_node(&ns->base, &prio->base, name, 1);
2136 	list_add_tail(&ns->base.list, &prio->objs);
2137 
2138 	return ns;
2139 }
2140 
2141 #define FLOW_TABLE_BIT_SZ 1
2142 #define GET_FLOW_TABLE_CAP(dev, offset) \
2143 	((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) +	\
2144 			offset / 32)) >>					\
2145 	  (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
2146 
2147 static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
2148 {
2149 	int i;
2150 
2151 	for (i = 0; i < caps->arr_sz; i++) {
2152 		if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
2153 			return false;
2154 	}
2155 	return true;
2156 }
2157 
2158 static int _init_root_tree(struct mlx5_core_dev *dev, int max_ft_level,
2159 		    struct init_tree_node *node, struct fs_base *base_parent,
2160 		    struct init_tree_node *tree_parent)
2161 {
2162 	struct mlx5_flow_namespace *fs_ns;
2163 	struct fs_prio *fs_prio;
2164 	int priority;
2165 	struct fs_base *base;
2166 	int i;
2167 	int err = 0;
2168 
2169 	if (node->type == FS_TYPE_PRIO) {
2170 		if ((node->min_ft_level > max_ft_level) ||
2171 		    !has_required_caps(dev, &node->caps))
2172 			goto out;
2173 
2174 		fs_get_obj(fs_ns, base_parent);
2175 		priority = node - tree_parent->children;
2176 		fs_prio = fs_create_prio(fs_ns, priority,
2177 					 node->max_ft,
2178 					 node->name, node->flags);
2179 		if (IS_ERR(fs_prio)) {
2180 			err = PTR_ERR(fs_prio);
2181 			goto out;
2182 		}
2183 		base = &fs_prio->base;
2184 	} else if (node->type == FS_TYPE_NAMESPACE) {
2185 		fs_get_obj(fs_prio, base_parent);
2186 		fs_ns = fs_create_namespace(fs_prio, node->name);
2187 		if (IS_ERR(fs_ns)) {
2188 			err = PTR_ERR(fs_ns);
2189 			goto out;
2190 		}
2191 		base = &fs_ns->base;
2192 	} else {
2193 		return -EINVAL;
2194 	}
2195 	for (i = 0; i < node->ar_size; i++) {
2196 		err = _init_root_tree(dev, max_ft_level, &node->children[i], base,
2197 				      node);
2198 		if (err)
2199 			break;
2200 	}
2201 out:
2202 	return err;
2203 }
2204 
2205 static int init_root_tree(struct mlx5_core_dev *dev, int max_ft_level,
2206 		   struct init_tree_node *node, struct fs_base *parent)
2207 {
2208 	int i;
2209 	struct mlx5_flow_namespace *fs_ns;
2210 	int err = 0;
2211 
2212 	fs_get_obj(fs_ns, parent);
2213 	for (i = 0; i < node->ar_size; i++) {
2214 		err = _init_root_tree(dev, max_ft_level,
2215 				      &node->children[i], &fs_ns->base, node);
2216 		if (err)
2217 			break;
2218 	}
2219 	return err;
2220 }
2221 
2222 static int sum_max_ft_in_prio(struct fs_prio *prio);
2223 static int sum_max_ft_in_ns(struct mlx5_flow_namespace *ns)
2224 {
2225 	struct fs_prio *prio;
2226 	int sum = 0;
2227 
2228 	fs_for_each_prio(prio, ns) {
2229 		sum += sum_max_ft_in_prio(prio);
2230 	}
2231 	return  sum;
2232 }
2233 
2234 static int sum_max_ft_in_prio(struct fs_prio *prio)
2235 {
2236 	int sum = 0;
2237 	struct fs_base *it;
2238 	struct mlx5_flow_namespace	*ns;
2239 
2240 	if (prio->max_ft)
2241 		return prio->max_ft;
2242 
2243 	fs_for_each_ns_or_ft(it, prio) {
2244 		if (it->type == FS_TYPE_FLOW_TABLE)
2245 			continue;
2246 
2247 		fs_get_obj(ns, it);
2248 		sum += sum_max_ft_in_ns(ns);
2249 	}
2250 	prio->max_ft = sum;
2251 	return  sum;
2252 }
2253 
2254 static void set_max_ft(struct mlx5_flow_namespace *ns)
2255 {
2256 	struct fs_prio *prio;
2257 
2258 	if (!ns)
2259 		return;
2260 
2261 	fs_for_each_prio(prio, ns)
2262 		sum_max_ft_in_prio(prio);
2263 }
2264 
2265 static int init_root_ns(struct mlx5_core_dev *dev)
2266 {
2267 	int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
2268 					      flow_table_properties_nic_receive.
2269 					      max_ft_level);
2270 
2271 	dev->root_ns = create_root_ns(dev, FS_FT_NIC_RX,
2272 				      MLX5_CORE_FS_ROOT_NS_NAME);
2273 	if (IS_ERR_OR_NULL(dev->root_ns))
2274 		goto err;
2275 
2276 
2277 	if (init_root_tree(dev, max_ft_level, &root_fs, &dev->root_ns->ns.base))
2278 		goto err;
2279 
2280 	set_max_ft(&dev->root_ns->ns);
2281 
2282 	return 0;
2283 err:
2284 	return -ENOMEM;
2285 }
2286 
2287 u8 mlx5_get_match_criteria_enable(struct mlx5_flow_rule *rule)
2288 {
2289 	struct fs_base *pbase;
2290 	struct mlx5_flow_group *fg;
2291 
2292 	pbase = rule->base.parent;
2293 	WARN_ON(!pbase);
2294 	pbase = pbase->parent;
2295 	WARN_ON(!pbase);
2296 
2297 	fs_get_obj(fg, pbase);
2298 	return fg->mask.match_criteria_enable;
2299 }
2300 
2301 void mlx5_get_match_value(u32 *match_value,
2302 			  struct mlx5_flow_rule *rule)
2303 {
2304 	struct fs_base *pbase;
2305 	struct fs_fte *fte;
2306 
2307 	pbase = rule->base.parent;
2308 	WARN_ON(!pbase);
2309 	fs_get_obj(fte, pbase);
2310 
2311 	memcpy(match_value, fte->val, sizeof(fte->val));
2312 }
2313 
2314 void mlx5_get_match_criteria(u32 *match_criteria,
2315 			     struct mlx5_flow_rule *rule)
2316 {
2317 	struct fs_base *pbase;
2318 	struct mlx5_flow_group *fg;
2319 
2320 	pbase = rule->base.parent;
2321 	WARN_ON(!pbase);
2322 	pbase = pbase->parent;
2323 	WARN_ON(!pbase);
2324 
2325 	fs_get_obj(fg, pbase);
2326 	memcpy(match_criteria, &fg->mask.match_criteria,
2327 	       sizeof(fg->mask.match_criteria));
2328 }
2329 
2330 int mlx5_init_fs(struct mlx5_core_dev *dev)
2331 {
2332 	int err;
2333 
2334 	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
2335 		err = init_root_ns(dev);
2336 		if (err)
2337 			goto err;
2338 	}
2339 
2340 	err = init_fdb_root_ns(dev);
2341 	if (err)
2342 		goto err;
2343 
2344 	err = init_egress_acl_root_ns(dev);
2345 	if (err)
2346 		goto err;
2347 
2348 	err = init_ingress_acl_root_ns(dev);
2349 	if (err)
2350 		goto err;
2351 
2352 	err = init_sniffer_tx_root_ns(dev);
2353 	if (err)
2354 		goto err;
2355 
2356 	err = init_sniffer_rx_root_ns(dev);
2357 	if (err)
2358 		goto err;
2359 
2360 	return 0;
2361 err:
2362 	mlx5_cleanup_fs(dev);
2363 	return err;
2364 }
2365 
2366 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
2367 						  enum mlx5_flow_namespace_type type)
2368 {
2369 	struct mlx5_flow_root_namespace *root_ns = dev->root_ns;
2370 	int prio;
2371 	static struct fs_prio *fs_prio;
2372 	struct mlx5_flow_namespace *ns;
2373 
2374 	switch (type) {
2375 	case MLX5_FLOW_NAMESPACE_BYPASS:
2376 		prio = 0;
2377 		break;
2378 	case MLX5_FLOW_NAMESPACE_KERNEL:
2379 		prio = 1;
2380 		break;
2381 	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
2382 		prio = 2;
2383 		break;
2384 	case MLX5_FLOW_NAMESPACE_FDB:
2385 		if (dev->fdb_root_ns)
2386 			return &dev->fdb_root_ns->ns;
2387 		else
2388 			return NULL;
2389 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
2390 		if (dev->esw_egress_root_ns)
2391 			return &dev->esw_egress_root_ns->ns;
2392 		else
2393 			return NULL;
2394 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
2395 		if (dev->esw_ingress_root_ns)
2396 			return &dev->esw_ingress_root_ns->ns;
2397 		else
2398 			return NULL;
2399 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
2400 		if (dev->sniffer_rx_root_ns)
2401 			return &dev->sniffer_rx_root_ns->ns;
2402 		else
2403 			return NULL;
2404 	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
2405 		if (dev->sniffer_tx_root_ns)
2406 			return &dev->sniffer_tx_root_ns->ns;
2407 		else
2408 			return NULL;
2409 	default:
2410 		return NULL;
2411 	}
2412 
2413 	if (!root_ns)
2414 		return NULL;
2415 
2416 	fs_prio = find_prio(&root_ns->ns, prio);
2417 	if (!fs_prio)
2418 		return NULL;
2419 
2420 	ns = list_first_entry(&fs_prio->objs,
2421 			      typeof(*ns),
2422 			      base.list);
2423 
2424 	return ns;
2425 }
2426 EXPORT_SYMBOL(mlx5_get_flow_namespace);
2427 
2428 
2429 int mlx5_set_rule_private_data(struct mlx5_flow_rule *rule,
2430 				  struct mlx5_flow_handler *fs_handler,
2431 				  void  *client_data)
2432 {
2433 	struct fs_client_priv_data *priv_data;
2434 
2435 	mutex_lock(&rule->clients_lock);
2436 	/*Check that hanlder isn't exists in the list already*/
2437 	list_for_each_entry(priv_data, &rule->clients_data, list) {
2438 		if (priv_data->fs_handler == fs_handler) {
2439 			priv_data->client_dst_data = client_data;
2440 			goto unlock;
2441 		}
2442 	}
2443 	priv_data = kzalloc(sizeof(*priv_data), GFP_KERNEL);
2444 	if (!priv_data) {
2445 		mutex_unlock(&rule->clients_lock);
2446 		return -ENOMEM;
2447 	}
2448 
2449 	priv_data->client_dst_data = client_data;
2450 	priv_data->fs_handler = fs_handler;
2451 	list_add(&priv_data->list, &rule->clients_data);
2452 
2453 unlock:
2454 	mutex_unlock(&rule->clients_lock);
2455 
2456 	return 0;
2457 }
2458 
2459 static int remove_from_clients(struct mlx5_flow_rule *rule,
2460 			bool ctx_changed,
2461 			void *client_data,
2462 			void *context)
2463 {
2464 	struct fs_client_priv_data *iter_client;
2465 	struct fs_client_priv_data *temp_client;
2466 	struct mlx5_flow_handler *handler = (struct
2467 						mlx5_flow_handler*)context;
2468 
2469 	mutex_lock(&rule->clients_lock);
2470 	list_for_each_entry_safe(iter_client, temp_client,
2471 				 &rule->clients_data, list) {
2472 		if (iter_client->fs_handler == handler) {
2473 			list_del(&iter_client->list);
2474 			kfree(iter_client);
2475 			break;
2476 		}
2477 	}
2478 	mutex_unlock(&rule->clients_lock);
2479 
2480 	return 0;
2481 }
2482 
2483 struct mlx5_flow_handler *mlx5_register_rule_notifier(struct mlx5_core_dev *dev,
2484 								enum mlx5_flow_namespace_type ns_type,
2485 								rule_event_fn add_cb,
2486 								rule_event_fn del_cb,
2487 								void *context)
2488 {
2489 	struct mlx5_flow_namespace *ns;
2490 	struct mlx5_flow_handler *handler;
2491 
2492 	ns = mlx5_get_flow_namespace(dev, ns_type);
2493 	if (!ns)
2494 		return ERR_PTR(-EINVAL);
2495 
2496 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
2497 	if (!handler)
2498 		return ERR_PTR(-ENOMEM);
2499 
2500 	handler->add_dst_cb = add_cb;
2501 	handler->del_dst_cb = del_cb;
2502 	handler->client_context = context;
2503 	handler->ns = ns;
2504 	down_write(&ns->notifiers_rw_sem);
2505 	list_add_tail(&handler->list, &ns->list_notifiers);
2506 	up_write(&ns->notifiers_rw_sem);
2507 
2508 	return handler;
2509 }
2510 
2511 static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns,
2512 				rule_event_fn add_rule_cb,
2513 				void *context);
2514 
2515 void mlx5_unregister_rule_notifier(struct mlx5_flow_handler *handler)
2516 {
2517 	struct mlx5_flow_namespace *ns = handler->ns;
2518 
2519 	/*Remove from dst's clients*/
2520 	down_write(&ns->dests_rw_sem);
2521 	down_write(&ns->notifiers_rw_sem);
2522 	iterate_rules_in_ns(ns, remove_from_clients, handler);
2523 	list_del(&handler->list);
2524 	up_write(&ns->notifiers_rw_sem);
2525 	up_write(&ns->dests_rw_sem);
2526 	kfree(handler);
2527 }
2528 
2529 static void iterate_rules_in_ft(struct mlx5_flow_table *ft,
2530 				rule_event_fn add_rule_cb,
2531 				void *context)
2532 {
2533 	struct mlx5_flow_group *iter_fg;
2534 	struct fs_fte *iter_fte;
2535 	struct mlx5_flow_rule *iter_rule;
2536 	int err = 0;
2537 	bool is_new_rule;
2538 
2539 	mutex_lock(&ft->base.lock);
2540 	fs_for_each_fg(iter_fg, ft) {
2541 		mutex_lock(&iter_fg->base.lock);
2542 		fs_for_each_fte(iter_fte, iter_fg) {
2543 			mutex_lock(&iter_fte->base.lock);
2544 			is_new_rule = true;
2545 			fs_for_each_dst(iter_rule, iter_fte) {
2546 				fs_get(&iter_rule->base);
2547 				err = add_rule_cb(iter_rule,
2548 						 is_new_rule,
2549 						 NULL,
2550 						 context);
2551 				fs_put_parent_locked(&iter_rule->base);
2552 				if (err)
2553 					break;
2554 				is_new_rule = false;
2555 			}
2556 			mutex_unlock(&iter_fte->base.lock);
2557 			if (err)
2558 				break;
2559 		}
2560 		mutex_unlock(&iter_fg->base.lock);
2561 		if (err)
2562 			break;
2563 	}
2564 	mutex_unlock(&ft->base.lock);
2565 }
2566 
2567 static void iterate_rules_in_prio(struct fs_prio *prio,
2568 				  rule_event_fn add_rule_cb,
2569 				  void *context)
2570 {
2571 	struct fs_base *it;
2572 
2573 	mutex_lock(&prio->base.lock);
2574 	fs_for_each_ns_or_ft(it, prio) {
2575 		if (it->type == FS_TYPE_FLOW_TABLE) {
2576 			struct mlx5_flow_table	      *ft;
2577 
2578 			fs_get_obj(ft, it);
2579 			iterate_rules_in_ft(ft, add_rule_cb, context);
2580 		} else {
2581 			struct mlx5_flow_namespace *ns;
2582 
2583 			fs_get_obj(ns, it);
2584 			iterate_rules_in_ns(ns, add_rule_cb, context);
2585 		}
2586 	}
2587 	mutex_unlock(&prio->base.lock);
2588 }
2589 
2590 static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns,
2591 				rule_event_fn add_rule_cb,
2592 				void *context)
2593 {
2594 	struct fs_prio *iter_prio;
2595 
2596 	mutex_lock(&ns->base.lock);
2597 	fs_for_each_prio(iter_prio, ns) {
2598 		iterate_rules_in_prio(iter_prio, add_rule_cb, context);
2599 	}
2600 	mutex_unlock(&ns->base.lock);
2601 }
2602 
2603 void mlx5_flow_iterate_existing_rules(struct mlx5_flow_namespace *ns,
2604 					 rule_event_fn add_rule_cb,
2605 					 void *context)
2606 {
2607 	down_write(&ns->dests_rw_sem);
2608 	down_read(&ns->notifiers_rw_sem);
2609 	iterate_rules_in_ns(ns, add_rule_cb, context);
2610 	up_read(&ns->notifiers_rw_sem);
2611 	up_write(&ns->dests_rw_sem);
2612 }
2613 
2614 
2615 void mlx5_del_flow_rules_list(struct mlx5_flow_rules_list *rules_list)
2616 {
2617 	struct mlx5_flow_rule_node *iter_node;
2618 	struct mlx5_flow_rule_node *temp_node;
2619 
2620 	list_for_each_entry_safe(iter_node, temp_node, &rules_list->head, list) {
2621 		list_del(&iter_node->list);
2622 		kfree(iter_node);
2623 	}
2624 
2625 	kfree(rules_list);
2626 }
2627 
2628 #define ROCEV1_ETHERTYPE 0x8915
2629 static int set_rocev1_rules(struct list_head *rules_list)
2630 {
2631 	struct mlx5_flow_rule_node *rocev1_rule;
2632 
2633 	rocev1_rule = kzalloc(sizeof(*rocev1_rule), GFP_KERNEL);
2634 	if (!rocev1_rule)
2635 		return -ENOMEM;
2636 
2637 	rocev1_rule->match_criteria_enable =
2638 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2639 	MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_criteria, ethertype,
2640 		 0xffff);
2641 	MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_value, ethertype,
2642 		 ROCEV1_ETHERTYPE);
2643 
2644 	list_add_tail(&rocev1_rule->list, rules_list);
2645 
2646 	return 0;
2647 }
2648 
2649 #define ROCEV2_UDP_PORT 4791
2650 static int set_rocev2_rules(struct list_head *rules_list)
2651 {
2652 	struct mlx5_flow_rule_node *ipv4_rule;
2653 	struct mlx5_flow_rule_node *ipv6_rule;
2654 
2655 	ipv4_rule = kzalloc(sizeof(*ipv4_rule), GFP_KERNEL);
2656 	if (!ipv4_rule)
2657 		return -ENOMEM;
2658 
2659 	ipv6_rule = kzalloc(sizeof(*ipv6_rule), GFP_KERNEL);
2660 	if (!ipv6_rule) {
2661 		kfree(ipv4_rule);
2662 		return -ENOMEM;
2663 	}
2664 
2665 	ipv4_rule->match_criteria_enable =
2666 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2667 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ethertype,
2668 		 0xffff);
2669 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ethertype,
2670 		 0x0800);
2671 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ip_protocol,
2672 		 0xff);
2673 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ip_protocol,
2674 		 IPPROTO_UDP);
2675 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, udp_dport,
2676 		 0xffff);
2677 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, udp_dport,
2678 		 ROCEV2_UDP_PORT);
2679 
2680 	ipv6_rule->match_criteria_enable =
2681 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2682 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ethertype,
2683 		 0xffff);
2684 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ethertype,
2685 		 0x86dd);
2686 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ip_protocol,
2687 		 0xff);
2688 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ip_protocol,
2689 		 IPPROTO_UDP);
2690 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, udp_dport,
2691 		 0xffff);
2692 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, udp_dport,
2693 		 ROCEV2_UDP_PORT);
2694 
2695 	list_add_tail(&ipv4_rule->list, rules_list);
2696 	list_add_tail(&ipv6_rule->list, rules_list);
2697 
2698 	return 0;
2699 }
2700 
2701 
2702 struct mlx5_flow_rules_list *get_roce_flow_rules(u8 roce_mode)
2703 {
2704 	int err = 0;
2705 	struct mlx5_flow_rules_list *rules_list =
2706 		kzalloc(sizeof(*rules_list), GFP_KERNEL);
2707 
2708 	if (!rules_list)
2709 		return NULL;
2710 
2711 	INIT_LIST_HEAD(&rules_list->head);
2712 
2713 	if (roce_mode & MLX5_ROCE_VERSION_1_CAP) {
2714 		err = set_rocev1_rules(&rules_list->head);
2715 		if (err)
2716 			goto free_list;
2717 	}
2718 	if (roce_mode & MLX5_ROCE_VERSION_2_CAP)
2719 		err = set_rocev2_rules(&rules_list->head);
2720 	if (err)
2721 		goto free_list;
2722 
2723 	return rules_list;
2724 
2725 free_list:
2726 	mlx5_del_flow_rules_list(rules_list);
2727 	return NULL;
2728 }
2729