xref: /freebsd/sys/dev/mlx5/mlx5_core/mlx5_fs_tree.c (revision ad744541)
1 /*-
2  * Copyright (c) 2013-2021, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include "opt_rss.h"
27 #include "opt_ratelimit.h"
28 
29 #include <linux/module.h>
30 #include <dev/mlx5/driver.h>
31 #include <dev/mlx5/mlx5_core/mlx5_core.h>
32 #include <dev/mlx5/mlx5_core/fs_core.h>
33 #include <linux/string.h>
34 #include <linux/compiler.h>
35 
36 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
37 					 sizeof(struct init_tree_node))
38 
39 #define ADD_PRIO(name_val, flags_val, min_level_val, max_ft_val, caps_val, \
40 		 ...) {.type = FS_TYPE_PRIO,\
41 	.name = name_val,\
42 	.min_ft_level = min_level_val,\
43 	.flags = flags_val,\
44 	.max_ft = max_ft_val,\
45 	.caps = caps_val,\
46 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
47 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
48 }
49 
50 #define ADD_FT_PRIO(name_val, flags_val, max_ft_val,  ...)\
51 	ADD_PRIO(name_val, flags_val, 0, max_ft_val, {},\
52 		 __VA_ARGS__)\
53 
54 #define ADD_NS(name_val, ...) {.type = FS_TYPE_NAMESPACE,\
55 	.name = name_val,\
56 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
57 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
58 }
59 
60 #define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
61 				   sizeof(long))
62 
63 #define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
64 
65 #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
66 			       .caps = (long[]) {__VA_ARGS__}}
67 
68 /* Flowtable sizes: */
69 #define	BYPASS_MAX_FT 5
70 #define	BYPASS_PRIO_MAX_FT 1
71 #define	OFFLOADS_MAX_FT 2
72 #define	KERNEL_MAX_FT 5
73 #define	LEFTOVER_MAX_FT 1
74 
75 /* Flowtable levels: */
76 #define	OFFLOADS_MIN_LEVEL 3
77 #define	KERNEL_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
78 #define	LEFTOVER_MIN_LEVEL (KERNEL_MIN_LEVEL + 1)
79 #define	BYPASS_MIN_LEVEL (MLX5_NUM_BYPASS_FTS + LEFTOVER_MIN_LEVEL)
80 
81 struct node_caps {
82 	size_t	arr_sz;
83 	long	*caps;
84 };
85 
86 struct init_tree_node {
87 	enum fs_type	type;
88 	const char	*name;
89 	struct init_tree_node *children;
90 	int ar_size;
91 	struct node_caps caps;
92 	u8  flags;
93 	int min_ft_level;
94 	int prio;
95 	int max_ft;
96 } root_fs = {
97 	.type = FS_TYPE_NAMESPACE,
98 	.name = "root",
99 	.ar_size = 4,
100 	.children = (struct init_tree_node[]) {
101 		ADD_PRIO("by_pass_prio", 0, BYPASS_MIN_LEVEL, 0,
102 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
103 					  FS_CAP(flow_table_properties_nic_receive.modify_root)),
104 			 ADD_NS("by_pass_ns",
105 				ADD_FT_PRIO("prio0", 0,
106 					    BYPASS_PRIO_MAX_FT),
107 				ADD_FT_PRIO("prio1", 0,
108 					    BYPASS_PRIO_MAX_FT),
109 				ADD_FT_PRIO("prio2", 0,
110 					    BYPASS_PRIO_MAX_FT),
111 				ADD_FT_PRIO("prio3", 0,
112 					    BYPASS_PRIO_MAX_FT),
113 				ADD_FT_PRIO("prio4", 0,
114 					    BYPASS_PRIO_MAX_FT),
115 				ADD_FT_PRIO("prio5", 0,
116 					    BYPASS_PRIO_MAX_FT),
117 				ADD_FT_PRIO("prio6", 0,
118 					    BYPASS_PRIO_MAX_FT),
119 				ADD_FT_PRIO("prio7", 0,
120 					    BYPASS_PRIO_MAX_FT),
121 				ADD_FT_PRIO("prio-mcast", 0,
122 					    BYPASS_PRIO_MAX_FT))),
123 		ADD_PRIO("offloads_prio", 0, OFFLOADS_MIN_LEVEL, 0, {},
124 			 ADD_NS("offloads_ns",
125 				ADD_FT_PRIO("prio_offloads-0", 0,
126 					    OFFLOADS_MAX_FT))),
127 		ADD_PRIO("kernel_prio", 0, KERNEL_MIN_LEVEL, 0, {},
128 			 ADD_NS("kernel_ns",
129 				ADD_FT_PRIO("prio_kernel-0", 0,
130 					    KERNEL_MAX_FT))),
131 		ADD_PRIO("leftovers_prio", MLX5_CORE_FS_PRIO_SHARED,
132 			 LEFTOVER_MIN_LEVEL, 0,
133 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
134 					  FS_CAP(flow_table_properties_nic_receive.modify_root)),
135 			 ADD_NS("leftover_ns",
136 				ADD_FT_PRIO("leftovers_prio-0",
137 					MLX5_CORE_FS_PRIO_SHARED,
138 					LEFTOVER_MAX_FT)))
139 	}
140 };
141 
142 /* Tree creation functions */
143 
144 static struct mlx5_flow_root_namespace *find_root(struct fs_base *node)
145 {
146 	struct fs_base *parent;
147 
148 	/* Make sure we only read it once while we go up the tree */
149 	while ((parent = node->parent))
150 		node = parent;
151 
152 	if (node->type != FS_TYPE_NAMESPACE) {
153 		return NULL;
154 	}
155 
156 	return container_of(container_of(node,
157 					 struct mlx5_flow_namespace,
158 					 base),
159 			    struct mlx5_flow_root_namespace,
160 			    ns);
161 }
162 
163 static inline struct mlx5_core_dev *fs_get_dev(struct fs_base *node)
164 {
165 	struct mlx5_flow_root_namespace *root = find_root(node);
166 
167 	if (root)
168 		return root->dev;
169 	return NULL;
170 }
171 
172 static void fs_init_node(struct fs_base *node,
173 			 unsigned int refcount)
174 {
175 	kref_init(&node->refcount);
176 	atomic_set(&node->users_refcount, refcount);
177 	init_completion(&node->complete);
178 	INIT_LIST_HEAD(&node->list);
179 	mutex_init(&node->lock);
180 }
181 
182 static void _fs_add_node(struct fs_base *node,
183 			 const char *name,
184 			 struct fs_base *parent)
185 {
186 	if (parent)
187 		atomic_inc(&parent->users_refcount);
188 	node->name = kstrdup_const(name, GFP_KERNEL);
189 	node->parent = parent;
190 }
191 
192 static void fs_add_node(struct fs_base *node,
193 			struct fs_base *parent, const char *name,
194 			unsigned int refcount)
195 {
196 	fs_init_node(node, refcount);
197 	_fs_add_node(node, name, parent);
198 }
199 
200 static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref),
201 		    bool parent_locked);
202 
203 static void fs_del_dst(struct mlx5_flow_rule *dst);
204 static void _fs_del_ft(struct mlx5_flow_table *ft);
205 static void fs_del_fg(struct mlx5_flow_group *fg);
206 static void fs_del_fte(struct fs_fte *fte);
207 
208 static void cmd_remove_node(struct fs_base *base)
209 {
210 	switch (base->type) {
211 	case FS_TYPE_FLOW_DEST:
212 		fs_del_dst(container_of(base, struct mlx5_flow_rule, base));
213 		break;
214 	case FS_TYPE_FLOW_TABLE:
215 		_fs_del_ft(container_of(base, struct mlx5_flow_table, base));
216 		break;
217 	case FS_TYPE_FLOW_GROUP:
218 		fs_del_fg(container_of(base, struct mlx5_flow_group, base));
219 		break;
220 	case FS_TYPE_FLOW_ENTRY:
221 		fs_del_fte(container_of(base, struct fs_fte, base));
222 		break;
223 	default:
224 		break;
225 	}
226 }
227 
228 static void __fs_remove_node(struct kref *kref)
229 {
230 	struct fs_base *node = container_of(kref, struct fs_base, refcount);
231 
232 	if (node->parent)
233 		mutex_lock(&node->parent->lock);
234 	mutex_lock(&node->lock);
235 	cmd_remove_node(node);
236 	mutex_unlock(&node->lock);
237 	complete(&node->complete);
238 	if (node->parent) {
239 		mutex_unlock(&node->parent->lock);
240 		_fs_put(node->parent, _fs_remove_node, false);
241 	}
242 }
243 
244 void _fs_remove_node(struct kref *kref)
245 {
246 	struct fs_base *node = container_of(kref, struct fs_base, refcount);
247 
248 	__fs_remove_node(kref);
249 	kfree_const(node->name);
250 	kfree(node);
251 }
252 
253 static void fs_get(struct fs_base *node)
254 {
255 	atomic_inc(&node->users_refcount);
256 }
257 
258 static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref),
259 		    bool parent_locked)
260 {
261 	struct fs_base *parent_node = node->parent;
262 
263 	if (parent_node && !parent_locked)
264 		mutex_lock(&parent_node->lock);
265 	if (atomic_dec_and_test(&node->users_refcount)) {
266 		if (parent_node) {
267 			/*remove from parent's list*/
268 			list_del_init(&node->list);
269 			mutex_unlock(&parent_node->lock);
270 		}
271 		kref_put(&node->refcount, kref_cb);
272 		if (parent_node && parent_locked)
273 			mutex_lock(&parent_node->lock);
274 	} else if (parent_node && !parent_locked) {
275 		mutex_unlock(&parent_node->lock);
276 	}
277 }
278 
279 static void fs_put(struct fs_base *node)
280 {
281 	_fs_put(node, __fs_remove_node, false);
282 }
283 
284 static void fs_put_parent_locked(struct fs_base *node)
285 {
286 	_fs_put(node, __fs_remove_node, true);
287 }
288 
289 static void fs_remove_node(struct fs_base *node)
290 {
291 	fs_put(node);
292 	wait_for_completion(&node->complete);
293 	kfree_const(node->name);
294 	kfree(node);
295 }
296 
297 static void fs_remove_node_parent_locked(struct fs_base *node)
298 {
299 	fs_put_parent_locked(node);
300 	wait_for_completion(&node->complete);
301 	kfree_const(node->name);
302 	kfree(node);
303 }
304 
305 static struct fs_fte *fs_alloc_fte(u32 sw_action,
306 				   struct mlx5_flow_act *flow_act,
307 				   u32 *match_value,
308 				   unsigned int index)
309 {
310 	struct fs_fte *fte;
311 
312 
313 	fte = kzalloc(sizeof(*fte), GFP_KERNEL);
314 	if (!fte)
315 		return ERR_PTR(-ENOMEM);
316 
317 	memcpy(fte->val, match_value, sizeof(fte->val));
318 	fte->base.type =  FS_TYPE_FLOW_ENTRY;
319 	fte->dests_size = 0;
320 	fte->index = index;
321 	INIT_LIST_HEAD(&fte->dests);
322 	fte->flow_act = *flow_act;
323 	fte->sw_action = sw_action;
324 
325 	return fte;
326 }
327 
328 static struct fs_fte *alloc_star_ft_entry(struct mlx5_flow_table *ft,
329 					  struct mlx5_flow_group *fg,
330 					  u32 *match_value,
331 					  unsigned int index)
332 {
333 	int err;
334 	struct fs_fte *fte;
335 	struct mlx5_flow_rule *dst;
336 	struct mlx5_flow_act flow_act = {
337 		.actions = MLX5_FLOW_ACT_ACTIONS_FLOW_TAG,
338 		.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
339 	};
340 
341 	if (fg->num_ftes == fg->max_ftes)
342 		return ERR_PTR(-ENOSPC);
343 
344 	fte = fs_alloc_fte(MLX5_FLOW_RULE_FWD_ACTION_DEST,
345 			   &flow_act, match_value, index);
346 	if (IS_ERR(fte))
347 		return fte;
348 
349 	/*create dst*/
350 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
351 	if (!dst) {
352 		err = -ENOMEM;
353 		goto free_fte;
354 	}
355 
356 	fte->base.parent = &fg->base;
357 	fte->dests_size = 1;
358 	dst->dest_attr.type = MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE;
359 	dst->base.parent = &fte->base;
360 	list_add(&dst->base.list, &fte->dests);
361 	/* assumed that the callee creates the star rules sorted by index */
362 	list_add_tail(&fte->base.list, &fg->ftes);
363 	fg->num_ftes++;
364 
365 	return fte;
366 
367 free_fte:
368 	kfree(fte);
369 	return ERR_PTR(err);
370 }
371 
372 /* assume that fte can't be changed */
373 static void free_star_fte_entry(struct fs_fte *fte)
374 {
375 	struct mlx5_flow_group	*fg;
376 	struct mlx5_flow_rule	*dst, *temp;
377 
378 	fs_get_parent(fg, fte);
379 
380 	list_for_each_entry_safe(dst, temp, &fte->dests, base.list) {
381 		fte->dests_size--;
382 		list_del(&dst->base.list);
383 		kfree(dst);
384 	}
385 
386 	list_del(&fte->base.list);
387 	fg->num_ftes--;
388 	kfree(fte);
389 }
390 
391 static struct mlx5_flow_group *fs_alloc_fg(u32 *create_fg_in)
392 {
393 	struct mlx5_flow_group *fg;
394 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
395 					    create_fg_in, match_criteria);
396 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
397 					    create_fg_in,
398 					    match_criteria_enable);
399 	fg = kzalloc(sizeof(*fg), GFP_KERNEL);
400 	if (!fg)
401 		return ERR_PTR(-ENOMEM);
402 
403 	INIT_LIST_HEAD(&fg->ftes);
404 	fg->mask.match_criteria_enable = match_criteria_enable;
405 	memcpy(&fg->mask.match_criteria, match_criteria,
406 	       sizeof(fg->mask.match_criteria));
407 	fg->base.type =  FS_TYPE_FLOW_GROUP;
408 	fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in,
409 				   start_flow_index);
410 	fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in,
411 				end_flow_index) - fg->start_index + 1;
412 	return fg;
413 }
414 
415 static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio);
416 static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr,
417 					    struct fs_prio *prio);
418 
419 /* assumed src_ft and dst_ft can't be freed */
420 static int fs_set_star_rule(struct mlx5_core_dev *dev,
421 			    struct mlx5_flow_table *src_ft,
422 			    struct mlx5_flow_table *dst_ft)
423 {
424 	struct mlx5_flow_rule *src_dst;
425 	struct fs_fte *src_fte;
426 	int err = 0;
427 	u32 *match_value;
428 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
429 
430 	src_dst = list_first_entry(&src_ft->star_rule.fte->dests,
431 				   struct mlx5_flow_rule, base.list);
432 	match_value = mlx5_vzalloc(match_len);
433 	if (!match_value) {
434 		mlx5_core_warn(dev, "failed to allocate inbox\n");
435 		return -ENOMEM;
436 	}
437 	/*Create match context*/
438 
439 	fs_get_parent(src_fte, src_dst);
440 
441 	src_dst->dest_attr.ft = dst_ft;
442 	if (dst_ft) {
443 		err = mlx5_cmd_fs_set_fte(dev,
444 					  src_ft->vport,
445 					  &src_fte->status,
446 					  match_value, src_ft->type,
447 					  src_ft->id, src_fte->index,
448 					  src_ft->star_rule.fg->id,
449 					  &src_fte->flow_act,
450 					  src_fte->sw_action,
451 					  src_fte->dests_size,
452 					  &src_fte->dests);
453 		if (err)
454 			goto free;
455 
456 		fs_get(&dst_ft->base);
457 	} else {
458 		mlx5_cmd_fs_delete_fte(dev,
459 				       src_ft->vport,
460 				       &src_fte->status,
461 				       src_ft->type, src_ft->id,
462 				       src_fte->index);
463 	}
464 
465 free:
466 	kvfree(match_value);
467 	return err;
468 }
469 
470 static int connect_prev_fts(struct fs_prio *locked_prio,
471 			    struct fs_prio *prev_prio,
472 			    struct mlx5_flow_table *next_ft)
473 {
474 	struct mlx5_flow_table *iter;
475 	int err = 0;
476 	struct mlx5_core_dev *dev = fs_get_dev(&prev_prio->base);
477 
478 	if (!dev)
479 		return -ENODEV;
480 
481 	mutex_lock(&prev_prio->base.lock);
482 	fs_for_each_ft(iter, prev_prio) {
483 		struct mlx5_flow_rule *src_dst =
484 			list_first_entry(&iter->star_rule.fte->dests,
485 					 struct mlx5_flow_rule, base.list);
486 		struct mlx5_flow_table *prev_ft = src_dst->dest_attr.ft;
487 
488 		if (prev_ft == next_ft)
489 			continue;
490 
491 		err = fs_set_star_rule(dev, iter, next_ft);
492 		if (err) {
493 			mlx5_core_warn(dev,
494 			    "mlx5: flow steering can't connect prev and next\n");
495 			goto unlock;
496 		} else {
497 			/* Assume ft's prio is locked */
498 			if (prev_ft) {
499 				struct fs_prio *prio;
500 
501 				fs_get_parent(prio, prev_ft);
502 				if (prio == locked_prio)
503 					fs_put_parent_locked(&prev_ft->base);
504 				else
505 					fs_put(&prev_ft->base);
506 			}
507 		}
508 	}
509 
510 unlock:
511 	mutex_unlock(&prev_prio->base.lock);
512 	return 0;
513 }
514 
515 static int create_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio)
516 {
517 	struct mlx5_flow_group *fg;
518 	int err;
519 	u32 *fg_in;
520 	u32 *match_value;
521 	struct mlx5_flow_table *next_ft;
522 	struct mlx5_flow_table *prev_ft;
523 	struct mlx5_flow_root_namespace *root = find_root(&prio->base);
524 	int fg_inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
525 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
526 
527 	fg_in = mlx5_vzalloc(fg_inlen);
528 	if (!fg_in) {
529 		mlx5_core_warn(root->dev, "failed to allocate inbox\n");
530 		return -ENOMEM;
531 	}
532 
533 	match_value = mlx5_vzalloc(match_len);
534 	if (!match_value) {
535 		mlx5_core_warn(root->dev, "failed to allocate inbox\n");
536 		kvfree(fg_in);
537 		return -ENOMEM;
538 	}
539 
540 	MLX5_SET(create_flow_group_in, fg_in, start_flow_index, ft->max_fte);
541 	MLX5_SET(create_flow_group_in, fg_in, end_flow_index, ft->max_fte);
542 	fg = fs_alloc_fg(fg_in);
543 	if (IS_ERR(fg)) {
544 		err = PTR_ERR(fg);
545 		goto out;
546 	}
547 	ft->star_rule.fg = fg;
548 	err =  mlx5_cmd_fs_create_fg(fs_get_dev(&prio->base),
549 				     fg_in, ft->vport, ft->type,
550 				     ft->id,
551 				     &fg->id);
552 	if (err)
553 		goto free_fg;
554 
555 	ft->star_rule.fte = alloc_star_ft_entry(ft, fg,
556 						      match_value,
557 						      ft->max_fte);
558 	if (IS_ERR(ft->star_rule.fte))
559 		goto free_star_rule;
560 
561 	mutex_lock(&root->fs_chain_lock);
562 	next_ft = find_next_ft(prio);
563 	err = fs_set_star_rule(root->dev, ft, next_ft);
564 	if (err) {
565 		mutex_unlock(&root->fs_chain_lock);
566 		goto free_star_rule;
567 	}
568 	if (next_ft) {
569 		struct fs_prio *parent;
570 
571 		fs_get_parent(parent, next_ft);
572 		fs_put(&next_ft->base);
573 	}
574 	prev_ft = find_prev_ft(ft, prio);
575 	if (prev_ft) {
576 		struct fs_prio *prev_parent;
577 
578 		fs_get_parent(prev_parent, prev_ft);
579 
580 		err = connect_prev_fts(NULL, prev_parent, ft);
581 		if (err) {
582 			mutex_unlock(&root->fs_chain_lock);
583 			goto destroy_chained_star_rule;
584 		}
585 		fs_put(&prev_ft->base);
586 	}
587 	mutex_unlock(&root->fs_chain_lock);
588 	kvfree(fg_in);
589 	kvfree(match_value);
590 
591 	return 0;
592 
593 destroy_chained_star_rule:
594 	fs_set_star_rule(fs_get_dev(&prio->base), ft, NULL);
595 	if (next_ft)
596 		fs_put(&next_ft->base);
597 free_star_rule:
598 	free_star_fte_entry(ft->star_rule.fte);
599 	mlx5_cmd_fs_destroy_fg(fs_get_dev(&ft->base), ft->vport,
600 			       ft->type, ft->id,
601 			       fg->id);
602 free_fg:
603 	kfree(fg);
604 out:
605 	kvfree(fg_in);
606 	kvfree(match_value);
607 	return err;
608 }
609 
610 static void destroy_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio)
611 {
612 	int err;
613 	struct mlx5_flow_root_namespace *root;
614 	struct mlx5_core_dev *dev = fs_get_dev(&prio->base);
615 	struct mlx5_flow_table *prev_ft, *next_ft;
616 	struct fs_prio *prev_prio;
617 
618 	WARN_ON(!dev);
619 
620 	root = find_root(&prio->base);
621 	if (!root)
622 		mlx5_core_err(dev,
623 		    "flow steering failed to find root of priority %s",
624 		    prio->base.name);
625 
626 	/* In order to ensure atomic deletion, first update
627 	 * prev ft to point on the next ft.
628 	 */
629 	mutex_lock(&root->fs_chain_lock);
630 	prev_ft = find_prev_ft(ft, prio);
631 	next_ft = find_next_ft(prio);
632 	if (prev_ft) {
633 		fs_get_parent(prev_prio, prev_ft);
634 		/*Prev is connected to ft, only if ft is the first(last) in the prio*/
635 		err = connect_prev_fts(prio, prev_prio, next_ft);
636 		if (err)
637 			mlx5_core_warn(root->dev,
638 				       "flow steering can't connect prev and next of flow table\n");
639 		fs_put(&prev_ft->base);
640 	}
641 
642 	err = fs_set_star_rule(root->dev, ft, NULL);
643 	/*One put is for fs_get in find next ft*/
644 	if (next_ft) {
645 		fs_put(&next_ft->base);
646 		if (!err)
647 			fs_put(&next_ft->base);
648 	}
649 
650 	mutex_unlock(&root->fs_chain_lock);
651 	err = mlx5_cmd_fs_destroy_fg(dev, ft->vport, ft->type, ft->id,
652 				     ft->star_rule.fg->id);
653 	if (err)
654 		mlx5_core_warn(dev,
655 			       "flow steering can't destroy star entry group(index:%d) of ft:%s\n", ft->star_rule.fg->start_index,
656 			       ft->base.name);
657 	free_star_fte_entry(ft->star_rule.fte);
658 
659 	kfree(ft->star_rule.fg);
660 	ft->star_rule.fg = NULL;
661 }
662 
663 static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
664 				 unsigned int prio)
665 {
666 	struct fs_prio *iter_prio;
667 
668 	fs_for_each_prio(iter_prio, ns) {
669 		if (iter_prio->prio == prio)
670 			return iter_prio;
671 	}
672 
673 	return NULL;
674 }
675 
676 static unsigned int _alloc_new_level(struct fs_prio *prio,
677 				     struct mlx5_flow_namespace *match);
678 
679 static unsigned int __alloc_new_level(struct mlx5_flow_namespace *ns,
680 				      struct fs_prio *prio)
681 {
682 	unsigned int level = 0;
683 	struct fs_prio *p;
684 
685 	if (!ns)
686 		return 0;
687 
688 	mutex_lock(&ns->base.lock);
689 	fs_for_each_prio(p, ns) {
690 		if (p != prio)
691 			level += p->max_ft;
692 		else
693 			break;
694 	}
695 	mutex_unlock(&ns->base.lock);
696 
697 	fs_get_parent(prio, ns);
698 	if (prio)
699 		WARN_ON(prio->base.type != FS_TYPE_PRIO);
700 
701 	return level + _alloc_new_level(prio, ns);
702 }
703 
704 /* Called under lock of priority, hence locking all upper objects */
705 static unsigned int _alloc_new_level(struct fs_prio *prio,
706 				     struct mlx5_flow_namespace *match)
707 {
708 	struct mlx5_flow_namespace *ns;
709 	struct fs_base *it;
710 	unsigned int level = 0;
711 
712 	if (!prio)
713 		return 0;
714 
715 	mutex_lock(&prio->base.lock);
716 	fs_for_each_ns_or_ft_reverse(it, prio) {
717 		if (it->type == FS_TYPE_NAMESPACE) {
718 			struct fs_prio *p;
719 
720 			fs_get_obj(ns, it);
721 
722 			if (match != ns) {
723 				mutex_lock(&ns->base.lock);
724 				fs_for_each_prio(p, ns)
725 					level += p->max_ft;
726 				mutex_unlock(&ns->base.lock);
727 			} else {
728 				break;
729 			}
730 		} else {
731 			struct mlx5_flow_table *ft;
732 
733 			fs_get_obj(ft, it);
734 			mutex_unlock(&prio->base.lock);
735 			return level + ft->level + 1;
736 		}
737 	}
738 
739 	fs_get_parent(ns, prio);
740 	mutex_unlock(&prio->base.lock);
741 	return __alloc_new_level(ns, prio) + level;
742 }
743 
744 static unsigned int alloc_new_level(struct fs_prio *prio)
745 {
746 	return _alloc_new_level(prio, NULL);
747 }
748 
749 static int update_root_ft_create(struct mlx5_flow_root_namespace *root,
750 				    struct mlx5_flow_table *ft)
751 {
752 	int err = 0;
753 	int min_level = INT_MAX;
754 
755 	if (root->root_ft)
756 		min_level = root->root_ft->level;
757 
758 	if (ft->level < min_level)
759 		err = mlx5_cmd_update_root_ft(root->dev, ft->type,
760 					      ft->id);
761 	else
762 		return err;
763 
764 	if (err)
765 		mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
766 			       ft->id);
767 	else
768 		root->root_ft = ft;
769 
770 	return err;
771 }
772 
773 static struct mlx5_flow_table *_create_ft_common(struct mlx5_flow_namespace *ns,
774 						 u16 vport,
775 						 struct fs_prio *fs_prio,
776 						 int max_fte,
777 						 const char *name)
778 {
779 	struct mlx5_flow_table *ft;
780 	int err;
781 	int log_table_sz;
782 	int ft_size;
783 	char gen_name[20];
784 	struct mlx5_flow_root_namespace *root = find_root(&ns->base);
785 	struct mlx5_core_dev *dev = fs_get_dev(&ns->base);
786 
787 	if (!root) {
788 		mlx5_core_err(dev,
789 		    "flow steering failed to find root of namespace %s",
790 		    ns->base.name);
791 		return ERR_PTR(-ENODEV);
792 	}
793 
794 	if (fs_prio->num_ft == fs_prio->max_ft)
795 		return ERR_PTR(-ENOSPC);
796 
797 	ft  = kzalloc(sizeof(*ft), GFP_KERNEL);
798 	if (!ft)
799 		return ERR_PTR(-ENOMEM);
800 
801 	fs_init_node(&ft->base, 1);
802 	INIT_LIST_HEAD(&ft->fgs);
803 
804 	/* Temporarily WA until we expose the level set in the API */
805 	if (root->table_type == FS_FT_ESW_EGRESS_ACL ||
806 		root->table_type == FS_FT_ESW_INGRESS_ACL)
807 		ft->level = 0;
808 	else
809 		ft->level = alloc_new_level(fs_prio);
810 
811 	ft->base.type = FS_TYPE_FLOW_TABLE;
812 	ft->vport = vport;
813 	ft->type = root->table_type;
814 	/*Two entries are reserved for star rules*/
815 	ft_size = roundup_pow_of_two(max_fte + 2);
816 	/*User isn't aware to those rules*/
817 	ft->max_fte = ft_size - 2;
818 	log_table_sz = ilog2(ft_size);
819 
820 	if (name == NULL || name[0] == '\0') {
821 		snprintf(gen_name, sizeof(gen_name), "flow_table_%u", ft->id);
822 		name = gen_name;
823 	}
824 
825 	err = mlx5_cmd_fs_create_ft(root->dev, ft->vport, ft->type,
826 				    ft->level, log_table_sz, name, &ft->id);
827 	if (err)
828 		goto free_ft;
829 
830 	err = create_star_rule(ft, fs_prio);
831 	if (err)
832 		goto del_ft;
833 
834 	if ((root->table_type == FS_FT_NIC_RX) && MLX5_CAP_FLOWTABLE(root->dev,
835 			       flow_table_properties_nic_receive.modify_root)) {
836 		err = update_root_ft_create(root, ft);
837 		if (err)
838 			goto destroy_star_rule;
839 	}
840 
841 	_fs_add_node(&ft->base, name, &fs_prio->base);
842 
843 	list_add_tail(&ft->base.list, &fs_prio->objs);
844 	fs_prio->num_ft++;
845 
846 	return ft;
847 
848 destroy_star_rule:
849 	destroy_star_rule(ft, fs_prio);
850 del_ft:
851 	mlx5_cmd_fs_destroy_ft(root->dev, ft->vport, ft->type, ft->id);
852 free_ft:
853 	kfree(ft);
854 	return ERR_PTR(err);
855 }
856 
857 static struct mlx5_flow_table *create_ft_common(struct mlx5_flow_namespace *ns,
858 						u16 vport,
859 						unsigned int prio,
860 						int max_fte,
861 						const char *name)
862 {
863 	struct fs_prio *fs_prio = NULL;
864 	fs_prio = find_prio(ns, prio);
865 	if (!fs_prio)
866 		return ERR_PTR(-EINVAL);
867 
868 	return _create_ft_common(ns, vport, fs_prio, max_fte, name);
869 }
870 
871 
872 static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns,
873 						   struct list_head *start);
874 
875 static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio,
876 						     struct list_head *start);
877 
878 static struct mlx5_flow_table *mlx5_create_autogrouped_shared_flow_table(struct fs_prio *fs_prio)
879 {
880 	struct mlx5_flow_table *ft;
881 
882 	ft = find_first_ft_in_prio(fs_prio, &fs_prio->objs);
883 	if (ft) {
884 		ft->shared_refcount++;
885 		return ft;
886 	}
887 
888 	return NULL;
889 }
890 
891 struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
892 							   int prio,
893 							   const char *name,
894 							   int num_flow_table_entries,
895 							   int max_num_groups,
896 							   int num_reserved_entries)
897 {
898 	struct mlx5_flow_table *ft = NULL;
899 	struct fs_prio *fs_prio;
900 	bool is_shared_prio;
901 
902 	if (max_num_groups > (num_flow_table_entries - num_reserved_entries))
903 		return ERR_PTR(-EINVAL);
904 	if (num_reserved_entries > num_flow_table_entries)
905 		return ERR_PTR(-EINVAL);
906 
907 	fs_prio = find_prio(ns, prio);
908 	if (!fs_prio)
909 		return ERR_PTR(-EINVAL);
910 
911 	is_shared_prio = fs_prio->flags & MLX5_CORE_FS_PRIO_SHARED;
912 	if (is_shared_prio) {
913 		mutex_lock(&fs_prio->shared_lock);
914 		ft = mlx5_create_autogrouped_shared_flow_table(fs_prio);
915 	}
916 
917 	if (ft)
918 		goto return_ft;
919 
920 	ft = create_ft_common(ns, 0, prio, num_flow_table_entries,
921 			      name);
922 	if (IS_ERR(ft))
923 		goto return_ft;
924 
925 	ft->autogroup.active = true;
926 	ft->autogroup.max_types = max_num_groups;
927 	ft->autogroup.max_fte = num_flow_table_entries - num_reserved_entries;
928 	/* We save place for flow groups in addition to max types */
929 	ft->autogroup.group_size = ft->autogroup.max_fte / (max_num_groups + 1);
930 
931 	if (is_shared_prio)
932 		ft->shared_refcount = 1;
933 
934 return_ft:
935 	if (is_shared_prio)
936 		mutex_unlock(&fs_prio->shared_lock);
937 	return ft;
938 }
939 EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
940 
941 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
942 						     u16 vport,
943 						     int prio,
944 						     const char *name,
945 						     int num_flow_table_entries)
946 {
947 	return create_ft_common(ns, vport, prio, num_flow_table_entries, name);
948 }
949 EXPORT_SYMBOL(mlx5_create_vport_flow_table);
950 
951 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
952 					       int prio,
953 					       const char *name,
954 					       int num_flow_table_entries)
955 {
956 	return create_ft_common(ns, 0, prio, num_flow_table_entries, name);
957 }
958 EXPORT_SYMBOL(mlx5_create_flow_table);
959 
960 static void _fs_del_ft(struct mlx5_flow_table *ft)
961 {
962 	int err;
963 	struct mlx5_core_dev *dev = fs_get_dev(&ft->base);
964 	struct fs_prio *prio;
965 
966 	err = mlx5_cmd_fs_destroy_ft(dev, ft->vport, ft->type, ft->id);
967 	if (err)
968 		mlx5_core_warn(dev, "flow steering can't destroy ft %s\n",
969 			       ft->base.name);
970 
971 	fs_get_parent(prio, ft);
972 	prio->num_ft--;
973 }
974 
975 static int update_root_ft_destroy(struct mlx5_flow_root_namespace *root,
976 				    struct mlx5_flow_table *ft)
977 {
978 	int err = 0;
979 	struct fs_prio *prio;
980 	struct mlx5_flow_table *next_ft = NULL;
981 	struct mlx5_flow_table *put_ft = NULL;
982 
983 	if (root->root_ft != ft)
984 		return 0;
985 
986 	fs_get_parent(prio, ft);
987 	/*Assuming objs containis only flow tables and
988 	 * flow tables are sorted by level.
989 	 */
990 	if (!list_is_last(&ft->base.list, &prio->objs)) {
991 		next_ft = list_next_entry(ft, base.list);
992 	} else {
993 		next_ft = find_next_ft(prio);
994 		put_ft = next_ft;
995 	}
996 
997 	if (next_ft) {
998 		err = mlx5_cmd_update_root_ft(root->dev, next_ft->type,
999 					      next_ft->id);
1000 		if (err)
1001 			mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
1002 				       ft->id);
1003 	}
1004 	if (!err)
1005 		root->root_ft = next_ft;
1006 
1007 	if (put_ft)
1008 		fs_put(&put_ft->base);
1009 
1010 	return err;
1011 }
1012 
1013 /*Objects in the same prio are destroyed in the reverse order they were createrd*/
1014 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
1015 {
1016 	int err = 0;
1017 	struct fs_prio *prio;
1018 	struct mlx5_flow_root_namespace *root;
1019 	bool is_shared_prio;
1020 	struct mlx5_core_dev *dev;
1021 
1022 	fs_get_parent(prio, ft);
1023 	root = find_root(&prio->base);
1024 	dev = fs_get_dev(&prio->base);
1025 
1026 	if (!root) {
1027 		mlx5_core_err(dev,
1028 		    "flow steering failed to find root of priority %s",
1029 		    prio->base.name);
1030 		return -ENODEV;
1031 	}
1032 
1033 	is_shared_prio = prio->flags & MLX5_CORE_FS_PRIO_SHARED;
1034 	if (is_shared_prio) {
1035 		mutex_lock(&prio->shared_lock);
1036 		if (ft->shared_refcount > 1) {
1037 			--ft->shared_refcount;
1038 			fs_put(&ft->base);
1039 			mutex_unlock(&prio->shared_lock);
1040 			return 0;
1041 		}
1042 	}
1043 
1044 	mutex_lock(&prio->base.lock);
1045 	mutex_lock(&ft->base.lock);
1046 
1047 	err = update_root_ft_destroy(root, ft);
1048 	if (err)
1049 		goto unlock_ft;
1050 
1051 	/* delete two last entries */
1052 	destroy_star_rule(ft, prio);
1053 
1054 	mutex_unlock(&ft->base.lock);
1055 	fs_remove_node_parent_locked(&ft->base);
1056 	mutex_unlock(&prio->base.lock);
1057 	if (is_shared_prio)
1058 		mutex_unlock(&prio->shared_lock);
1059 
1060 	return err;
1061 
1062 unlock_ft:
1063 	mutex_unlock(&ft->base.lock);
1064 	mutex_unlock(&prio->base.lock);
1065 	if (is_shared_prio)
1066 		mutex_unlock(&prio->shared_lock);
1067 
1068 	return err;
1069 }
1070 EXPORT_SYMBOL(mlx5_destroy_flow_table);
1071 
1072 static struct mlx5_flow_group *fs_create_fg(struct mlx5_core_dev *dev,
1073 					    struct mlx5_flow_table *ft,
1074 					    struct list_head *prev,
1075 					    u32 *fg_in,
1076 					    int refcount)
1077 {
1078 	struct mlx5_flow_group *fg;
1079 	unsigned int group_size;
1080 	int err;
1081 	char name[20];
1082 
1083 	fg = fs_alloc_fg(fg_in);
1084 	if (IS_ERR(fg))
1085 		return fg;
1086 
1087 	group_size = MLX5_GET(create_flow_group_in, fg_in, end_flow_index) -
1088 		MLX5_GET(create_flow_group_in, fg_in, start_flow_index) + 1;
1089 	err =  mlx5_cmd_fs_create_fg(dev, fg_in,
1090 				     ft->vport, ft->type, ft->id,
1091 				     &fg->id);
1092 	if (err)
1093 		goto free_fg;
1094 
1095 	mutex_lock(&ft->base.lock);
1096 
1097 	if (ft->autogroup.active && group_size == ft->autogroup.group_size)
1098 		ft->autogroup.num_types++;
1099 
1100 	snprintf(name, sizeof(name), "group_%u", fg->id);
1101 	/*Add node to tree*/
1102 	fs_add_node(&fg->base, &ft->base, name, refcount);
1103 	/*Add node to group list*/
1104 	list_add(&fg->base.list, prev);
1105 	mutex_unlock(&ft->base.lock);
1106 
1107 	return fg;
1108 
1109 free_fg:
1110 	kfree(fg);
1111 	return ERR_PTR(err);
1112 }
1113 
1114 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
1115 					       u32 *in)
1116 {
1117 	struct mlx5_flow_group *fg;
1118 	struct mlx5_core_dev *dev = fs_get_dev(&ft->base);
1119 	unsigned int start_index;
1120 
1121 	start_index = MLX5_GET(create_flow_group_in, in, start_flow_index);
1122 	if (!dev)
1123 		return ERR_PTR(-ENODEV);
1124 
1125 	if (ft->autogroup.active && start_index < ft->autogroup.max_fte)
1126 		return ERR_PTR(-EPERM);
1127 
1128 	fg = fs_create_fg(dev, ft, ft->fgs.prev, in, 1);
1129 
1130 	return fg;
1131 }
1132 EXPORT_SYMBOL(mlx5_create_flow_group);
1133 
1134 /*Group is destoyed when all the rules in the group were removed*/
1135 static void fs_del_fg(struct mlx5_flow_group *fg)
1136 {
1137 	struct mlx5_flow_table *parent_ft;
1138 	struct mlx5_core_dev *dev;
1139 
1140 	fs_get_parent(parent_ft, fg);
1141 	dev = fs_get_dev(&parent_ft->base);
1142 	WARN_ON(!dev);
1143 
1144 	if (parent_ft->autogroup.active &&
1145 	    fg->max_ftes == parent_ft->autogroup.group_size &&
1146 	    fg->start_index < parent_ft->autogroup.max_fte)
1147 		parent_ft->autogroup.num_types--;
1148 
1149 	if (mlx5_cmd_fs_destroy_fg(dev, parent_ft->vport,
1150 				   parent_ft->type,
1151 				   parent_ft->id, fg->id))
1152 		mlx5_core_warn(dev, "flow steering can't destroy fg\n");
1153 }
1154 
1155 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
1156 {
1157 	fs_remove_node(&fg->base);
1158 }
1159 EXPORT_SYMBOL(mlx5_destroy_flow_group);
1160 
1161 static bool _fs_match_exact_val(void *mask, void *val1, void *val2, size_t size)
1162 {
1163 	unsigned int i;
1164 
1165 	/* TODO: optimize by comparing 64bits when possible */
1166 	for (i = 0; i < size; i++, mask++, val1++, val2++)
1167 		if ((*((u8 *)val1) & (*(u8 *)mask)) !=
1168 		    ((*(u8 *)val2) & (*(u8 *)mask)))
1169 			return false;
1170 
1171 	return true;
1172 }
1173 
1174 bool fs_match_exact_val(struct mlx5_core_fs_mask *mask,
1175 			       void *val1, void *val2)
1176 {
1177 	if (mask->match_criteria_enable &
1178 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
1179 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1180 						val1, outer_headers);
1181 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1182 						val2, outer_headers);
1183 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1184 					      mask->match_criteria, outer_headers);
1185 
1186 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1187 					 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
1188 			return false;
1189 	}
1190 
1191 	if (mask->match_criteria_enable &
1192 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
1193 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1194 						val1, misc_parameters);
1195 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1196 						val2, misc_parameters);
1197 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1198 					  mask->match_criteria, misc_parameters);
1199 
1200 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1201 					 MLX5_ST_SZ_BYTES(fte_match_set_misc)))
1202 			return false;
1203 	}
1204 	if (mask->match_criteria_enable &
1205 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
1206 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
1207 						val1, inner_headers);
1208 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
1209 						val2, inner_headers);
1210 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
1211 					  mask->match_criteria, inner_headers);
1212 
1213 		if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2,
1214 					 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
1215 			return false;
1216 	}
1217 	return true;
1218 }
1219 
1220 bool fs_match_exact_mask(u8 match_criteria_enable1,
1221 				u8 match_criteria_enable2,
1222 				void *mask1, void *mask2)
1223 {
1224 	return match_criteria_enable1 == match_criteria_enable2 &&
1225 		!memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param));
1226 }
1227 
1228 static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns,
1229 							   struct list_head *start);
1230 
1231 static struct mlx5_flow_table *_find_first_ft_in_prio_reverse(struct fs_prio *prio,
1232 							      struct list_head *start)
1233 {
1234 	struct fs_base *it = container_of(start, struct fs_base, list);
1235 
1236 	if (!prio)
1237 		return NULL;
1238 
1239 	fs_for_each_ns_or_ft_continue_reverse(it, prio) {
1240 		struct mlx5_flow_namespace	*ns;
1241 		struct mlx5_flow_table		*ft;
1242 
1243 		if (it->type == FS_TYPE_FLOW_TABLE) {
1244 			fs_get_obj(ft, it);
1245 			fs_get(&ft->base);
1246 			return ft;
1247 		}
1248 
1249 		fs_get_obj(ns, it);
1250 		WARN_ON(ns->base.type != FS_TYPE_NAMESPACE);
1251 
1252 		ft = find_first_ft_in_ns_reverse(ns, &ns->prios);
1253 		if (ft)
1254 			return ft;
1255 	}
1256 
1257 	return NULL;
1258 }
1259 
1260 static struct mlx5_flow_table *find_first_ft_in_prio_reverse(struct fs_prio *prio,
1261 							     struct list_head *start)
1262 {
1263 	struct mlx5_flow_table *ft;
1264 
1265 	if (!prio)
1266 		return NULL;
1267 
1268 	mutex_lock(&prio->base.lock);
1269 	ft = _find_first_ft_in_prio_reverse(prio, start);
1270 	mutex_unlock(&prio->base.lock);
1271 
1272 	return ft;
1273 }
1274 
1275 static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns,
1276 							   struct list_head *start)
1277 {
1278 	struct fs_prio *prio;
1279 
1280 	if (!ns)
1281 		return NULL;
1282 
1283 	fs_get_obj(prio, container_of(start, struct fs_base, list));
1284 	mutex_lock(&ns->base.lock);
1285 	fs_for_each_prio_continue_reverse(prio, ns) {
1286 		struct mlx5_flow_table *ft;
1287 
1288 		ft = find_first_ft_in_prio_reverse(prio, &prio->objs);
1289 		if (ft) {
1290 			mutex_unlock(&ns->base.lock);
1291 			return ft;
1292 		}
1293 	}
1294 	mutex_unlock(&ns->base.lock);
1295 
1296 	return NULL;
1297 }
1298 
1299 /* Returned a held ft, assumed curr is protected, assumed curr's parent is
1300  * locked
1301  */
1302 static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr,
1303 					    struct fs_prio *prio)
1304 {
1305 	struct mlx5_flow_table *ft = NULL;
1306 	struct fs_base *curr_base;
1307 
1308 	if (!curr)
1309 		return NULL;
1310 
1311 	/* prio has either namespace or flow-tables, but not both */
1312 	if (!list_empty(&prio->objs) &&
1313 	    list_first_entry(&prio->objs, struct mlx5_flow_table, base.list) !=
1314 	    curr)
1315 		return NULL;
1316 
1317 	while (!ft && prio) {
1318 		struct mlx5_flow_namespace *ns;
1319 
1320 		fs_get_parent(ns, prio);
1321 		ft = find_first_ft_in_ns_reverse(ns, &prio->base.list);
1322 		curr_base = &ns->base;
1323 		fs_get_parent(prio, ns);
1324 
1325 		if (prio && !ft)
1326 			ft = find_first_ft_in_prio_reverse(prio,
1327 							   &curr_base->list);
1328 	}
1329 	return ft;
1330 }
1331 
1332 static struct mlx5_flow_table *_find_first_ft_in_prio(struct fs_prio *prio,
1333 						      struct list_head *start)
1334 {
1335 	struct fs_base	*it = container_of(start, struct fs_base, list);
1336 
1337 	if (!prio)
1338 		return NULL;
1339 
1340 	fs_for_each_ns_or_ft_continue(it, prio) {
1341 		struct mlx5_flow_namespace	*ns;
1342 		struct mlx5_flow_table		*ft;
1343 
1344 		if (it->type == FS_TYPE_FLOW_TABLE) {
1345 			fs_get_obj(ft, it);
1346 			fs_get(&ft->base);
1347 			return ft;
1348 		}
1349 
1350 		fs_get_obj(ns, it);
1351 		WARN_ON(ns->base.type != FS_TYPE_NAMESPACE);
1352 
1353 		ft = find_first_ft_in_ns(ns, &ns->prios);
1354 		if (ft)
1355 			return ft;
1356 	}
1357 
1358 	return NULL;
1359 }
1360 
1361 static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio,
1362 						     struct list_head *start)
1363 {
1364 	struct mlx5_flow_table *ft;
1365 
1366 	if (!prio)
1367 		return NULL;
1368 
1369 	mutex_lock(&prio->base.lock);
1370 	ft = _find_first_ft_in_prio(prio, start);
1371 	mutex_unlock(&prio->base.lock);
1372 
1373 	return ft;
1374 }
1375 
1376 static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns,
1377 						   struct list_head *start)
1378 {
1379 	struct fs_prio *prio;
1380 
1381 	if (!ns)
1382 		return NULL;
1383 
1384 	fs_get_obj(prio, container_of(start, struct fs_base, list));
1385 	mutex_lock(&ns->base.lock);
1386 	fs_for_each_prio_continue(prio, ns) {
1387 		struct mlx5_flow_table *ft;
1388 
1389 		ft = find_first_ft_in_prio(prio, &prio->objs);
1390 		if (ft) {
1391 			mutex_unlock(&ns->base.lock);
1392 			return ft;
1393 		}
1394 	}
1395 	mutex_unlock(&ns->base.lock);
1396 
1397 	return NULL;
1398 }
1399 
1400 /* returned a held ft, assumed curr is protected, assumed curr's parent is
1401  * locked
1402  */
1403 static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio)
1404 {
1405 	struct mlx5_flow_table *ft = NULL;
1406 	struct fs_base *curr_base;
1407 
1408 	while (!ft && prio) {
1409 		struct mlx5_flow_namespace *ns;
1410 
1411 		fs_get_parent(ns, prio);
1412 		ft = find_first_ft_in_ns(ns, &prio->base.list);
1413 		curr_base = &ns->base;
1414 		fs_get_parent(prio, ns);
1415 
1416 		if (!ft && prio)
1417 			ft = _find_first_ft_in_prio(prio, &curr_base->list);
1418 	}
1419 	return ft;
1420 }
1421 
1422 
1423 /* called under ft mutex lock */
1424 static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
1425 						u8 match_criteria_enable,
1426 						u32 *match_criteria)
1427 {
1428 	unsigned int group_size;
1429 	unsigned int candidate_index = 0;
1430 	struct mlx5_flow_group *g;
1431 	struct mlx5_flow_group *ret;
1432 	struct list_head *prev = &ft->fgs;
1433 	struct mlx5_core_dev *dev;
1434 	u32 *in;
1435 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1436 	void *match_criteria_addr;
1437 	u32 max_fte = ft->autogroup.max_fte;
1438 
1439 	if (!ft->autogroup.active)
1440 		return ERR_PTR(-ENOENT);
1441 
1442 	dev = fs_get_dev(&ft->base);
1443 	if (!dev)
1444 		return ERR_PTR(-ENODEV);
1445 
1446 	in = mlx5_vzalloc(inlen);
1447 	if (!in) {
1448 		mlx5_core_warn(dev, "failed to allocate inbox\n");
1449 		return ERR_PTR(-ENOMEM);
1450 	}
1451 
1452 
1453 	if (ft->autogroup.num_types < ft->autogroup.max_types)
1454 		group_size = ft->autogroup.group_size;
1455 	else
1456 		group_size = 1;
1457 
1458 	if (group_size == 0) {
1459 		mlx5_core_warn(dev,
1460 			       "flow steering can't create group size of 0\n");
1461 		ret = ERR_PTR(-EINVAL);
1462 		goto out;
1463 	}
1464 
1465 	/* sorted by start_index */
1466 	fs_for_each_fg(g, ft) {
1467 		if (candidate_index + group_size > g->start_index)
1468 			candidate_index = g->start_index + g->max_ftes;
1469 		else
1470 			break;
1471 		prev = &g->base.list;
1472 	}
1473 
1474 	if (candidate_index + group_size > max_fte) {
1475 		ret = ERR_PTR(-ENOSPC);
1476 		goto out;
1477 	}
1478 
1479 	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
1480 		 match_criteria_enable);
1481 	MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
1482 	MLX5_SET(create_flow_group_in, in, end_flow_index,   candidate_index +
1483 		 group_size - 1);
1484 	match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
1485 					   in, match_criteria);
1486 	memcpy(match_criteria_addr, match_criteria,
1487 	       MLX5_ST_SZ_BYTES(fte_match_param));
1488 
1489 	ret = fs_create_fg(dev, ft, prev, in, 0);
1490 out:
1491 	kvfree(in);
1492 	return ret;
1493 }
1494 
1495 static struct mlx5_flow_namespace *get_ns_with_notifiers(struct fs_base *node)
1496 {
1497 	struct mlx5_flow_namespace *ns = NULL;
1498 
1499 	while (node  && (node->type != FS_TYPE_NAMESPACE ||
1500 			      list_empty(&container_of(node, struct
1501 						       mlx5_flow_namespace,
1502 						       base)->list_notifiers)))
1503 		node = node->parent;
1504 
1505 	if (node)
1506 		fs_get_obj(ns, node);
1507 
1508 	return ns;
1509 }
1510 
1511 
1512 /*Assumption- fte is locked*/
1513 static void call_to_add_rule_notifiers(struct mlx5_flow_rule *dst,
1514 				      struct fs_fte *fte)
1515 {
1516 	struct mlx5_flow_namespace *ns;
1517 	struct mlx5_flow_handler *iter_handler;
1518 	struct fs_client_priv_data *iter_client;
1519 	void *data;
1520 	bool is_new_rule = list_first_entry(&fte->dests,
1521 					    struct mlx5_flow_rule,
1522 					    base.list) == dst;
1523 	int err;
1524 
1525 	ns = get_ns_with_notifiers(&fte->base);
1526 	if (!ns)
1527 		return;
1528 
1529 	down_read(&ns->notifiers_rw_sem);
1530 	list_for_each_entry(iter_handler, &ns->list_notifiers,
1531 			    list) {
1532 		if (iter_handler->add_dst_cb) {
1533 			data = NULL;
1534 			mutex_lock(&dst->clients_lock);
1535 			list_for_each_entry(
1536 				iter_client, &dst->clients_data, list) {
1537 				if (iter_client->fs_handler == iter_handler) {
1538 					data = iter_client->client_dst_data;
1539 					break;
1540 				}
1541 			}
1542 			mutex_unlock(&dst->clients_lock);
1543 			err  = iter_handler->add_dst_cb(dst,
1544 							is_new_rule,
1545 							data,
1546 							iter_handler->client_context);
1547 			if (err)
1548 				break;
1549 		}
1550 	}
1551 	up_read(&ns->notifiers_rw_sem);
1552 }
1553 
1554 static void call_to_del_rule_notifiers(struct mlx5_flow_rule *dst,
1555 				      struct fs_fte *fte)
1556 {
1557 	struct mlx5_flow_namespace *ns;
1558 	struct mlx5_flow_handler *iter_handler;
1559 	struct fs_client_priv_data *iter_client;
1560 	void *data;
1561 	bool ctx_changed = (fte->dests_size == 0);
1562 
1563 	ns = get_ns_with_notifiers(&fte->base);
1564 	if (!ns)
1565 		return;
1566 	down_read(&ns->notifiers_rw_sem);
1567 	list_for_each_entry(iter_handler, &ns->list_notifiers,
1568 			    list) {
1569 		data = NULL;
1570 		mutex_lock(&dst->clients_lock);
1571 		list_for_each_entry(iter_client, &dst->clients_data, list) {
1572 			if (iter_client->fs_handler == iter_handler) {
1573 				data = iter_client->client_dst_data;
1574 				break;
1575 			}
1576 		}
1577 		mutex_unlock(&dst->clients_lock);
1578 		if (iter_handler->del_dst_cb) {
1579 			iter_handler->del_dst_cb(dst, ctx_changed, data,
1580 						 iter_handler->client_context);
1581 		}
1582 	}
1583 	up_read(&ns->notifiers_rw_sem);
1584 }
1585 
1586 /* fte should not be deleted while calling this function */
1587 static struct mlx5_flow_rule *_fs_add_dst_fte(struct fs_fte *fte,
1588 					      struct mlx5_flow_group *fg,
1589 					      struct mlx5_flow_destination *dest)
1590 {
1591 	struct mlx5_flow_table *ft;
1592 	struct mlx5_flow_rule *dst;
1593 	int err;
1594 
1595 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1596 	if (!dst)
1597 		return ERR_PTR(-ENOMEM);
1598 
1599 	memcpy(&dst->dest_attr, dest, sizeof(*dest));
1600 	dst->base.type = FS_TYPE_FLOW_DEST;
1601 	INIT_LIST_HEAD(&dst->clients_data);
1602 	mutex_init(&dst->clients_lock);
1603 	fs_get_parent(ft, fg);
1604 	/*Add dest to dests list- added as first element after the head*/
1605 	list_add_tail(&dst->base.list, &fte->dests);
1606 	fte->dests_size++;
1607 	err = mlx5_cmd_fs_set_fte(fs_get_dev(&ft->base),
1608 				  ft->vport,
1609 				  &fte->status,
1610 				  fte->val, ft->type,
1611 				  ft->id, fte->index, fg->id, &fte->flow_act,
1612 				  fte->sw_action, fte->dests_size, &fte->dests);
1613 	if (err)
1614 		goto free_dst;
1615 
1616 	list_del(&dst->base.list);
1617 
1618 	return dst;
1619 
1620 free_dst:
1621 	list_del(&dst->base.list);
1622 	kfree(dst);
1623 	fte->dests_size--;
1624 	return ERR_PTR(err);
1625 }
1626 
1627 static char *get_dest_name(struct mlx5_flow_destination *dest)
1628 {
1629 	char *name = kzalloc(sizeof(char) * 20, GFP_KERNEL);
1630 
1631 	switch (dest->type) {
1632 	case MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE:
1633 		snprintf(name, 20, "dest_%s_%u", "flow_table",
1634 			 dest->ft->id);
1635 		return name;
1636 	case MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT:
1637 		snprintf(name, 20, "dest_%s_%u", "vport",
1638 			 dest->vport_num);
1639 		return name;
1640 	case MLX5_FLOW_CONTEXT_DEST_TYPE_TIR:
1641 		snprintf(name, 20, "dest_%s_%u", "tir", dest->tir_num);
1642 		return name;
1643 	default:
1644 		kfree(name);
1645 		return NULL;
1646 	}
1647 }
1648 
1649 /* assumed fg is locked */
1650 static unsigned int fs_get_free_fg_index(struct mlx5_flow_group *fg,
1651 					 struct list_head **prev)
1652 {
1653 	struct fs_fte *fte;
1654 	unsigned int start = fg->start_index;
1655 
1656 	if (prev)
1657 		*prev = &fg->ftes;
1658 
1659 	/* assumed list is sorted by index */
1660 	fs_for_each_fte(fte, fg) {
1661 		if (fte->index != start)
1662 			return start;
1663 		start++;
1664 		if (prev)
1665 			*prev = &fte->base.list;
1666 	}
1667 
1668 	return start;
1669 }
1670 
1671 
1672 static struct fs_fte *fs_create_fte(struct mlx5_flow_group *fg,
1673 			     u32 *match_value,
1674 			     u32 sw_action,
1675 			     struct mlx5_flow_act *flow_act,
1676 			     struct list_head **prev)
1677 {
1678 	struct fs_fte *fte;
1679 	int index = 0;
1680 
1681 	index = fs_get_free_fg_index(fg, prev);
1682 	fte = fs_alloc_fte(sw_action, flow_act, match_value, index);
1683 	if (IS_ERR(fte))
1684 		return fte;
1685 
1686 	return fte;
1687 }
1688 
1689 static void add_rule_to_tree(struct mlx5_flow_rule *rule,
1690 			     struct fs_fte *fte)
1691 {
1692 	char *dest_name;
1693 
1694 	dest_name = get_dest_name(&rule->dest_attr);
1695 	fs_add_node(&rule->base, &fte->base, dest_name, 1);
1696 	/* re-add to list, since fs_add_node reset our list */
1697 	list_add_tail(&rule->base.list, &fte->dests);
1698 	kfree(dest_name);
1699 	call_to_add_rule_notifiers(rule, fte);
1700 }
1701 
1702 static void fs_del_dst(struct mlx5_flow_rule *dst)
1703 {
1704 	struct mlx5_flow_table *ft;
1705 	struct mlx5_flow_group *fg;
1706 	struct fs_fte *fte;
1707 	u32	*match_value;
1708 	struct mlx5_core_dev *dev = fs_get_dev(&dst->base);
1709 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
1710 	int err;
1711 
1712 	WARN_ON(!dev);
1713 
1714 	match_value = mlx5_vzalloc(match_len);
1715 	if (!match_value) {
1716 		mlx5_core_warn(dev, "failed to allocate inbox\n");
1717 		return;
1718 	}
1719 
1720 	fs_get_parent(fte, dst);
1721 	fs_get_parent(fg, fte);
1722 	mutex_lock(&fg->base.lock);
1723 	memcpy(match_value, fte->val, sizeof(fte->val));
1724 	/* ft can't be changed as fg is locked */
1725 	fs_get_parent(ft, fg);
1726 	list_del(&dst->base.list);
1727 	fte->dests_size--;
1728 	if (fte->dests_size) {
1729 		err = mlx5_cmd_fs_set_fte(dev, ft->vport,
1730 					  &fte->status, match_value, ft->type,
1731 					  ft->id, fte->index, fg->id,
1732 					  &fte->flow_act, fte->sw_action,
1733 					  fte->dests_size, &fte->dests);
1734 		if (err) {
1735 			mlx5_core_warn(dev, "%s can't delete dst %s\n",
1736 				       __func__, dst->base.name);
1737 			goto err;
1738 		}
1739 	}
1740 	call_to_del_rule_notifiers(dst, fte);
1741 err:
1742 	mutex_unlock(&fg->base.lock);
1743 	kvfree(match_value);
1744 }
1745 
1746 static void fs_del_fte(struct fs_fte *fte)
1747 {
1748 	struct mlx5_flow_table *ft;
1749 	struct mlx5_flow_group *fg;
1750 	int err;
1751 	struct mlx5_core_dev *dev;
1752 
1753 	fs_get_parent(fg, fte);
1754 	fs_get_parent(ft, fg);
1755 
1756 	dev = fs_get_dev(&ft->base);
1757 	WARN_ON(!dev);
1758 
1759 	err = mlx5_cmd_fs_delete_fte(dev, ft->vport, &fte->status,
1760 				     ft->type, ft->id, fte->index);
1761 	if (err)
1762 		mlx5_core_warn(dev, "flow steering can't delete fte %s\n",
1763 			       fte->base.name);
1764 
1765 	fg->num_ftes--;
1766 }
1767 
1768 static bool check_conflicting_actions(const struct mlx5_flow_act *act1,
1769 				      const struct mlx5_flow_act *act2)
1770 {
1771         u32 action1 = act1->actions;
1772         u32 action2 = act2->actions;
1773 	u32 xored_actions;
1774 
1775 	xored_actions = action1 ^ action2;
1776 
1777 	if (xored_actions & (MLX5_FLOW_ACT_ACTIONS_FLOW_TAG))
1778 		return true;
1779 
1780 	if (action1 & MLX5_FLOW_ACT_ACTIONS_FLOW_TAG &&
1781 	    act1->flow_tag != act2->flow_tag)
1782 		return true;
1783 
1784 	/* Can even have complex actions in merged rules */
1785 	if (action1 & MLX5_FLOW_ACT_ACTIONS_MODIFY_HDR)
1786 		return true;
1787 
1788 	if (action1 & MLX5_FLOW_ACT_ACTIONS_PACKET_REFORMAT)
1789 		return true;
1790 
1791 	if (action1 & MLX5_FLOW_ACT_ACTIONS_COUNT)
1792 		return true;
1793 
1794 	return false;
1795 }
1796 
1797 /* assuming parent fg is locked */
1798 /* Add dst algorithm */
1799 static struct mlx5_flow_rule *fs_add_dst_fg(struct mlx5_flow_group *fg,
1800 						   u32 *match_value,
1801 						   u32 sw_action,
1802 						   struct mlx5_flow_act *flow_act,
1803 						   struct mlx5_flow_destination *dest)
1804 {
1805 	struct fs_fte *fte;
1806 	struct mlx5_flow_rule *dst;
1807 	struct mlx5_flow_table *ft;
1808 	struct list_head *prev;
1809 	char fte_name[20];
1810 
1811 	mutex_lock(&fg->base.lock);
1812 	if (flow_act->flags & MLX5_FLOW_ACT_NO_APPEND)
1813 		goto insert_fte;
1814 
1815 	fs_for_each_fte(fte, fg) {
1816 		/* TODO: Check of size against PRM max size */
1817 		mutex_lock(&fte->base.lock);
1818 		if (fs_match_exact_val(&fg->mask, match_value, &fte->val) &&
1819 		    sw_action == fte->sw_action &&
1820 		    !check_conflicting_actions(flow_act, &fte->flow_act)) {
1821 			dst = _fs_add_dst_fte(fte, fg, dest);
1822 			mutex_unlock(&fte->base.lock);
1823 			if (IS_ERR(dst))
1824 				goto unlock_fg;
1825 			goto add_rule;
1826 		}
1827 		mutex_unlock(&fte->base.lock);
1828 	}
1829 
1830 insert_fte:
1831 	fs_get_parent(ft, fg);
1832 	if (fg->num_ftes == fg->max_ftes) {
1833 		dst = ERR_PTR(-ENOSPC);
1834 		goto unlock_fg;
1835 	}
1836 
1837 	fte = fs_create_fte(fg, match_value, sw_action, flow_act, &prev);
1838 	if (IS_ERR(fte)) {
1839 		dst = (void *)fte;
1840 		goto unlock_fg;
1841 	}
1842 	dst = _fs_add_dst_fte(fte, fg, dest);
1843 	if (IS_ERR(dst)) {
1844 		kfree(fte);
1845 		goto unlock_fg;
1846 	}
1847 
1848 	fg->num_ftes++;
1849 
1850 	snprintf(fte_name, sizeof(fte_name), "fte%u", fte->index);
1851 	/* Add node to tree */
1852 	fs_add_node(&fte->base, &fg->base, fte_name, 0);
1853 	list_add(&fte->base.list, prev);
1854 add_rule:
1855 	add_rule_to_tree(dst, fte);
1856 unlock_fg:
1857 	mutex_unlock(&fg->base.lock);
1858 	return dst;
1859 }
1860 
1861 static struct mlx5_flow_rule *fs_add_dst_ft(struct mlx5_flow_table *ft,
1862 					    u8 match_criteria_enable,
1863 					    u32 *match_criteria,
1864 					    u32 *match_value,
1865 					    u32 sw_action,
1866 					    struct mlx5_flow_act *flow_act,
1867 					    struct mlx5_flow_destination *dest)
1868 {
1869 	/*? where dst_entry is allocated*/
1870 	struct mlx5_flow_group *g;
1871 	struct mlx5_flow_rule *dst;
1872 
1873 	fs_get(&ft->base);
1874 	mutex_lock(&ft->base.lock);
1875 	fs_for_each_fg(g, ft)
1876 		if (fs_match_exact_mask(g->mask.match_criteria_enable,
1877 					match_criteria_enable,
1878 					g->mask.match_criteria,
1879 					match_criteria)) {
1880 			mutex_unlock(&ft->base.lock);
1881 
1882 			dst = fs_add_dst_fg(g, match_value, sw_action, flow_act, dest);
1883 			if (PTR_ERR(dst) && PTR_ERR(dst) != -ENOSPC)
1884 				goto unlock;
1885 		}
1886 	mutex_unlock(&ft->base.lock);
1887 
1888 	g = create_autogroup(ft, match_criteria_enable, match_criteria);
1889 	if (IS_ERR(g)) {
1890 		dst = (void *)g;
1891 		goto unlock;
1892 	}
1893 
1894 	dst = fs_add_dst_fg(g, match_value,
1895 			    sw_action, flow_act, dest);
1896 	if (IS_ERR(dst)) {
1897 		/* Remove assumes refcount > 0 and autogroup creates a group
1898 		 * with a refcount = 0.
1899 		 */
1900 		fs_get(&g->base);
1901 		fs_remove_node(&g->base);
1902 		goto unlock;
1903 	}
1904 
1905 unlock:
1906 	fs_put(&ft->base);
1907 	return dst;
1908 }
1909 
1910 struct mlx5_flow_rule *
1911 mlx5_add_flow_rule(struct mlx5_flow_table *ft,
1912 		   u8 match_criteria_enable,
1913 		   u32 *match_criteria,
1914 		   u32 *match_value,
1915 		   u32 sw_action,
1916 		   struct mlx5_flow_act *flow_act,
1917 		   struct mlx5_flow_destination *dest)
1918 {
1919 	struct mlx5_flow_rule *dst;
1920 	struct mlx5_flow_namespace *ns;
1921 
1922 	ns = get_ns_with_notifiers(&ft->base);
1923 	if (ns)
1924 		down_read(&ns->dests_rw_sem);
1925 	dst =  fs_add_dst_ft(ft, match_criteria_enable, match_criteria,
1926 			     match_value, sw_action, flow_act, dest);
1927 	if (ns)
1928 		up_read(&ns->dests_rw_sem);
1929 
1930 	return dst;
1931 
1932 
1933 }
1934 EXPORT_SYMBOL(mlx5_add_flow_rule);
1935 
1936 void mlx5_del_flow_rule(struct mlx5_flow_rule **pp)
1937 {
1938 	struct mlx5_flow_namespace *ns;
1939 	struct mlx5_flow_rule *dst;
1940 
1941 	dst = *pp;
1942 	*pp = NULL;
1943 
1944 	if (IS_ERR_OR_NULL(dst))
1945 		return;
1946 	ns = get_ns_with_notifiers(&dst->base);
1947 	if (ns)
1948 		down_read(&ns->dests_rw_sem);
1949 	fs_remove_node(&dst->base);
1950 	if (ns)
1951 		up_read(&ns->dests_rw_sem);
1952 }
1953 EXPORT_SYMBOL(mlx5_del_flow_rule);
1954 
1955 #define MLX5_CORE_FS_ROOT_NS_NAME "root"
1956 #define MLX5_CORE_FS_ESW_EGRESS_ACL "esw_egress_root"
1957 #define MLX5_CORE_FS_ESW_INGRESS_ACL "esw_ingress_root"
1958 #define MLX5_CORE_FS_FDB_ROOT_NS_NAME "fdb_root"
1959 #define MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME "sniffer_rx_root"
1960 #define MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME "sniffer_tx_root"
1961 #define MLX5_CORE_FS_PRIO_MAX_FT 4
1962 #define MLX5_CORE_FS_PRIO_MAX_NS 1
1963 
1964 static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
1965 				      unsigned prio, int max_ft,
1966 				      const char *name, u8 flags)
1967 {
1968 	struct fs_prio *fs_prio;
1969 
1970 	fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
1971 	if (!fs_prio)
1972 		return ERR_PTR(-ENOMEM);
1973 
1974 	fs_prio->base.type = FS_TYPE_PRIO;
1975 	fs_add_node(&fs_prio->base, &ns->base, name, 1);
1976 	fs_prio->max_ft = max_ft;
1977 	fs_prio->max_ns = MLX5_CORE_FS_PRIO_MAX_NS;
1978 	fs_prio->prio = prio;
1979 	fs_prio->flags = flags;
1980 	list_add_tail(&fs_prio->base.list, &ns->prios);
1981 	INIT_LIST_HEAD(&fs_prio->objs);
1982 	mutex_init(&fs_prio->shared_lock);
1983 
1984 	return fs_prio;
1985 }
1986 
1987 static void cleanup_root_ns(struct mlx5_core_dev *dev)
1988 {
1989 	struct mlx5_flow_root_namespace *root_ns = dev->root_ns;
1990 	struct fs_prio *iter_prio;
1991 
1992 	if (!root_ns)
1993 		return;
1994 
1995 	/* stage 1 */
1996 	fs_for_each_prio(iter_prio, &root_ns->ns) {
1997 		struct mlx5_flow_namespace *iter_ns;
1998 
1999 		fs_for_each_ns(iter_ns, iter_prio) {
2000 			while (!list_empty(&iter_ns->prios)) {
2001 				struct fs_base *iter_prio2 =
2002 					list_first_entry(&iter_ns->prios,
2003 							 struct fs_base,
2004 							 list);
2005 
2006 				fs_remove_node(iter_prio2);
2007 			}
2008 		}
2009 	}
2010 
2011 	/* stage 2 */
2012 	fs_for_each_prio(iter_prio, &root_ns->ns) {
2013 		while (!list_empty(&iter_prio->objs)) {
2014 			struct fs_base *iter_ns =
2015 				list_first_entry(&iter_prio->objs,
2016 						 struct fs_base,
2017 						 list);
2018 
2019 				fs_remove_node(iter_ns);
2020 		}
2021 	}
2022 	/* stage 3 */
2023 	while (!list_empty(&root_ns->ns.prios)) {
2024 		struct fs_base *iter_prio =
2025 			list_first_entry(&root_ns->ns.prios,
2026 					 struct fs_base,
2027 					 list);
2028 
2029 		fs_remove_node(iter_prio);
2030 	}
2031 
2032 	fs_remove_node(&root_ns->ns.base);
2033 	dev->root_ns = NULL;
2034 }
2035 
2036 static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev,
2037 					struct mlx5_flow_root_namespace *root_ns)
2038 {
2039 	struct fs_base *prio;
2040 
2041 	if (!root_ns)
2042 		return;
2043 
2044 	if (!list_empty(&root_ns->ns.prios)) {
2045 		prio = list_first_entry(&root_ns->ns.prios,
2046 					struct fs_base,
2047 				 list);
2048 		fs_remove_node(prio);
2049 	}
2050 	fs_remove_node(&root_ns->ns.base);
2051 	root_ns = NULL;
2052 }
2053 
2054 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
2055 {
2056 	mlx5_cleanup_fc_stats(dev);
2057 	cleanup_root_ns(dev);
2058 	cleanup_single_prio_root_ns(dev, dev->sniffer_rx_root_ns);
2059 	cleanup_single_prio_root_ns(dev, dev->sniffer_tx_root_ns);
2060 	cleanup_single_prio_root_ns(dev, dev->fdb_root_ns);
2061 	cleanup_single_prio_root_ns(dev, dev->esw_egress_root_ns);
2062 	cleanup_single_prio_root_ns(dev, dev->esw_ingress_root_ns);
2063 }
2064 
2065 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
2066 						 *ns)
2067 {
2068 	ns->base.type = FS_TYPE_NAMESPACE;
2069 	init_rwsem(&ns->dests_rw_sem);
2070 	init_rwsem(&ns->notifiers_rw_sem);
2071 	INIT_LIST_HEAD(&ns->prios);
2072 	INIT_LIST_HEAD(&ns->list_notifiers);
2073 
2074 	return ns;
2075 }
2076 
2077 static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev,
2078 							  enum fs_ft_type
2079 							  table_type,
2080 							  char *name)
2081 {
2082 	struct mlx5_flow_root_namespace *root_ns;
2083 	struct mlx5_flow_namespace *ns;
2084 
2085 	/* create the root namespace */
2086 	root_ns = mlx5_vzalloc(sizeof(*root_ns));
2087 	if (!root_ns)
2088 		goto err;
2089 
2090 	root_ns->dev = dev;
2091 	root_ns->table_type = table_type;
2092 	mutex_init(&root_ns->fs_chain_lock);
2093 
2094 	ns = &root_ns->ns;
2095 	fs_init_namespace(ns);
2096 	fs_add_node(&ns->base, NULL, name, 1);
2097 
2098 	return root_ns;
2099 err:
2100 	return NULL;
2101 }
2102 
2103 static int init_fdb_root_ns(struct mlx5_core_dev *dev)
2104 {
2105 	struct fs_prio *prio;
2106 
2107 	dev->fdb_root_ns = create_root_ns(dev, FS_FT_FDB,
2108 					  MLX5_CORE_FS_FDB_ROOT_NS_NAME);
2109 	if (!dev->fdb_root_ns)
2110 		return -ENOMEM;
2111 
2112 	/* create 1 prio*/
2113 	prio = fs_create_prio(&dev->fdb_root_ns->ns, 0, 1, "fdb_prio", 0);
2114 	if (IS_ERR(prio))
2115 		return PTR_ERR(prio);
2116 	else
2117 		return 0;
2118 }
2119 
2120 #define MAX_VPORTS 128
2121 
2122 static int init_egress_acl_root_ns(struct mlx5_core_dev *dev)
2123 {
2124 	struct fs_prio *prio;
2125 
2126 	dev->esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL,
2127 						 MLX5_CORE_FS_ESW_EGRESS_ACL);
2128 	if (!dev->esw_egress_root_ns)
2129 		return -ENOMEM;
2130 
2131 	/* create 1 prio*/
2132 	prio = fs_create_prio(&dev->esw_egress_root_ns->ns, 0, MAX_VPORTS,
2133 			      "esw_egress_prio", 0);
2134 	if (IS_ERR(prio))
2135 		return PTR_ERR(prio);
2136 	else
2137 		return 0;
2138 }
2139 
2140 static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev)
2141 {
2142 	struct fs_prio *prio;
2143 
2144 	dev->esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL,
2145 						  MLX5_CORE_FS_ESW_INGRESS_ACL);
2146 	if (!dev->esw_ingress_root_ns)
2147 		return -ENOMEM;
2148 
2149 	/* create 1 prio*/
2150 	prio = fs_create_prio(&dev->esw_ingress_root_ns->ns, 0, MAX_VPORTS,
2151 			      "esw_ingress_prio", 0);
2152 	if (IS_ERR(prio))
2153 		return PTR_ERR(prio);
2154 	else
2155 		return 0;
2156 }
2157 
2158 static int init_sniffer_rx_root_ns(struct mlx5_core_dev *dev)
2159 {
2160 	struct fs_prio *prio;
2161 
2162 	dev->sniffer_rx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_RX,
2163 				     MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME);
2164 	if (!dev->sniffer_rx_root_ns)
2165 		return  -ENOMEM;
2166 
2167 	/* create 1 prio*/
2168 	prio = fs_create_prio(&dev->sniffer_rx_root_ns->ns, 0, 1,
2169 			      "sniffer_prio", 0);
2170 	if (IS_ERR(prio))
2171 		return PTR_ERR(prio);
2172 	else
2173 		return 0;
2174 }
2175 
2176 
2177 static int init_sniffer_tx_root_ns(struct mlx5_core_dev *dev)
2178 {
2179 	struct fs_prio *prio;
2180 
2181 	dev->sniffer_tx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_TX,
2182 						 MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME);
2183 	if (!dev->sniffer_tx_root_ns)
2184 		return  -ENOMEM;
2185 
2186 	/* create 1 prio*/
2187 	prio = fs_create_prio(&dev->sniffer_tx_root_ns->ns, 0, 1,
2188 			      "sniffer_prio", 0);
2189 	if (IS_ERR(prio))
2190 		return PTR_ERR(prio);
2191 	else
2192 		return 0;
2193 }
2194 
2195 static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio,
2196 						       const char *name)
2197 {
2198 	struct mlx5_flow_namespace	*ns;
2199 
2200 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
2201 	if (!ns)
2202 		return ERR_PTR(-ENOMEM);
2203 
2204 	fs_init_namespace(ns);
2205 	fs_add_node(&ns->base, &prio->base, name, 1);
2206 	list_add_tail(&ns->base.list, &prio->objs);
2207 
2208 	return ns;
2209 }
2210 
2211 #define FLOW_TABLE_BIT_SZ 1
2212 #define GET_FLOW_TABLE_CAP(dev, offset) \
2213 	((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) +	\
2214 			offset / 32)) >>					\
2215 	  (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
2216 
2217 static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
2218 {
2219 	int i;
2220 
2221 	for (i = 0; i < caps->arr_sz; i++) {
2222 		if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
2223 			return false;
2224 	}
2225 	return true;
2226 }
2227 
2228 static int _init_root_tree(struct mlx5_core_dev *dev, int max_ft_level,
2229 		    struct init_tree_node *node, struct fs_base *base_parent,
2230 		    struct init_tree_node *tree_parent)
2231 {
2232 	struct mlx5_flow_namespace *fs_ns;
2233 	struct fs_prio *fs_prio;
2234 	int priority;
2235 	struct fs_base *base;
2236 	int i;
2237 	int err = 0;
2238 
2239 	if (node->type == FS_TYPE_PRIO) {
2240 		if ((node->min_ft_level > max_ft_level) ||
2241 		    !has_required_caps(dev, &node->caps))
2242 			goto out;
2243 
2244 		fs_get_obj(fs_ns, base_parent);
2245 		priority = node - tree_parent->children;
2246 		fs_prio = fs_create_prio(fs_ns, priority,
2247 					 node->max_ft,
2248 					 node->name, node->flags);
2249 		if (IS_ERR(fs_prio)) {
2250 			err = PTR_ERR(fs_prio);
2251 			goto out;
2252 		}
2253 		base = &fs_prio->base;
2254 	} else if (node->type == FS_TYPE_NAMESPACE) {
2255 		fs_get_obj(fs_prio, base_parent);
2256 		fs_ns = fs_create_namespace(fs_prio, node->name);
2257 		if (IS_ERR(fs_ns)) {
2258 			err = PTR_ERR(fs_ns);
2259 			goto out;
2260 		}
2261 		base = &fs_ns->base;
2262 	} else {
2263 		return -EINVAL;
2264 	}
2265 	for (i = 0; i < node->ar_size; i++) {
2266 		err = _init_root_tree(dev, max_ft_level, &node->children[i], base,
2267 				      node);
2268 		if (err)
2269 			break;
2270 	}
2271 out:
2272 	return err;
2273 }
2274 
2275 static int init_root_tree(struct mlx5_core_dev *dev, int max_ft_level,
2276 		   struct init_tree_node *node, struct fs_base *parent)
2277 {
2278 	int i;
2279 	struct mlx5_flow_namespace *fs_ns;
2280 	int err = 0;
2281 
2282 	fs_get_obj(fs_ns, parent);
2283 	for (i = 0; i < node->ar_size; i++) {
2284 		err = _init_root_tree(dev, max_ft_level,
2285 				      &node->children[i], &fs_ns->base, node);
2286 		if (err)
2287 			break;
2288 	}
2289 	return err;
2290 }
2291 
2292 static int sum_max_ft_in_prio(struct fs_prio *prio);
2293 static int sum_max_ft_in_ns(struct mlx5_flow_namespace *ns)
2294 {
2295 	struct fs_prio *prio;
2296 	int sum = 0;
2297 
2298 	fs_for_each_prio(prio, ns) {
2299 		sum += sum_max_ft_in_prio(prio);
2300 	}
2301 	return  sum;
2302 }
2303 
2304 static int sum_max_ft_in_prio(struct fs_prio *prio)
2305 {
2306 	int sum = 0;
2307 	struct fs_base *it;
2308 	struct mlx5_flow_namespace	*ns;
2309 
2310 	if (prio->max_ft)
2311 		return prio->max_ft;
2312 
2313 	fs_for_each_ns_or_ft(it, prio) {
2314 		if (it->type == FS_TYPE_FLOW_TABLE)
2315 			continue;
2316 
2317 		fs_get_obj(ns, it);
2318 		sum += sum_max_ft_in_ns(ns);
2319 	}
2320 	prio->max_ft = sum;
2321 	return  sum;
2322 }
2323 
2324 static void set_max_ft(struct mlx5_flow_namespace *ns)
2325 {
2326 	struct fs_prio *prio;
2327 
2328 	if (!ns)
2329 		return;
2330 
2331 	fs_for_each_prio(prio, ns)
2332 		sum_max_ft_in_prio(prio);
2333 }
2334 
2335 static int init_root_ns(struct mlx5_core_dev *dev)
2336 {
2337 	int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
2338 					      flow_table_properties_nic_receive.
2339 					      max_ft_level);
2340 
2341 	dev->root_ns = create_root_ns(dev, FS_FT_NIC_RX,
2342 				      MLX5_CORE_FS_ROOT_NS_NAME);
2343 	if (IS_ERR_OR_NULL(dev->root_ns))
2344 		goto err;
2345 
2346 
2347 	if (init_root_tree(dev, max_ft_level, &root_fs, &dev->root_ns->ns.base))
2348 		goto err;
2349 
2350 	set_max_ft(&dev->root_ns->ns);
2351 
2352 	return 0;
2353 err:
2354 	return -ENOMEM;
2355 }
2356 
2357 u8 mlx5_get_match_criteria_enable(struct mlx5_flow_rule *rule)
2358 {
2359 	struct fs_base *pbase;
2360 	struct mlx5_flow_group *fg;
2361 
2362 	pbase = rule->base.parent;
2363 	WARN_ON(!pbase);
2364 	pbase = pbase->parent;
2365 	WARN_ON(!pbase);
2366 
2367 	fs_get_obj(fg, pbase);
2368 	return fg->mask.match_criteria_enable;
2369 }
2370 
2371 void mlx5_get_match_value(u32 *match_value,
2372 			  struct mlx5_flow_rule *rule)
2373 {
2374 	struct fs_base *pbase;
2375 	struct fs_fte *fte;
2376 
2377 	pbase = rule->base.parent;
2378 	WARN_ON(!pbase);
2379 	fs_get_obj(fte, pbase);
2380 
2381 	memcpy(match_value, fte->val, sizeof(fte->val));
2382 }
2383 
2384 void mlx5_get_match_criteria(u32 *match_criteria,
2385 			     struct mlx5_flow_rule *rule)
2386 {
2387 	struct fs_base *pbase;
2388 	struct mlx5_flow_group *fg;
2389 
2390 	pbase = rule->base.parent;
2391 	WARN_ON(!pbase);
2392 	pbase = pbase->parent;
2393 	WARN_ON(!pbase);
2394 
2395 	fs_get_obj(fg, pbase);
2396 	memcpy(match_criteria, &fg->mask.match_criteria,
2397 	       sizeof(fg->mask.match_criteria));
2398 }
2399 
2400 int mlx5_init_fs(struct mlx5_core_dev *dev)
2401 {
2402 	int err;
2403 
2404 	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
2405 		err = init_root_ns(dev);
2406 		if (err)
2407 			goto err;
2408 	}
2409 
2410 	err = init_fdb_root_ns(dev);
2411 	if (err)
2412 		goto err;
2413 
2414 	err = init_egress_acl_root_ns(dev);
2415 	if (err)
2416 		goto err;
2417 
2418 	err = init_ingress_acl_root_ns(dev);
2419 	if (err)
2420 		goto err;
2421 
2422 	err = init_sniffer_tx_root_ns(dev);
2423 	if (err)
2424 		goto err;
2425 
2426 	err = init_sniffer_rx_root_ns(dev);
2427 	if (err)
2428 		goto err;
2429 
2430 	err = mlx5_init_fc_stats(dev);
2431 	if (err)
2432 		goto err;
2433 
2434 	return 0;
2435 err:
2436 	mlx5_cleanup_fs(dev);
2437 	return err;
2438 }
2439 
2440 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
2441 						  enum mlx5_flow_namespace_type type)
2442 {
2443 	struct mlx5_flow_root_namespace *root_ns = dev->root_ns;
2444 	int prio;
2445 	static struct fs_prio *fs_prio;
2446 	struct mlx5_flow_namespace *ns;
2447 
2448 	switch (type) {
2449 	case MLX5_FLOW_NAMESPACE_BYPASS:
2450 		prio = 0;
2451 		break;
2452 	case MLX5_FLOW_NAMESPACE_OFFLOADS:
2453 		prio = 1;
2454 		break;
2455 	case MLX5_FLOW_NAMESPACE_KERNEL:
2456 		prio = 2;
2457 		break;
2458 	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
2459 		prio = 3;
2460 		break;
2461 	case MLX5_FLOW_NAMESPACE_FDB:
2462 		if (dev->fdb_root_ns)
2463 			return &dev->fdb_root_ns->ns;
2464 		else
2465 			return NULL;
2466 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
2467 		if (dev->esw_egress_root_ns)
2468 			return &dev->esw_egress_root_ns->ns;
2469 		else
2470 			return NULL;
2471 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
2472 		if (dev->esw_ingress_root_ns)
2473 			return &dev->esw_ingress_root_ns->ns;
2474 		else
2475 			return NULL;
2476 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
2477 		if (dev->sniffer_rx_root_ns)
2478 			return &dev->sniffer_rx_root_ns->ns;
2479 		else
2480 			return NULL;
2481 	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
2482 		if (dev->sniffer_tx_root_ns)
2483 			return &dev->sniffer_tx_root_ns->ns;
2484 		else
2485 			return NULL;
2486 	default:
2487 		return NULL;
2488 	}
2489 
2490 	if (!root_ns)
2491 		return NULL;
2492 
2493 	fs_prio = find_prio(&root_ns->ns, prio);
2494 	if (!fs_prio)
2495 		return NULL;
2496 
2497 	ns = list_first_entry(&fs_prio->objs,
2498 			      typeof(*ns),
2499 			      base.list);
2500 
2501 	return ns;
2502 }
2503 EXPORT_SYMBOL(mlx5_get_flow_namespace);
2504 
2505 
2506 int mlx5_set_rule_private_data(struct mlx5_flow_rule *rule,
2507 				  struct mlx5_flow_handler *fs_handler,
2508 				  void  *client_data)
2509 {
2510 	struct fs_client_priv_data *priv_data;
2511 
2512 	mutex_lock(&rule->clients_lock);
2513 	/*Check that hanlder isn't exists in the list already*/
2514 	list_for_each_entry(priv_data, &rule->clients_data, list) {
2515 		if (priv_data->fs_handler == fs_handler) {
2516 			priv_data->client_dst_data = client_data;
2517 			goto unlock;
2518 		}
2519 	}
2520 	priv_data = kzalloc(sizeof(*priv_data), GFP_KERNEL);
2521 	if (!priv_data) {
2522 		mutex_unlock(&rule->clients_lock);
2523 		return -ENOMEM;
2524 	}
2525 
2526 	priv_data->client_dst_data = client_data;
2527 	priv_data->fs_handler = fs_handler;
2528 	list_add(&priv_data->list, &rule->clients_data);
2529 
2530 unlock:
2531 	mutex_unlock(&rule->clients_lock);
2532 
2533 	return 0;
2534 }
2535 
2536 static int remove_from_clients(struct mlx5_flow_rule *rule,
2537 			bool ctx_changed,
2538 			void *client_data,
2539 			void *context)
2540 {
2541 	struct fs_client_priv_data *iter_client;
2542 	struct fs_client_priv_data *temp_client;
2543 	struct mlx5_flow_handler *handler = (struct
2544 						mlx5_flow_handler*)context;
2545 
2546 	mutex_lock(&rule->clients_lock);
2547 	list_for_each_entry_safe(iter_client, temp_client,
2548 				 &rule->clients_data, list) {
2549 		if (iter_client->fs_handler == handler) {
2550 			list_del(&iter_client->list);
2551 			kfree(iter_client);
2552 			break;
2553 		}
2554 	}
2555 	mutex_unlock(&rule->clients_lock);
2556 
2557 	return 0;
2558 }
2559 
2560 struct mlx5_flow_handler *mlx5_register_rule_notifier(struct mlx5_core_dev *dev,
2561 								enum mlx5_flow_namespace_type ns_type,
2562 								rule_event_fn add_cb,
2563 								rule_event_fn del_cb,
2564 								void *context)
2565 {
2566 	struct mlx5_flow_namespace *ns;
2567 	struct mlx5_flow_handler *handler;
2568 
2569 	ns = mlx5_get_flow_namespace(dev, ns_type);
2570 	if (!ns)
2571 		return ERR_PTR(-EINVAL);
2572 
2573 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
2574 	if (!handler)
2575 		return ERR_PTR(-ENOMEM);
2576 
2577 	handler->add_dst_cb = add_cb;
2578 	handler->del_dst_cb = del_cb;
2579 	handler->client_context = context;
2580 	handler->ns = ns;
2581 	down_write(&ns->notifiers_rw_sem);
2582 	list_add_tail(&handler->list, &ns->list_notifiers);
2583 	up_write(&ns->notifiers_rw_sem);
2584 
2585 	return handler;
2586 }
2587 
2588 static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns,
2589 				rule_event_fn add_rule_cb,
2590 				void *context);
2591 
2592 void mlx5_unregister_rule_notifier(struct mlx5_flow_handler *handler)
2593 {
2594 	struct mlx5_flow_namespace *ns = handler->ns;
2595 
2596 	/*Remove from dst's clients*/
2597 	down_write(&ns->dests_rw_sem);
2598 	down_write(&ns->notifiers_rw_sem);
2599 	iterate_rules_in_ns(ns, remove_from_clients, handler);
2600 	list_del(&handler->list);
2601 	up_write(&ns->notifiers_rw_sem);
2602 	up_write(&ns->dests_rw_sem);
2603 	kfree(handler);
2604 }
2605 
2606 static void iterate_rules_in_ft(struct mlx5_flow_table *ft,
2607 				rule_event_fn add_rule_cb,
2608 				void *context)
2609 {
2610 	struct mlx5_flow_group *iter_fg;
2611 	struct fs_fte *iter_fte;
2612 	struct mlx5_flow_rule *iter_rule;
2613 	int err = 0;
2614 	bool is_new_rule;
2615 
2616 	mutex_lock(&ft->base.lock);
2617 	fs_for_each_fg(iter_fg, ft) {
2618 		mutex_lock(&iter_fg->base.lock);
2619 		fs_for_each_fte(iter_fte, iter_fg) {
2620 			mutex_lock(&iter_fte->base.lock);
2621 			is_new_rule = true;
2622 			fs_for_each_dst(iter_rule, iter_fte) {
2623 				fs_get(&iter_rule->base);
2624 				err = add_rule_cb(iter_rule,
2625 						 is_new_rule,
2626 						 NULL,
2627 						 context);
2628 				fs_put_parent_locked(&iter_rule->base);
2629 				if (err)
2630 					break;
2631 				is_new_rule = false;
2632 			}
2633 			mutex_unlock(&iter_fte->base.lock);
2634 			if (err)
2635 				break;
2636 		}
2637 		mutex_unlock(&iter_fg->base.lock);
2638 		if (err)
2639 			break;
2640 	}
2641 	mutex_unlock(&ft->base.lock);
2642 }
2643 
2644 static void iterate_rules_in_prio(struct fs_prio *prio,
2645 				  rule_event_fn add_rule_cb,
2646 				  void *context)
2647 {
2648 	struct fs_base *it;
2649 
2650 	mutex_lock(&prio->base.lock);
2651 	fs_for_each_ns_or_ft(it, prio) {
2652 		if (it->type == FS_TYPE_FLOW_TABLE) {
2653 			struct mlx5_flow_table	      *ft;
2654 
2655 			fs_get_obj(ft, it);
2656 			iterate_rules_in_ft(ft, add_rule_cb, context);
2657 		} else {
2658 			struct mlx5_flow_namespace *ns;
2659 
2660 			fs_get_obj(ns, it);
2661 			iterate_rules_in_ns(ns, add_rule_cb, context);
2662 		}
2663 	}
2664 	mutex_unlock(&prio->base.lock);
2665 }
2666 
2667 static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns,
2668 				rule_event_fn add_rule_cb,
2669 				void *context)
2670 {
2671 	struct fs_prio *iter_prio;
2672 
2673 	mutex_lock(&ns->base.lock);
2674 	fs_for_each_prio(iter_prio, ns) {
2675 		iterate_rules_in_prio(iter_prio, add_rule_cb, context);
2676 	}
2677 	mutex_unlock(&ns->base.lock);
2678 }
2679 
2680 void mlx5_flow_iterate_existing_rules(struct mlx5_flow_namespace *ns,
2681 					 rule_event_fn add_rule_cb,
2682 					 void *context)
2683 {
2684 	down_write(&ns->dests_rw_sem);
2685 	down_read(&ns->notifiers_rw_sem);
2686 	iterate_rules_in_ns(ns, add_rule_cb, context);
2687 	up_read(&ns->notifiers_rw_sem);
2688 	up_write(&ns->dests_rw_sem);
2689 }
2690 
2691 
2692 void mlx5_del_flow_rules_list(struct mlx5_flow_rules_list *rules_list)
2693 {
2694 	struct mlx5_flow_rule_node *iter_node;
2695 	struct mlx5_flow_rule_node *temp_node;
2696 
2697 	list_for_each_entry_safe(iter_node, temp_node, &rules_list->head, list) {
2698 		list_del(&iter_node->list);
2699 		kfree(iter_node);
2700 	}
2701 
2702 	kfree(rules_list);
2703 }
2704 
2705 #define ROCEV1_ETHERTYPE 0x8915
2706 static int set_rocev1_rules(struct list_head *rules_list)
2707 {
2708 	struct mlx5_flow_rule_node *rocev1_rule;
2709 
2710 	rocev1_rule = kzalloc(sizeof(*rocev1_rule), GFP_KERNEL);
2711 	if (!rocev1_rule)
2712 		return -ENOMEM;
2713 
2714 	rocev1_rule->match_criteria_enable =
2715 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2716 	MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_criteria, ethertype,
2717 		 0xffff);
2718 	MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_value, ethertype,
2719 		 ROCEV1_ETHERTYPE);
2720 
2721 	list_add_tail(&rocev1_rule->list, rules_list);
2722 
2723 	return 0;
2724 }
2725 
2726 #define ROCEV2_UDP_PORT 4791
2727 static int set_rocev2_rules(struct list_head *rules_list)
2728 {
2729 	struct mlx5_flow_rule_node *ipv4_rule;
2730 	struct mlx5_flow_rule_node *ipv6_rule;
2731 
2732 	ipv4_rule = kzalloc(sizeof(*ipv4_rule), GFP_KERNEL);
2733 	if (!ipv4_rule)
2734 		return -ENOMEM;
2735 
2736 	ipv6_rule = kzalloc(sizeof(*ipv6_rule), GFP_KERNEL);
2737 	if (!ipv6_rule) {
2738 		kfree(ipv4_rule);
2739 		return -ENOMEM;
2740 	}
2741 
2742 	ipv4_rule->match_criteria_enable =
2743 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2744 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ethertype,
2745 		 0xffff);
2746 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ethertype,
2747 		 0x0800);
2748 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ip_protocol,
2749 		 0xff);
2750 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ip_protocol,
2751 		 IPPROTO_UDP);
2752 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, udp_dport,
2753 		 0xffff);
2754 	MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, udp_dport,
2755 		 ROCEV2_UDP_PORT);
2756 
2757 	ipv6_rule->match_criteria_enable =
2758 		1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS;
2759 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ethertype,
2760 		 0xffff);
2761 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ethertype,
2762 		 0x86dd);
2763 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ip_protocol,
2764 		 0xff);
2765 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ip_protocol,
2766 		 IPPROTO_UDP);
2767 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, udp_dport,
2768 		 0xffff);
2769 	MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, udp_dport,
2770 		 ROCEV2_UDP_PORT);
2771 
2772 	list_add_tail(&ipv4_rule->list, rules_list);
2773 	list_add_tail(&ipv6_rule->list, rules_list);
2774 
2775 	return 0;
2776 }
2777 
2778 
2779 struct mlx5_flow_rules_list *get_roce_flow_rules(u8 roce_mode)
2780 {
2781 	int err = 0;
2782 	struct mlx5_flow_rules_list *rules_list =
2783 		kzalloc(sizeof(*rules_list), GFP_KERNEL);
2784 
2785 	if (!rules_list)
2786 		return NULL;
2787 
2788 	INIT_LIST_HEAD(&rules_list->head);
2789 
2790 	if (roce_mode & MLX5_ROCE_VERSION_1_CAP) {
2791 		err = set_rocev1_rules(&rules_list->head);
2792 		if (err)
2793 			goto free_list;
2794 	}
2795 	if (roce_mode & MLX5_ROCE_VERSION_2_CAP)
2796 		err = set_rocev2_rules(&rules_list->head);
2797 	if (err)
2798 		goto free_list;
2799 
2800 	return rules_list;
2801 
2802 free_list:
2803 	mlx5_del_flow_rules_list(rules_list);
2804 	return NULL;
2805 }
2806 
2807 struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
2808 						 enum mlx5_flow_namespace_type ns_type,
2809 						 u8 num_actions,
2810 						 void *modify_actions)
2811 {
2812 	struct mlx5_modify_hdr *modify_hdr;
2813 	int err;
2814 
2815 	modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL);
2816 	if (!modify_hdr)
2817 		return ERR_PTR(-ENOMEM);
2818 
2819 	modify_hdr->ns_type = ns_type;
2820 	err = mlx5_cmd_modify_header_alloc(dev, ns_type, num_actions,
2821 					   modify_actions, modify_hdr);
2822 	if (err) {
2823 		kfree(modify_hdr);
2824 		return ERR_PTR(err);
2825 	}
2826 
2827 	return modify_hdr;
2828 }
2829 EXPORT_SYMBOL(mlx5_modify_header_alloc);
2830 
2831 void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
2832                                 struct mlx5_modify_hdr *modify_hdr)
2833 {
2834         mlx5_cmd_modify_header_dealloc(dev, modify_hdr);
2835         kfree(modify_hdr);
2836 }
2837 EXPORT_SYMBOL(mlx5_modify_header_dealloc);
2838 
2839 struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
2840                                                      struct mlx5_pkt_reformat_params *params,
2841                                                      enum mlx5_flow_namespace_type ns_type)
2842 {
2843         struct mlx5_pkt_reformat *pkt_reformat;
2844         int err;
2845 
2846         pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL);
2847         if (!pkt_reformat)
2848                 return ERR_PTR(-ENOMEM);
2849 
2850         pkt_reformat->ns_type = ns_type;
2851         pkt_reformat->reformat_type = params->type;
2852 	err = mlx5_cmd_packet_reformat_alloc(dev, params, ns_type,
2853 					     pkt_reformat);
2854         if (err) {
2855                 kfree(pkt_reformat);
2856                 return ERR_PTR(err);
2857         }
2858 
2859         return pkt_reformat;
2860 }
2861 EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
2862 
2863 void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
2864                                   struct mlx5_pkt_reformat *pkt_reformat)
2865 {
2866         mlx5_cmd_packet_reformat_dealloc(dev, pkt_reformat);
2867         kfree(pkt_reformat);
2868 }
2869 EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
2870 
2871