xref: /linux/drivers/net/ethernet/intel/ice/ice_sched.c (revision 44f57d78)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018, Intel Corporation. */
3 
4 #include "ice_sched.h"
5 
6 /**
7  * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB
8  * @pi: port information structure
9  * @info: Scheduler element information from firmware
10  *
11  * This function inserts the root node of the scheduling tree topology
12  * to the SW DB.
13  */
14 static enum ice_status
15 ice_sched_add_root_node(struct ice_port_info *pi,
16 			struct ice_aqc_txsched_elem_data *info)
17 {
18 	struct ice_sched_node *root;
19 	struct ice_hw *hw;
20 
21 	if (!pi)
22 		return ICE_ERR_PARAM;
23 
24 	hw = pi->hw;
25 
26 	root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL);
27 	if (!root)
28 		return ICE_ERR_NO_MEMORY;
29 
30 	/* coverity[suspicious_sizeof] */
31 	root->children = devm_kcalloc(ice_hw_to_dev(hw), hw->max_children[0],
32 				      sizeof(*root), GFP_KERNEL);
33 	if (!root->children) {
34 		devm_kfree(ice_hw_to_dev(hw), root);
35 		return ICE_ERR_NO_MEMORY;
36 	}
37 
38 	memcpy(&root->info, info, sizeof(*info));
39 	pi->root = root;
40 	return 0;
41 }
42 
43 /**
44  * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
45  * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
46  * @teid: node TEID to search
47  *
48  * This function searches for a node matching the TEID in the scheduling tree
49  * from the SW DB. The search is recursive and is restricted by the number of
50  * layers it has searched through; stopping at the max supported layer.
51  *
52  * This function needs to be called when holding the port_info->sched_lock
53  */
54 struct ice_sched_node *
55 ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
56 {
57 	u16 i;
58 
59 	/* The TEID is same as that of the start_node */
60 	if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid)
61 		return start_node;
62 
63 	/* The node has no children or is at the max layer */
64 	if (!start_node->num_children ||
65 	    start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM ||
66 	    start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
67 		return NULL;
68 
69 	/* Check if TEID matches to any of the children nodes */
70 	for (i = 0; i < start_node->num_children; i++)
71 		if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
72 			return start_node->children[i];
73 
74 	/* Search within each child's sub-tree */
75 	for (i = 0; i < start_node->num_children; i++) {
76 		struct ice_sched_node *tmp;
77 
78 		tmp = ice_sched_find_node_by_teid(start_node->children[i],
79 						  teid);
80 		if (tmp)
81 			return tmp;
82 	}
83 
84 	return NULL;
85 }
86 
87 /**
88  * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
89  * @hw: pointer to the HW struct
90  * @cmd_opc: cmd opcode
91  * @elems_req: number of elements to request
92  * @buf: pointer to buffer
93  * @buf_size: buffer size in bytes
94  * @elems_resp: returns total number of elements response
95  * @cd: pointer to command details structure or NULL
96  *
97  * This function sends a scheduling elements cmd (cmd_opc)
98  */
99 static enum ice_status
100 ice_aqc_send_sched_elem_cmd(struct ice_hw *hw, enum ice_adminq_opc cmd_opc,
101 			    u16 elems_req, void *buf, u16 buf_size,
102 			    u16 *elems_resp, struct ice_sq_cd *cd)
103 {
104 	struct ice_aqc_sched_elem_cmd *cmd;
105 	struct ice_aq_desc desc;
106 	enum ice_status status;
107 
108 	cmd = &desc.params.sched_elem_cmd;
109 	ice_fill_dflt_direct_cmd_desc(&desc, cmd_opc);
110 	cmd->num_elem_req = cpu_to_le16(elems_req);
111 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
112 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
113 	if (!status && elems_resp)
114 		*elems_resp = le16_to_cpu(cmd->num_elem_resp);
115 
116 	return status;
117 }
118 
119 /**
120  * ice_aq_query_sched_elems - query scheduler elements
121  * @hw: pointer to the HW struct
122  * @elems_req: number of elements to query
123  * @buf: pointer to buffer
124  * @buf_size: buffer size in bytes
125  * @elems_ret: returns total number of elements returned
126  * @cd: pointer to command details structure or NULL
127  *
128  * Query scheduling elements (0x0404)
129  */
130 enum ice_status
131 ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
132 			 struct ice_aqc_get_elem *buf, u16 buf_size,
133 			 u16 *elems_ret, struct ice_sq_cd *cd)
134 {
135 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_get_sched_elems,
136 					   elems_req, (void *)buf, buf_size,
137 					   elems_ret, cd);
138 }
139 
140 /**
141  * ice_sched_add_node - Insert the Tx scheduler node in SW DB
142  * @pi: port information structure
143  * @layer: Scheduler layer of the node
144  * @info: Scheduler element information from firmware
145  *
146  * This function inserts a scheduler node to the SW DB.
147  */
148 enum ice_status
149 ice_sched_add_node(struct ice_port_info *pi, u8 layer,
150 		   struct ice_aqc_txsched_elem_data *info)
151 {
152 	struct ice_sched_node *parent;
153 	struct ice_aqc_get_elem elem;
154 	struct ice_sched_node *node;
155 	enum ice_status status;
156 	struct ice_hw *hw;
157 
158 	if (!pi)
159 		return ICE_ERR_PARAM;
160 
161 	hw = pi->hw;
162 
163 	/* A valid parent node should be there */
164 	parent = ice_sched_find_node_by_teid(pi->root,
165 					     le32_to_cpu(info->parent_teid));
166 	if (!parent) {
167 		ice_debug(hw, ICE_DBG_SCHED,
168 			  "Parent Node not found for parent_teid=0x%x\n",
169 			  le32_to_cpu(info->parent_teid));
170 		return ICE_ERR_PARAM;
171 	}
172 
173 	/* query the current node information from FW  before additing it
174 	 * to the SW DB
175 	 */
176 	status = ice_sched_query_elem(hw, le32_to_cpu(info->node_teid), &elem);
177 	if (status)
178 		return status;
179 
180 	node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
181 	if (!node)
182 		return ICE_ERR_NO_MEMORY;
183 	if (hw->max_children[layer]) {
184 		/* coverity[suspicious_sizeof] */
185 		node->children = devm_kcalloc(ice_hw_to_dev(hw),
186 					      hw->max_children[layer],
187 					      sizeof(*node), GFP_KERNEL);
188 		if (!node->children) {
189 			devm_kfree(ice_hw_to_dev(hw), node);
190 			return ICE_ERR_NO_MEMORY;
191 		}
192 	}
193 
194 	node->in_use = true;
195 	node->parent = parent;
196 	node->tx_sched_layer = layer;
197 	parent->children[parent->num_children++] = node;
198 	memcpy(&node->info, &elem.generic[0], sizeof(node->info));
199 	return 0;
200 }
201 
202 /**
203  * ice_aq_delete_sched_elems - delete scheduler elements
204  * @hw: pointer to the HW struct
205  * @grps_req: number of groups to delete
206  * @buf: pointer to buffer
207  * @buf_size: buffer size in bytes
208  * @grps_del: returns total number of elements deleted
209  * @cd: pointer to command details structure or NULL
210  *
211  * Delete scheduling elements (0x040F)
212  */
213 static enum ice_status
214 ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
215 			  struct ice_aqc_delete_elem *buf, u16 buf_size,
216 			  u16 *grps_del, struct ice_sq_cd *cd)
217 {
218 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_delete_sched_elems,
219 					   grps_req, (void *)buf, buf_size,
220 					   grps_del, cd);
221 }
222 
223 /**
224  * ice_sched_remove_elems - remove nodes from HW
225  * @hw: pointer to the HW struct
226  * @parent: pointer to the parent node
227  * @num_nodes: number of nodes
228  * @node_teids: array of node teids to be deleted
229  *
230  * This function remove nodes from HW
231  */
232 static enum ice_status
233 ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
234 		       u16 num_nodes, u32 *node_teids)
235 {
236 	struct ice_aqc_delete_elem *buf;
237 	u16 i, num_groups_removed = 0;
238 	enum ice_status status;
239 	u16 buf_size;
240 
241 	buf_size = sizeof(*buf) + sizeof(u32) * (num_nodes - 1);
242 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
243 	if (!buf)
244 		return ICE_ERR_NO_MEMORY;
245 
246 	buf->hdr.parent_teid = parent->info.node_teid;
247 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
248 	for (i = 0; i < num_nodes; i++)
249 		buf->teid[i] = cpu_to_le32(node_teids[i]);
250 
251 	status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
252 					   &num_groups_removed, NULL);
253 	if (status || num_groups_removed != 1)
254 		ice_debug(hw, ICE_DBG_SCHED, "remove node failed FW error %d\n",
255 			  hw->adminq.sq_last_status);
256 
257 	devm_kfree(ice_hw_to_dev(hw), buf);
258 	return status;
259 }
260 
261 /**
262  * ice_sched_get_first_node - get the first node of the given layer
263  * @hw: pointer to the HW struct
264  * @parent: pointer the base node of the subtree
265  * @layer: layer number
266  *
267  * This function retrieves the first node of the given layer from the subtree
268  */
269 static struct ice_sched_node *
270 ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent,
271 			 u8 layer)
272 {
273 	u8 i;
274 
275 	if (layer < hw->sw_entry_point_layer)
276 		return NULL;
277 	for (i = 0; i < parent->num_children; i++) {
278 		struct ice_sched_node *node = parent->children[i];
279 
280 		if (node) {
281 			if (node->tx_sched_layer == layer)
282 				return node;
283 			/* this recursion is intentional, and wouldn't
284 			 * go more than 9 calls
285 			 */
286 			return ice_sched_get_first_node(hw, node, layer);
287 		}
288 	}
289 	return NULL;
290 }
291 
292 /**
293  * ice_sched_get_tc_node - get pointer to TC node
294  * @pi: port information structure
295  * @tc: TC number
296  *
297  * This function returns the TC node pointer
298  */
299 struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
300 {
301 	u8 i;
302 
303 	if (!pi)
304 		return NULL;
305 	for (i = 0; i < pi->root->num_children; i++)
306 		if (pi->root->children[i]->tc_num == tc)
307 			return pi->root->children[i];
308 	return NULL;
309 }
310 
311 /**
312  * ice_free_sched_node - Free a Tx scheduler node from SW DB
313  * @pi: port information structure
314  * @node: pointer to the ice_sched_node struct
315  *
316  * This function frees up a node from SW DB as well as from HW
317  *
318  * This function needs to be called with the port_info->sched_lock held
319  */
320 void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
321 {
322 	struct ice_sched_node *parent;
323 	struct ice_hw *hw = pi->hw;
324 	u8 i, j;
325 
326 	/* Free the children before freeing up the parent node
327 	 * The parent array is updated below and that shifts the nodes
328 	 * in the array. So always pick the first child if num children > 0
329 	 */
330 	while (node->num_children)
331 		ice_free_sched_node(pi, node->children[0]);
332 
333 	/* Leaf, TC and root nodes can't be deleted by SW */
334 	if (node->tx_sched_layer >= hw->sw_entry_point_layer &&
335 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
336 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
337 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
338 		u32 teid = le32_to_cpu(node->info.node_teid);
339 
340 		ice_sched_remove_elems(hw, node->parent, 1, &teid);
341 	}
342 	parent = node->parent;
343 	/* root has no parent */
344 	if (parent) {
345 		struct ice_sched_node *p, *tc_node;
346 
347 		/* update the parent */
348 		for (i = 0; i < parent->num_children; i++)
349 			if (parent->children[i] == node) {
350 				for (j = i + 1; j < parent->num_children; j++)
351 					parent->children[j - 1] =
352 						parent->children[j];
353 				parent->num_children--;
354 				break;
355 			}
356 
357 		/* search for previous sibling that points to this node and
358 		 * remove the reference
359 		 */
360 		tc_node = ice_sched_get_tc_node(pi, node->tc_num);
361 		if (!tc_node) {
362 			ice_debug(hw, ICE_DBG_SCHED,
363 				  "Invalid TC number %d\n", node->tc_num);
364 			goto err_exit;
365 		}
366 		p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer);
367 		while (p) {
368 			if (p->sibling == node) {
369 				p->sibling = node->sibling;
370 				break;
371 			}
372 			p = p->sibling;
373 		}
374 	}
375 err_exit:
376 	/* leaf nodes have no children */
377 	if (node->children)
378 		devm_kfree(ice_hw_to_dev(hw), node->children);
379 	devm_kfree(ice_hw_to_dev(hw), node);
380 }
381 
382 /**
383  * ice_aq_get_dflt_topo - gets default scheduler topology
384  * @hw: pointer to the HW struct
385  * @lport: logical port number
386  * @buf: pointer to buffer
387  * @buf_size: buffer size in bytes
388  * @num_branches: returns total number of queue to port branches
389  * @cd: pointer to command details structure or NULL
390  *
391  * Get default scheduler topology (0x400)
392  */
393 static enum ice_status
394 ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
395 		     struct ice_aqc_get_topo_elem *buf, u16 buf_size,
396 		     u8 *num_branches, struct ice_sq_cd *cd)
397 {
398 	struct ice_aqc_get_topo *cmd;
399 	struct ice_aq_desc desc;
400 	enum ice_status status;
401 
402 	cmd = &desc.params.get_topo;
403 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo);
404 	cmd->port_num = lport;
405 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
406 	if (!status && num_branches)
407 		*num_branches = cmd->num_branches;
408 
409 	return status;
410 }
411 
412 /**
413  * ice_aq_add_sched_elems - adds scheduling element
414  * @hw: pointer to the HW struct
415  * @grps_req: the number of groups that are requested to be added
416  * @buf: pointer to buffer
417  * @buf_size: buffer size in bytes
418  * @grps_added: returns total number of groups added
419  * @cd: pointer to command details structure or NULL
420  *
421  * Add scheduling elements (0x0401)
422  */
423 static enum ice_status
424 ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
425 		       struct ice_aqc_add_elem *buf, u16 buf_size,
426 		       u16 *grps_added, struct ice_sq_cd *cd)
427 {
428 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_add_sched_elems,
429 					   grps_req, (void *)buf, buf_size,
430 					   grps_added, cd);
431 }
432 
433 /**
434  * ice_aq_suspend_sched_elems - suspend scheduler elements
435  * @hw: pointer to the HW struct
436  * @elems_req: number of elements to suspend
437  * @buf: pointer to buffer
438  * @buf_size: buffer size in bytes
439  * @elems_ret: returns total number of elements suspended
440  * @cd: pointer to command details structure or NULL
441  *
442  * Suspend scheduling elements (0x0409)
443  */
444 static enum ice_status
445 ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req,
446 			   struct ice_aqc_suspend_resume_elem *buf,
447 			   u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
448 {
449 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_suspend_sched_elems,
450 					   elems_req, (void *)buf, buf_size,
451 					   elems_ret, cd);
452 }
453 
454 /**
455  * ice_aq_resume_sched_elems - resume scheduler elements
456  * @hw: pointer to the HW struct
457  * @elems_req: number of elements to resume
458  * @buf: pointer to buffer
459  * @buf_size: buffer size in bytes
460  * @elems_ret: returns total number of elements resumed
461  * @cd: pointer to command details structure or NULL
462  *
463  * resume scheduling elements (0x040A)
464  */
465 static enum ice_status
466 ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req,
467 			  struct ice_aqc_suspend_resume_elem *buf,
468 			  u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
469 {
470 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_resume_sched_elems,
471 					   elems_req, (void *)buf, buf_size,
472 					   elems_ret, cd);
473 }
474 
475 /**
476  * ice_aq_query_sched_res - query scheduler resource
477  * @hw: pointer to the HW struct
478  * @buf_size: buffer size in bytes
479  * @buf: pointer to buffer
480  * @cd: pointer to command details structure or NULL
481  *
482  * Query scheduler resource allocation (0x0412)
483  */
484 static enum ice_status
485 ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
486 		       struct ice_aqc_query_txsched_res_resp *buf,
487 		       struct ice_sq_cd *cd)
488 {
489 	struct ice_aq_desc desc;
490 
491 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res);
492 	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
493 }
494 
495 /**
496  * ice_sched_suspend_resume_elems - suspend or resume HW nodes
497  * @hw: pointer to the HW struct
498  * @num_nodes: number of nodes
499  * @node_teids: array of node teids to be suspended or resumed
500  * @suspend: true means suspend / false means resume
501  *
502  * This function suspends or resumes HW nodes
503  */
504 static enum ice_status
505 ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
506 			       bool suspend)
507 {
508 	struct ice_aqc_suspend_resume_elem *buf;
509 	u16 i, buf_size, num_elem_ret = 0;
510 	enum ice_status status;
511 
512 	buf_size = sizeof(*buf) * num_nodes;
513 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
514 	if (!buf)
515 		return ICE_ERR_NO_MEMORY;
516 
517 	for (i = 0; i < num_nodes; i++)
518 		buf->teid[i] = cpu_to_le32(node_teids[i]);
519 
520 	if (suspend)
521 		status = ice_aq_suspend_sched_elems(hw, num_nodes, buf,
522 						    buf_size, &num_elem_ret,
523 						    NULL);
524 	else
525 		status = ice_aq_resume_sched_elems(hw, num_nodes, buf,
526 						   buf_size, &num_elem_ret,
527 						   NULL);
528 	if (status || num_elem_ret != num_nodes)
529 		ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n");
530 
531 	devm_kfree(ice_hw_to_dev(hw), buf);
532 	return status;
533 }
534 
535 /**
536  * ice_alloc_lan_q_ctx - allocate LAN queue contexts for the given VSI and TC
537  * @hw: pointer to the HW struct
538  * @vsi_handle: VSI handle
539  * @tc: TC number
540  * @new_numqs: number of queues
541  */
542 static enum ice_status
543 ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
544 {
545 	struct ice_vsi_ctx *vsi_ctx;
546 	struct ice_q_ctx *q_ctx;
547 
548 	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
549 	if (!vsi_ctx)
550 		return ICE_ERR_PARAM;
551 	/* allocate LAN queue contexts */
552 	if (!vsi_ctx->lan_q_ctx[tc]) {
553 		vsi_ctx->lan_q_ctx[tc] = devm_kcalloc(ice_hw_to_dev(hw),
554 						      new_numqs,
555 						      sizeof(*q_ctx),
556 						      GFP_KERNEL);
557 		if (!vsi_ctx->lan_q_ctx[tc])
558 			return ICE_ERR_NO_MEMORY;
559 		vsi_ctx->num_lan_q_entries[tc] = new_numqs;
560 		return 0;
561 	}
562 	/* num queues are increased, update the queue contexts */
563 	if (new_numqs > vsi_ctx->num_lan_q_entries[tc]) {
564 		u16 prev_num = vsi_ctx->num_lan_q_entries[tc];
565 
566 		q_ctx = devm_kcalloc(ice_hw_to_dev(hw), new_numqs,
567 				     sizeof(*q_ctx), GFP_KERNEL);
568 		if (!q_ctx)
569 			return ICE_ERR_NO_MEMORY;
570 		memcpy(q_ctx, vsi_ctx->lan_q_ctx[tc],
571 		       prev_num * sizeof(*q_ctx));
572 		devm_kfree(ice_hw_to_dev(hw), vsi_ctx->lan_q_ctx[tc]);
573 		vsi_ctx->lan_q_ctx[tc] = q_ctx;
574 		vsi_ctx->num_lan_q_entries[tc] = new_numqs;
575 	}
576 	return 0;
577 }
578 
579 /**
580  * ice_sched_clear_agg - clears the aggregator related information
581  * @hw: pointer to the hardware structure
582  *
583  * This function removes aggregator list and free up aggregator related memory
584  * previously allocated.
585  */
586 void ice_sched_clear_agg(struct ice_hw *hw)
587 {
588 	struct ice_sched_agg_info *agg_info;
589 	struct ice_sched_agg_info *atmp;
590 
591 	list_for_each_entry_safe(agg_info, atmp, &hw->agg_list, list_entry) {
592 		struct ice_sched_agg_vsi_info *agg_vsi_info;
593 		struct ice_sched_agg_vsi_info *vtmp;
594 
595 		list_for_each_entry_safe(agg_vsi_info, vtmp,
596 					 &agg_info->agg_vsi_list, list_entry) {
597 			list_del(&agg_vsi_info->list_entry);
598 			devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
599 		}
600 		list_del(&agg_info->list_entry);
601 		devm_kfree(ice_hw_to_dev(hw), agg_info);
602 	}
603 }
604 
605 /**
606  * ice_sched_clear_tx_topo - clears the scheduler tree nodes
607  * @pi: port information structure
608  *
609  * This function removes all the nodes from HW as well as from SW DB.
610  */
611 static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
612 {
613 	if (!pi)
614 		return;
615 	if (pi->root) {
616 		ice_free_sched_node(pi, pi->root);
617 		pi->root = NULL;
618 	}
619 }
620 
621 /**
622  * ice_sched_clear_port - clear the scheduler elements from SW DB for a port
623  * @pi: port information structure
624  *
625  * Cleanup scheduling elements from SW DB
626  */
627 void ice_sched_clear_port(struct ice_port_info *pi)
628 {
629 	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
630 		return;
631 
632 	pi->port_state = ICE_SCHED_PORT_STATE_INIT;
633 	mutex_lock(&pi->sched_lock);
634 	ice_sched_clear_tx_topo(pi);
635 	mutex_unlock(&pi->sched_lock);
636 	mutex_destroy(&pi->sched_lock);
637 }
638 
639 /**
640  * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
641  * @hw: pointer to the HW struct
642  *
643  * Cleanup scheduling elements from SW DB for all the ports
644  */
645 void ice_sched_cleanup_all(struct ice_hw *hw)
646 {
647 	if (!hw)
648 		return;
649 
650 	if (hw->layer_info) {
651 		devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
652 		hw->layer_info = NULL;
653 	}
654 
655 	if (hw->port_info)
656 		ice_sched_clear_port(hw->port_info);
657 
658 	hw->num_tx_sched_layers = 0;
659 	hw->num_tx_sched_phys_layers = 0;
660 	hw->flattened_layers = 0;
661 	hw->max_cgds = 0;
662 }
663 
664 /**
665  * ice_sched_add_elems - add nodes to HW and SW DB
666  * @pi: port information structure
667  * @tc_node: pointer to the branch node
668  * @parent: pointer to the parent node
669  * @layer: layer number to add nodes
670  * @num_nodes: number of nodes
671  * @num_nodes_added: pointer to num nodes added
672  * @first_node_teid: if new nodes are added then return the TEID of first node
673  *
674  * This function add nodes to HW as well as to SW DB for a given layer
675  */
676 static enum ice_status
677 ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
678 		    struct ice_sched_node *parent, u8 layer, u16 num_nodes,
679 		    u16 *num_nodes_added, u32 *first_node_teid)
680 {
681 	struct ice_sched_node *prev, *new_node;
682 	struct ice_aqc_add_elem *buf;
683 	u16 i, num_groups_added = 0;
684 	enum ice_status status = 0;
685 	struct ice_hw *hw = pi->hw;
686 	u16 buf_size;
687 	u32 teid;
688 
689 	buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1);
690 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
691 	if (!buf)
692 		return ICE_ERR_NO_MEMORY;
693 
694 	buf->hdr.parent_teid = parent->info.node_teid;
695 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
696 	for (i = 0; i < num_nodes; i++) {
697 		buf->generic[i].parent_teid = parent->info.node_teid;
698 		buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC;
699 		buf->generic[i].data.valid_sections =
700 			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
701 			ICE_AQC_ELEM_VALID_EIR;
702 		buf->generic[i].data.generic = 0;
703 		buf->generic[i].data.cir_bw.bw_profile_idx =
704 			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
705 		buf->generic[i].data.cir_bw.bw_alloc =
706 			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
707 		buf->generic[i].data.eir_bw.bw_profile_idx =
708 			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
709 		buf->generic[i].data.eir_bw.bw_alloc =
710 			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
711 	}
712 
713 	status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
714 					&num_groups_added, NULL);
715 	if (status || num_groups_added != 1) {
716 		ice_debug(hw, ICE_DBG_SCHED, "add node failed FW Error %d\n",
717 			  hw->adminq.sq_last_status);
718 		devm_kfree(ice_hw_to_dev(hw), buf);
719 		return ICE_ERR_CFG;
720 	}
721 
722 	*num_nodes_added = num_nodes;
723 	/* add nodes to the SW DB */
724 	for (i = 0; i < num_nodes; i++) {
725 		status = ice_sched_add_node(pi, layer, &buf->generic[i]);
726 		if (status) {
727 			ice_debug(hw, ICE_DBG_SCHED,
728 				  "add nodes in SW DB failed status =%d\n",
729 				  status);
730 			break;
731 		}
732 
733 		teid = le32_to_cpu(buf->generic[i].node_teid);
734 		new_node = ice_sched_find_node_by_teid(parent, teid);
735 		if (!new_node) {
736 			ice_debug(hw, ICE_DBG_SCHED,
737 				  "Node is missing for teid =%d\n", teid);
738 			break;
739 		}
740 
741 		new_node->sibling = NULL;
742 		new_node->tc_num = tc_node->tc_num;
743 
744 		/* add it to previous node sibling pointer */
745 		/* Note: siblings are not linked across branches */
746 		prev = ice_sched_get_first_node(hw, tc_node, layer);
747 		if (prev && prev != new_node) {
748 			while (prev->sibling)
749 				prev = prev->sibling;
750 			prev->sibling = new_node;
751 		}
752 
753 		if (i == 0)
754 			*first_node_teid = teid;
755 	}
756 
757 	devm_kfree(ice_hw_to_dev(hw), buf);
758 	return status;
759 }
760 
761 /**
762  * ice_sched_add_nodes_to_layer - Add nodes to a given layer
763  * @pi: port information structure
764  * @tc_node: pointer to TC node
765  * @parent: pointer to parent node
766  * @layer: layer number to add nodes
767  * @num_nodes: number of nodes to be added
768  * @first_node_teid: pointer to the first node TEID
769  * @num_nodes_added: pointer to number of nodes added
770  *
771  * This function add nodes to a given layer.
772  */
773 static enum ice_status
774 ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
775 			     struct ice_sched_node *tc_node,
776 			     struct ice_sched_node *parent, u8 layer,
777 			     u16 num_nodes, u32 *first_node_teid,
778 			     u16 *num_nodes_added)
779 {
780 	u32 *first_teid_ptr = first_node_teid;
781 	u16 new_num_nodes, max_child_nodes;
782 	enum ice_status status = 0;
783 	struct ice_hw *hw = pi->hw;
784 	u16 num_added = 0;
785 	u32 temp;
786 
787 	*num_nodes_added = 0;
788 
789 	if (!num_nodes)
790 		return status;
791 
792 	if (!parent || layer < hw->sw_entry_point_layer)
793 		return ICE_ERR_PARAM;
794 
795 	/* max children per node per layer */
796 	max_child_nodes = hw->max_children[parent->tx_sched_layer];
797 
798 	/* current number of children + required nodes exceed max children ? */
799 	if ((parent->num_children + num_nodes) > max_child_nodes) {
800 		/* Fail if the parent is a TC node */
801 		if (parent == tc_node)
802 			return ICE_ERR_CFG;
803 
804 		/* utilize all the spaces if the parent is not full */
805 		if (parent->num_children < max_child_nodes) {
806 			new_num_nodes = max_child_nodes - parent->num_children;
807 			/* this recursion is intentional, and wouldn't
808 			 * go more than 2 calls
809 			 */
810 			status = ice_sched_add_nodes_to_layer(pi, tc_node,
811 							      parent, layer,
812 							      new_num_nodes,
813 							      first_node_teid,
814 							      &num_added);
815 			if (status)
816 				return status;
817 
818 			*num_nodes_added += num_added;
819 		}
820 		/* Don't modify the first node TEID memory if the first node was
821 		 * added already in the above call. Instead send some temp
822 		 * memory for all other recursive calls.
823 		 */
824 		if (num_added)
825 			first_teid_ptr = &temp;
826 
827 		new_num_nodes = num_nodes - num_added;
828 
829 		/* This parent is full, try the next sibling */
830 		parent = parent->sibling;
831 
832 		/* this recursion is intentional, for 1024 queues
833 		 * per VSI, it goes max of 16 iterations.
834 		 * 1024 / 8 = 128 layer 8 nodes
835 		 * 128 /8 = 16 (add 8 nodes per iteration)
836 		 */
837 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
838 						      layer, new_num_nodes,
839 						      first_teid_ptr,
840 						      &num_added);
841 		*num_nodes_added += num_added;
842 		return status;
843 	}
844 
845 	status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
846 				     num_nodes_added, first_node_teid);
847 	return status;
848 }
849 
850 /**
851  * ice_sched_get_qgrp_layer - get the current queue group layer number
852  * @hw: pointer to the HW struct
853  *
854  * This function returns the current queue group layer number
855  */
856 static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
857 {
858 	/* It's always total layers - 1, the array is 0 relative so -2 */
859 	return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
860 }
861 
862 /**
863  * ice_sched_get_vsi_layer - get the current VSI layer number
864  * @hw: pointer to the HW struct
865  *
866  * This function returns the current VSI layer number
867  */
868 static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
869 {
870 	/* Num Layers       VSI layer
871 	 *     9               6
872 	 *     7               4
873 	 *     5 or less       sw_entry_point_layer
874 	 */
875 	/* calculate the VSI layer based on number of layers. */
876 	if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
877 		u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
878 
879 		if (layer > hw->sw_entry_point_layer)
880 			return layer;
881 	}
882 	return hw->sw_entry_point_layer;
883 }
884 
885 /**
886  * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
887  * @pi: port information structure
888  *
889  * This function removes the leaf node that was created by the FW
890  * during initialization
891  */
892 static void ice_rm_dflt_leaf_node(struct ice_port_info *pi)
893 {
894 	struct ice_sched_node *node;
895 
896 	node = pi->root;
897 	while (node) {
898 		if (!node->num_children)
899 			break;
900 		node = node->children[0];
901 	}
902 	if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
903 		u32 teid = le32_to_cpu(node->info.node_teid);
904 		enum ice_status status;
905 
906 		/* remove the default leaf node */
907 		status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid);
908 		if (!status)
909 			ice_free_sched_node(pi, node);
910 	}
911 }
912 
913 /**
914  * ice_sched_rm_dflt_nodes - free the default nodes in the tree
915  * @pi: port information structure
916  *
917  * This function frees all the nodes except root and TC that were created by
918  * the FW during initialization
919  */
920 static void ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
921 {
922 	struct ice_sched_node *node;
923 
924 	ice_rm_dflt_leaf_node(pi);
925 
926 	/* remove the default nodes except TC and root nodes */
927 	node = pi->root;
928 	while (node) {
929 		if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer &&
930 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
931 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) {
932 			ice_free_sched_node(pi, node);
933 			break;
934 		}
935 
936 		if (!node->num_children)
937 			break;
938 		node = node->children[0];
939 	}
940 }
941 
942 /**
943  * ice_sched_init_port - Initialize scheduler by querying information from FW
944  * @pi: port info structure for the tree to cleanup
945  *
946  * This function is the initial call to find the total number of Tx scheduler
947  * resources, default topology created by firmware and storing the information
948  * in SW DB.
949  */
950 enum ice_status ice_sched_init_port(struct ice_port_info *pi)
951 {
952 	struct ice_aqc_get_topo_elem *buf;
953 	enum ice_status status;
954 	struct ice_hw *hw;
955 	u8 num_branches;
956 	u16 num_elems;
957 	u8 i, j;
958 
959 	if (!pi)
960 		return ICE_ERR_PARAM;
961 	hw = pi->hw;
962 
963 	/* Query the Default Topology from FW */
964 	buf = devm_kzalloc(ice_hw_to_dev(hw), ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
965 	if (!buf)
966 		return ICE_ERR_NO_MEMORY;
967 
968 	/* Query default scheduling tree topology */
969 	status = ice_aq_get_dflt_topo(hw, pi->lport, buf, ICE_AQ_MAX_BUF_LEN,
970 				      &num_branches, NULL);
971 	if (status)
972 		goto err_init_port;
973 
974 	/* num_branches should be between 1-8 */
975 	if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) {
976 		ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n",
977 			  num_branches);
978 		status = ICE_ERR_PARAM;
979 		goto err_init_port;
980 	}
981 
982 	/* get the number of elements on the default/first branch */
983 	num_elems = le16_to_cpu(buf[0].hdr.num_elems);
984 
985 	/* num_elems should always be between 1-9 */
986 	if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) {
987 		ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n",
988 			  num_elems);
989 		status = ICE_ERR_PARAM;
990 		goto err_init_port;
991 	}
992 
993 	/* If the last node is a leaf node then the index of the queue group
994 	 * layer is two less than the number of elements.
995 	 */
996 	if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
997 	    ICE_AQC_ELEM_TYPE_LEAF)
998 		pi->last_node_teid =
999 			le32_to_cpu(buf[0].generic[num_elems - 2].node_teid);
1000 	else
1001 		pi->last_node_teid =
1002 			le32_to_cpu(buf[0].generic[num_elems - 1].node_teid);
1003 
1004 	/* Insert the Tx Sched root node */
1005 	status = ice_sched_add_root_node(pi, &buf[0].generic[0]);
1006 	if (status)
1007 		goto err_init_port;
1008 
1009 	/* Parse the default tree and cache the information */
1010 	for (i = 0; i < num_branches; i++) {
1011 		num_elems = le16_to_cpu(buf[i].hdr.num_elems);
1012 
1013 		/* Skip root element as already inserted */
1014 		for (j = 1; j < num_elems; j++) {
1015 			/* update the sw entry point */
1016 			if (buf[0].generic[j].data.elem_type ==
1017 			    ICE_AQC_ELEM_TYPE_ENTRY_POINT)
1018 				hw->sw_entry_point_layer = j;
1019 
1020 			status = ice_sched_add_node(pi, j, &buf[i].generic[j]);
1021 			if (status)
1022 				goto err_init_port;
1023 		}
1024 	}
1025 
1026 	/* Remove the default nodes. */
1027 	if (pi->root)
1028 		ice_sched_rm_dflt_nodes(pi);
1029 
1030 	/* initialize the port for handling the scheduler tree */
1031 	pi->port_state = ICE_SCHED_PORT_STATE_READY;
1032 	mutex_init(&pi->sched_lock);
1033 
1034 err_init_port:
1035 	if (status && pi->root) {
1036 		ice_free_sched_node(pi, pi->root);
1037 		pi->root = NULL;
1038 	}
1039 
1040 	devm_kfree(ice_hw_to_dev(hw), buf);
1041 	return status;
1042 }
1043 
1044 /**
1045  * ice_sched_query_res_alloc - query the FW for num of logical sched layers
1046  * @hw: pointer to the HW struct
1047  *
1048  * query FW for allocated scheduler resources and store in HW struct
1049  */
1050 enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
1051 {
1052 	struct ice_aqc_query_txsched_res_resp *buf;
1053 	enum ice_status status = 0;
1054 	__le16 max_sibl;
1055 	u8 i;
1056 
1057 	if (hw->layer_info)
1058 		return status;
1059 
1060 	buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL);
1061 	if (!buf)
1062 		return ICE_ERR_NO_MEMORY;
1063 
1064 	status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL);
1065 	if (status)
1066 		goto sched_query_out;
1067 
1068 	hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels);
1069 	hw->num_tx_sched_phys_layers =
1070 		le16_to_cpu(buf->sched_props.phys_levels);
1071 	hw->flattened_layers = buf->sched_props.flattening_bitmap;
1072 	hw->max_cgds = buf->sched_props.max_pf_cgds;
1073 
1074 	/* max sibling group size of current layer refers to the max children
1075 	 * of the below layer node.
1076 	 * layer 1 node max children will be layer 2 max sibling group size
1077 	 * layer 2 node max children will be layer 3 max sibling group size
1078 	 * and so on. This array will be populated from root (index 0) to
1079 	 * qgroup layer 7. Leaf node has no children.
1080 	 */
1081 	for (i = 0; i < hw->num_tx_sched_layers; i++) {
1082 		max_sibl = buf->layer_props[i].max_sibl_grp_sz;
1083 		hw->max_children[i] = le16_to_cpu(max_sibl);
1084 	}
1085 
1086 	hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
1087 				      (hw->num_tx_sched_layers *
1088 				       sizeof(*hw->layer_info)),
1089 				      GFP_KERNEL);
1090 	if (!hw->layer_info) {
1091 		status = ICE_ERR_NO_MEMORY;
1092 		goto sched_query_out;
1093 	}
1094 
1095 sched_query_out:
1096 	devm_kfree(ice_hw_to_dev(hw), buf);
1097 	return status;
1098 }
1099 
1100 /**
1101  * ice_sched_find_node_in_subtree - Find node in part of base node subtree
1102  * @hw: pointer to the HW struct
1103  * @base: pointer to the base node
1104  * @node: pointer to the node to search
1105  *
1106  * This function checks whether a given node is part of the base node
1107  * subtree or not
1108  */
1109 static bool
1110 ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
1111 			       struct ice_sched_node *node)
1112 {
1113 	u8 i;
1114 
1115 	for (i = 0; i < base->num_children; i++) {
1116 		struct ice_sched_node *child = base->children[i];
1117 
1118 		if (node == child)
1119 			return true;
1120 
1121 		if (child->tx_sched_layer > node->tx_sched_layer)
1122 			return false;
1123 
1124 		/* this recursion is intentional, and wouldn't
1125 		 * go more than 8 calls
1126 		 */
1127 		if (ice_sched_find_node_in_subtree(hw, child, node))
1128 			return true;
1129 	}
1130 	return false;
1131 }
1132 
1133 /**
1134  * ice_sched_get_free_qparent - Get a free LAN or RDMA queue group node
1135  * @pi: port information structure
1136  * @vsi_handle: software VSI handle
1137  * @tc: branch number
1138  * @owner: LAN or RDMA
1139  *
1140  * This function retrieves a free LAN or RDMA queue group node
1141  */
1142 struct ice_sched_node *
1143 ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
1144 			   u8 owner)
1145 {
1146 	struct ice_sched_node *vsi_node, *qgrp_node = NULL;
1147 	struct ice_vsi_ctx *vsi_ctx;
1148 	u16 max_children;
1149 	u8 qgrp_layer;
1150 
1151 	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
1152 	max_children = pi->hw->max_children[qgrp_layer];
1153 
1154 	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
1155 	if (!vsi_ctx)
1156 		return NULL;
1157 	vsi_node = vsi_ctx->sched.vsi_node[tc];
1158 	/* validate invalid VSI ID */
1159 	if (!vsi_node)
1160 		goto lan_q_exit;
1161 
1162 	/* get the first queue group node from VSI sub-tree */
1163 	qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer);
1164 	while (qgrp_node) {
1165 		/* make sure the qgroup node is part of the VSI subtree */
1166 		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
1167 			if (qgrp_node->num_children < max_children &&
1168 			    qgrp_node->owner == owner)
1169 				break;
1170 		qgrp_node = qgrp_node->sibling;
1171 	}
1172 
1173 lan_q_exit:
1174 	return qgrp_node;
1175 }
1176 
1177 /**
1178  * ice_sched_get_vsi_node - Get a VSI node based on VSI ID
1179  * @hw: pointer to the HW struct
1180  * @tc_node: pointer to the TC node
1181  * @vsi_handle: software VSI handle
1182  *
1183  * This function retrieves a VSI node for a given VSI ID from a given
1184  * TC branch
1185  */
1186 static struct ice_sched_node *
1187 ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
1188 		       u16 vsi_handle)
1189 {
1190 	struct ice_sched_node *node;
1191 	u8 vsi_layer;
1192 
1193 	vsi_layer = ice_sched_get_vsi_layer(hw);
1194 	node = ice_sched_get_first_node(hw, tc_node, vsi_layer);
1195 
1196 	/* Check whether it already exists */
1197 	while (node) {
1198 		if (node->vsi_handle == vsi_handle)
1199 			return node;
1200 		node = node->sibling;
1201 	}
1202 
1203 	return node;
1204 }
1205 
1206 /**
1207  * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
1208  * @hw: pointer to the HW struct
1209  * @num_qs: number of queues
1210  * @num_nodes: num nodes array
1211  *
1212  * This function calculates the number of VSI child nodes based on the
1213  * number of queues.
1214  */
1215 static void
1216 ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
1217 {
1218 	u16 num = num_qs;
1219 	u8 i, qgl, vsil;
1220 
1221 	qgl = ice_sched_get_qgrp_layer(hw);
1222 	vsil = ice_sched_get_vsi_layer(hw);
1223 
1224 	/* calculate num nodes from queue group to VSI layer */
1225 	for (i = qgl; i > vsil; i--) {
1226 		/* round to the next integer if there is a remainder */
1227 		num = DIV_ROUND_UP(num, hw->max_children[i]);
1228 
1229 		/* need at least one node */
1230 		num_nodes[i] = num ? num : 1;
1231 	}
1232 }
1233 
1234 /**
1235  * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
1236  * @pi: port information structure
1237  * @vsi_handle: software VSI handle
1238  * @tc_node: pointer to the TC node
1239  * @num_nodes: pointer to the num nodes that needs to be added per layer
1240  * @owner: node owner (LAN or RDMA)
1241  *
1242  * This function adds the VSI child nodes to tree. It gets called for
1243  * LAN and RDMA separately.
1244  */
1245 static enum ice_status
1246 ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
1247 			      struct ice_sched_node *tc_node, u16 *num_nodes,
1248 			      u8 owner)
1249 {
1250 	struct ice_sched_node *parent, *node;
1251 	struct ice_hw *hw = pi->hw;
1252 	enum ice_status status;
1253 	u32 first_node_teid;
1254 	u16 num_added = 0;
1255 	u8 i, qgl, vsil;
1256 
1257 	qgl = ice_sched_get_qgrp_layer(hw);
1258 	vsil = ice_sched_get_vsi_layer(hw);
1259 	parent = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1260 	for (i = vsil + 1; i <= qgl; i++) {
1261 		if (!parent)
1262 			return ICE_ERR_CFG;
1263 
1264 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
1265 						      num_nodes[i],
1266 						      &first_node_teid,
1267 						      &num_added);
1268 		if (status || num_nodes[i] != num_added)
1269 			return ICE_ERR_CFG;
1270 
1271 		/* The newly added node can be a new parent for the next
1272 		 * layer nodes
1273 		 */
1274 		if (num_added) {
1275 			parent = ice_sched_find_node_by_teid(tc_node,
1276 							     first_node_teid);
1277 			node = parent;
1278 			while (node) {
1279 				node->owner = owner;
1280 				node = node->sibling;
1281 			}
1282 		} else {
1283 			parent = parent->children[0];
1284 		}
1285 	}
1286 
1287 	return 0;
1288 }
1289 
1290 /**
1291  * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
1292  * @hw: pointer to the HW struct
1293  * @tc_node: pointer to TC node
1294  * @num_nodes: pointer to num nodes array
1295  *
1296  * This function calculates the number of supported nodes needed to add this
1297  * VSI into Tx tree including the VSI, parent and intermediate nodes in below
1298  * layers
1299  */
1300 static void
1301 ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
1302 				 struct ice_sched_node *tc_node, u16 *num_nodes)
1303 {
1304 	struct ice_sched_node *node;
1305 	u8 vsil;
1306 	int i;
1307 
1308 	vsil = ice_sched_get_vsi_layer(hw);
1309 	for (i = vsil; i >= hw->sw_entry_point_layer; i--)
1310 		/* Add intermediate nodes if TC has no children and
1311 		 * need at least one node for VSI
1312 		 */
1313 		if (!tc_node->num_children || i == vsil) {
1314 			num_nodes[i]++;
1315 		} else {
1316 			/* If intermediate nodes are reached max children
1317 			 * then add a new one.
1318 			 */
1319 			node = ice_sched_get_first_node(hw, tc_node, (u8)i);
1320 			/* scan all the siblings */
1321 			while (node) {
1322 				if (node->num_children < hw->max_children[i])
1323 					break;
1324 				node = node->sibling;
1325 			}
1326 
1327 			/* tree has one intermediate node to add this new VSI.
1328 			 * So no need to calculate supported nodes for below
1329 			 * layers.
1330 			 */
1331 			if (node)
1332 				break;
1333 			/* all the nodes are full, allocate a new one */
1334 			num_nodes[i]++;
1335 		}
1336 }
1337 
1338 /**
1339  * ice_sched_add_vsi_support_nodes - add VSI supported nodes into Tx tree
1340  * @pi: port information structure
1341  * @vsi_handle: software VSI handle
1342  * @tc_node: pointer to TC node
1343  * @num_nodes: pointer to num nodes array
1344  *
1345  * This function adds the VSI supported nodes into Tx tree including the
1346  * VSI, its parent and intermediate nodes in below layers
1347  */
1348 static enum ice_status
1349 ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
1350 				struct ice_sched_node *tc_node, u16 *num_nodes)
1351 {
1352 	struct ice_sched_node *parent = tc_node;
1353 	enum ice_status status;
1354 	u32 first_node_teid;
1355 	u16 num_added = 0;
1356 	u8 i, vsil;
1357 
1358 	if (!pi)
1359 		return ICE_ERR_PARAM;
1360 
1361 	vsil = ice_sched_get_vsi_layer(pi->hw);
1362 	for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
1363 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
1364 						      i, num_nodes[i],
1365 						      &first_node_teid,
1366 						      &num_added);
1367 		if (status || num_nodes[i] != num_added)
1368 			return ICE_ERR_CFG;
1369 
1370 		/* The newly added node can be a new parent for the next
1371 		 * layer nodes
1372 		 */
1373 		if (num_added)
1374 			parent = ice_sched_find_node_by_teid(tc_node,
1375 							     first_node_teid);
1376 		else
1377 			parent = parent->children[0];
1378 
1379 		if (!parent)
1380 			return ICE_ERR_CFG;
1381 
1382 		if (i == vsil)
1383 			parent->vsi_handle = vsi_handle;
1384 	}
1385 
1386 	return 0;
1387 }
1388 
1389 /**
1390  * ice_sched_add_vsi_to_topo - add a new VSI into tree
1391  * @pi: port information structure
1392  * @vsi_handle: software VSI handle
1393  * @tc: TC number
1394  *
1395  * This function adds a new VSI into scheduler tree
1396  */
1397 static enum ice_status
1398 ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
1399 {
1400 	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1401 	struct ice_sched_node *tc_node;
1402 	struct ice_hw *hw = pi->hw;
1403 
1404 	tc_node = ice_sched_get_tc_node(pi, tc);
1405 	if (!tc_node)
1406 		return ICE_ERR_PARAM;
1407 
1408 	/* calculate number of supported nodes needed for this VSI */
1409 	ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
1410 
1411 	/* add VSI supported nodes to TC subtree */
1412 	return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
1413 					       num_nodes);
1414 }
1415 
1416 /**
1417  * ice_sched_update_vsi_child_nodes - update VSI child nodes
1418  * @pi: port information structure
1419  * @vsi_handle: software VSI handle
1420  * @tc: TC number
1421  * @new_numqs: new number of max queues
1422  * @owner: owner of this subtree
1423  *
1424  * This function updates the VSI child nodes based on the number of queues
1425  */
1426 static enum ice_status
1427 ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
1428 				 u8 tc, u16 new_numqs, u8 owner)
1429 {
1430 	u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1431 	struct ice_sched_node *vsi_node;
1432 	struct ice_sched_node *tc_node;
1433 	struct ice_vsi_ctx *vsi_ctx;
1434 	enum ice_status status = 0;
1435 	struct ice_hw *hw = pi->hw;
1436 	u16 prev_numqs;
1437 
1438 	tc_node = ice_sched_get_tc_node(pi, tc);
1439 	if (!tc_node)
1440 		return ICE_ERR_CFG;
1441 
1442 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1443 	if (!vsi_node)
1444 		return ICE_ERR_CFG;
1445 
1446 	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
1447 	if (!vsi_ctx)
1448 		return ICE_ERR_PARAM;
1449 
1450 	prev_numqs = vsi_ctx->sched.max_lanq[tc];
1451 	/* num queues are not changed or less than the previous number */
1452 	if (new_numqs <= prev_numqs)
1453 		return status;
1454 	status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs);
1455 	if (status)
1456 		return status;
1457 
1458 	if (new_numqs)
1459 		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
1460 	/* Keep the max number of queue configuration all the time. Update the
1461 	 * tree only if number of queues > previous number of queues. This may
1462 	 * leave some extra nodes in the tree if number of queues < previous
1463 	 * number but that wouldn't harm anything. Removing those extra nodes
1464 	 * may complicate the code if those nodes are part of SRL or
1465 	 * individually rate limited.
1466 	 */
1467 	status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
1468 					       new_num_nodes, owner);
1469 	if (status)
1470 		return status;
1471 	vsi_ctx->sched.max_lanq[tc] = new_numqs;
1472 
1473 	return 0;
1474 }
1475 
1476 /**
1477  * ice_sched_cfg_vsi - configure the new/existing VSI
1478  * @pi: port information structure
1479  * @vsi_handle: software VSI handle
1480  * @tc: TC number
1481  * @maxqs: max number of queues
1482  * @owner: LAN or RDMA
1483  * @enable: TC enabled or disabled
1484  *
1485  * This function adds/updates VSI nodes based on the number of queues. If TC is
1486  * enabled and VSI is in suspended state then resume the VSI back. If TC is
1487  * disabled then suspend the VSI if it is not already.
1488  */
1489 enum ice_status
1490 ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
1491 		  u8 owner, bool enable)
1492 {
1493 	struct ice_sched_node *vsi_node, *tc_node;
1494 	struct ice_vsi_ctx *vsi_ctx;
1495 	enum ice_status status = 0;
1496 	struct ice_hw *hw = pi->hw;
1497 
1498 	ice_debug(pi->hw, ICE_DBG_SCHED, "add/config VSI %d\n", vsi_handle);
1499 	tc_node = ice_sched_get_tc_node(pi, tc);
1500 	if (!tc_node)
1501 		return ICE_ERR_PARAM;
1502 	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
1503 	if (!vsi_ctx)
1504 		return ICE_ERR_PARAM;
1505 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1506 
1507 	/* suspend the VSI if TC is not enabled */
1508 	if (!enable) {
1509 		if (vsi_node && vsi_node->in_use) {
1510 			u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1511 
1512 			status = ice_sched_suspend_resume_elems(hw, 1, &teid,
1513 								true);
1514 			if (!status)
1515 				vsi_node->in_use = false;
1516 		}
1517 		return status;
1518 	}
1519 
1520 	/* TC is enabled, if it is a new VSI then add it to the tree */
1521 	if (!vsi_node) {
1522 		status = ice_sched_add_vsi_to_topo(pi, vsi_handle, tc);
1523 		if (status)
1524 			return status;
1525 
1526 		vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1527 		if (!vsi_node)
1528 			return ICE_ERR_CFG;
1529 
1530 		vsi_ctx->sched.vsi_node[tc] = vsi_node;
1531 		vsi_node->in_use = true;
1532 		/* invalidate the max queues whenever VSI gets added first time
1533 		 * into the scheduler tree (boot or after reset). We need to
1534 		 * recreate the child nodes all the time in these cases.
1535 		 */
1536 		vsi_ctx->sched.max_lanq[tc] = 0;
1537 	}
1538 
1539 	/* update the VSI child nodes */
1540 	status = ice_sched_update_vsi_child_nodes(pi, vsi_handle, tc, maxqs,
1541 						  owner);
1542 	if (status)
1543 		return status;
1544 
1545 	/* TC is enabled, resume the VSI if it is in the suspend state */
1546 	if (!vsi_node->in_use) {
1547 		u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1548 
1549 		status = ice_sched_suspend_resume_elems(hw, 1, &teid, false);
1550 		if (!status)
1551 			vsi_node->in_use = true;
1552 	}
1553 
1554 	return status;
1555 }
1556 
1557 /**
1558  * ice_sched_rm_agg_vsi_entry - remove aggregator related VSI info entry
1559  * @pi: port information structure
1560  * @vsi_handle: software VSI handle
1561  *
1562  * This function removes single aggregator VSI info entry from
1563  * aggregator list.
1564  */
1565 static void
1566 ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle)
1567 {
1568 	struct ice_sched_agg_info *agg_info;
1569 	struct ice_sched_agg_info *atmp;
1570 
1571 	list_for_each_entry_safe(agg_info, atmp, &pi->hw->agg_list,
1572 				 list_entry) {
1573 		struct ice_sched_agg_vsi_info *agg_vsi_info;
1574 		struct ice_sched_agg_vsi_info *vtmp;
1575 
1576 		list_for_each_entry_safe(agg_vsi_info, vtmp,
1577 					 &agg_info->agg_vsi_list, list_entry)
1578 			if (agg_vsi_info->vsi_handle == vsi_handle) {
1579 				list_del(&agg_vsi_info->list_entry);
1580 				devm_kfree(ice_hw_to_dev(pi->hw),
1581 					   agg_vsi_info);
1582 				return;
1583 			}
1584 	}
1585 }
1586 
1587 /**
1588  * ice_sched_is_leaf_node_present - check for a leaf node in the sub-tree
1589  * @node: pointer to the sub-tree node
1590  *
1591  * This function checks for a leaf node presence in a given sub-tree node.
1592  */
1593 static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
1594 {
1595 	u8 i;
1596 
1597 	for (i = 0; i < node->num_children; i++)
1598 		if (ice_sched_is_leaf_node_present(node->children[i]))
1599 			return true;
1600 	/* check for a leaf node */
1601 	return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF);
1602 }
1603 
1604 /**
1605  * ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
1606  * @pi: port information structure
1607  * @vsi_handle: software VSI handle
1608  * @owner: LAN or RDMA
1609  *
1610  * This function removes the VSI and its LAN or RDMA children nodes from the
1611  * scheduler tree.
1612  */
1613 static enum ice_status
1614 ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
1615 {
1616 	enum ice_status status = ICE_ERR_PARAM;
1617 	struct ice_vsi_ctx *vsi_ctx;
1618 	u8 i;
1619 
1620 	ice_debug(pi->hw, ICE_DBG_SCHED, "removing VSI %d\n", vsi_handle);
1621 	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
1622 		return status;
1623 	mutex_lock(&pi->sched_lock);
1624 	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
1625 	if (!vsi_ctx)
1626 		goto exit_sched_rm_vsi_cfg;
1627 
1628 	ice_for_each_traffic_class(i) {
1629 		struct ice_sched_node *vsi_node, *tc_node;
1630 		u8 j = 0;
1631 
1632 		tc_node = ice_sched_get_tc_node(pi, i);
1633 		if (!tc_node)
1634 			continue;
1635 
1636 		vsi_node = ice_sched_get_vsi_node(pi->hw, tc_node, vsi_handle);
1637 		if (!vsi_node)
1638 			continue;
1639 
1640 		if (ice_sched_is_leaf_node_present(vsi_node)) {
1641 			ice_debug(pi->hw, ICE_DBG_SCHED,
1642 				  "VSI has leaf nodes in TC %d\n", i);
1643 			status = ICE_ERR_IN_USE;
1644 			goto exit_sched_rm_vsi_cfg;
1645 		}
1646 		while (j < vsi_node->num_children) {
1647 			if (vsi_node->children[j]->owner == owner) {
1648 				ice_free_sched_node(pi, vsi_node->children[j]);
1649 
1650 				/* reset the counter again since the num
1651 				 * children will be updated after node removal
1652 				 */
1653 				j = 0;
1654 			} else {
1655 				j++;
1656 			}
1657 		}
1658 		/* remove the VSI if it has no children */
1659 		if (!vsi_node->num_children) {
1660 			ice_free_sched_node(pi, vsi_node);
1661 			vsi_ctx->sched.vsi_node[i] = NULL;
1662 
1663 			/* clean up aggregator related VSI info if any */
1664 			ice_sched_rm_agg_vsi_info(pi, vsi_handle);
1665 		}
1666 		if (owner == ICE_SCHED_NODE_OWNER_LAN)
1667 			vsi_ctx->sched.max_lanq[i] = 0;
1668 	}
1669 	status = 0;
1670 
1671 exit_sched_rm_vsi_cfg:
1672 	mutex_unlock(&pi->sched_lock);
1673 	return status;
1674 }
1675 
1676 /**
1677  * ice_rm_vsi_lan_cfg - remove VSI and its LAN children nodes
1678  * @pi: port information structure
1679  * @vsi_handle: software VSI handle
1680  *
1681  * This function clears the VSI and its LAN children nodes from scheduler tree
1682  * for all TCs.
1683  */
1684 enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
1685 {
1686 	return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
1687 }
1688