1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/interrupt.h>
5 #include <linux/notifier.h>
6 #include <linux/mlx5/driver.h>
7 #include "mlx5_core.h"
8 #include "mlx5_irq.h"
9 #include "pci_irq.h"
10 #include "lib/sf.h"
11 #ifdef CONFIG_RFS_ACCEL
12 #include <linux/cpu_rmap.h>
13 #endif
14 
15 #define MLX5_SFS_PER_CTRL_IRQ 64
16 #define MLX5_IRQ_CTRL_SF_MAX 8
17 /* min num of vectors for SFs to be enabled */
18 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
19 
20 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
21 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
22 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
23 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
24 
25 struct mlx5_irq {
26 	struct atomic_notifier_head nh;
27 	cpumask_var_t mask;
28 	char name[MLX5_MAX_IRQ_NAME];
29 	struct mlx5_irq_pool *pool;
30 	int refcount;
31 	u32 index;
32 	int irqn;
33 };
34 
35 struct mlx5_irq_table {
36 	struct mlx5_irq_pool *pf_pool;
37 	struct mlx5_irq_pool *sf_ctrl_pool;
38 	struct mlx5_irq_pool *sf_comp_pool;
39 };
40 
41 /**
42  * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
43  *                                   to be ssigned to each VF.
44  * @dev: PF to work on
45  * @num_vfs: Number of enabled VFs
46  */
47 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
48 {
49 	int num_vf_msix, min_msix, max_msix;
50 
51 	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
52 	if (!num_vf_msix)
53 		return 0;
54 
55 	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
56 	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
57 
58 	/* Limit maximum number of MSI-X vectors so the default configuration
59 	 * has some available in the pool. This will allow the user to increase
60 	 * the number of vectors in a VF without having to first size-down other
61 	 * VFs.
62 	 */
63 	return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
64 }
65 
66 /**
67  * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
68  * @dev: PF to work on
69  * @function_id: Internal PCI VF function IDd
70  * @msix_vec_count: Number of MSI-X vectors to set
71  */
72 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
73 			    int msix_vec_count)
74 {
75 	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
76 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
77 	void *hca_cap = NULL, *query_cap = NULL, *cap;
78 	int num_vf_msix, min_msix, max_msix;
79 	int ret;
80 
81 	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
82 	if (!num_vf_msix)
83 		return 0;
84 
85 	if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
86 		return -EOPNOTSUPP;
87 
88 	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
89 	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
90 
91 	if (msix_vec_count < min_msix)
92 		return -EINVAL;
93 
94 	if (msix_vec_count > max_msix)
95 		return -EOVERFLOW;
96 
97 	query_cap = kvzalloc(query_sz, GFP_KERNEL);
98 	hca_cap = kvzalloc(set_sz, GFP_KERNEL);
99 	if (!hca_cap || !query_cap) {
100 		ret = -ENOMEM;
101 		goto out;
102 	}
103 
104 	ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap);
105 	if (ret)
106 		goto out;
107 
108 	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
109 	memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
110 	       MLX5_UN_SZ_BYTES(hca_cap_union));
111 	MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
112 
113 	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
114 	MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
115 	MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
116 
117 	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
118 		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
119 	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
120 out:
121 	kvfree(hca_cap);
122 	kvfree(query_cap);
123 	return ret;
124 }
125 
126 static void irq_release(struct mlx5_irq *irq)
127 {
128 	struct mlx5_irq_pool *pool = irq->pool;
129 
130 	xa_erase(&pool->irqs, irq->index);
131 	/* free_irq requires that affinity_hint and rmap will be cleared
132 	 * before calling it. This is why there is asymmetry with set_rmap
133 	 * which should be called after alloc_irq but before request_irq.
134 	 */
135 	irq_update_affinity_hint(irq->irqn, NULL);
136 	free_cpumask_var(irq->mask);
137 	free_irq(irq->irqn, &irq->nh);
138 	kfree(irq);
139 }
140 
141 int mlx5_irq_put(struct mlx5_irq *irq)
142 {
143 	struct mlx5_irq_pool *pool = irq->pool;
144 	int ret = 0;
145 
146 	mutex_lock(&pool->lock);
147 	irq->refcount--;
148 	if (!irq->refcount) {
149 		irq_release(irq);
150 		ret = 1;
151 	}
152 	mutex_unlock(&pool->lock);
153 	return ret;
154 }
155 
156 int mlx5_irq_read_locked(struct mlx5_irq *irq)
157 {
158 	lockdep_assert_held(&irq->pool->lock);
159 	return irq->refcount;
160 }
161 
162 int mlx5_irq_get_locked(struct mlx5_irq *irq)
163 {
164 	lockdep_assert_held(&irq->pool->lock);
165 	if (WARN_ON_ONCE(!irq->refcount))
166 		return 0;
167 	irq->refcount++;
168 	return 1;
169 }
170 
171 static int irq_get(struct mlx5_irq *irq)
172 {
173 	int err;
174 
175 	mutex_lock(&irq->pool->lock);
176 	err = mlx5_irq_get_locked(irq);
177 	mutex_unlock(&irq->pool->lock);
178 	return err;
179 }
180 
181 static irqreturn_t irq_int_handler(int irq, void *nh)
182 {
183 	atomic_notifier_call_chain(nh, 0, NULL);
184 	return IRQ_HANDLED;
185 }
186 
187 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
188 {
189 	snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
190 }
191 
192 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
193 {
194 	if (!pool->xa_num_irqs.max) {
195 		/* in case we only have a single irq for the device */
196 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
197 		return;
198 	}
199 
200 	if (vecidx == pool->xa_num_irqs.max) {
201 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
202 		return;
203 	}
204 
205 	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
206 }
207 
208 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
209 				const struct cpumask *affinity)
210 {
211 	struct mlx5_core_dev *dev = pool->dev;
212 	char name[MLX5_MAX_IRQ_NAME];
213 	struct mlx5_irq *irq;
214 	int err;
215 
216 	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
217 	if (!irq)
218 		return ERR_PTR(-ENOMEM);
219 	irq->irqn = pci_irq_vector(dev->pdev, i);
220 	if (!mlx5_irq_pool_is_sf_pool(pool))
221 		irq_set_name(pool, name, i);
222 	else
223 		irq_sf_set_name(pool, name, i);
224 	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
225 	snprintf(irq->name, MLX5_MAX_IRQ_NAME,
226 		 "%s@pci:%s", name, pci_name(dev->pdev));
227 	err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
228 			  &irq->nh);
229 	if (err) {
230 		mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
231 		goto err_req_irq;
232 	}
233 	if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
234 		mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
235 		err = -ENOMEM;
236 		goto err_cpumask;
237 	}
238 	if (affinity) {
239 		cpumask_copy(irq->mask, affinity);
240 		irq_set_affinity_and_hint(irq->irqn, irq->mask);
241 	}
242 	irq->pool = pool;
243 	irq->refcount = 1;
244 	irq->index = i;
245 	err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
246 	if (err) {
247 		mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
248 			      irq->index, err);
249 		goto err_xa;
250 	}
251 	return irq;
252 err_xa:
253 	irq_update_affinity_hint(irq->irqn, NULL);
254 	free_cpumask_var(irq->mask);
255 err_cpumask:
256 	free_irq(irq->irqn, &irq->nh);
257 err_req_irq:
258 	kfree(irq);
259 	return ERR_PTR(err);
260 }
261 
262 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
263 {
264 	int ret;
265 
266 	ret = irq_get(irq);
267 	if (!ret)
268 		/* Something very bad happens here, we are enabling EQ
269 		 * on non-existing IRQ.
270 		 */
271 		return -ENOENT;
272 	ret = atomic_notifier_chain_register(&irq->nh, nb);
273 	if (ret)
274 		mlx5_irq_put(irq);
275 	return ret;
276 }
277 
278 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
279 {
280 	int err = 0;
281 
282 	err = atomic_notifier_chain_unregister(&irq->nh, nb);
283 	mlx5_irq_put(irq);
284 	return err;
285 }
286 
287 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
288 {
289 	return irq->mask;
290 }
291 
292 int mlx5_irq_get_index(struct mlx5_irq *irq)
293 {
294 	return irq->index;
295 }
296 
297 /* irq_pool API */
298 
299 /* requesting an irq from a given pool according to given index */
300 static struct mlx5_irq *
301 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
302 			struct cpumask *affinity)
303 {
304 	struct mlx5_irq *irq;
305 
306 	mutex_lock(&pool->lock);
307 	irq = xa_load(&pool->irqs, vecidx);
308 	if (irq) {
309 		mlx5_irq_get_locked(irq);
310 		goto unlock;
311 	}
312 	irq = mlx5_irq_alloc(pool, vecidx, affinity);
313 unlock:
314 	mutex_unlock(&pool->lock);
315 	return irq;
316 }
317 
318 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
319 {
320 	return irq_table->sf_ctrl_pool;
321 }
322 
323 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
324 {
325 	return irq_table->sf_comp_pool;
326 }
327 
328 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
329 {
330 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
331 	struct mlx5_irq_pool *pool = NULL;
332 
333 	if (mlx5_core_is_sf(dev))
334 		pool = sf_irq_pool_get(irq_table);
335 
336 	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
337 	 * the PF IRQs pool in case the SF pool doesn't exist.
338 	 */
339 	return pool ? pool : irq_table->pf_pool;
340 }
341 
342 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
343 {
344 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
345 	struct mlx5_irq_pool *pool = NULL;
346 
347 	if (mlx5_core_is_sf(dev))
348 		pool = sf_ctrl_irq_pool_get(irq_table);
349 
350 	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
351 	 * the PF IRQs pool in case the SF pool doesn't exist.
352 	 */
353 	return pool ? pool : irq_table->pf_pool;
354 }
355 
356 /**
357  * mlx5_irqs_release - release one or more IRQs back to the system.
358  * @irqs: IRQs to be released.
359  * @nirqs: number of IRQs to be released.
360  */
361 static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
362 {
363 	int i;
364 
365 	for (i = 0; i < nirqs; i++) {
366 		synchronize_irq(irqs[i]->irqn);
367 		mlx5_irq_put(irqs[i]);
368 	}
369 }
370 
371 /**
372  * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
373  * @ctrl_irq: ctrl IRQ to be released.
374  */
375 void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
376 {
377 	mlx5_irqs_release(&ctrl_irq, 1);
378 }
379 
380 /**
381  * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
382  * @dev: mlx5 device that requesting the IRQ.
383  *
384  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
385  */
386 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
387 {
388 	struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
389 	cpumask_var_t req_mask;
390 	struct mlx5_irq *irq;
391 
392 	if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
393 		return ERR_PTR(-ENOMEM);
394 	cpumask_copy(req_mask, cpu_online_mask);
395 	if (!mlx5_irq_pool_is_sf_pool(pool)) {
396 		/* In case we are allocating a control IRQ for PF/VF */
397 		if (!pool->xa_num_irqs.max) {
398 			cpumask_clear(req_mask);
399 			/* In case we only have a single IRQ for PF/VF */
400 			cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask);
401 		}
402 		/* Allocate the IRQ in the last index of the pool */
403 		irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask);
404 	} else {
405 		irq = mlx5_irq_affinity_request(pool, req_mask);
406 	}
407 
408 	free_cpumask_var(req_mask);
409 	return irq;
410 }
411 
412 /**
413  * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
414  * @dev: mlx5 device that requesting the IRQ.
415  * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
416  * provided.
417  * @affinity: cpumask requested for this IRQ.
418  *
419  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
420  */
421 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
422 				  struct cpumask *affinity)
423 {
424 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
425 	struct mlx5_irq_pool *pool;
426 	struct mlx5_irq *irq;
427 
428 	pool = irq_table->pf_pool;
429 	irq = irq_pool_request_vector(pool, vecidx, affinity);
430 	if (IS_ERR(irq))
431 		return irq;
432 	mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
433 		      irq->irqn, cpumask_pr_args(affinity),
434 		      irq->refcount / MLX5_EQ_REFS_PER_IRQ);
435 	return irq;
436 }
437 
438 /**
439  * mlx5_irqs_release_vectors - release one or more IRQs back to the system.
440  * @irqs: IRQs to be released.
441  * @nirqs: number of IRQs to be released.
442  */
443 void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
444 {
445 	mlx5_irqs_release(irqs, nirqs);
446 }
447 
448 /**
449  * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device.
450  * @dev: mlx5 device that is requesting the IRQs.
451  * @cpus: CPUs array for binding the IRQs
452  * @nirqs: number of IRQs to request.
453  * @irqs: an output array of IRQs pointers.
454  *
455  * Each IRQ is bound to at most 1 CPU.
456  * This function is requests nirqs IRQs, starting from @vecidx.
457  *
458  * This function returns the number of IRQs requested, (which might be smaller than
459  * @nirqs), if successful, or a negative error code in case of an error.
460  */
461 int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
462 			      struct mlx5_irq **irqs)
463 {
464 	cpumask_var_t req_mask;
465 	struct mlx5_irq *irq;
466 	int i;
467 
468 	if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
469 		return -ENOMEM;
470 	for (i = 0; i < nirqs; i++) {
471 		cpumask_set_cpu(cpus[i], req_mask);
472 		irq = mlx5_irq_request(dev, i, req_mask);
473 		if (IS_ERR(irq))
474 			break;
475 		cpumask_clear(req_mask);
476 		irqs[i] = irq;
477 	}
478 
479 	free_cpumask_var(req_mask);
480 	return i ? i : PTR_ERR(irq);
481 }
482 
483 static struct mlx5_irq_pool *
484 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
485 	       u32 min_threshold, u32 max_threshold)
486 {
487 	struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
488 
489 	if (!pool)
490 		return ERR_PTR(-ENOMEM);
491 	pool->dev = dev;
492 	mutex_init(&pool->lock);
493 	xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
494 	pool->xa_num_irqs.min = start;
495 	pool->xa_num_irqs.max = start + size - 1;
496 	if (name)
497 		snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
498 			 "%s", name);
499 	pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
500 	pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
501 	mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
502 		      name, size, start);
503 	return pool;
504 }
505 
506 static void irq_pool_free(struct mlx5_irq_pool *pool)
507 {
508 	struct mlx5_irq *irq;
509 	unsigned long index;
510 
511 	/* There are cases in which we are destrying the irq_table before
512 	 * freeing all the IRQs, fast teardown for example. Hence, free the irqs
513 	 * which might not have been freed.
514 	 */
515 	xa_for_each(&pool->irqs, index, irq)
516 		irq_release(irq);
517 	xa_destroy(&pool->irqs);
518 	mutex_destroy(&pool->lock);
519 	kfree(pool->irqs_per_cpu);
520 	kvfree(pool);
521 }
522 
523 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
524 {
525 	struct mlx5_irq_table *table = dev->priv.irq_table;
526 	int num_sf_ctrl_by_msix;
527 	int num_sf_ctrl_by_sfs;
528 	int num_sf_ctrl;
529 	int err;
530 
531 	/* init pf_pool */
532 	table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL,
533 					MLX5_EQ_SHARE_IRQ_MIN_COMP,
534 					MLX5_EQ_SHARE_IRQ_MAX_COMP);
535 	if (IS_ERR(table->pf_pool))
536 		return PTR_ERR(table->pf_pool);
537 	if (!mlx5_sf_max_functions(dev))
538 		return 0;
539 	if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
540 		mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
541 		return 0;
542 	}
543 
544 	/* init sf_ctrl_pool */
545 	num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
546 	num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
547 					  MLX5_SFS_PER_CTRL_IRQ);
548 	num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
549 	num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
550 	table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
551 					     "mlx5_sf_ctrl",
552 					     MLX5_EQ_SHARE_IRQ_MIN_CTRL,
553 					     MLX5_EQ_SHARE_IRQ_MAX_CTRL);
554 	if (IS_ERR(table->sf_ctrl_pool)) {
555 		err = PTR_ERR(table->sf_ctrl_pool);
556 		goto err_pf;
557 	}
558 	/* init sf_comp_pool */
559 	table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
560 					     sf_vec - num_sf_ctrl, "mlx5_sf_comp",
561 					     MLX5_EQ_SHARE_IRQ_MIN_COMP,
562 					     MLX5_EQ_SHARE_IRQ_MAX_COMP);
563 	if (IS_ERR(table->sf_comp_pool)) {
564 		err = PTR_ERR(table->sf_comp_pool);
565 		goto err_sf_ctrl;
566 	}
567 
568 	table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
569 	if (!table->sf_comp_pool->irqs_per_cpu) {
570 		err = -ENOMEM;
571 		goto err_irqs_per_cpu;
572 	}
573 
574 	return 0;
575 
576 err_irqs_per_cpu:
577 	irq_pool_free(table->sf_comp_pool);
578 err_sf_ctrl:
579 	irq_pool_free(table->sf_ctrl_pool);
580 err_pf:
581 	irq_pool_free(table->pf_pool);
582 	return err;
583 }
584 
585 static void irq_pools_destroy(struct mlx5_irq_table *table)
586 {
587 	if (table->sf_ctrl_pool) {
588 		irq_pool_free(table->sf_comp_pool);
589 		irq_pool_free(table->sf_ctrl_pool);
590 	}
591 	irq_pool_free(table->pf_pool);
592 }
593 
594 /* irq_table API */
595 
596 int mlx5_irq_table_init(struct mlx5_core_dev *dev)
597 {
598 	struct mlx5_irq_table *irq_table;
599 
600 	if (mlx5_core_is_sf(dev))
601 		return 0;
602 
603 	irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
604 				  dev->priv.numa_node);
605 	if (!irq_table)
606 		return -ENOMEM;
607 
608 	dev->priv.irq_table = irq_table;
609 	return 0;
610 }
611 
612 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
613 {
614 	if (mlx5_core_is_sf(dev))
615 		return;
616 
617 	kvfree(dev->priv.irq_table);
618 }
619 
620 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
621 {
622 	if (!table->pf_pool->xa_num_irqs.max)
623 		return 1;
624 	return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min;
625 }
626 
627 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
628 {
629 	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
630 		      MLX5_CAP_GEN(dev, max_num_eqs) :
631 		      1 << MLX5_CAP_GEN(dev, log_max_eq);
632 	int total_vec;
633 	int pf_vec;
634 	int err;
635 
636 	if (mlx5_core_is_sf(dev))
637 		return 0;
638 
639 	pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
640 	pf_vec = min_t(int, pf_vec, num_eqs);
641 
642 	total_vec = pf_vec;
643 	if (mlx5_sf_max_functions(dev))
644 		total_vec += MLX5_IRQ_CTRL_SF_MAX +
645 			MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
646 
647 	total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX);
648 	if (total_vec < 0)
649 		return total_vec;
650 	pf_vec = min(pf_vec, total_vec);
651 
652 	err = irq_pools_init(dev, total_vec - pf_vec, pf_vec);
653 	if (err)
654 		pci_free_irq_vectors(dev->pdev);
655 
656 	return err;
657 }
658 
659 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
660 {
661 	struct mlx5_irq_table *table = dev->priv.irq_table;
662 
663 	if (mlx5_core_is_sf(dev))
664 		return;
665 
666 	/* There are cases where IRQs still will be in used when we reaching
667 	 * to here. Hence, making sure all the irqs are released.
668 	 */
669 	irq_pools_destroy(table);
670 	pci_free_irq_vectors(dev->pdev);
671 }
672 
673 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
674 {
675 	if (table->sf_comp_pool)
676 		return min_t(int, num_online_cpus(),
677 			     table->sf_comp_pool->xa_num_irqs.max -
678 			     table->sf_comp_pool->xa_num_irqs.min + 1);
679 	else
680 		return mlx5_irq_table_get_num_comp(table);
681 }
682 
683 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
684 {
685 #ifdef CONFIG_MLX5_SF
686 	if (mlx5_core_is_sf(dev))
687 		return dev->priv.parent_mdev->priv.irq_table;
688 #endif
689 	return dev->priv.irq_table;
690 }
691