1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <linux/pci.h> 5 #include <linux/interrupt.h> 6 #include <linux/notifier.h> 7 #include <linux/mlx5/driver.h> 8 #include <linux/mlx5/vport.h> 9 #include "mlx5_core.h" 10 #include "mlx5_irq.h" 11 #include "pci_irq.h" 12 #include "lib/sf.h" 13 #include "lib/eq.h" 14 #ifdef CONFIG_RFS_ACCEL 15 #include <linux/cpu_rmap.h> 16 #endif 17 18 #define MLX5_SFS_PER_CTRL_IRQ 64 19 #define MLX5_IRQ_CTRL_SF_MAX 8 20 /* min num of vectors for SFs to be enabled */ 21 #define MLX5_IRQ_VEC_COMP_BASE_SF 2 22 23 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8) 24 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX) 25 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1) 26 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4) 27 28 struct mlx5_irq { 29 struct atomic_notifier_head nh; 30 cpumask_var_t mask; 31 char name[MLX5_MAX_IRQ_NAME]; 32 struct mlx5_irq_pool *pool; 33 int refcount; 34 struct msi_map map; 35 u32 pool_index; 36 }; 37 38 struct mlx5_irq_table { 39 struct mlx5_irq_pool *pcif_pool; 40 struct mlx5_irq_pool *sf_ctrl_pool; 41 struct mlx5_irq_pool *sf_comp_pool; 42 }; 43 44 static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev, 45 int func, 46 bool ec_vf_func) 47 { 48 if (!ec_vf_func) 49 return func; 50 return mlx5_core_ec_vf_vport_base(dev) + func - 1; 51 } 52 53 /** 54 * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors 55 * to be ssigned to each VF. 56 * @dev: PF to work on 57 * @num_vfs: Number of enabled VFs 58 */ 59 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs) 60 { 61 int num_vf_msix, min_msix, max_msix; 62 63 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); 64 if (!num_vf_msix) 65 return 0; 66 67 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); 68 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); 69 70 /* Limit maximum number of MSI-X vectors so the default configuration 71 * has some available in the pool. This will allow the user to increase 72 * the number of vectors in a VF without having to first size-down other 73 * VFs. 74 */ 75 return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix); 76 } 77 78 /** 79 * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF 80 * @dev: PF to work on 81 * @function_id: Internal PCI VF function IDd 82 * @msix_vec_count: Number of MSI-X vectors to set 83 */ 84 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, 85 int msix_vec_count) 86 { 87 int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); 88 int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); 89 void *hca_cap = NULL, *query_cap = NULL, *cap; 90 int num_vf_msix, min_msix, max_msix; 91 bool ec_vf_function; 92 int vport; 93 int ret; 94 95 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); 96 if (!num_vf_msix) 97 return 0; 98 99 if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev)) 100 return -EOPNOTSUPP; 101 102 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); 103 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); 104 105 if (msix_vec_count < min_msix) 106 return -EINVAL; 107 108 if (msix_vec_count > max_msix) 109 return -EOVERFLOW; 110 111 query_cap = kvzalloc(query_sz, GFP_KERNEL); 112 hca_cap = kvzalloc(set_sz, GFP_KERNEL); 113 if (!hca_cap || !query_cap) { 114 ret = -ENOMEM; 115 goto out; 116 } 117 118 ec_vf_function = mlx5_core_ec_sriov_enabled(dev); 119 vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function); 120 ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap); 121 if (ret) 122 goto out; 123 124 cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); 125 memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), 126 MLX5_UN_SZ_BYTES(hca_cap_union)); 127 MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count); 128 129 MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); 130 MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); 131 MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function); 132 MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); 133 134 MLX5_SET(set_hca_cap_in, hca_cap, op_mod, 135 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); 136 ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); 137 out: 138 kvfree(hca_cap); 139 kvfree(query_cap); 140 return ret; 141 } 142 143 /* mlx5_system_free_irq - Free an IRQ 144 * @irq: IRQ to free 145 * 146 * Free the IRQ and other resources such as rmap from the system. 147 * BUT doesn't free or remove reference from mlx5. 148 * This function is very important for the shutdown flow, where we need to 149 * cleanup system resoruces but keep mlx5 objects alive, 150 * see mlx5_irq_table_free_irqs(). 151 */ 152 static void mlx5_system_free_irq(struct mlx5_irq *irq) 153 { 154 struct mlx5_irq_pool *pool = irq->pool; 155 #ifdef CONFIG_RFS_ACCEL 156 struct cpu_rmap *rmap; 157 #endif 158 159 /* free_irq requires that affinity_hint and rmap will be cleared before 160 * calling it. To satisfy this requirement, we call 161 * irq_cpu_rmap_remove() to remove the notifier 162 */ 163 irq_update_affinity_hint(irq->map.virq, NULL); 164 #ifdef CONFIG_RFS_ACCEL 165 rmap = mlx5_eq_table_get_rmap(pool->dev); 166 if (rmap) 167 irq_cpu_rmap_remove(rmap, irq->map.virq); 168 #endif 169 170 free_irq(irq->map.virq, &irq->nh); 171 if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev)) 172 pci_msix_free_irq(pool->dev->pdev, irq->map); 173 } 174 175 static void irq_release(struct mlx5_irq *irq) 176 { 177 struct mlx5_irq_pool *pool = irq->pool; 178 179 xa_erase(&pool->irqs, irq->pool_index); 180 mlx5_system_free_irq(irq); 181 free_cpumask_var(irq->mask); 182 kfree(irq); 183 } 184 185 int mlx5_irq_put(struct mlx5_irq *irq) 186 { 187 struct mlx5_irq_pool *pool = irq->pool; 188 int ret = 0; 189 190 mutex_lock(&pool->lock); 191 irq->refcount--; 192 if (!irq->refcount) { 193 irq_release(irq); 194 ret = 1; 195 } 196 mutex_unlock(&pool->lock); 197 return ret; 198 } 199 200 int mlx5_irq_read_locked(struct mlx5_irq *irq) 201 { 202 lockdep_assert_held(&irq->pool->lock); 203 return irq->refcount; 204 } 205 206 int mlx5_irq_get_locked(struct mlx5_irq *irq) 207 { 208 lockdep_assert_held(&irq->pool->lock); 209 if (WARN_ON_ONCE(!irq->refcount)) 210 return 0; 211 irq->refcount++; 212 return 1; 213 } 214 215 static int irq_get(struct mlx5_irq *irq) 216 { 217 int err; 218 219 mutex_lock(&irq->pool->lock); 220 err = mlx5_irq_get_locked(irq); 221 mutex_unlock(&irq->pool->lock); 222 return err; 223 } 224 225 static irqreturn_t irq_int_handler(int irq, void *nh) 226 { 227 atomic_notifier_call_chain(nh, 0, NULL); 228 return IRQ_HANDLED; 229 } 230 231 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) 232 { 233 snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx); 234 } 235 236 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) 237 { 238 if (!pool->xa_num_irqs.max) { 239 /* in case we only have a single irq for the device */ 240 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx); 241 return; 242 } 243 244 if (!vecidx) { 245 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx); 246 return; 247 } 248 249 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx); 250 } 251 252 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, 253 struct irq_affinity_desc *af_desc, 254 struct cpu_rmap **rmap) 255 { 256 struct mlx5_core_dev *dev = pool->dev; 257 char name[MLX5_MAX_IRQ_NAME]; 258 struct mlx5_irq *irq; 259 int err; 260 261 irq = kzalloc(sizeof(*irq), GFP_KERNEL); 262 if (!irq || !zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { 263 kfree(irq); 264 return ERR_PTR(-ENOMEM); 265 } 266 267 if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) { 268 /* The vector at index 0 is always statically allocated. If 269 * dynamic irq is not supported all vectors are statically 270 * allocated. In both cases just get the irq number and set 271 * the index. 272 */ 273 irq->map.virq = pci_irq_vector(dev->pdev, i); 274 irq->map.index = i; 275 } else { 276 irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc); 277 if (!irq->map.virq) { 278 err = irq->map.index; 279 goto err_alloc_irq; 280 } 281 } 282 283 if (i && rmap && *rmap) { 284 #ifdef CONFIG_RFS_ACCEL 285 err = irq_cpu_rmap_add(*rmap, irq->map.virq); 286 if (err) 287 goto err_irq_rmap; 288 #endif 289 } 290 if (!mlx5_irq_pool_is_sf_pool(pool)) 291 irq_set_name(pool, name, i); 292 else 293 irq_sf_set_name(pool, name, i); 294 ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); 295 snprintf(irq->name, MLX5_MAX_IRQ_NAME, 296 "%s@pci:%s", name, pci_name(dev->pdev)); 297 err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name, 298 &irq->nh); 299 if (err) { 300 mlx5_core_err(dev, "Failed to request irq. err = %d\n", err); 301 goto err_req_irq; 302 } 303 304 if (af_desc) { 305 cpumask_copy(irq->mask, &af_desc->mask); 306 irq_set_affinity_and_hint(irq->map.virq, irq->mask); 307 } 308 irq->pool = pool; 309 irq->refcount = 1; 310 irq->pool_index = i; 311 err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL)); 312 if (err) { 313 mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n", 314 irq->pool_index, err); 315 goto err_xa; 316 } 317 return irq; 318 err_xa: 319 if (af_desc) 320 irq_update_affinity_hint(irq->map.virq, NULL); 321 free_irq(irq->map.virq, &irq->nh); 322 err_req_irq: 323 #ifdef CONFIG_RFS_ACCEL 324 if (i && rmap && *rmap) { 325 free_irq_cpu_rmap(*rmap); 326 *rmap = NULL; 327 } 328 err_irq_rmap: 329 #endif 330 if (i && pci_msix_can_alloc_dyn(dev->pdev)) 331 pci_msix_free_irq(dev->pdev, irq->map); 332 err_alloc_irq: 333 free_cpumask_var(irq->mask); 334 kfree(irq); 335 return ERR_PTR(err); 336 } 337 338 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb) 339 { 340 int ret; 341 342 ret = irq_get(irq); 343 if (!ret) 344 /* Something very bad happens here, we are enabling EQ 345 * on non-existing IRQ. 346 */ 347 return -ENOENT; 348 ret = atomic_notifier_chain_register(&irq->nh, nb); 349 if (ret) 350 mlx5_irq_put(irq); 351 return ret; 352 } 353 354 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb) 355 { 356 int err = 0; 357 358 err = atomic_notifier_chain_unregister(&irq->nh, nb); 359 mlx5_irq_put(irq); 360 return err; 361 } 362 363 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq) 364 { 365 return irq->mask; 366 } 367 368 int mlx5_irq_get_index(struct mlx5_irq *irq) 369 { 370 return irq->map.index; 371 } 372 373 /* irq_pool API */ 374 375 /* requesting an irq from a given pool according to given index */ 376 static struct mlx5_irq * 377 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, 378 struct irq_affinity_desc *af_desc, 379 struct cpu_rmap **rmap) 380 { 381 struct mlx5_irq *irq; 382 383 mutex_lock(&pool->lock); 384 irq = xa_load(&pool->irqs, vecidx); 385 if (irq) { 386 mlx5_irq_get_locked(irq); 387 goto unlock; 388 } 389 irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap); 390 unlock: 391 mutex_unlock(&pool->lock); 392 return irq; 393 } 394 395 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table) 396 { 397 return irq_table->sf_ctrl_pool; 398 } 399 400 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) 401 { 402 return irq_table->sf_comp_pool; 403 } 404 405 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) 406 { 407 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); 408 struct mlx5_irq_pool *pool = NULL; 409 410 if (mlx5_core_is_sf(dev)) 411 pool = sf_irq_pool_get(irq_table); 412 413 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning 414 * the PF IRQs pool in case the SF pool doesn't exist. 415 */ 416 return pool ? pool : irq_table->pcif_pool; 417 } 418 419 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) 420 { 421 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); 422 struct mlx5_irq_pool *pool = NULL; 423 424 if (mlx5_core_is_sf(dev)) 425 pool = sf_ctrl_irq_pool_get(irq_table); 426 427 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning 428 * the PF IRQs pool in case the SF pool doesn't exist. 429 */ 430 return pool ? pool : irq_table->pcif_pool; 431 } 432 433 static void _mlx5_irq_release(struct mlx5_irq *irq) 434 { 435 synchronize_irq(irq->map.virq); 436 mlx5_irq_put(irq); 437 } 438 439 /** 440 * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system. 441 * @ctrl_irq: ctrl IRQ to be released. 442 */ 443 void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq) 444 { 445 _mlx5_irq_release(ctrl_irq); 446 } 447 448 /** 449 * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device. 450 * @dev: mlx5 device that requesting the IRQ. 451 * 452 * This function returns a pointer to IRQ, or ERR_PTR in case of error. 453 */ 454 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) 455 { 456 struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); 457 struct irq_affinity_desc af_desc; 458 struct mlx5_irq *irq; 459 460 cpumask_copy(&af_desc.mask, cpu_online_mask); 461 af_desc.is_managed = false; 462 if (!mlx5_irq_pool_is_sf_pool(pool)) { 463 /* In case we are allocating a control IRQ from a pci device's pool. 464 * This can happen also for a SF if the SFs pool is empty. 465 */ 466 if (!pool->xa_num_irqs.max) { 467 cpumask_clear(&af_desc.mask); 468 /* In case we only have a single IRQ for PF/VF */ 469 cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask); 470 } 471 /* Allocate the IRQ in index 0. The vector was already allocated */ 472 irq = irq_pool_request_vector(pool, 0, &af_desc, NULL); 473 } else { 474 irq = mlx5_irq_affinity_request(pool, &af_desc); 475 } 476 477 return irq; 478 } 479 480 /** 481 * mlx5_irq_request - request an IRQ for mlx5 PF/VF device. 482 * @dev: mlx5 device that requesting the IRQ. 483 * @vecidx: vector index of the IRQ. This argument is ignore if affinity is 484 * provided. 485 * @af_desc: affinity descriptor for this IRQ. 486 * @rmap: pointer to reverse map pointer for completion interrupts 487 * 488 * This function returns a pointer to IRQ, or ERR_PTR in case of error. 489 */ 490 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, 491 struct irq_affinity_desc *af_desc, 492 struct cpu_rmap **rmap) 493 { 494 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); 495 struct mlx5_irq_pool *pool; 496 struct mlx5_irq *irq; 497 498 pool = irq_table->pcif_pool; 499 irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap); 500 if (IS_ERR(irq)) 501 return irq; 502 mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n", 503 irq->map.virq, cpumask_pr_args(&af_desc->mask), 504 irq->refcount / MLX5_EQ_REFS_PER_IRQ); 505 return irq; 506 } 507 508 /** 509 * mlx5_msix_alloc - allocate msix interrupt 510 * @dev: mlx5 device from which to request 511 * @handler: interrupt handler 512 * @affdesc: affinity descriptor 513 * @name: interrupt name 514 * 515 * Returns: struct msi_map with result encoded. 516 * Note: the caller must make sure to release the irq by calling 517 * mlx5_msix_free() if shutdown was initiated. 518 */ 519 struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev, 520 irqreturn_t (*handler)(int, void *), 521 const struct irq_affinity_desc *affdesc, 522 const char *name) 523 { 524 struct msi_map map; 525 int err; 526 527 if (!dev->pdev) { 528 map.virq = 0; 529 map.index = -EINVAL; 530 return map; 531 } 532 533 map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc); 534 if (!map.virq) 535 return map; 536 537 err = request_irq(map.virq, handler, 0, name, NULL); 538 if (err) { 539 mlx5_core_warn(dev, "err %d\n", err); 540 pci_msix_free_irq(dev->pdev, map); 541 map.virq = 0; 542 map.index = -ENOMEM; 543 } 544 return map; 545 } 546 EXPORT_SYMBOL(mlx5_msix_alloc); 547 548 /** 549 * mlx5_msix_free - free a previously allocated msix interrupt 550 * @dev: mlx5 device associated with interrupt 551 * @map: map previously returned by mlx5_msix_alloc() 552 */ 553 void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map) 554 { 555 free_irq(map.virq, NULL); 556 pci_msix_free_irq(dev->pdev, map); 557 } 558 EXPORT_SYMBOL(mlx5_msix_free); 559 560 /** 561 * mlx5_irq_release_vector - release one IRQ back to the system. 562 * @irq: the irq to release. 563 */ 564 void mlx5_irq_release_vector(struct mlx5_irq *irq) 565 { 566 _mlx5_irq_release(irq); 567 } 568 569 /** 570 * mlx5_irq_request_vector - request one IRQ for mlx5 device. 571 * @dev: mlx5 device that is requesting the IRQ. 572 * @cpu: CPU to bind the IRQ to. 573 * @vecidx: vector index to request an IRQ for. 574 * @rmap: pointer to reverse map pointer for completion interrupts 575 * 576 * Each IRQ is bound to at most 1 CPU. 577 * This function is requests one IRQ, for the given @vecidx. 578 * 579 * This function returns a pointer to the irq on success, or an error pointer 580 * in case of an error. 581 */ 582 struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, 583 u16 vecidx, struct cpu_rmap **rmap) 584 { 585 struct mlx5_irq_table *table = mlx5_irq_table_get(dev); 586 struct mlx5_irq_pool *pool = table->pcif_pool; 587 struct irq_affinity_desc af_desc; 588 int offset = 1; 589 590 if (!pool->xa_num_irqs.max) 591 offset = 0; 592 593 af_desc.is_managed = false; 594 cpumask_clear(&af_desc.mask); 595 cpumask_set_cpu(cpu, &af_desc.mask); 596 return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap); 597 } 598 599 static struct mlx5_irq_pool * 600 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, 601 u32 min_threshold, u32 max_threshold) 602 { 603 struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL); 604 605 if (!pool) 606 return ERR_PTR(-ENOMEM); 607 pool->dev = dev; 608 mutex_init(&pool->lock); 609 xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC); 610 pool->xa_num_irqs.min = start; 611 pool->xa_num_irqs.max = start + size - 1; 612 if (name) 613 snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS, 614 "%s", name); 615 pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; 616 pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; 617 mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", 618 name, size, start); 619 return pool; 620 } 621 622 static void irq_pool_free(struct mlx5_irq_pool *pool) 623 { 624 struct mlx5_irq *irq; 625 unsigned long index; 626 627 /* There are cases in which we are destrying the irq_table before 628 * freeing all the IRQs, fast teardown for example. Hence, free the irqs 629 * which might not have been freed. 630 */ 631 xa_for_each(&pool->irqs, index, irq) 632 irq_release(irq); 633 xa_destroy(&pool->irqs); 634 mutex_destroy(&pool->lock); 635 kfree(pool->irqs_per_cpu); 636 kvfree(pool); 637 } 638 639 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec) 640 { 641 struct mlx5_irq_table *table = dev->priv.irq_table; 642 int num_sf_ctrl_by_msix; 643 int num_sf_ctrl_by_sfs; 644 int num_sf_ctrl; 645 int err; 646 647 /* init pcif_pool */ 648 table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL, 649 MLX5_EQ_SHARE_IRQ_MIN_COMP, 650 MLX5_EQ_SHARE_IRQ_MAX_COMP); 651 if (IS_ERR(table->pcif_pool)) 652 return PTR_ERR(table->pcif_pool); 653 if (!mlx5_sf_max_functions(dev)) 654 return 0; 655 if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { 656 mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n"); 657 return 0; 658 } 659 660 /* init sf_ctrl_pool */ 661 num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF); 662 num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev), 663 MLX5_SFS_PER_CTRL_IRQ); 664 num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs); 665 num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl); 666 table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl, 667 "mlx5_sf_ctrl", 668 MLX5_EQ_SHARE_IRQ_MIN_CTRL, 669 MLX5_EQ_SHARE_IRQ_MAX_CTRL); 670 if (IS_ERR(table->sf_ctrl_pool)) { 671 err = PTR_ERR(table->sf_ctrl_pool); 672 goto err_pf; 673 } 674 /* init sf_comp_pool */ 675 table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl, 676 sf_vec - num_sf_ctrl, "mlx5_sf_comp", 677 MLX5_EQ_SHARE_IRQ_MIN_COMP, 678 MLX5_EQ_SHARE_IRQ_MAX_COMP); 679 if (IS_ERR(table->sf_comp_pool)) { 680 err = PTR_ERR(table->sf_comp_pool); 681 goto err_sf_ctrl; 682 } 683 684 table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL); 685 if (!table->sf_comp_pool->irqs_per_cpu) { 686 err = -ENOMEM; 687 goto err_irqs_per_cpu; 688 } 689 690 return 0; 691 692 err_irqs_per_cpu: 693 irq_pool_free(table->sf_comp_pool); 694 err_sf_ctrl: 695 irq_pool_free(table->sf_ctrl_pool); 696 err_pf: 697 irq_pool_free(table->pcif_pool); 698 return err; 699 } 700 701 static void irq_pools_destroy(struct mlx5_irq_table *table) 702 { 703 if (table->sf_ctrl_pool) { 704 irq_pool_free(table->sf_comp_pool); 705 irq_pool_free(table->sf_ctrl_pool); 706 } 707 irq_pool_free(table->pcif_pool); 708 } 709 710 static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool) 711 { 712 struct mlx5_irq *irq; 713 unsigned long index; 714 715 xa_for_each(&pool->irqs, index, irq) 716 mlx5_system_free_irq(irq); 717 718 } 719 720 static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table) 721 { 722 if (table->sf_ctrl_pool) { 723 mlx5_irq_pool_free_irqs(table->sf_comp_pool); 724 mlx5_irq_pool_free_irqs(table->sf_ctrl_pool); 725 } 726 mlx5_irq_pool_free_irqs(table->pcif_pool); 727 } 728 729 /* irq_table API */ 730 731 int mlx5_irq_table_init(struct mlx5_core_dev *dev) 732 { 733 struct mlx5_irq_table *irq_table; 734 735 if (mlx5_core_is_sf(dev)) 736 return 0; 737 738 irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL, 739 dev->priv.numa_node); 740 if (!irq_table) 741 return -ENOMEM; 742 743 dev->priv.irq_table = irq_table; 744 return 0; 745 } 746 747 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) 748 { 749 if (mlx5_core_is_sf(dev)) 750 return; 751 752 kvfree(dev->priv.irq_table); 753 } 754 755 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table) 756 { 757 if (!table->pcif_pool->xa_num_irqs.max) 758 return 1; 759 return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min; 760 } 761 762 int mlx5_irq_table_create(struct mlx5_core_dev *dev) 763 { 764 int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? 765 MLX5_CAP_GEN(dev, max_num_eqs) : 766 1 << MLX5_CAP_GEN(dev, log_max_eq); 767 int total_vec; 768 int pcif_vec; 769 int req_vec; 770 int err; 771 int n; 772 773 if (mlx5_core_is_sf(dev)) 774 return 0; 775 776 pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1; 777 pcif_vec = min_t(int, pcif_vec, num_eqs); 778 779 total_vec = pcif_vec; 780 if (mlx5_sf_max_functions(dev)) 781 total_vec += MLX5_IRQ_CTRL_SF_MAX + 782 MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev); 783 total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev)); 784 pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev)); 785 786 req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec; 787 n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX); 788 if (n < 0) 789 return n; 790 791 err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec); 792 if (err) 793 pci_free_irq_vectors(dev->pdev); 794 795 return err; 796 } 797 798 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) 799 { 800 struct mlx5_irq_table *table = dev->priv.irq_table; 801 802 if (mlx5_core_is_sf(dev)) 803 return; 804 805 /* There are cases where IRQs still will be in used when we reaching 806 * to here. Hence, making sure all the irqs are released. 807 */ 808 irq_pools_destroy(table); 809 pci_free_irq_vectors(dev->pdev); 810 } 811 812 void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev) 813 { 814 struct mlx5_irq_table *table = dev->priv.irq_table; 815 816 if (mlx5_core_is_sf(dev)) 817 return; 818 819 mlx5_irq_pools_free_irqs(table); 820 pci_free_irq_vectors(dev->pdev); 821 } 822 823 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table) 824 { 825 if (table->sf_comp_pool) 826 return min_t(int, num_online_cpus(), 827 table->sf_comp_pool->xa_num_irqs.max - 828 table->sf_comp_pool->xa_num_irqs.min + 1); 829 else 830 return mlx5_irq_table_get_num_comp(table); 831 } 832 833 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev) 834 { 835 #ifdef CONFIG_MLX5_SF 836 if (mlx5_core_is_sf(dev)) 837 return dev->priv.parent_mdev->priv.irq_table; 838 #endif 839 return dev->priv.irq_table; 840 } 841