1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2013-2021, Mellanox Technologies inc. All rights reserved. 4 */ 5 6 #include <linux/interrupt.h> 7 #include <linux/notifier.h> 8 #include <linux/mlx5/driver.h> 9 #include <linux/mlx5/vport.h> 10 #include <linux/mlx5/eq.h> 11 #ifdef CONFIG_RFS_ACCEL 12 #include <linux/cpu_rmap.h> 13 #endif 14 #include "mlx5_core.h" 15 #include "lib/eq.h" 16 #include "fpga/core.h" 17 #include "eswitch.h" 18 #include "lib/clock.h" 19 #include "diag/fw_tracer.h" 20 #include "mlx5_irq.h" 21 #include "pci_irq.h" 22 #include "devlink.h" 23 #include "en_accel/ipsec.h" 24 25 enum { 26 MLX5_EQE_OWNER_INIT_VAL = 0x1, 27 }; 28 29 enum { 30 MLX5_EQ_STATE_ARMED = 0x9, 31 MLX5_EQ_STATE_FIRED = 0xa, 32 MLX5_EQ_STATE_ALWAYS_ARMED = 0xb, 33 }; 34 35 enum { 36 MLX5_EQ_DOORBEL_OFFSET = 0x40, 37 }; 38 39 /* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update 40 * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is 41 * used to set the EQ size, budget must be smaller than the EQ size. 42 */ 43 enum { 44 MLX5_EQ_POLLING_BUDGET = 128, 45 }; 46 47 static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); 48 49 struct mlx5_eq_table { 50 struct xarray comp_eqs; 51 struct mlx5_eq_async pages_eq; 52 struct mlx5_eq_async cmd_eq; 53 struct mlx5_eq_async async_eq; 54 55 struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; 56 57 /* Since CQ DB is stored in async_eq */ 58 struct mlx5_nb cq_err_nb; 59 60 struct mutex lock; /* sync async eqs creations */ 61 struct mutex comp_lock; /* sync comp eqs creations */ 62 int curr_comp_eqs; 63 int max_comp_eqs; 64 struct mlx5_irq_table *irq_table; 65 struct xarray comp_irqs; 66 struct mlx5_irq *ctrl_irq; 67 struct cpu_rmap *rmap; 68 struct cpumask used_cpus; 69 }; 70 71 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ 72 (1ull << MLX5_EVENT_TYPE_COMM_EST) | \ 73 (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \ 74 (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \ 75 (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \ 76 (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \ 77 (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ 78 (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \ 79 (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \ 80 (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \ 81 (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ 82 (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) 83 84 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) 85 { 86 u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {}; 87 88 MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ); 89 MLX5_SET(destroy_eq_in, in, eq_number, eqn); 90 return mlx5_cmd_exec_in(dev, destroy_eq, in); 91 } 92 93 /* caller must eventually call mlx5_cq_put on the returned cq */ 94 static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) 95 { 96 struct mlx5_cq_table *table = &eq->cq_table; 97 struct mlx5_core_cq *cq = NULL; 98 99 rcu_read_lock(); 100 cq = radix_tree_lookup(&table->tree, cqn); 101 if (likely(cq)) 102 mlx5_cq_hold(cq); 103 rcu_read_unlock(); 104 105 return cq; 106 } 107 108 static int mlx5_eq_comp_int(struct notifier_block *nb, 109 __always_unused unsigned long action, 110 __always_unused void *data) 111 { 112 struct mlx5_eq_comp *eq_comp = 113 container_of(nb, struct mlx5_eq_comp, irq_nb); 114 struct mlx5_eq *eq = &eq_comp->core; 115 struct mlx5_eqe *eqe; 116 int num_eqes = 0; 117 u32 cqn = -1; 118 119 eqe = next_eqe_sw(eq); 120 if (!eqe) 121 goto out; 122 123 do { 124 struct mlx5_core_cq *cq; 125 126 /* Make sure we read EQ entry contents after we've 127 * checked the ownership bit. 128 */ 129 dma_rmb(); 130 /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */ 131 cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; 132 133 cq = mlx5_eq_cq_get(eq, cqn); 134 if (likely(cq)) { 135 ++cq->arm_sn; 136 cq->comp(cq, eqe); 137 mlx5_cq_put(cq); 138 } else { 139 dev_dbg_ratelimited(eq->dev->device, 140 "Completion event for bogus CQ 0x%x\n", cqn); 141 } 142 143 ++eq->cons_index; 144 145 } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); 146 147 out: 148 eq_update_ci(eq, 1); 149 150 if (cqn != -1) 151 tasklet_schedule(&eq_comp->tasklet_ctx.task); 152 153 return 0; 154 } 155 156 /* Some architectures don't latch interrupts when they are disabled, so using 157 * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to 158 * avoid losing them. It is not recommended to use it, unless this is the last 159 * resort. 160 */ 161 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) 162 { 163 u32 count_eqe; 164 165 disable_irq(eq->core.irqn); 166 count_eqe = eq->core.cons_index; 167 mlx5_eq_comp_int(&eq->irq_nb, 0, NULL); 168 count_eqe = eq->core.cons_index - count_eqe; 169 enable_irq(eq->core.irqn); 170 171 return count_eqe; 172 } 173 174 static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, bool recovery, 175 unsigned long *flags) 176 __acquires(&eq->lock) 177 { 178 if (!recovery) 179 spin_lock(&eq->lock); 180 else 181 spin_lock_irqsave(&eq->lock, *flags); 182 } 183 184 static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, bool recovery, 185 unsigned long *flags) 186 __releases(&eq->lock) 187 { 188 if (!recovery) 189 spin_unlock(&eq->lock); 190 else 191 spin_unlock_irqrestore(&eq->lock, *flags); 192 } 193 194 enum async_eq_nb_action { 195 ASYNC_EQ_IRQ_HANDLER = 0, 196 ASYNC_EQ_RECOVER = 1, 197 }; 198 199 static int mlx5_eq_async_int(struct notifier_block *nb, 200 unsigned long action, void *data) 201 { 202 struct mlx5_eq_async *eq_async = 203 container_of(nb, struct mlx5_eq_async, irq_nb); 204 struct mlx5_eq *eq = &eq_async->core; 205 struct mlx5_eq_table *eqt; 206 struct mlx5_core_dev *dev; 207 struct mlx5_eqe *eqe; 208 unsigned long flags; 209 int num_eqes = 0; 210 bool recovery; 211 212 dev = eq->dev; 213 eqt = dev->priv.eq_table; 214 215 recovery = action == ASYNC_EQ_RECOVER; 216 mlx5_eq_async_int_lock(eq_async, recovery, &flags); 217 218 eqe = next_eqe_sw(eq); 219 if (!eqe) 220 goto out; 221 222 do { 223 /* 224 * Make sure we read EQ entry contents after we've 225 * checked the ownership bit. 226 */ 227 dma_rmb(); 228 229 atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); 230 atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); 231 232 ++eq->cons_index; 233 234 } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); 235 236 out: 237 eq_update_ci(eq, 1); 238 mlx5_eq_async_int_unlock(eq_async, recovery, &flags); 239 240 return unlikely(recovery) ? num_eqes : 0; 241 } 242 243 void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev) 244 { 245 struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq; 246 int eqes; 247 248 eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL); 249 if (eqes) 250 mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes); 251 } 252 253 static void init_eq_buf(struct mlx5_eq *eq) 254 { 255 struct mlx5_eqe *eqe; 256 int i; 257 258 for (i = 0; i < eq_get_size(eq); i++) { 259 eqe = get_eqe(eq, i); 260 eqe->owner = MLX5_EQE_OWNER_INIT_VAL; 261 } 262 } 263 264 static int 265 create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, 266 struct mlx5_eq_param *param) 267 { 268 u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE); 269 struct mlx5_cq_table *cq_table = &eq->cq_table; 270 u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; 271 u8 log_eq_stride = ilog2(MLX5_EQE_SIZE); 272 struct mlx5_priv *priv = &dev->priv; 273 __be64 *pas; 274 u16 vecidx; 275 void *eqc; 276 int inlen; 277 u32 *in; 278 int err; 279 int i; 280 281 /* Init CQ table */ 282 memset(cq_table, 0, sizeof(*cq_table)); 283 spin_lock_init(&cq_table->lock); 284 INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); 285 286 eq->cons_index = 0; 287 288 err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride), 289 &eq->frag_buf, dev->priv.numa_node); 290 if (err) 291 return err; 292 293 mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc); 294 init_eq_buf(eq); 295 296 eq->irq = param->irq; 297 vecidx = mlx5_irq_get_index(eq->irq); 298 299 inlen = MLX5_ST_SZ_BYTES(create_eq_in) + 300 MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages; 301 302 in = kvzalloc(inlen, GFP_KERNEL); 303 if (!in) { 304 err = -ENOMEM; 305 goto err_buf; 306 } 307 308 pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); 309 mlx5_fill_page_frag_array(&eq->frag_buf, pas); 310 311 MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); 312 if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx)) 313 MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID); 314 315 for (i = 0; i < 4; i++) 316 MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i, 317 param->mask[i]); 318 319 eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); 320 MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz); 321 MLX5_SET(eqc, eqc, uar_page, priv->uar->index); 322 MLX5_SET(eqc, eqc, intr, vecidx); 323 MLX5_SET(eqc, eqc, log_page_size, 324 eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 325 326 err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); 327 if (err) 328 goto err_in; 329 330 eq->vecidx = vecidx; 331 eq->eqn = MLX5_GET(create_eq_out, out, eq_number); 332 eq->irqn = pci_irq_vector(dev->pdev, vecidx); 333 eq->dev = dev; 334 eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; 335 336 err = mlx5_debug_eq_add(dev, eq); 337 if (err) 338 goto err_eq; 339 340 kvfree(in); 341 return 0; 342 343 err_eq: 344 mlx5_cmd_destroy_eq(dev, eq->eqn); 345 346 err_in: 347 kvfree(in); 348 349 err_buf: 350 mlx5_frag_buf_free(dev, &eq->frag_buf); 351 return err; 352 } 353 354 /** 355 * mlx5_eq_enable - Enable EQ for receiving EQEs 356 * @dev : Device which owns the eq 357 * @eq : EQ to enable 358 * @nb : Notifier call block 359 * 360 * Must be called after EQ is created in device. 361 * 362 * @return: 0 if no error 363 */ 364 int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, 365 struct notifier_block *nb) 366 { 367 int err; 368 369 err = mlx5_irq_attach_nb(eq->irq, nb); 370 if (!err) 371 eq_update_ci(eq, 1); 372 373 return err; 374 } 375 EXPORT_SYMBOL(mlx5_eq_enable); 376 377 /** 378 * mlx5_eq_disable - Disable EQ for receiving EQEs 379 * @dev : Device which owns the eq 380 * @eq : EQ to disable 381 * @nb : Notifier call block 382 * 383 * Must be called before EQ is destroyed. 384 */ 385 void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, 386 struct notifier_block *nb) 387 { 388 mlx5_irq_detach_nb(eq->irq, nb); 389 } 390 EXPORT_SYMBOL(mlx5_eq_disable); 391 392 static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) 393 { 394 int err; 395 396 mlx5_debug_eq_remove(dev, eq); 397 398 err = mlx5_cmd_destroy_eq(dev, eq->eqn); 399 if (err) 400 mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", 401 eq->eqn); 402 403 mlx5_frag_buf_free(dev, &eq->frag_buf); 404 return err; 405 } 406 407 int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) 408 { 409 struct mlx5_cq_table *table = &eq->cq_table; 410 int err; 411 412 spin_lock(&table->lock); 413 err = radix_tree_insert(&table->tree, cq->cqn, cq); 414 spin_unlock(&table->lock); 415 416 return err; 417 } 418 419 void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) 420 { 421 struct mlx5_cq_table *table = &eq->cq_table; 422 struct mlx5_core_cq *tmp; 423 424 spin_lock(&table->lock); 425 tmp = radix_tree_delete(&table->tree, cq->cqn); 426 spin_unlock(&table->lock); 427 428 if (!tmp) { 429 mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", 430 eq->eqn, cq->cqn); 431 return; 432 } 433 434 if (tmp != cq) 435 mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", 436 eq->eqn, cq->cqn); 437 } 438 439 int mlx5_eq_table_init(struct mlx5_core_dev *dev) 440 { 441 struct mlx5_eq_table *eq_table; 442 int i; 443 444 eq_table = kvzalloc_node(sizeof(*eq_table), GFP_KERNEL, 445 dev->priv.numa_node); 446 if (!eq_table) 447 return -ENOMEM; 448 449 dev->priv.eq_table = eq_table; 450 451 mlx5_eq_debugfs_init(dev); 452 453 mutex_init(&eq_table->lock); 454 for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) 455 ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); 456 457 eq_table->irq_table = mlx5_irq_table_get(dev); 458 cpumask_clear(&eq_table->used_cpus); 459 xa_init(&eq_table->comp_eqs); 460 xa_init(&eq_table->comp_irqs); 461 mutex_init(&eq_table->comp_lock); 462 eq_table->curr_comp_eqs = 0; 463 return 0; 464 } 465 466 void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) 467 { 468 struct mlx5_eq_table *table = dev->priv.eq_table; 469 470 mlx5_eq_debugfs_cleanup(dev); 471 xa_destroy(&table->comp_irqs); 472 xa_destroy(&table->comp_eqs); 473 kvfree(table); 474 } 475 476 /* Async EQs */ 477 478 static int create_async_eq(struct mlx5_core_dev *dev, 479 struct mlx5_eq *eq, struct mlx5_eq_param *param) 480 { 481 struct mlx5_eq_table *eq_table = dev->priv.eq_table; 482 int err; 483 484 mutex_lock(&eq_table->lock); 485 err = create_map_eq(dev, eq, param); 486 mutex_unlock(&eq_table->lock); 487 return err; 488 } 489 490 static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) 491 { 492 struct mlx5_eq_table *eq_table = dev->priv.eq_table; 493 int err; 494 495 mutex_lock(&eq_table->lock); 496 err = destroy_unmap_eq(dev, eq); 497 mutex_unlock(&eq_table->lock); 498 return err; 499 } 500 501 static int cq_err_event_notifier(struct notifier_block *nb, 502 unsigned long type, void *data) 503 { 504 struct mlx5_eq_table *eqt; 505 struct mlx5_core_cq *cq; 506 struct mlx5_eqe *eqe; 507 struct mlx5_eq *eq; 508 u32 cqn; 509 510 /* type == MLX5_EVENT_TYPE_CQ_ERROR */ 511 512 eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); 513 eq = &eqt->async_eq.core; 514 eqe = data; 515 516 cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; 517 mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", 518 cqn, eqe->data.cq_err.syndrome); 519 520 cq = mlx5_eq_cq_get(eq, cqn); 521 if (unlikely(!cq)) { 522 mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); 523 return NOTIFY_OK; 524 } 525 526 if (cq->event) 527 cq->event(cq, type); 528 529 mlx5_cq_put(cq); 530 531 return NOTIFY_OK; 532 } 533 534 static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4]) 535 { 536 __be64 *user_unaffiliated_events; 537 __be64 *user_affiliated_events; 538 int i; 539 540 user_affiliated_events = 541 MLX5_CAP_DEV_EVENT(dev, user_affiliated_events); 542 user_unaffiliated_events = 543 MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events); 544 545 for (i = 0; i < 4; i++) 546 mask[i] |= be64_to_cpu(user_affiliated_events[i] | 547 user_unaffiliated_events[i]); 548 } 549 550 static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) 551 { 552 u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; 553 554 if (MLX5_VPORT_MANAGER(dev)) 555 async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); 556 557 if (MLX5_CAP_GEN(dev, general_notification_event)) 558 async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT); 559 560 if (MLX5_CAP_GEN(dev, port_module_event)) 561 async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT); 562 else 563 mlx5_core_dbg(dev, "port_module_event is not set\n"); 564 565 if (MLX5_PPS_CAP(dev)) 566 async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT); 567 568 if (MLX5_CAP_GEN(dev, fpga)) 569 async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) | 570 (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR); 571 if (MLX5_CAP_GEN_MAX(dev, dct)) 572 async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED); 573 574 if (MLX5_CAP_GEN(dev, temp_warn_event)) 575 async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT); 576 577 if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) 578 async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); 579 580 if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) 581 async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); 582 583 if (mlx5_eswitch_is_funcs_handler(dev)) 584 async_event_mask |= 585 (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); 586 587 if (MLX5_CAP_GEN_MAX(dev, vhca_state)) 588 async_event_mask |= (1ull << MLX5_EVENT_TYPE_VHCA_STATE_CHANGE); 589 590 if (MLX5_CAP_MACSEC(dev, log_max_macsec_offload)) 591 async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE); 592 593 if (mlx5_ipsec_device_caps(dev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) 594 async_event_mask |= 595 (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE); 596 597 mask[0] = async_event_mask; 598 599 if (MLX5_CAP_GEN(dev, event_cap)) 600 gather_user_async_events(dev, mask); 601 } 602 603 static int 604 setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, 605 struct mlx5_eq_param *param, const char *name) 606 { 607 int err; 608 609 eq->irq_nb.notifier_call = mlx5_eq_async_int; 610 spin_lock_init(&eq->lock); 611 612 err = create_async_eq(dev, &eq->core, param); 613 if (err) { 614 mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err); 615 return err; 616 } 617 err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); 618 if (err) { 619 mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err); 620 destroy_async_eq(dev, &eq->core); 621 } 622 return err; 623 } 624 625 static void cleanup_async_eq(struct mlx5_core_dev *dev, 626 struct mlx5_eq_async *eq, const char *name) 627 { 628 int err; 629 630 mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); 631 err = destroy_async_eq(dev, &eq->core); 632 if (err) 633 mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n", 634 name, err); 635 } 636 637 static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) 638 { 639 struct devlink *devlink = priv_to_devlink(dev); 640 union devlink_param_value val; 641 int err; 642 643 err = devl_param_driverinit_value_get(devlink, 644 DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, 645 &val); 646 if (!err) 647 return val.vu32; 648 mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); 649 return MLX5_NUM_ASYNC_EQE; 650 } 651 652 static int create_async_eqs(struct mlx5_core_dev *dev) 653 { 654 struct mlx5_eq_table *table = dev->priv.eq_table; 655 struct mlx5_eq_param param = {}; 656 int err; 657 658 /* All the async_eqs are using single IRQ, request one IRQ and share its 659 * index among all the async_eqs of this device. 660 */ 661 table->ctrl_irq = mlx5_ctrl_irq_request(dev); 662 if (IS_ERR(table->ctrl_irq)) 663 return PTR_ERR(table->ctrl_irq); 664 665 MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); 666 mlx5_eq_notifier_register(dev, &table->cq_err_nb); 667 668 param = (struct mlx5_eq_param) { 669 .irq = table->ctrl_irq, 670 .nent = MLX5_NUM_CMD_EQE, 671 .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, 672 }; 673 mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ); 674 err = setup_async_eq(dev, &table->cmd_eq, ¶m, "cmd"); 675 if (err) 676 goto err1; 677 678 mlx5_cmd_use_events(dev); 679 mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); 680 681 param = (struct mlx5_eq_param) { 682 .irq = table->ctrl_irq, 683 .nent = async_eq_depth_devlink_param_get(dev), 684 }; 685 686 gather_async_events_mask(dev, param.mask); 687 err = setup_async_eq(dev, &table->async_eq, ¶m, "async"); 688 if (err) 689 goto err2; 690 691 param = (struct mlx5_eq_param) { 692 .irq = table->ctrl_irq, 693 .nent = /* TODO: sriov max_vf + */ 1, 694 .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST, 695 }; 696 697 err = setup_async_eq(dev, &table->pages_eq, ¶m, "pages"); 698 if (err) 699 goto err3; 700 701 return 0; 702 703 err3: 704 cleanup_async_eq(dev, &table->async_eq, "async"); 705 err2: 706 mlx5_cmd_use_polling(dev); 707 cleanup_async_eq(dev, &table->cmd_eq, "cmd"); 708 err1: 709 mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); 710 mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); 711 mlx5_ctrl_irq_release(table->ctrl_irq); 712 return err; 713 } 714 715 static void destroy_async_eqs(struct mlx5_core_dev *dev) 716 { 717 struct mlx5_eq_table *table = dev->priv.eq_table; 718 719 cleanup_async_eq(dev, &table->pages_eq, "pages"); 720 cleanup_async_eq(dev, &table->async_eq, "async"); 721 mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ); 722 mlx5_cmd_use_polling(dev); 723 cleanup_async_eq(dev, &table->cmd_eq, "cmd"); 724 mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); 725 mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); 726 mlx5_ctrl_irq_release(table->ctrl_irq); 727 } 728 729 struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) 730 { 731 return &dev->priv.eq_table->async_eq.core; 732 } 733 734 void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) 735 { 736 synchronize_irq(dev->priv.eq_table->async_eq.core.irqn); 737 } 738 739 void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) 740 { 741 synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn); 742 } 743 744 /* Generic EQ API for mlx5_core consumers 745 * Needed For RDMA ODP EQ for now 746 */ 747 struct mlx5_eq * 748 mlx5_eq_create_generic(struct mlx5_core_dev *dev, 749 struct mlx5_eq_param *param) 750 { 751 struct mlx5_eq *eq = kvzalloc_node(sizeof(*eq), GFP_KERNEL, 752 dev->priv.numa_node); 753 int err; 754 755 if (!eq) 756 return ERR_PTR(-ENOMEM); 757 758 param->irq = dev->priv.eq_table->ctrl_irq; 759 err = create_async_eq(dev, eq, param); 760 if (err) { 761 kvfree(eq); 762 eq = ERR_PTR(err); 763 } 764 765 return eq; 766 } 767 EXPORT_SYMBOL(mlx5_eq_create_generic); 768 769 int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) 770 { 771 int err; 772 773 if (IS_ERR(eq)) 774 return -EINVAL; 775 776 err = destroy_async_eq(dev, eq); 777 if (err) 778 goto out; 779 780 kvfree(eq); 781 out: 782 return err; 783 } 784 EXPORT_SYMBOL(mlx5_eq_destroy_generic); 785 786 struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) 787 { 788 u32 ci = eq->cons_index + cc; 789 u32 nent = eq_get_size(eq); 790 struct mlx5_eqe *eqe; 791 792 eqe = get_eqe(eq, ci & (nent - 1)); 793 eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe; 794 /* Make sure we read EQ entry contents after we've 795 * checked the ownership bit. 796 */ 797 if (eqe) 798 dma_rmb(); 799 800 return eqe; 801 } 802 EXPORT_SYMBOL(mlx5_eq_get_eqe); 803 804 void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) 805 { 806 __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); 807 u32 val; 808 809 eq->cons_index += cc; 810 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); 811 812 __raw_writel((__force u32)cpu_to_be32(val), addr); 813 /* We still want ordering, just not swabbing, so add a barrier */ 814 wmb(); 815 } 816 EXPORT_SYMBOL(mlx5_eq_update_ci); 817 818 static void comp_irq_release_pci(struct mlx5_core_dev *dev, u16 vecidx) 819 { 820 struct mlx5_eq_table *table = dev->priv.eq_table; 821 struct mlx5_irq *irq; 822 823 irq = xa_load(&table->comp_irqs, vecidx); 824 if (!irq) 825 return; 826 827 xa_erase(&table->comp_irqs, vecidx); 828 mlx5_irq_release_vector(irq); 829 } 830 831 static int mlx5_cpumask_default_spread(int numa_node, int index) 832 { 833 const struct cpumask *prev = cpu_none_mask; 834 const struct cpumask *mask; 835 int found_cpu = 0; 836 int i = 0; 837 int cpu; 838 839 rcu_read_lock(); 840 for_each_numa_hop_mask(mask, numa_node) { 841 for_each_cpu_andnot(cpu, mask, prev) { 842 if (i++ == index) { 843 found_cpu = cpu; 844 goto spread_done; 845 } 846 } 847 prev = mask; 848 } 849 850 spread_done: 851 rcu_read_unlock(); 852 return found_cpu; 853 } 854 855 static struct cpu_rmap *mlx5_eq_table_get_pci_rmap(struct mlx5_core_dev *dev) 856 { 857 #ifdef CONFIG_RFS_ACCEL 858 #ifdef CONFIG_MLX5_SF 859 if (mlx5_core_is_sf(dev)) 860 return dev->priv.parent_mdev->priv.eq_table->rmap; 861 #endif 862 return dev->priv.eq_table->rmap; 863 #else 864 return NULL; 865 #endif 866 } 867 868 static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) 869 { 870 struct mlx5_eq_table *table = dev->priv.eq_table; 871 struct cpu_rmap *rmap; 872 struct mlx5_irq *irq; 873 int cpu; 874 875 rmap = mlx5_eq_table_get_pci_rmap(dev); 876 cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vecidx); 877 irq = mlx5_irq_request_vector(dev, cpu, vecidx, &rmap); 878 if (IS_ERR(irq)) 879 return PTR_ERR(irq); 880 881 return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); 882 } 883 884 static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) 885 { 886 struct mlx5_eq_table *table = dev->priv.eq_table; 887 struct mlx5_irq *irq; 888 889 irq = xa_load(&table->comp_irqs, vecidx); 890 if (!irq) 891 return; 892 893 xa_erase(&table->comp_irqs, vecidx); 894 mlx5_irq_affinity_irq_release(dev, irq); 895 } 896 897 static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) 898 { 899 struct mlx5_eq_table *table = dev->priv.eq_table; 900 struct mlx5_irq *irq; 901 902 irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, vecidx); 903 if (IS_ERR(irq)) { 904 /* In case SF irq pool does not exist, fallback to the PF irqs*/ 905 if (PTR_ERR(irq) == -ENOENT) 906 return comp_irq_request_pci(dev, vecidx); 907 908 return PTR_ERR(irq); 909 } 910 911 return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); 912 } 913 914 static void comp_irq_release(struct mlx5_core_dev *dev, u16 vecidx) 915 { 916 mlx5_core_is_sf(dev) ? comp_irq_release_sf(dev, vecidx) : 917 comp_irq_release_pci(dev, vecidx); 918 } 919 920 static int comp_irq_request(struct mlx5_core_dev *dev, u16 vecidx) 921 { 922 return mlx5_core_is_sf(dev) ? comp_irq_request_sf(dev, vecidx) : 923 comp_irq_request_pci(dev, vecidx); 924 } 925 926 #ifdef CONFIG_RFS_ACCEL 927 static int alloc_rmap(struct mlx5_core_dev *mdev) 928 { 929 struct mlx5_eq_table *eq_table = mdev->priv.eq_table; 930 931 /* rmap is a mapping between irq number and queue number. 932 * Each irq can be assigned only to a single rmap. 933 * Since SFs share IRQs, rmap mapping cannot function correctly 934 * for irqs that are shared between different core/netdev RX rings. 935 * Hence we don't allow netdev rmap for SFs. 936 */ 937 if (mlx5_core_is_sf(mdev)) 938 return 0; 939 940 eq_table->rmap = alloc_irq_cpu_rmap(eq_table->max_comp_eqs); 941 if (!eq_table->rmap) 942 return -ENOMEM; 943 return 0; 944 } 945 946 static void free_rmap(struct mlx5_core_dev *mdev) 947 { 948 struct mlx5_eq_table *eq_table = mdev->priv.eq_table; 949 950 if (eq_table->rmap) { 951 free_irq_cpu_rmap(eq_table->rmap); 952 eq_table->rmap = NULL; 953 } 954 } 955 #else 956 static int alloc_rmap(struct mlx5_core_dev *mdev) { return 0; } 957 static void free_rmap(struct mlx5_core_dev *mdev) {} 958 #endif 959 960 static void destroy_comp_eq(struct mlx5_core_dev *dev, struct mlx5_eq_comp *eq, u16 vecidx) 961 { 962 struct mlx5_eq_table *table = dev->priv.eq_table; 963 964 xa_erase(&table->comp_eqs, vecidx); 965 mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); 966 if (destroy_unmap_eq(dev, &eq->core)) 967 mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", 968 eq->core.eqn); 969 tasklet_disable(&eq->tasklet_ctx.task); 970 kfree(eq); 971 comp_irq_release(dev, vecidx); 972 table->curr_comp_eqs--; 973 } 974 975 static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) 976 { 977 struct devlink *devlink = priv_to_devlink(dev); 978 union devlink_param_value val; 979 int err; 980 981 err = devl_param_driverinit_value_get(devlink, 982 DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, 983 &val); 984 if (!err) 985 return val.vu32; 986 mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); 987 return MLX5_COMP_EQ_SIZE; 988 } 989 990 /* Must be called with EQ table comp_lock held */ 991 static int create_comp_eq(struct mlx5_core_dev *dev, u16 vecidx) 992 { 993 struct mlx5_eq_table *table = dev->priv.eq_table; 994 struct mlx5_eq_param param = {}; 995 struct mlx5_eq_comp *eq; 996 struct mlx5_irq *irq; 997 int nent; 998 int err; 999 1000 lockdep_assert_held(&table->comp_lock); 1001 if (table->curr_comp_eqs == table->max_comp_eqs) { 1002 mlx5_core_err(dev, "maximum number of vectors is allocated, %d\n", 1003 table->max_comp_eqs); 1004 return -ENOMEM; 1005 } 1006 1007 err = comp_irq_request(dev, vecidx); 1008 if (err) 1009 return err; 1010 1011 nent = comp_eq_depth_devlink_param_get(dev); 1012 1013 eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node); 1014 if (!eq) { 1015 err = -ENOMEM; 1016 goto clean_irq; 1017 } 1018 1019 INIT_LIST_HEAD(&eq->tasklet_ctx.list); 1020 INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); 1021 spin_lock_init(&eq->tasklet_ctx.lock); 1022 tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb); 1023 1024 irq = xa_load(&table->comp_irqs, vecidx); 1025 eq->irq_nb.notifier_call = mlx5_eq_comp_int; 1026 param = (struct mlx5_eq_param) { 1027 .irq = irq, 1028 .nent = nent, 1029 }; 1030 1031 err = create_map_eq(dev, &eq->core, ¶m); 1032 if (err) 1033 goto clean_eq; 1034 err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); 1035 if (err) { 1036 destroy_unmap_eq(dev, &eq->core); 1037 goto clean_eq; 1038 } 1039 1040 mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); 1041 err = xa_err(xa_store(&table->comp_eqs, vecidx, eq, GFP_KERNEL)); 1042 if (err) 1043 goto disable_eq; 1044 1045 table->curr_comp_eqs++; 1046 return eq->core.eqn; 1047 1048 disable_eq: 1049 mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); 1050 clean_eq: 1051 kfree(eq); 1052 clean_irq: 1053 comp_irq_release(dev, vecidx); 1054 return err; 1055 } 1056 1057 int mlx5_comp_eqn_get(struct mlx5_core_dev *dev, u16 vecidx, int *eqn) 1058 { 1059 struct mlx5_eq_table *table = dev->priv.eq_table; 1060 struct mlx5_eq_comp *eq; 1061 int ret = 0; 1062 1063 mutex_lock(&table->comp_lock); 1064 eq = xa_load(&table->comp_eqs, vecidx); 1065 if (eq) { 1066 *eqn = eq->core.eqn; 1067 goto out; 1068 } 1069 1070 ret = create_comp_eq(dev, vecidx); 1071 if (ret < 0) { 1072 mutex_unlock(&table->comp_lock); 1073 return ret; 1074 } 1075 1076 *eqn = ret; 1077 out: 1078 mutex_unlock(&table->comp_lock); 1079 return 0; 1080 } 1081 EXPORT_SYMBOL(mlx5_comp_eqn_get); 1082 1083 int mlx5_comp_irqn_get(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) 1084 { 1085 struct mlx5_eq_table *table = dev->priv.eq_table; 1086 struct mlx5_eq_comp *eq; 1087 int eqn; 1088 int err; 1089 1090 /* Allocate the EQ if not allocated yet */ 1091 err = mlx5_comp_eqn_get(dev, vector, &eqn); 1092 if (err) 1093 return err; 1094 1095 eq = xa_load(&table->comp_eqs, vector); 1096 *irqn = eq->core.irqn; 1097 return 0; 1098 } 1099 1100 unsigned int mlx5_comp_vectors_max(struct mlx5_core_dev *dev) 1101 { 1102 return dev->priv.eq_table->max_comp_eqs; 1103 } 1104 EXPORT_SYMBOL(mlx5_comp_vectors_max); 1105 1106 static struct cpumask * 1107 mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) 1108 { 1109 struct mlx5_eq_table *table = dev->priv.eq_table; 1110 struct mlx5_eq_comp *eq; 1111 1112 eq = xa_load(&table->comp_eqs, vector); 1113 if (eq) 1114 return mlx5_irq_get_affinity_mask(eq->core.irq); 1115 1116 return NULL; 1117 } 1118 1119 int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector) 1120 { 1121 struct cpumask *mask; 1122 int cpu; 1123 1124 mask = mlx5_comp_irq_get_affinity_mask(dev, vector); 1125 if (mask) 1126 cpu = cpumask_first(mask); 1127 else 1128 cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vector); 1129 1130 return cpu; 1131 } 1132 EXPORT_SYMBOL(mlx5_comp_vector_get_cpu); 1133 1134 #ifdef CONFIG_RFS_ACCEL 1135 struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) 1136 { 1137 return dev->priv.eq_table->rmap; 1138 } 1139 #endif 1140 1141 struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) 1142 { 1143 struct mlx5_eq_table *table = dev->priv.eq_table; 1144 struct mlx5_eq_comp *eq; 1145 unsigned long index; 1146 1147 xa_for_each(&table->comp_eqs, index, eq) 1148 if (eq->core.eqn == eqn) 1149 return eq; 1150 1151 return ERR_PTR(-ENOENT); 1152 } 1153 1154 /* This function should only be called after mlx5_cmd_force_teardown_hca */ 1155 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) 1156 { 1157 mlx5_irq_table_free_irqs(dev); 1158 } 1159 1160 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1161 #define MLX5_MAX_ASYNC_EQS 4 1162 #else 1163 #define MLX5_MAX_ASYNC_EQS 3 1164 #endif 1165 1166 static int get_num_eqs(struct mlx5_core_dev *dev) 1167 { 1168 struct mlx5_eq_table *eq_table = dev->priv.eq_table; 1169 int max_dev_eqs; 1170 int max_eqs_sf; 1171 int num_eqs; 1172 1173 /* If ethernet is disabled we use just a single completion vector to 1174 * have the other vectors available for other drivers using mlx5_core. For 1175 * example, mlx5_vdpa 1176 */ 1177 if (!mlx5_core_is_eth_enabled(dev) && mlx5_eth_supported(dev)) 1178 return 1; 1179 1180 max_dev_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? 1181 MLX5_CAP_GEN(dev, max_num_eqs) : 1182 1 << MLX5_CAP_GEN(dev, log_max_eq); 1183 1184 num_eqs = min_t(int, mlx5_irq_table_get_num_comp(eq_table->irq_table), 1185 max_dev_eqs - MLX5_MAX_ASYNC_EQS); 1186 if (mlx5_core_is_sf(dev)) { 1187 max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF, 1188 mlx5_irq_table_get_sfs_vec(eq_table->irq_table)); 1189 num_eqs = min_t(int, num_eqs, max_eqs_sf); 1190 } 1191 1192 return num_eqs; 1193 } 1194 1195 int mlx5_eq_table_create(struct mlx5_core_dev *dev) 1196 { 1197 struct mlx5_eq_table *eq_table = dev->priv.eq_table; 1198 int err; 1199 1200 eq_table->max_comp_eqs = get_num_eqs(dev); 1201 err = create_async_eqs(dev); 1202 if (err) { 1203 mlx5_core_err(dev, "Failed to create async EQs\n"); 1204 goto err_async_eqs; 1205 } 1206 1207 err = alloc_rmap(dev); 1208 if (err) { 1209 mlx5_core_err(dev, "Failed to allocate rmap\n"); 1210 goto err_rmap; 1211 } 1212 1213 return 0; 1214 1215 err_rmap: 1216 destroy_async_eqs(dev); 1217 err_async_eqs: 1218 return err; 1219 } 1220 1221 void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) 1222 { 1223 struct mlx5_eq_table *table = dev->priv.eq_table; 1224 struct mlx5_eq_comp *eq; 1225 unsigned long index; 1226 1227 xa_for_each(&table->comp_eqs, index, eq) 1228 destroy_comp_eq(dev, eq, index); 1229 1230 free_rmap(dev); 1231 destroy_async_eqs(dev); 1232 } 1233 1234 int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) 1235 { 1236 struct mlx5_eq_table *eqt = dev->priv.eq_table; 1237 1238 return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); 1239 } 1240 EXPORT_SYMBOL(mlx5_eq_notifier_register); 1241 1242 int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) 1243 { 1244 struct mlx5_eq_table *eqt = dev->priv.eq_table; 1245 1246 return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); 1247 } 1248 EXPORT_SYMBOL(mlx5_eq_notifier_unregister); 1249