1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/interrupt.h> 34 #include <linux/notifier.h> 35 #include <linux/module.h> 36 #include <linux/mlx5/driver.h> 37 #include <linux/mlx5/vport.h> 38 #include <linux/mlx5/eq.h> 39 #include <linux/mlx5/cmd.h> 40 #ifdef CONFIG_RFS_ACCEL 41 #include <linux/cpu_rmap.h> 42 #endif 43 #include "mlx5_core.h" 44 #include "lib/eq.h" 45 #include "fpga/core.h" 46 #include "eswitch.h" 47 #include "lib/clock.h" 48 #include "diag/fw_tracer.h" 49 50 enum { 51 MLX5_EQE_OWNER_INIT_VAL = 0x1, 52 }; 53 54 enum { 55 MLX5_EQ_STATE_ARMED = 0x9, 56 MLX5_EQ_STATE_FIRED = 0xa, 57 MLX5_EQ_STATE_ALWAYS_ARMED = 0xb, 58 }; 59 60 enum { 61 MLX5_EQ_DOORBEL_OFFSET = 0x40, 62 }; 63 64 struct mlx5_irq_info { 65 cpumask_var_t mask; 66 char name[MLX5_MAX_IRQ_NAME]; 67 void *context; /* dev_id provided to request_irq */ 68 }; 69 70 struct mlx5_eq_table { 71 struct list_head comp_eqs_list; 72 struct mlx5_eq pages_eq; 73 struct mlx5_eq cmd_eq; 74 struct mlx5_eq async_eq; 75 76 struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; 77 78 /* Since CQ DB is stored in async_eq */ 79 struct mlx5_nb cq_err_nb; 80 81 struct mutex lock; /* sync async eqs creations */ 82 int num_comp_vectors; 83 struct mlx5_irq_info *irq_info; 84 #ifdef CONFIG_RFS_ACCEL 85 struct cpu_rmap *rmap; 86 #endif 87 }; 88 89 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ 90 (1ull << MLX5_EVENT_TYPE_COMM_EST) | \ 91 (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \ 92 (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \ 93 (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \ 94 (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \ 95 (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ 96 (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \ 97 (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \ 98 (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \ 99 (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ 100 (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) 101 102 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) 103 { 104 u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; 105 u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {0}; 106 107 MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ); 108 MLX5_SET(destroy_eq_in, in, eq_number, eqn); 109 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 110 } 111 112 /* caller must eventually call mlx5_cq_put on the returned cq */ 113 static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) 114 { 115 struct mlx5_cq_table *table = &eq->cq_table; 116 struct mlx5_core_cq *cq = NULL; 117 118 rcu_read_lock(); 119 cq = radix_tree_lookup(&table->tree, cqn); 120 if (likely(cq)) 121 mlx5_cq_hold(cq); 122 rcu_read_unlock(); 123 124 return cq; 125 } 126 127 static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) 128 { 129 struct mlx5_eq_comp *eq_comp = eq_ptr; 130 struct mlx5_eq *eq = eq_ptr; 131 struct mlx5_eqe *eqe; 132 int set_ci = 0; 133 u32 cqn = -1; 134 135 while ((eqe = next_eqe_sw(eq))) { 136 struct mlx5_core_cq *cq; 137 /* Make sure we read EQ entry contents after we've 138 * checked the ownership bit. 139 */ 140 dma_rmb(); 141 /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */ 142 cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; 143 144 cq = mlx5_eq_cq_get(eq, cqn); 145 if (likely(cq)) { 146 ++cq->arm_sn; 147 cq->comp(cq); 148 mlx5_cq_put(cq); 149 } else { 150 mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); 151 } 152 153 ++eq->cons_index; 154 ++set_ci; 155 156 /* The HCA will think the queue has overflowed if we 157 * don't tell it we've been processing events. We 158 * create our EQs with MLX5_NUM_SPARE_EQE extra 159 * entries, so we must update our consumer index at 160 * least that often. 161 */ 162 if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { 163 eq_update_ci(eq, 0); 164 set_ci = 0; 165 } 166 } 167 168 eq_update_ci(eq, 1); 169 170 if (cqn != -1) 171 tasklet_schedule(&eq_comp->tasklet_ctx.task); 172 173 return IRQ_HANDLED; 174 } 175 176 /* Some architectures don't latch interrupts when they are disabled, so using 177 * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to 178 * avoid losing them. It is not recommended to use it, unless this is the last 179 * resort. 180 */ 181 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) 182 { 183 u32 count_eqe; 184 185 disable_irq(eq->core.irqn); 186 count_eqe = eq->core.cons_index; 187 mlx5_eq_comp_int(eq->core.irqn, eq); 188 count_eqe = eq->core.cons_index - count_eqe; 189 enable_irq(eq->core.irqn); 190 191 return count_eqe; 192 } 193 194 static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) 195 { 196 struct mlx5_eq *eq = eq_ptr; 197 struct mlx5_eq_table *eqt; 198 struct mlx5_core_dev *dev; 199 struct mlx5_eqe *eqe; 200 int set_ci = 0; 201 202 dev = eq->dev; 203 eqt = dev->priv.eq_table; 204 205 while ((eqe = next_eqe_sw(eq))) { 206 /* 207 * Make sure we read EQ entry contents after we've 208 * checked the ownership bit. 209 */ 210 dma_rmb(); 211 212 if (likely(eqe->type < MLX5_EVENT_TYPE_MAX)) 213 atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); 214 else 215 mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type); 216 217 atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); 218 219 ++eq->cons_index; 220 ++set_ci; 221 222 /* The HCA will think the queue has overflowed if we 223 * don't tell it we've been processing events. We 224 * create our EQs with MLX5_NUM_SPARE_EQE extra 225 * entries, so we must update our consumer index at 226 * least that often. 227 */ 228 if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { 229 eq_update_ci(eq, 0); 230 set_ci = 0; 231 } 232 } 233 234 eq_update_ci(eq, 1); 235 236 return IRQ_HANDLED; 237 } 238 239 static void init_eq_buf(struct mlx5_eq *eq) 240 { 241 struct mlx5_eqe *eqe; 242 int i; 243 244 for (i = 0; i < eq->nent; i++) { 245 eqe = get_eqe(eq, i); 246 eqe->owner = MLX5_EQE_OWNER_INIT_VAL; 247 } 248 } 249 250 static int 251 create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, 252 struct mlx5_eq_param *param) 253 { 254 struct mlx5_eq_table *eq_table = dev->priv.eq_table; 255 struct mlx5_cq_table *cq_table = &eq->cq_table; 256 u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; 257 struct mlx5_priv *priv = &dev->priv; 258 u8 vecidx = param->index; 259 __be64 *pas; 260 void *eqc; 261 int inlen; 262 u32 *in; 263 int err; 264 265 if (eq_table->irq_info[vecidx].context) 266 return -EEXIST; 267 268 /* Init CQ table */ 269 memset(cq_table, 0, sizeof(*cq_table)); 270 spin_lock_init(&cq_table->lock); 271 INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); 272 273 eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE); 274 eq->cons_index = 0; 275 err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); 276 if (err) 277 return err; 278 279 init_eq_buf(eq); 280 281 inlen = MLX5_ST_SZ_BYTES(create_eq_in) + 282 MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages; 283 284 in = kvzalloc(inlen, GFP_KERNEL); 285 if (!in) { 286 err = -ENOMEM; 287 goto err_buf; 288 } 289 290 pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); 291 mlx5_fill_page_array(&eq->buf, pas); 292 293 MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); 294 if (!param->mask && MLX5_CAP_GEN(dev, log_max_uctx)) 295 MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID); 296 297 MLX5_SET64(create_eq_in, in, event_bitmask, param->mask); 298 299 eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); 300 MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); 301 MLX5_SET(eqc, eqc, uar_page, priv->uar->index); 302 MLX5_SET(eqc, eqc, intr, vecidx); 303 MLX5_SET(eqc, eqc, log_page_size, 304 eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 305 306 err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); 307 if (err) 308 goto err_in; 309 310 snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", 311 name, pci_name(dev->pdev)); 312 eq_table->irq_info[vecidx].context = param->context; 313 314 eq->vecidx = vecidx; 315 eq->eqn = MLX5_GET(create_eq_out, out, eq_number); 316 eq->irqn = pci_irq_vector(dev->pdev, vecidx); 317 eq->dev = dev; 318 eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; 319 err = request_irq(eq->irqn, param->handler, 0, 320 eq_table->irq_info[vecidx].name, param->context); 321 if (err) 322 goto err_eq; 323 324 err = mlx5_debug_eq_add(dev, eq); 325 if (err) 326 goto err_irq; 327 328 /* EQs are created in ARMED state 329 */ 330 eq_update_ci(eq, 1); 331 332 kvfree(in); 333 return 0; 334 335 err_irq: 336 free_irq(eq->irqn, eq); 337 338 err_eq: 339 mlx5_cmd_destroy_eq(dev, eq->eqn); 340 341 err_in: 342 kvfree(in); 343 344 err_buf: 345 mlx5_buf_free(dev, &eq->buf); 346 return err; 347 } 348 349 static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) 350 { 351 struct mlx5_eq_table *eq_table = dev->priv.eq_table; 352 struct mlx5_irq_info *irq_info; 353 int err; 354 355 irq_info = &eq_table->irq_info[eq->vecidx]; 356 357 mlx5_debug_eq_remove(dev, eq); 358 359 free_irq(eq->irqn, irq_info->context); 360 irq_info->context = NULL; 361 362 err = mlx5_cmd_destroy_eq(dev, eq->eqn); 363 if (err) 364 mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", 365 eq->eqn); 366 synchronize_irq(eq->irqn); 367 368 mlx5_buf_free(dev, &eq->buf); 369 370 return err; 371 } 372 373 int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) 374 { 375 struct mlx5_cq_table *table = &eq->cq_table; 376 int err; 377 378 spin_lock(&table->lock); 379 err = radix_tree_insert(&table->tree, cq->cqn, cq); 380 spin_unlock(&table->lock); 381 382 return err; 383 } 384 385 int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) 386 { 387 struct mlx5_cq_table *table = &eq->cq_table; 388 struct mlx5_core_cq *tmp; 389 390 spin_lock(&table->lock); 391 tmp = radix_tree_delete(&table->tree, cq->cqn); 392 spin_unlock(&table->lock); 393 394 if (!tmp) { 395 mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn); 396 return -ENOENT; 397 } 398 399 if (tmp != cq) { 400 mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn); 401 return -EINVAL; 402 } 403 404 return 0; 405 } 406 407 int mlx5_eq_table_init(struct mlx5_core_dev *dev) 408 { 409 struct mlx5_eq_table *eq_table; 410 int i, err; 411 412 eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL); 413 if (!eq_table) 414 return -ENOMEM; 415 416 dev->priv.eq_table = eq_table; 417 418 err = mlx5_eq_debugfs_init(dev); 419 if (err) 420 goto kvfree_eq_table; 421 422 mutex_init(&eq_table->lock); 423 for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) 424 ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); 425 426 return 0; 427 428 kvfree_eq_table: 429 kvfree(eq_table); 430 dev->priv.eq_table = NULL; 431 return err; 432 } 433 434 void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) 435 { 436 mlx5_eq_debugfs_cleanup(dev); 437 kvfree(dev->priv.eq_table); 438 } 439 440 /* Async EQs */ 441 442 static int create_async_eq(struct mlx5_core_dev *dev, const char *name, 443 struct mlx5_eq *eq, struct mlx5_eq_param *param) 444 { 445 struct mlx5_eq_table *eq_table = dev->priv.eq_table; 446 int err; 447 448 mutex_lock(&eq_table->lock); 449 if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { 450 err = -ENOSPC; 451 goto unlock; 452 } 453 454 err = create_map_eq(dev, eq, name, param); 455 unlock: 456 mutex_unlock(&eq_table->lock); 457 return err; 458 } 459 460 static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) 461 { 462 struct mlx5_eq_table *eq_table = dev->priv.eq_table; 463 int err; 464 465 mutex_lock(&eq_table->lock); 466 err = destroy_unmap_eq(dev, eq); 467 mutex_unlock(&eq_table->lock); 468 return err; 469 } 470 471 static int cq_err_event_notifier(struct notifier_block *nb, 472 unsigned long type, void *data) 473 { 474 struct mlx5_eq_table *eqt; 475 struct mlx5_core_cq *cq; 476 struct mlx5_eqe *eqe; 477 struct mlx5_eq *eq; 478 u32 cqn; 479 480 /* type == MLX5_EVENT_TYPE_CQ_ERROR */ 481 482 eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); 483 eq = &eqt->async_eq; 484 eqe = data; 485 486 cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; 487 mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", 488 cqn, eqe->data.cq_err.syndrome); 489 490 cq = mlx5_eq_cq_get(eq, cqn); 491 if (unlikely(!cq)) { 492 mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); 493 return NOTIFY_OK; 494 } 495 496 cq->event(cq, type); 497 498 mlx5_cq_put(cq); 499 500 return NOTIFY_OK; 501 } 502 503 static u64 gather_async_events_mask(struct mlx5_core_dev *dev) 504 { 505 u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; 506 507 if (MLX5_VPORT_MANAGER(dev)) 508 async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); 509 510 if (MLX5_CAP_GEN(dev, general_notification_event)) 511 async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT); 512 513 if (MLX5_CAP_GEN(dev, port_module_event)) 514 async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT); 515 else 516 mlx5_core_dbg(dev, "port_module_event is not set\n"); 517 518 if (MLX5_PPS_CAP(dev)) 519 async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT); 520 521 if (MLX5_CAP_GEN(dev, fpga)) 522 async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) | 523 (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR); 524 if (MLX5_CAP_GEN_MAX(dev, dct)) 525 async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED); 526 527 if (MLX5_CAP_GEN(dev, temp_warn_event)) 528 async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT); 529 530 if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) 531 async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); 532 533 if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) 534 async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); 535 536 if (mlx5_core_is_ecpf_esw_manager(dev)) 537 async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); 538 539 return async_event_mask; 540 } 541 542 static int create_async_eqs(struct mlx5_core_dev *dev) 543 { 544 struct mlx5_eq_table *table = dev->priv.eq_table; 545 struct mlx5_eq_param param = {}; 546 int err; 547 548 MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); 549 mlx5_eq_notifier_register(dev, &table->cq_err_nb); 550 551 param = (struct mlx5_eq_param) { 552 .index = MLX5_EQ_CMD_IDX, 553 .mask = 1ull << MLX5_EVENT_TYPE_CMD, 554 .nent = MLX5_NUM_CMD_EQE, 555 .context = &table->cmd_eq, 556 .handler = mlx5_eq_async_int, 557 }; 558 err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); 559 if (err) { 560 mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); 561 goto err0; 562 } 563 564 mlx5_cmd_use_events(dev); 565 566 param = (struct mlx5_eq_param) { 567 .index = MLX5_EQ_ASYNC_IDX, 568 .mask = gather_async_events_mask(dev), 569 .nent = MLX5_NUM_ASYNC_EQE, 570 .context = &table->async_eq, 571 .handler = mlx5_eq_async_int, 572 }; 573 err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); 574 if (err) { 575 mlx5_core_warn(dev, "failed to create async EQ %d\n", err); 576 goto err1; 577 } 578 579 param = (struct mlx5_eq_param) { 580 .index = MLX5_EQ_PAGEREQ_IDX, 581 .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, 582 .nent = /* TODO: sriov max_vf + */ 1, 583 .context = &table->pages_eq, 584 .handler = mlx5_eq_async_int, 585 }; 586 err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); 587 if (err) { 588 mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); 589 goto err2; 590 } 591 592 return err; 593 594 err2: 595 destroy_async_eq(dev, &table->async_eq); 596 597 err1: 598 mlx5_cmd_use_polling(dev); 599 destroy_async_eq(dev, &table->cmd_eq); 600 err0: 601 mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); 602 return err; 603 } 604 605 static void destroy_async_eqs(struct mlx5_core_dev *dev) 606 { 607 struct mlx5_eq_table *table = dev->priv.eq_table; 608 int err; 609 610 err = destroy_async_eq(dev, &table->pages_eq); 611 if (err) 612 mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", 613 err); 614 615 err = destroy_async_eq(dev, &table->async_eq); 616 if (err) 617 mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", 618 err); 619 620 mlx5_cmd_use_polling(dev); 621 622 err = destroy_async_eq(dev, &table->cmd_eq); 623 if (err) 624 mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", 625 err); 626 627 mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); 628 } 629 630 struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) 631 { 632 return &dev->priv.eq_table->async_eq; 633 } 634 635 void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) 636 { 637 synchronize_irq(dev->priv.eq_table->async_eq.irqn); 638 } 639 640 void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) 641 { 642 synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); 643 } 644 645 /* Generic EQ API for mlx5_core consumers 646 * Needed For RDMA ODP EQ for now 647 */ 648 struct mlx5_eq * 649 mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, 650 struct mlx5_eq_param *param) 651 { 652 struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); 653 int err; 654 655 if (!eq) 656 return ERR_PTR(-ENOMEM); 657 658 err = create_async_eq(dev, name, eq, param); 659 if (err) { 660 kvfree(eq); 661 eq = ERR_PTR(err); 662 } 663 664 return eq; 665 } 666 EXPORT_SYMBOL(mlx5_eq_create_generic); 667 668 int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) 669 { 670 int err; 671 672 if (IS_ERR(eq)) 673 return -EINVAL; 674 675 err = destroy_async_eq(dev, eq); 676 if (err) 677 goto out; 678 679 kvfree(eq); 680 out: 681 return err; 682 } 683 EXPORT_SYMBOL(mlx5_eq_destroy_generic); 684 685 struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) 686 { 687 u32 ci = eq->cons_index + cc; 688 struct mlx5_eqe *eqe; 689 690 eqe = get_eqe(eq, ci & (eq->nent - 1)); 691 eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe; 692 /* Make sure we read EQ entry contents after we've 693 * checked the ownership bit. 694 */ 695 if (eqe) 696 dma_rmb(); 697 698 return eqe; 699 } 700 EXPORT_SYMBOL(mlx5_eq_get_eqe); 701 702 void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) 703 { 704 __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); 705 u32 val; 706 707 eq->cons_index += cc; 708 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); 709 710 __raw_writel((__force u32)cpu_to_be32(val), addr); 711 /* We still want ordering, just not swabbing, so add a barrier */ 712 wmb(); 713 } 714 EXPORT_SYMBOL(mlx5_eq_update_ci); 715 716 /* Completion EQs */ 717 718 static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) 719 { 720 struct mlx5_priv *priv = &mdev->priv; 721 int vecidx = MLX5_EQ_VEC_COMP_BASE + i; 722 int irq = pci_irq_vector(mdev->pdev, vecidx); 723 struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; 724 725 if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { 726 mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); 727 return -ENOMEM; 728 } 729 730 cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), 731 irq_info->mask); 732 733 if (IS_ENABLED(CONFIG_SMP) && 734 irq_set_affinity_hint(irq, irq_info->mask)) 735 mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); 736 737 return 0; 738 } 739 740 static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) 741 { 742 int vecidx = MLX5_EQ_VEC_COMP_BASE + i; 743 struct mlx5_priv *priv = &mdev->priv; 744 int irq = pci_irq_vector(mdev->pdev, vecidx); 745 struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; 746 747 irq_set_affinity_hint(irq, NULL); 748 free_cpumask_var(irq_info->mask); 749 } 750 751 static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) 752 { 753 int err; 754 int i; 755 756 for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { 757 err = set_comp_irq_affinity_hint(mdev, i); 758 if (err) 759 goto err_out; 760 } 761 762 return 0; 763 764 err_out: 765 for (i--; i >= 0; i--) 766 clear_comp_irq_affinity_hint(mdev, i); 767 768 return err; 769 } 770 771 static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) 772 { 773 int i; 774 775 for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) 776 clear_comp_irq_affinity_hint(mdev, i); 777 } 778 779 static void destroy_comp_eqs(struct mlx5_core_dev *dev) 780 { 781 struct mlx5_eq_table *table = dev->priv.eq_table; 782 struct mlx5_eq_comp *eq, *n; 783 784 clear_comp_irqs_affinity_hints(dev); 785 786 #ifdef CONFIG_RFS_ACCEL 787 if (table->rmap) { 788 free_irq_cpu_rmap(table->rmap); 789 table->rmap = NULL; 790 } 791 #endif 792 list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { 793 list_del(&eq->list); 794 if (destroy_unmap_eq(dev, &eq->core)) 795 mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", 796 eq->core.eqn); 797 tasklet_disable(&eq->tasklet_ctx.task); 798 kfree(eq); 799 } 800 } 801 802 static int create_comp_eqs(struct mlx5_core_dev *dev) 803 { 804 struct mlx5_eq_table *table = dev->priv.eq_table; 805 char name[MLX5_MAX_IRQ_NAME]; 806 struct mlx5_eq_comp *eq; 807 int ncomp_vec; 808 int nent; 809 int err; 810 int i; 811 812 INIT_LIST_HEAD(&table->comp_eqs_list); 813 ncomp_vec = table->num_comp_vectors; 814 nent = MLX5_COMP_EQ_SIZE; 815 #ifdef CONFIG_RFS_ACCEL 816 table->rmap = alloc_irq_cpu_rmap(ncomp_vec); 817 if (!table->rmap) 818 return -ENOMEM; 819 #endif 820 for (i = 0; i < ncomp_vec; i++) { 821 int vecidx = i + MLX5_EQ_VEC_COMP_BASE; 822 struct mlx5_eq_param param = {}; 823 824 eq = kzalloc(sizeof(*eq), GFP_KERNEL); 825 if (!eq) { 826 err = -ENOMEM; 827 goto clean; 828 } 829 830 INIT_LIST_HEAD(&eq->tasklet_ctx.list); 831 INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); 832 spin_lock_init(&eq->tasklet_ctx.lock); 833 tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, 834 (unsigned long)&eq->tasklet_ctx); 835 836 #ifdef CONFIG_RFS_ACCEL 837 irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); 838 #endif 839 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); 840 param = (struct mlx5_eq_param) { 841 .index = vecidx, 842 .mask = 0, 843 .nent = nent, 844 .context = &eq->core, 845 .handler = mlx5_eq_comp_int 846 }; 847 err = create_map_eq(dev, &eq->core, name, ¶m); 848 if (err) { 849 kfree(eq); 850 goto clean; 851 } 852 mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); 853 /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ 854 list_add_tail(&eq->list, &table->comp_eqs_list); 855 } 856 857 err = set_comp_irq_affinity_hints(dev); 858 if (err) { 859 mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); 860 goto clean; 861 } 862 863 return 0; 864 865 clean: 866 destroy_comp_eqs(dev); 867 return err; 868 } 869 870 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, 871 unsigned int *irqn) 872 { 873 struct mlx5_eq_table *table = dev->priv.eq_table; 874 struct mlx5_eq_comp *eq, *n; 875 int err = -ENOENT; 876 int i = 0; 877 878 list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { 879 if (i++ == vector) { 880 *eqn = eq->core.eqn; 881 *irqn = eq->core.irqn; 882 err = 0; 883 break; 884 } 885 } 886 887 return err; 888 } 889 EXPORT_SYMBOL(mlx5_vector2eqn); 890 891 unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) 892 { 893 return dev->priv.eq_table->num_comp_vectors; 894 } 895 EXPORT_SYMBOL(mlx5_comp_vectors_count); 896 897 struct cpumask * 898 mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) 899 { 900 /* TODO: consider irq_get_affinity_mask(irq) */ 901 return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; 902 } 903 EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); 904 905 #ifdef CONFIG_RFS_ACCEL 906 struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) 907 { 908 return dev->priv.eq_table->rmap; 909 } 910 #endif 911 912 struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) 913 { 914 struct mlx5_eq_table *table = dev->priv.eq_table; 915 struct mlx5_eq_comp *eq; 916 917 list_for_each_entry(eq, &table->comp_eqs_list, list) { 918 if (eq->core.eqn == eqn) 919 return eq; 920 } 921 922 return ERR_PTR(-ENOENT); 923 } 924 925 /* This function should only be called after mlx5_cmd_force_teardown_hca */ 926 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) 927 { 928 struct mlx5_eq_table *table = dev->priv.eq_table; 929 int i, max_eqs; 930 931 clear_comp_irqs_affinity_hints(dev); 932 933 #ifdef CONFIG_RFS_ACCEL 934 if (table->rmap) { 935 free_irq_cpu_rmap(table->rmap); 936 table->rmap = NULL; 937 } 938 #endif 939 940 mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ 941 max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; 942 for (i = max_eqs - 1; i >= 0; i--) { 943 if (!table->irq_info[i].context) 944 continue; 945 free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); 946 table->irq_info[i].context = NULL; 947 } 948 mutex_unlock(&table->lock); 949 pci_free_irq_vectors(dev->pdev); 950 } 951 952 static int alloc_irq_vectors(struct mlx5_core_dev *dev) 953 { 954 struct mlx5_priv *priv = &dev->priv; 955 struct mlx5_eq_table *table = priv->eq_table; 956 int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? 957 MLX5_CAP_GEN(dev, max_num_eqs) : 958 1 << MLX5_CAP_GEN(dev, log_max_eq); 959 int nvec; 960 int err; 961 962 nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 963 MLX5_EQ_VEC_COMP_BASE; 964 nvec = min_t(int, nvec, num_eqs); 965 if (nvec <= MLX5_EQ_VEC_COMP_BASE) 966 return -ENOMEM; 967 968 table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); 969 if (!table->irq_info) 970 return -ENOMEM; 971 972 nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, 973 nvec, PCI_IRQ_MSIX); 974 if (nvec < 0) { 975 err = nvec; 976 goto err_free_irq_info; 977 } 978 979 table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; 980 981 return 0; 982 983 err_free_irq_info: 984 kfree(table->irq_info); 985 return err; 986 } 987 988 static void free_irq_vectors(struct mlx5_core_dev *dev) 989 { 990 struct mlx5_priv *priv = &dev->priv; 991 992 pci_free_irq_vectors(dev->pdev); 993 kfree(priv->eq_table->irq_info); 994 } 995 996 int mlx5_eq_table_create(struct mlx5_core_dev *dev) 997 { 998 int err; 999 1000 err = alloc_irq_vectors(dev); 1001 if (err) { 1002 mlx5_core_err(dev, "alloc irq vectors failed\n"); 1003 return err; 1004 } 1005 1006 err = create_async_eqs(dev); 1007 if (err) { 1008 mlx5_core_err(dev, "Failed to create async EQs\n"); 1009 goto err_async_eqs; 1010 } 1011 1012 err = create_comp_eqs(dev); 1013 if (err) { 1014 mlx5_core_err(dev, "Failed to create completion EQs\n"); 1015 goto err_comp_eqs; 1016 } 1017 1018 return 0; 1019 err_comp_eqs: 1020 destroy_async_eqs(dev); 1021 err_async_eqs: 1022 free_irq_vectors(dev); 1023 return err; 1024 } 1025 1026 void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) 1027 { 1028 destroy_comp_eqs(dev); 1029 destroy_async_eqs(dev); 1030 free_irq_vectors(dev); 1031 } 1032 1033 int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) 1034 { 1035 struct mlx5_eq_table *eqt = dev->priv.eq_table; 1036 1037 if (nb->event_type >= MLX5_EVENT_TYPE_MAX) 1038 return -EINVAL; 1039 1040 return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); 1041 } 1042 1043 int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) 1044 { 1045 struct mlx5_eq_table *eqt = dev->priv.eq_table; 1046 1047 if (nb->event_type >= MLX5_EVENT_TYPE_MAX) 1048 return -EINVAL; 1049 1050 return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); 1051 } 1052