1 /*- 2 * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "opt_rss.h" 29 #include "opt_ratelimit.h" 30 31 #include <linux/kernel.h> 32 #include <linux/module.h> 33 #include <linux/random.h> 34 #include <linux/vmalloc.h> 35 #include <linux/hardirq.h> 36 #include <linux/delay.h> 37 #include <dev/mlx5/driver.h> 38 #include <dev/mlx5/mlx5_ifc.h> 39 #include "mlx5_core.h" 40 41 #define MLX5_HEALTH_POLL_INTERVAL (2 * HZ) 42 #define MAX_MISSES 3 43 44 enum { 45 MLX5_DROP_NEW_HEALTH_WORK, 46 MLX5_DROP_NEW_RECOVERY_WORK, 47 MLX5_DROP_NEW_WATCHDOG_WORK, 48 }; 49 50 enum { 51 MLX5_SENSOR_NO_ERR = 0, 52 MLX5_SENSOR_PCI_COMM_ERR = 1, 53 MLX5_SENSOR_PCI_ERR = 2, 54 MLX5_SENSOR_NIC_DISABLED = 3, 55 MLX5_SENSOR_NIC_SW_RESET = 4, 56 MLX5_SENSOR_FW_SYND_RFR = 5, 57 }; 58 59 static int mlx5_fw_reset_enable = 1; 60 SYSCTL_INT(_hw_mlx5, OID_AUTO, fw_reset_enable, CTLFLAG_RWTUN, 61 &mlx5_fw_reset_enable, 0, 62 "Enable firmware reset"); 63 64 static unsigned int sw_reset_to = 1200; 65 SYSCTL_UINT(_hw_mlx5, OID_AUTO, sw_reset_timeout, CTLFLAG_RWTUN, 66 &sw_reset_to, 0, 67 "Minimum timeout in seconds between two firmware resets"); 68 69 70 static int lock_sem_sw_reset(struct mlx5_core_dev *dev) 71 { 72 int ret; 73 74 /* Lock GW access */ 75 ret = -mlx5_vsc_lock(dev); 76 if (ret) { 77 mlx5_core_warn(dev, "Timed out locking gateway %d\n", ret); 78 return ret; 79 } 80 81 ret = -mlx5_vsc_lock_addr_space(dev, MLX5_SEMAPHORE_SW_RESET); 82 if (ret) { 83 if (ret == -EBUSY) 84 mlx5_core_dbg(dev, 85 "SW reset FW semaphore already locked, another function will handle the reset\n"); 86 else 87 mlx5_core_warn(dev, 88 "SW reset semaphore lock return %d\n", ret); 89 } 90 91 /* Unlock GW access */ 92 mlx5_vsc_unlock(dev); 93 94 return ret; 95 } 96 97 static int unlock_sem_sw_reset(struct mlx5_core_dev *dev) 98 { 99 int ret; 100 101 /* Lock GW access */ 102 ret = -mlx5_vsc_lock(dev); 103 if (ret) { 104 mlx5_core_warn(dev, "Timed out locking gateway %d\n", ret); 105 return ret; 106 } 107 108 ret = -mlx5_vsc_unlock_addr_space(dev, MLX5_SEMAPHORE_SW_RESET); 109 110 /* Unlock GW access */ 111 mlx5_vsc_unlock(dev); 112 113 return ret; 114 } 115 116 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) 117 { 118 return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; 119 } 120 121 void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) 122 { 123 u32 cur_cmdq_addr_l_sz; 124 125 cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz); 126 iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) | 127 state << MLX5_NIC_IFC_OFFSET, 128 &dev->iseg->cmdq_addr_l_sz); 129 } 130 131 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) 132 { 133 struct mlx5_core_health *health = &dev->priv.health; 134 struct mlx5_health_buffer __iomem *h = health->health; 135 u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET; 136 u8 synd = ioread8(&h->synd); 137 138 if (rfr && synd) 139 mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd); 140 return rfr && synd; 141 } 142 143 static void mlx5_trigger_cmd_completions(struct work_struct *work) 144 { 145 struct mlx5_core_dev *dev = 146 container_of(work, struct mlx5_core_dev, priv.health.work_cmd_completion); 147 unsigned long flags; 148 u64 vector; 149 150 /* wait for pending handlers to complete */ 151 synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector); 152 spin_lock_irqsave(&dev->cmd.alloc_lock, flags); 153 vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); 154 if (!vector) 155 goto no_trig; 156 157 vector |= MLX5_TRIGGERED_CMD_COMP; 158 spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); 159 160 mlx5_core_dbg(dev, "vector 0x%jx\n", (uintmax_t)vector); 161 mlx5_cmd_comp_handler(dev, vector, MLX5_CMD_MODE_EVENTS); 162 return; 163 164 no_trig: 165 spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); 166 } 167 168 static bool sensor_pci_no_comm(struct mlx5_core_dev *dev) 169 { 170 struct mlx5_core_health *health = &dev->priv.health; 171 struct mlx5_health_buffer __iomem *h = health->health; 172 bool err = ioread32be(&h->fw_ver) == 0xffffffff; 173 174 return err; 175 } 176 177 static bool sensor_nic_disabled(struct mlx5_core_dev *dev) 178 { 179 return mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED; 180 } 181 182 static bool sensor_nic_sw_reset(struct mlx5_core_dev *dev) 183 { 184 return mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET; 185 } 186 187 static u32 check_fatal_sensors(struct mlx5_core_dev *dev) 188 { 189 if (sensor_pci_no_comm(dev)) 190 return MLX5_SENSOR_PCI_COMM_ERR; 191 if (pci_channel_offline(dev->pdev)) 192 return MLX5_SENSOR_PCI_ERR; 193 if (sensor_nic_disabled(dev)) 194 return MLX5_SENSOR_NIC_DISABLED; 195 if (sensor_nic_sw_reset(dev)) 196 return MLX5_SENSOR_NIC_SW_RESET; 197 if (sensor_fw_synd_rfr(dev)) 198 return MLX5_SENSOR_FW_SYND_RFR; 199 200 return MLX5_SENSOR_NO_ERR; 201 } 202 203 static void reset_fw_if_needed(struct mlx5_core_dev *dev) 204 { 205 bool supported; 206 u32 cmdq_addr, fatal_error; 207 208 if (!mlx5_fw_reset_enable) 209 return; 210 supported = (ioread32be(&dev->iseg->initializing) >> 211 MLX5_FW_RESET_SUPPORTED_OFFSET) & 1; 212 if (!supported) 213 return; 214 215 /* The reset only needs to be issued by one PF. The health buffer is 216 * shared between all functions, and will be cleared during a reset. 217 * Check again to avoid a redundant 2nd reset. If the fatal erros was 218 * PCI related a reset won't help. 219 */ 220 fatal_error = check_fatal_sensors(dev); 221 if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || 222 fatal_error == MLX5_SENSOR_NIC_DISABLED || 223 fatal_error == MLX5_SENSOR_NIC_SW_RESET) { 224 mlx5_core_warn(dev, 225 "Not issuing FW reset. Either it's already done or won't help.\n"); 226 return; 227 } 228 229 mlx5_core_info(dev, "Issuing FW Reset\n"); 230 /* Write the NIC interface field to initiate the reset, the command 231 * interface address also resides here, don't overwrite it. 232 */ 233 cmdq_addr = ioread32be(&dev->iseg->cmdq_addr_l_sz); 234 iowrite32be((cmdq_addr & 0xFFFFF000) | 235 MLX5_NIC_IFC_SW_RESET << MLX5_NIC_IFC_OFFSET, 236 &dev->iseg->cmdq_addr_l_sz); 237 } 238 239 static bool 240 mlx5_health_allow_reset(struct mlx5_core_dev *dev) 241 { 242 struct mlx5_core_health *health = &dev->priv.health; 243 unsigned int delta; 244 bool ret; 245 246 if (health->last_reset_req != 0) { 247 delta = ticks - health->last_reset_req; 248 delta /= hz; 249 ret = delta >= sw_reset_to; 250 } else { 251 ret = true; 252 } 253 254 /* 255 * In principle, ticks may be 0. Setting it to off by one (-1) 256 * to prevent certain reset in next request. 257 */ 258 health->last_reset_req = ticks ? : -1; 259 if (!ret) 260 mlx5_core_warn(dev, 261 "Firmware reset elided due to auto-reset frequency threshold.\n"); 262 return (ret); 263 } 264 265 #define MLX5_CRDUMP_WAIT_MS 60000 266 #define MLX5_FW_RESET_WAIT_MS 1000 267 #define MLX5_NIC_STATE_POLL_MS 5 268 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) 269 { 270 int end, delay_ms = MLX5_CRDUMP_WAIT_MS; 271 u32 fatal_error; 272 int lock = -EBUSY; 273 274 fatal_error = check_fatal_sensors(dev); 275 276 if (fatal_error || force) { 277 if (xchg(&dev->state, MLX5_DEVICE_STATE_INTERNAL_ERROR) == 278 MLX5_DEVICE_STATE_INTERNAL_ERROR) 279 return; 280 if (!force) 281 mlx5_core_err(dev, "internal state error detected\n"); 282 283 /* 284 * Queue the command completion handler on the command 285 * work queue to avoid racing with the real command 286 * completion handler and then wait for it to 287 * complete: 288 */ 289 queue_work(dev->priv.health.wq_cmd, &dev->priv.health.work_cmd_completion); 290 flush_workqueue(dev->priv.health.wq_cmd); 291 } 292 293 mutex_lock(&dev->intf_state_mutex); 294 295 if (force) 296 goto err_state_done; 297 298 if (fatal_error == MLX5_SENSOR_FW_SYND_RFR && 299 mlx5_health_allow_reset(dev)) { 300 /* Get cr-dump and reset FW semaphore */ 301 if (mlx5_core_is_pf(dev)) 302 lock = lock_sem_sw_reset(dev); 303 304 /* Execute cr-dump and SW reset */ 305 if (lock != -EBUSY) { 306 (void)mlx5_fwdump(dev); 307 reset_fw_if_needed(dev); 308 delay_ms = MLX5_FW_RESET_WAIT_MS; 309 } 310 } 311 312 /* Recover from SW reset */ 313 end = jiffies + msecs_to_jiffies(delay_ms); 314 do { 315 if (sensor_nic_disabled(dev)) 316 break; 317 318 msleep(MLX5_NIC_STATE_POLL_MS); 319 } while (!time_after(jiffies, end)); 320 321 if (!sensor_nic_disabled(dev)) { 322 mlx5_core_err(dev, "NIC IFC still %d after %ums.\n", 323 mlx5_get_nic_state(dev), delay_ms); 324 } 325 326 /* Release FW semaphore if you are the lock owner */ 327 if (!lock) 328 unlock_sem_sw_reset(dev); 329 330 mlx5_core_info(dev, "System error event triggered\n"); 331 332 err_state_done: 333 mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); 334 mutex_unlock(&dev->intf_state_mutex); 335 } 336 337 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) 338 { 339 u8 nic_mode = mlx5_get_nic_state(dev); 340 341 if (nic_mode == MLX5_NIC_IFC_SW_RESET) { 342 /* The IFC mode field is 3 bits, so it will read 0x7 in two cases: 343 * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded 344 * and this is a VF), this is not recoverable by SW reset. 345 * Logging of this is handled elsewhere. 346 * 2. FW reset has been issued by another function, driver can 347 * be reloaded to recover after the mode switches to 348 * MLX5_NIC_IFC_DISABLED. 349 */ 350 if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR) 351 mlx5_core_warn(dev, 352 "NIC SW reset is already progress\n"); 353 else 354 mlx5_core_warn(dev, 355 "Communication with FW over the PCI link is down\n"); 356 } else { 357 mlx5_core_warn(dev, "NIC mode %d\n", nic_mode); 358 } 359 360 mlx5_disable_device(dev); 361 } 362 363 #define MLX5_FW_RESET_WAIT_MS 1000 364 #define MLX5_NIC_STATE_POLL_MS 5 365 static void health_recover(struct work_struct *work) 366 { 367 unsigned long end = jiffies + msecs_to_jiffies(MLX5_FW_RESET_WAIT_MS); 368 struct mlx5_core_health *health; 369 struct delayed_work *dwork; 370 struct mlx5_core_dev *dev; 371 struct mlx5_priv *priv; 372 bool recover = true; 373 u8 nic_mode; 374 375 dwork = container_of(work, struct delayed_work, work); 376 health = container_of(dwork, struct mlx5_core_health, recover_work); 377 priv = container_of(health, struct mlx5_priv, health); 378 dev = container_of(priv, struct mlx5_core_dev, priv); 379 380 /* This might likely be wrong, cut and paste from elsewhere? */ 381 bus_topo_lock(); 382 383 if (sensor_pci_no_comm(dev)) { 384 mlx5_core_err(dev, 385 "health recovery flow aborted, PCI reads still not working\n"); 386 recover = false; 387 } 388 389 nic_mode = mlx5_get_nic_state(dev); 390 while (nic_mode != MLX5_NIC_IFC_DISABLED && 391 !time_after(jiffies, end)) { 392 msleep(MLX5_NIC_STATE_POLL_MS); 393 nic_mode = mlx5_get_nic_state(dev); 394 } 395 396 if (nic_mode != MLX5_NIC_IFC_DISABLED) { 397 mlx5_core_err(dev, 398 "health recovery flow aborted, unexpected NIC IFC mode %d.\n", 399 nic_mode); 400 recover = false; 401 } 402 403 if (recover) { 404 mlx5_core_info(dev, "Starting health recovery flow\n"); 405 mlx5_recover_device(dev); 406 } 407 408 bus_topo_unlock(); 409 } 410 411 /* How much time to wait until health resetting the driver (in msecs) */ 412 #define MLX5_RECOVERY_DELAY_MSECS 60000 413 #define MLX5_RECOVERY_NO_DELAY 0 414 static unsigned long get_recovery_delay(struct mlx5_core_dev *dev) 415 { 416 return dev->priv.health.fatal_error == MLX5_SENSOR_PCI_ERR || 417 dev->priv.health.fatal_error == MLX5_SENSOR_PCI_COMM_ERR ? 418 MLX5_RECOVERY_DELAY_MSECS : MLX5_RECOVERY_NO_DELAY; 419 } 420 421 static void health_care(struct work_struct *work) 422 { 423 struct mlx5_core_health *health; 424 unsigned long recover_delay; 425 struct mlx5_core_dev *dev; 426 struct mlx5_priv *priv; 427 unsigned long flags; 428 429 health = container_of(work, struct mlx5_core_health, work); 430 priv = container_of(health, struct mlx5_priv, health); 431 dev = container_of(priv, struct mlx5_core_dev, priv); 432 433 mlx5_core_warn(dev, "handling bad device here\n"); 434 mlx5_handle_bad_state(dev); 435 recover_delay = msecs_to_jiffies(get_recovery_delay(dev)); 436 437 spin_lock_irqsave(&health->wq_lock, flags); 438 if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) { 439 mlx5_core_warn(dev, 440 "Scheduling recovery work with %lums delay\n", 441 recover_delay); 442 schedule_delayed_work(&health->recover_work, recover_delay); 443 } else { 444 mlx5_core_err(dev, 445 "new health works are not permitted at this stage\n"); 446 } 447 spin_unlock_irqrestore(&health->wq_lock, flags); 448 } 449 450 static int get_next_poll_jiffies(void) 451 { 452 unsigned long next; 453 454 get_random_bytes(&next, sizeof(next)); 455 next %= HZ; 456 next += jiffies + MLX5_HEALTH_POLL_INTERVAL; 457 458 return next; 459 } 460 461 void mlx5_trigger_health_work(struct mlx5_core_dev *dev) 462 { 463 struct mlx5_core_health *health = &dev->priv.health; 464 unsigned long flags; 465 466 spin_lock_irqsave(&health->wq_lock, flags); 467 if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) 468 queue_work(health->wq, &health->work); 469 else 470 mlx5_core_err(dev, 471 "new health works are not permitted at this stage\n"); 472 spin_unlock_irqrestore(&health->wq_lock, flags); 473 } 474 475 static const char *hsynd_str(u8 synd) 476 { 477 switch (synd) { 478 case MLX5_HEALTH_SYNDR_FW_ERR: 479 return "firmware internal error"; 480 case MLX5_HEALTH_SYNDR_IRISC_ERR: 481 return "irisc not responding"; 482 case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR: 483 return "unrecoverable hardware error"; 484 case MLX5_HEALTH_SYNDR_CRC_ERR: 485 return "firmware CRC error"; 486 case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR: 487 return "ICM fetch PCI error"; 488 case MLX5_HEALTH_SYNDR_HW_FTL_ERR: 489 return "HW fatal error\n"; 490 case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR: 491 return "async EQ buffer overrun"; 492 case MLX5_HEALTH_SYNDR_EQ_ERR: 493 return "EQ error"; 494 case MLX5_HEALTH_SYNDR_EQ_INV: 495 return "Invalid EQ referenced"; 496 case MLX5_HEALTH_SYNDR_FFSER_ERR: 497 return "FFSER error"; 498 case MLX5_HEALTH_SYNDR_HIGH_TEMP: 499 return "High temperature"; 500 default: 501 return "unrecognized error"; 502 } 503 } 504 505 static u8 506 print_health_info(struct mlx5_core_dev *dev) 507 { 508 struct mlx5_core_health *health = &dev->priv.health; 509 struct mlx5_health_buffer __iomem *h = health->health; 510 u8 synd = ioread8(&h->synd); 511 char fw_str[18]; 512 u32 fw; 513 int i; 514 515 /* 516 * If synd is 0x0 - this indicates that FW is unable to 517 * respond to initialization segment reads and health buffer 518 * should not be read. 519 */ 520 if (synd == 0) 521 return (0); 522 523 for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) 524 mlx5_core_info(dev, "assert_var[%d] 0x%08x\n", i, 525 ioread32be(h->assert_var + i)); 526 527 mlx5_core_info(dev, "assert_exit_ptr 0x%08x\n", 528 ioread32be(&h->assert_exit_ptr)); 529 mlx5_core_info(dev, "assert_callra 0x%08x\n", 530 ioread32be(&h->assert_callra)); 531 snprintf(fw_str, sizeof(fw_str), "%d.%d.%d", 532 fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev)); 533 mlx5_core_info(dev, "fw_ver %s\n", fw_str); 534 mlx5_core_info(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); 535 mlx5_core_info(dev, "irisc_index %d\n", ioread8(&h->irisc_index)); 536 mlx5_core_info(dev, "synd 0x%x: %s\n", 537 ioread8(&h->synd), hsynd_str(ioread8(&h->synd))); 538 mlx5_core_info(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); 539 fw = ioread32be(&h->fw_ver); 540 mlx5_core_info(dev, "raw fw_ver 0x%08x\n", fw); 541 542 return synd; 543 } 544 545 static void health_watchdog(struct work_struct *work) 546 { 547 struct mlx5_core_dev *dev; 548 u16 power; 549 u8 status; 550 int err; 551 552 dev = container_of(work, struct mlx5_core_dev, priv.health.work_watchdog); 553 554 if (!MLX5_CAP_GEN(dev, mcam_reg) || 555 !MLX5_CAP_MCAM_FEATURE(dev, pcie_status_and_power)) 556 return; 557 558 err = mlx5_pci_read_power_status(dev, &power, &status); 559 if (err < 0) { 560 mlx5_core_warn(dev, "Failed reading power status: %d\n", 561 err); 562 return; 563 } 564 565 dev->pwr_value = power; 566 567 if (dev->pwr_status != status) { 568 569 switch (status) { 570 case 0: 571 dev->pwr_status = status; 572 mlx5_core_info(dev, 573 "PCI power is not published by the PCIe slot.\n"); 574 break; 575 case 1: 576 dev->pwr_status = status; 577 mlx5_core_info(dev, 578 "PCIe slot advertised sufficient power (%uW).\n", 579 power); 580 break; 581 case 2: 582 dev->pwr_status = status; 583 mlx5_core_warn(dev, 584 "Detected insufficient power on the PCIe slot (%uW).\n", 585 power); 586 break; 587 default: 588 dev->pwr_status = 0; 589 mlx5_core_warn(dev, 590 "Unknown power state detected(%d).\n", 591 status); 592 break; 593 } 594 } 595 } 596 597 void 598 mlx5_trigger_health_watchdog(struct mlx5_core_dev *dev) 599 { 600 struct mlx5_core_health *health = &dev->priv.health; 601 unsigned long flags; 602 603 spin_lock_irqsave(&health->wq_lock, flags); 604 if (!test_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags)) 605 queue_work(health->wq_watchdog, &health->work_watchdog); 606 else 607 mlx5_core_err(dev, 608 "scheduling watchdog is not permitted at this stage\n"); 609 spin_unlock_irqrestore(&health->wq_lock, flags); 610 } 611 612 static void poll_health(unsigned long data) 613 { 614 struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; 615 struct mlx5_core_health *health = &dev->priv.health; 616 u32 fatal_error; 617 u32 count; 618 619 if (dev->state != MLX5_DEVICE_STATE_UP) 620 return; 621 622 count = ioread32be(health->health_counter); 623 if (count == health->prev) 624 ++health->miss_counter; 625 else 626 health->miss_counter = 0; 627 628 health->prev = count; 629 if (health->miss_counter == MAX_MISSES) { 630 mlx5_core_err(dev, "device's health compromised - reached miss count\n"); 631 if (print_health_info(dev) == 0) 632 mlx5_core_err(dev, "FW is unable to respond to initialization segment reads\n"); 633 } 634 635 fatal_error = check_fatal_sensors(dev); 636 637 if (fatal_error && !health->fatal_error) { 638 mlx5_core_err(dev, 639 "Fatal error %u detected\n", fatal_error); 640 dev->priv.health.fatal_error = fatal_error; 641 print_health_info(dev); 642 mlx5_trigger_health_work(dev); 643 } 644 645 mod_timer(&health->timer, get_next_poll_jiffies()); 646 } 647 648 void mlx5_start_health_poll(struct mlx5_core_dev *dev) 649 { 650 struct mlx5_core_health *health = &dev->priv.health; 651 652 init_timer(&health->timer); 653 health->fatal_error = MLX5_SENSOR_NO_ERR; 654 clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); 655 clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); 656 clear_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags); 657 health->health = &dev->iseg->health; 658 health->health_counter = &dev->iseg->health_counter; 659 660 setup_timer(&health->timer, poll_health, (unsigned long)dev); 661 mod_timer(&health->timer, 662 round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL)); 663 664 /* do initial PCI power state readout */ 665 mlx5_trigger_health_watchdog(dev); 666 } 667 668 void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) 669 { 670 struct mlx5_core_health *health = &dev->priv.health; 671 unsigned long flags; 672 673 if (disable_health) { 674 spin_lock_irqsave(&health->wq_lock, flags); 675 set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); 676 set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); 677 set_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags); 678 spin_unlock_irqrestore(&health->wq_lock, flags); 679 } 680 681 del_timer_sync(&health->timer); 682 } 683 684 void mlx5_drain_health_wq(struct mlx5_core_dev *dev) 685 { 686 struct mlx5_core_health *health = &dev->priv.health; 687 unsigned long flags; 688 689 spin_lock_irqsave(&health->wq_lock, flags); 690 set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); 691 set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); 692 set_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags); 693 spin_unlock_irqrestore(&health->wq_lock, flags); 694 cancel_delayed_work_sync(&health->recover_work); 695 cancel_work_sync(&health->work); 696 cancel_work_sync(&health->work_watchdog); 697 } 698 699 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) 700 { 701 struct mlx5_core_health *health = &dev->priv.health; 702 unsigned long flags; 703 704 spin_lock_irqsave(&health->wq_lock, flags); 705 set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); 706 spin_unlock_irqrestore(&health->wq_lock, flags); 707 cancel_delayed_work_sync(&dev->priv.health.recover_work); 708 } 709 710 void mlx5_health_cleanup(struct mlx5_core_dev *dev) 711 { 712 struct mlx5_core_health *health = &dev->priv.health; 713 714 destroy_workqueue(health->wq); 715 destroy_workqueue(health->wq_watchdog); 716 destroy_workqueue(health->wq_cmd); 717 } 718 719 int mlx5_health_init(struct mlx5_core_dev *dev) 720 { 721 struct mlx5_core_health *health; 722 char name[64]; 723 724 health = &dev->priv.health; 725 726 snprintf(name, sizeof(name), "%s-rec", dev_name(&dev->pdev->dev)); 727 health->wq = create_singlethread_workqueue(name); 728 if (!health->wq) 729 goto err_recovery; 730 731 snprintf(name, sizeof(name), "%s-wdg", dev_name(&dev->pdev->dev)); 732 health->wq_watchdog = create_singlethread_workqueue(name); 733 if (!health->wq_watchdog) 734 goto err_watchdog; 735 736 snprintf(name, sizeof(name), "%s-cmd", dev_name(&dev->pdev->dev)); 737 health->wq_cmd = create_singlethread_workqueue(name); 738 if (!health->wq_cmd) 739 goto err_cmd; 740 741 spin_lock_init(&health->wq_lock); 742 INIT_WORK(&health->work, health_care); 743 INIT_WORK(&health->work_watchdog, health_watchdog); 744 INIT_WORK(&health->work_cmd_completion, mlx5_trigger_cmd_completions); 745 INIT_DELAYED_WORK(&health->recover_work, health_recover); 746 747 return 0; 748 749 err_cmd: 750 destroy_workqueue(health->wq_watchdog); 751 err_watchdog: 752 destroy_workqueue(health->wq); 753 err_recovery: 754 return -ENOMEM; 755 } 756