1 /*- 2 * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "opt_rss.h" 27 #include "opt_ratelimit.h" 28 29 #include <linux/kernel.h> 30 #include <linux/module.h> 31 #include <linux/random.h> 32 #include <linux/vmalloc.h> 33 #include <linux/hardirq.h> 34 #include <linux/delay.h> 35 #include <dev/mlx5/driver.h> 36 #include <dev/mlx5/mlx5_ifc.h> 37 #include <dev/mlx5/mlx5_core/mlx5_core.h> 38 39 #define MLX5_HEALTH_POLL_INTERVAL (2 * HZ) 40 #define MAX_MISSES 3 41 42 enum { 43 MLX5_DROP_NEW_HEALTH_WORK, 44 MLX5_DROP_NEW_RECOVERY_WORK, 45 MLX5_DROP_NEW_WATCHDOG_WORK, 46 }; 47 48 enum { 49 MLX5_SENSOR_NO_ERR = 0, 50 MLX5_SENSOR_PCI_COMM_ERR = 1, 51 MLX5_SENSOR_PCI_ERR = 2, 52 MLX5_SENSOR_NIC_DISABLED = 3, 53 MLX5_SENSOR_NIC_SW_RESET = 4, 54 MLX5_SENSOR_FW_SYND_RFR = 5, 55 }; 56 57 static int mlx5_fw_reset_enable = 1; 58 SYSCTL_INT(_hw_mlx5, OID_AUTO, fw_reset_enable, CTLFLAG_RWTUN, 59 &mlx5_fw_reset_enable, 0, 60 "Enable firmware reset"); 61 62 static unsigned int sw_reset_to = 1200; 63 SYSCTL_UINT(_hw_mlx5, OID_AUTO, sw_reset_timeout, CTLFLAG_RWTUN, 64 &sw_reset_to, 0, 65 "Minimum timeout in seconds between two firmware resets"); 66 67 68 static int lock_sem_sw_reset(struct mlx5_core_dev *dev) 69 { 70 int ret; 71 72 /* Lock GW access */ 73 ret = -mlx5_vsc_lock(dev); 74 if (ret) { 75 mlx5_core_warn(dev, "Timed out locking gateway %d\n", ret); 76 return ret; 77 } 78 79 ret = -mlx5_vsc_lock_addr_space(dev, MLX5_SEMAPHORE_SW_RESET); 80 if (ret) { 81 if (ret == -EBUSY) 82 mlx5_core_dbg(dev, 83 "SW reset FW semaphore already locked, another function will handle the reset\n"); 84 else 85 mlx5_core_warn(dev, 86 "SW reset semaphore lock return %d\n", ret); 87 } 88 89 /* Unlock GW access */ 90 mlx5_vsc_unlock(dev); 91 92 return ret; 93 } 94 95 static int unlock_sem_sw_reset(struct mlx5_core_dev *dev) 96 { 97 int ret; 98 99 /* Lock GW access */ 100 ret = -mlx5_vsc_lock(dev); 101 if (ret) { 102 mlx5_core_warn(dev, "Timed out locking gateway %d\n", ret); 103 return ret; 104 } 105 106 ret = -mlx5_vsc_unlock_addr_space(dev, MLX5_SEMAPHORE_SW_RESET); 107 108 /* Unlock GW access */ 109 mlx5_vsc_unlock(dev); 110 111 return ret; 112 } 113 114 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) 115 { 116 return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; 117 } 118 119 void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) 120 { 121 u32 cur_cmdq_addr_l_sz; 122 123 cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz); 124 iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) | 125 state << MLX5_NIC_IFC_OFFSET, 126 &dev->iseg->cmdq_addr_l_sz); 127 } 128 129 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) 130 { 131 struct mlx5_core_health *health = &dev->priv.health; 132 struct mlx5_health_buffer __iomem *h = health->health; 133 u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET; 134 u8 synd = ioread8(&h->synd); 135 136 if (rfr && synd) 137 mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd); 138 return rfr && synd; 139 } 140 141 static void mlx5_trigger_cmd_completions(struct work_struct *work) 142 { 143 struct mlx5_core_dev *dev = 144 container_of(work, struct mlx5_core_dev, priv.health.work_cmd_completion); 145 unsigned long flags; 146 u64 vector; 147 148 /* wait for pending handlers to complete */ 149 synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector); 150 spin_lock_irqsave(&dev->cmd.alloc_lock, flags); 151 vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); 152 if (!vector) 153 goto no_trig; 154 155 vector |= MLX5_TRIGGERED_CMD_COMP; 156 spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); 157 158 mlx5_core_dbg(dev, "vector 0x%jx\n", (uintmax_t)vector); 159 mlx5_cmd_comp_handler(dev, vector, MLX5_CMD_MODE_EVENTS); 160 return; 161 162 no_trig: 163 spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); 164 } 165 166 static bool sensor_pci_no_comm(struct mlx5_core_dev *dev) 167 { 168 struct mlx5_core_health *health = &dev->priv.health; 169 struct mlx5_health_buffer __iomem *h = health->health; 170 bool err = ioread32be(&h->fw_ver) == 0xffffffff; 171 172 return err; 173 } 174 175 static bool sensor_nic_disabled(struct mlx5_core_dev *dev) 176 { 177 return mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED; 178 } 179 180 static bool sensor_nic_sw_reset(struct mlx5_core_dev *dev) 181 { 182 return mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET; 183 } 184 185 static u32 check_fatal_sensors(struct mlx5_core_dev *dev) 186 { 187 if (sensor_pci_no_comm(dev)) 188 return MLX5_SENSOR_PCI_COMM_ERR; 189 if (pci_channel_offline(dev->pdev)) 190 return MLX5_SENSOR_PCI_ERR; 191 if (sensor_nic_disabled(dev)) 192 return MLX5_SENSOR_NIC_DISABLED; 193 if (sensor_nic_sw_reset(dev)) 194 return MLX5_SENSOR_NIC_SW_RESET; 195 if (sensor_fw_synd_rfr(dev)) 196 return MLX5_SENSOR_FW_SYND_RFR; 197 198 return MLX5_SENSOR_NO_ERR; 199 } 200 201 static void reset_fw_if_needed(struct mlx5_core_dev *dev) 202 { 203 bool supported; 204 u32 cmdq_addr, fatal_error; 205 206 if (!mlx5_fw_reset_enable) 207 return; 208 supported = (ioread32be(&dev->iseg->initializing) >> 209 MLX5_FW_RESET_SUPPORTED_OFFSET) & 1; 210 if (!supported) 211 return; 212 213 /* The reset only needs to be issued by one PF. The health buffer is 214 * shared between all functions, and will be cleared during a reset. 215 * Check again to avoid a redundant 2nd reset. If the fatal erros was 216 * PCI related a reset won't help. 217 */ 218 fatal_error = check_fatal_sensors(dev); 219 if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || 220 fatal_error == MLX5_SENSOR_NIC_DISABLED || 221 fatal_error == MLX5_SENSOR_NIC_SW_RESET) { 222 mlx5_core_warn(dev, 223 "Not issuing FW reset. Either it's already done or won't help.\n"); 224 return; 225 } 226 227 mlx5_core_info(dev, "Issuing FW Reset\n"); 228 /* Write the NIC interface field to initiate the reset, the command 229 * interface address also resides here, don't overwrite it. 230 */ 231 cmdq_addr = ioread32be(&dev->iseg->cmdq_addr_l_sz); 232 iowrite32be((cmdq_addr & 0xFFFFF000) | 233 MLX5_NIC_IFC_SW_RESET << MLX5_NIC_IFC_OFFSET, 234 &dev->iseg->cmdq_addr_l_sz); 235 } 236 237 static bool 238 mlx5_health_allow_reset(struct mlx5_core_dev *dev) 239 { 240 struct mlx5_core_health *health = &dev->priv.health; 241 unsigned int delta; 242 bool ret; 243 244 if (health->last_reset_req != 0) { 245 delta = ticks - health->last_reset_req; 246 delta /= hz; 247 ret = delta >= sw_reset_to; 248 } else { 249 ret = true; 250 } 251 252 /* 253 * In principle, ticks may be 0. Setting it to off by one (-1) 254 * to prevent certain reset in next request. 255 */ 256 health->last_reset_req = ticks ? : -1; 257 if (!ret) 258 mlx5_core_warn(dev, 259 "Firmware reset elided due to auto-reset frequency threshold.\n"); 260 return (ret); 261 } 262 263 #define MLX5_CRDUMP_WAIT_MS 60000 264 #define MLX5_FW_RESET_WAIT_MS 1000 265 #define MLX5_NIC_STATE_POLL_MS 5 266 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) 267 { 268 int end, delay_ms = MLX5_CRDUMP_WAIT_MS; 269 u32 fatal_error; 270 int lock = -EBUSY; 271 272 fatal_error = check_fatal_sensors(dev); 273 274 if (fatal_error || force) { 275 if (xchg(&dev->state, MLX5_DEVICE_STATE_INTERNAL_ERROR) == 276 MLX5_DEVICE_STATE_INTERNAL_ERROR) 277 return; 278 if (!force) 279 mlx5_core_err(dev, "internal state error detected\n"); 280 281 /* 282 * Queue the command completion handler on the command 283 * work queue to avoid racing with the real command 284 * completion handler and then wait for it to 285 * complete: 286 */ 287 queue_work(dev->priv.health.wq_cmd, &dev->priv.health.work_cmd_completion); 288 flush_workqueue(dev->priv.health.wq_cmd); 289 } 290 291 mutex_lock(&dev->intf_state_mutex); 292 293 if (force) 294 goto err_state_done; 295 296 if (fatal_error == MLX5_SENSOR_FW_SYND_RFR && 297 mlx5_health_allow_reset(dev)) { 298 /* Get cr-dump and reset FW semaphore */ 299 if (mlx5_core_is_pf(dev)) 300 lock = lock_sem_sw_reset(dev); 301 302 /* Execute cr-dump and SW reset */ 303 if (lock != -EBUSY) { 304 (void)mlx5_fwdump(dev); 305 reset_fw_if_needed(dev); 306 delay_ms = MLX5_FW_RESET_WAIT_MS; 307 } 308 } 309 310 /* Recover from SW reset */ 311 end = jiffies + msecs_to_jiffies(delay_ms); 312 do { 313 if (sensor_nic_disabled(dev)) 314 break; 315 316 msleep(MLX5_NIC_STATE_POLL_MS); 317 } while (!time_after(jiffies, end)); 318 319 if (!sensor_nic_disabled(dev)) { 320 mlx5_core_err(dev, "NIC IFC still %d after %ums.\n", 321 mlx5_get_nic_state(dev), delay_ms); 322 } 323 324 /* Release FW semaphore if you are the lock owner */ 325 if (!lock) 326 unlock_sem_sw_reset(dev); 327 328 mlx5_core_info(dev, "System error event triggered\n"); 329 330 err_state_done: 331 mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); 332 mutex_unlock(&dev->intf_state_mutex); 333 } 334 335 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) 336 { 337 u8 nic_mode = mlx5_get_nic_state(dev); 338 339 if (nic_mode == MLX5_NIC_IFC_SW_RESET) { 340 /* The IFC mode field is 3 bits, so it will read 0x7 in two cases: 341 * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded 342 * and this is a VF), this is not recoverable by SW reset. 343 * Logging of this is handled elsewhere. 344 * 2. FW reset has been issued by another function, driver can 345 * be reloaded to recover after the mode switches to 346 * MLX5_NIC_IFC_DISABLED. 347 */ 348 if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR) 349 mlx5_core_warn(dev, 350 "NIC SW reset is already progress\n"); 351 else 352 mlx5_core_warn(dev, 353 "Communication with FW over the PCI link is down\n"); 354 } else { 355 mlx5_core_warn(dev, "NIC mode %d\n", nic_mode); 356 } 357 358 mlx5_disable_device(dev); 359 } 360 361 #define MLX5_FW_RESET_WAIT_MS 1000 362 #define MLX5_NIC_STATE_POLL_MS 5 363 static void health_recover(struct work_struct *work) 364 { 365 unsigned long end = jiffies + msecs_to_jiffies(MLX5_FW_RESET_WAIT_MS); 366 struct mlx5_core_health *health; 367 struct delayed_work *dwork; 368 struct mlx5_core_dev *dev; 369 struct mlx5_priv *priv; 370 bool recover = true; 371 u8 nic_mode; 372 373 dwork = container_of(work, struct delayed_work, work); 374 health = container_of(dwork, struct mlx5_core_health, recover_work); 375 priv = container_of(health, struct mlx5_priv, health); 376 dev = container_of(priv, struct mlx5_core_dev, priv); 377 378 /* This might likely be wrong, cut and paste from elsewhere? */ 379 bus_topo_lock(); 380 381 if (sensor_pci_no_comm(dev)) { 382 mlx5_core_err(dev, 383 "health recovery flow aborted, PCI reads still not working\n"); 384 recover = false; 385 } 386 387 nic_mode = mlx5_get_nic_state(dev); 388 while (nic_mode != MLX5_NIC_IFC_DISABLED && 389 !time_after(jiffies, end)) { 390 msleep(MLX5_NIC_STATE_POLL_MS); 391 nic_mode = mlx5_get_nic_state(dev); 392 } 393 394 if (nic_mode != MLX5_NIC_IFC_DISABLED) { 395 mlx5_core_err(dev, 396 "health recovery flow aborted, unexpected NIC IFC mode %d.\n", 397 nic_mode); 398 recover = false; 399 } 400 401 if (recover) { 402 mlx5_core_info(dev, "Starting health recovery flow\n"); 403 mlx5_recover_device(dev); 404 } 405 406 bus_topo_unlock(); 407 } 408 409 /* How much time to wait until health resetting the driver (in msecs) */ 410 #define MLX5_RECOVERY_DELAY_MSECS 60000 411 #define MLX5_RECOVERY_NO_DELAY 0 412 static unsigned long get_recovery_delay(struct mlx5_core_dev *dev) 413 { 414 return dev->priv.health.fatal_error == MLX5_SENSOR_PCI_ERR || 415 dev->priv.health.fatal_error == MLX5_SENSOR_PCI_COMM_ERR ? 416 MLX5_RECOVERY_DELAY_MSECS : MLX5_RECOVERY_NO_DELAY; 417 } 418 419 static void health_care(struct work_struct *work) 420 { 421 struct mlx5_core_health *health; 422 unsigned long recover_delay; 423 struct mlx5_core_dev *dev; 424 struct mlx5_priv *priv; 425 unsigned long flags; 426 427 health = container_of(work, struct mlx5_core_health, work); 428 priv = container_of(health, struct mlx5_priv, health); 429 dev = container_of(priv, struct mlx5_core_dev, priv); 430 431 mlx5_core_warn(dev, "handling bad device here\n"); 432 mlx5_handle_bad_state(dev); 433 recover_delay = msecs_to_jiffies(get_recovery_delay(dev)); 434 435 spin_lock_irqsave(&health->wq_lock, flags); 436 if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) { 437 mlx5_core_warn(dev, 438 "Scheduling recovery work with %lums delay\n", 439 recover_delay); 440 schedule_delayed_work(&health->recover_work, recover_delay); 441 } else { 442 mlx5_core_err(dev, 443 "new health works are not permitted at this stage\n"); 444 } 445 spin_unlock_irqrestore(&health->wq_lock, flags); 446 } 447 448 static int get_next_poll_jiffies(void) 449 { 450 unsigned long next; 451 452 get_random_bytes(&next, sizeof(next)); 453 next %= HZ; 454 next += jiffies + MLX5_HEALTH_POLL_INTERVAL; 455 456 return next; 457 } 458 459 void mlx5_trigger_health_work(struct mlx5_core_dev *dev) 460 { 461 struct mlx5_core_health *health = &dev->priv.health; 462 unsigned long flags; 463 464 spin_lock_irqsave(&health->wq_lock, flags); 465 if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) 466 queue_work(health->wq, &health->work); 467 else 468 mlx5_core_err(dev, 469 "new health works are not permitted at this stage\n"); 470 spin_unlock_irqrestore(&health->wq_lock, flags); 471 } 472 473 static const char *hsynd_str(u8 synd) 474 { 475 switch (synd) { 476 case MLX5_HEALTH_SYNDR_FW_ERR: 477 return "firmware internal error"; 478 case MLX5_HEALTH_SYNDR_IRISC_ERR: 479 return "irisc not responding"; 480 case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR: 481 return "unrecoverable hardware error"; 482 case MLX5_HEALTH_SYNDR_CRC_ERR: 483 return "firmware CRC error"; 484 case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR: 485 return "ICM fetch PCI error"; 486 case MLX5_HEALTH_SYNDR_HW_FTL_ERR: 487 return "HW fatal error\n"; 488 case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR: 489 return "async EQ buffer overrun"; 490 case MLX5_HEALTH_SYNDR_EQ_ERR: 491 return "EQ error"; 492 case MLX5_HEALTH_SYNDR_EQ_INV: 493 return "Invalid EQ referenced"; 494 case MLX5_HEALTH_SYNDR_FFSER_ERR: 495 return "FFSER error"; 496 case MLX5_HEALTH_SYNDR_HIGH_TEMP: 497 return "High temperature"; 498 default: 499 return "unrecognized error"; 500 } 501 } 502 503 static u8 504 print_health_info(struct mlx5_core_dev *dev) 505 { 506 struct mlx5_core_health *health = &dev->priv.health; 507 struct mlx5_health_buffer __iomem *h = health->health; 508 u8 synd = ioread8(&h->synd); 509 char fw_str[18]; 510 u32 fw; 511 int i; 512 513 /* 514 * If synd is 0x0 - this indicates that FW is unable to 515 * respond to initialization segment reads and health buffer 516 * should not be read. 517 */ 518 if (synd == 0) 519 return (0); 520 521 for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) 522 mlx5_core_info(dev, "assert_var[%d] 0x%08x\n", i, 523 ioread32be(h->assert_var + i)); 524 525 mlx5_core_info(dev, "assert_exit_ptr 0x%08x\n", 526 ioread32be(&h->assert_exit_ptr)); 527 mlx5_core_info(dev, "assert_callra 0x%08x\n", 528 ioread32be(&h->assert_callra)); 529 snprintf(fw_str, sizeof(fw_str), "%d.%d.%d", 530 fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev)); 531 mlx5_core_info(dev, "fw_ver %s\n", fw_str); 532 mlx5_core_info(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); 533 mlx5_core_info(dev, "irisc_index %d\n", ioread8(&h->irisc_index)); 534 mlx5_core_info(dev, "synd 0x%x: %s\n", 535 ioread8(&h->synd), hsynd_str(ioread8(&h->synd))); 536 mlx5_core_info(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); 537 fw = ioread32be(&h->fw_ver); 538 mlx5_core_info(dev, "raw fw_ver 0x%08x\n", fw); 539 540 return synd; 541 } 542 543 static void health_watchdog(struct work_struct *work) 544 { 545 struct mlx5_core_dev *dev; 546 u16 power; 547 u8 status; 548 int err; 549 550 dev = container_of(work, struct mlx5_core_dev, priv.health.work_watchdog); 551 552 if (!MLX5_CAP_GEN(dev, mcam_reg) || 553 !MLX5_CAP_MCAM_FEATURE(dev, pcie_status_and_power)) 554 return; 555 556 err = mlx5_pci_read_power_status(dev, &power, &status); 557 if (err < 0) { 558 mlx5_core_warn(dev, "Failed reading power status: %d\n", 559 err); 560 return; 561 } 562 563 dev->pwr_value = power; 564 565 if (dev->pwr_status != status) { 566 567 switch (status) { 568 case 0: 569 dev->pwr_status = status; 570 mlx5_core_info(dev, 571 "PCI power is not published by the PCIe slot.\n"); 572 break; 573 case 1: 574 dev->pwr_status = status; 575 mlx5_core_info(dev, 576 "PCIe slot advertised sufficient power (%uW).\n", 577 power); 578 break; 579 case 2: 580 dev->pwr_status = status; 581 mlx5_core_warn(dev, 582 "Detected insufficient power on the PCIe slot (%uW).\n", 583 power); 584 break; 585 default: 586 dev->pwr_status = 0; 587 mlx5_core_warn(dev, 588 "Unknown power state detected(%d).\n", 589 status); 590 break; 591 } 592 } 593 } 594 595 void 596 mlx5_trigger_health_watchdog(struct mlx5_core_dev *dev) 597 { 598 struct mlx5_core_health *health = &dev->priv.health; 599 unsigned long flags; 600 601 spin_lock_irqsave(&health->wq_lock, flags); 602 if (!test_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags)) 603 queue_work(health->wq_watchdog, &health->work_watchdog); 604 else 605 mlx5_core_err(dev, 606 "scheduling watchdog is not permitted at this stage\n"); 607 spin_unlock_irqrestore(&health->wq_lock, flags); 608 } 609 610 static void poll_health(unsigned long data) 611 { 612 struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; 613 struct mlx5_core_health *health = &dev->priv.health; 614 u32 fatal_error; 615 u32 count; 616 617 if (dev->state != MLX5_DEVICE_STATE_UP) 618 return; 619 620 count = ioread32be(health->health_counter); 621 if (count == health->prev) 622 ++health->miss_counter; 623 else 624 health->miss_counter = 0; 625 626 health->prev = count; 627 if (health->miss_counter == MAX_MISSES) { 628 mlx5_core_err(dev, "device's health compromised - reached miss count\n"); 629 if (print_health_info(dev) == 0) 630 mlx5_core_err(dev, "FW is unable to respond to initialization segment reads\n"); 631 } 632 633 fatal_error = check_fatal_sensors(dev); 634 635 if (fatal_error && !health->fatal_error) { 636 mlx5_core_err(dev, 637 "Fatal error %u detected\n", fatal_error); 638 dev->priv.health.fatal_error = fatal_error; 639 print_health_info(dev); 640 mlx5_trigger_health_work(dev); 641 } 642 643 mod_timer(&health->timer, get_next_poll_jiffies()); 644 } 645 646 void mlx5_start_health_poll(struct mlx5_core_dev *dev) 647 { 648 struct mlx5_core_health *health = &dev->priv.health; 649 650 init_timer(&health->timer); 651 health->fatal_error = MLX5_SENSOR_NO_ERR; 652 clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); 653 clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); 654 clear_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags); 655 health->health = &dev->iseg->health; 656 health->health_counter = &dev->iseg->health_counter; 657 658 setup_timer(&health->timer, poll_health, (unsigned long)dev); 659 mod_timer(&health->timer, 660 round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL)); 661 662 /* do initial PCI power state readout */ 663 mlx5_trigger_health_watchdog(dev); 664 } 665 666 void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) 667 { 668 struct mlx5_core_health *health = &dev->priv.health; 669 unsigned long flags; 670 671 if (disable_health) { 672 spin_lock_irqsave(&health->wq_lock, flags); 673 set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); 674 set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); 675 set_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags); 676 spin_unlock_irqrestore(&health->wq_lock, flags); 677 } 678 679 del_timer_sync(&health->timer); 680 } 681 682 void mlx5_drain_health_wq(struct mlx5_core_dev *dev) 683 { 684 struct mlx5_core_health *health = &dev->priv.health; 685 unsigned long flags; 686 687 spin_lock_irqsave(&health->wq_lock, flags); 688 set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); 689 set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); 690 set_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags); 691 spin_unlock_irqrestore(&health->wq_lock, flags); 692 cancel_delayed_work_sync(&health->recover_work); 693 cancel_work_sync(&health->work); 694 cancel_work_sync(&health->work_watchdog); 695 } 696 697 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) 698 { 699 struct mlx5_core_health *health = &dev->priv.health; 700 unsigned long flags; 701 702 spin_lock_irqsave(&health->wq_lock, flags); 703 set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); 704 spin_unlock_irqrestore(&health->wq_lock, flags); 705 cancel_delayed_work_sync(&dev->priv.health.recover_work); 706 } 707 708 void mlx5_health_cleanup(struct mlx5_core_dev *dev) 709 { 710 struct mlx5_core_health *health = &dev->priv.health; 711 712 destroy_workqueue(health->wq); 713 destroy_workqueue(health->wq_watchdog); 714 destroy_workqueue(health->wq_cmd); 715 } 716 717 int mlx5_health_init(struct mlx5_core_dev *dev) 718 { 719 struct mlx5_core_health *health; 720 char name[64]; 721 722 health = &dev->priv.health; 723 724 snprintf(name, sizeof(name), "%s-rec", dev_name(&dev->pdev->dev)); 725 health->wq = create_singlethread_workqueue(name); 726 if (!health->wq) 727 goto err_recovery; 728 729 snprintf(name, sizeof(name), "%s-wdg", dev_name(&dev->pdev->dev)); 730 health->wq_watchdog = create_singlethread_workqueue(name); 731 if (!health->wq_watchdog) 732 goto err_watchdog; 733 734 snprintf(name, sizeof(name), "%s-cmd", dev_name(&dev->pdev->dev)); 735 health->wq_cmd = create_singlethread_workqueue(name); 736 if (!health->wq_cmd) 737 goto err_cmd; 738 739 spin_lock_init(&health->wq_lock); 740 INIT_WORK(&health->work, health_care); 741 INIT_WORK(&health->work_watchdog, health_watchdog); 742 INIT_WORK(&health->work_cmd_completion, mlx5_trigger_cmd_completions); 743 INIT_DELAYED_WORK(&health->recover_work, health_recover); 744 745 return 0; 746 747 err_cmd: 748 destroy_workqueue(health->wq_watchdog); 749 err_watchdog: 750 destroy_workqueue(health->wq); 751 err_recovery: 752 return -ENOMEM; 753 } 754