1 /*- 2 * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "opt_rss.h" 27 #include "opt_ratelimit.h" 28 29 #include <linux/kernel.h> 30 #include <linux/module.h> 31 #include <linux/random.h> 32 #include <linux/vmalloc.h> 33 #include <linux/hardirq.h> 34 #include <linux/delay.h> 35 #include <dev/mlx5/driver.h> 36 #include <dev/mlx5/mlx5_ifc.h> 37 #include <dev/mlx5/mlx5_core/mlx5_core.h> 38 39 #define MLX5_HEALTH_POLL_INTERVAL (2 * HZ) 40 #define MAX_MISSES 3 41 42 enum { 43 MLX5_DROP_NEW_HEALTH_WORK, 44 MLX5_DROP_NEW_RECOVERY_WORK, 45 MLX5_DROP_NEW_WATCHDOG_WORK, 46 }; 47 48 enum { 49 MLX5_SENSOR_NO_ERR = 0, 50 MLX5_SENSOR_PCI_COMM_ERR = 1, 51 MLX5_SENSOR_PCI_ERR = 2, 52 MLX5_SENSOR_NIC_DISABLED = 3, 53 MLX5_SENSOR_NIC_SW_RESET = 4, 54 MLX5_SENSOR_FW_SYND_RFR = 5, 55 }; 56 57 static int mlx5_fw_reset_enable = 1; 58 SYSCTL_INT(_hw_mlx5, OID_AUTO, fw_reset_enable, CTLFLAG_RWTUN, 59 &mlx5_fw_reset_enable, 0, 60 "Enable firmware reset"); 61 62 static unsigned int sw_reset_to = 1200; 63 SYSCTL_UINT(_hw_mlx5, OID_AUTO, sw_reset_timeout, CTLFLAG_RWTUN, 64 &sw_reset_to, 0, 65 "Minimum timeout in seconds between two firmware resets"); 66 67 68 static int lock_sem_sw_reset(struct mlx5_core_dev *dev) 69 { 70 int ret; 71 72 /* Lock GW access */ 73 ret = -mlx5_vsc_lock(dev); 74 if (ret) { 75 mlx5_core_warn(dev, "Timed out locking gateway %d\n", ret); 76 return ret; 77 } 78 79 ret = -mlx5_vsc_lock_addr_space(dev, MLX5_SEMAPHORE_SW_RESET); 80 if (ret) { 81 if (ret == -EBUSY) 82 mlx5_core_dbg(dev, 83 "SW reset FW semaphore already locked, another function will handle the reset\n"); 84 else 85 mlx5_core_warn(dev, 86 "SW reset semaphore lock return %d\n", ret); 87 } 88 89 /* Unlock GW access */ 90 mlx5_vsc_unlock(dev); 91 92 return ret; 93 } 94 95 static int unlock_sem_sw_reset(struct mlx5_core_dev *dev) 96 { 97 int ret; 98 99 /* Lock GW access */ 100 ret = -mlx5_vsc_lock(dev); 101 if (ret) { 102 mlx5_core_warn(dev, "Timed out locking gateway %d\n", ret); 103 return ret; 104 } 105 106 ret = -mlx5_vsc_unlock_addr_space(dev, MLX5_SEMAPHORE_SW_RESET); 107 108 /* Unlock GW access */ 109 mlx5_vsc_unlock(dev); 110 111 return ret; 112 } 113 114 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) 115 { 116 return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; 117 } 118 119 void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) 120 { 121 u32 cur_cmdq_addr_l_sz; 122 123 cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz); 124 iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) | 125 state << MLX5_NIC_IFC_OFFSET, 126 &dev->iseg->cmdq_addr_l_sz); 127 } 128 129 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) 130 { 131 struct mlx5_core_health *health = &dev->priv.health; 132 struct mlx5_health_buffer __iomem *h = health->health; 133 u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET; 134 u8 synd = ioread8(&h->synd); 135 136 if (rfr && synd) 137 mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd); 138 return rfr && synd; 139 } 140 141 static void mlx5_trigger_cmd_completions(struct work_struct *work) 142 { 143 struct mlx5_core_dev *dev = 144 container_of(work, struct mlx5_core_dev, priv.health.work_cmd_completion); 145 unsigned long flags; 146 u64 vector; 147 148 /* wait for pending handlers to complete */ 149 synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector); 150 spin_lock_irqsave(&dev->cmd.alloc_lock, flags); 151 vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); 152 if (!vector) 153 goto no_trig; 154 155 vector |= MLX5_TRIGGERED_CMD_COMP; 156 spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); 157 158 mlx5_core_dbg(dev, "vector 0x%jx\n", (uintmax_t)vector); 159 mlx5_cmd_comp_handler(dev, vector, MLX5_CMD_MODE_EVENTS); 160 return; 161 162 no_trig: 163 spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); 164 } 165 166 static bool sensor_pci_no_comm(struct mlx5_core_dev *dev) 167 { 168 struct mlx5_core_health *health = &dev->priv.health; 169 struct mlx5_health_buffer __iomem *h = health->health; 170 bool err = ioread32be(&h->fw_ver) == 0xffffffff; 171 172 return err; 173 } 174 175 static bool sensor_nic_disabled(struct mlx5_core_dev *dev) 176 { 177 return mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED; 178 } 179 180 static bool sensor_nic_sw_reset(struct mlx5_core_dev *dev) 181 { 182 return mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET; 183 } 184 185 static u32 check_fatal_sensors(struct mlx5_core_dev *dev) 186 { 187 if (sensor_pci_no_comm(dev)) 188 return MLX5_SENSOR_PCI_COMM_ERR; 189 if (pci_channel_offline(dev->pdev)) 190 return MLX5_SENSOR_PCI_ERR; 191 if (sensor_nic_disabled(dev)) 192 return MLX5_SENSOR_NIC_DISABLED; 193 if (sensor_nic_sw_reset(dev)) 194 return MLX5_SENSOR_NIC_SW_RESET; 195 if (sensor_fw_synd_rfr(dev)) 196 return MLX5_SENSOR_FW_SYND_RFR; 197 198 return MLX5_SENSOR_NO_ERR; 199 } 200 201 static void reset_fw_if_needed(struct mlx5_core_dev *dev) 202 { 203 bool supported; 204 u32 cmdq_addr, fatal_error; 205 206 if (!mlx5_fw_reset_enable) 207 return; 208 supported = (ioread32be(&dev->iseg->initializing) >> 209 MLX5_FW_RESET_SUPPORTED_OFFSET) & 1; 210 if (!supported) 211 return; 212 213 /* The reset only needs to be issued by one PF. The health buffer is 214 * shared between all functions, and will be cleared during a reset. 215 * Check again to avoid a redundant 2nd reset. If the fatal erros was 216 * PCI related a reset won't help. 217 */ 218 fatal_error = check_fatal_sensors(dev); 219 if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || 220 fatal_error == MLX5_SENSOR_NIC_DISABLED || 221 fatal_error == MLX5_SENSOR_NIC_SW_RESET) { 222 mlx5_core_warn(dev, 223 "Not issuing FW reset. Either it's already done or won't help.\n"); 224 return; 225 } 226 227 mlx5_core_info(dev, "Issuing FW Reset\n"); 228 /* Write the NIC interface field to initiate the reset, the command 229 * interface address also resides here, don't overwrite it. 230 */ 231 cmdq_addr = ioread32be(&dev->iseg->cmdq_addr_l_sz); 232 iowrite32be((cmdq_addr & 0xFFFFF000) | 233 MLX5_NIC_IFC_SW_RESET << MLX5_NIC_IFC_OFFSET, 234 &dev->iseg->cmdq_addr_l_sz); 235 } 236 237 static bool 238 mlx5_health_allow_reset(struct mlx5_core_dev *dev) 239 { 240 struct mlx5_core_health *health = &dev->priv.health; 241 unsigned int delta; 242 bool ret; 243 244 if (health->last_reset_req != 0) { 245 delta = ticks - health->last_reset_req; 246 delta /= hz; 247 ret = delta >= sw_reset_to; 248 } else { 249 ret = true; 250 } 251 252 /* 253 * In principle, ticks may be 0. Setting it to off by one (-1) 254 * to prevent certain reset in next request. 255 */ 256 health->last_reset_req = ticks ? : -1; 257 if (!ret) 258 mlx5_core_warn(dev, 259 "Firmware reset elided due to auto-reset frequency threshold.\n"); 260 return (ret); 261 } 262 263 #define MLX5_CRDUMP_WAIT_MS 60000 264 #define MLX5_FW_RESET_WAIT_MS 1000 265 #define MLX5_NIC_STATE_POLL_MS 5 266 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) 267 { 268 unsigned long end; 269 int delay_ms = MLX5_CRDUMP_WAIT_MS; 270 u32 fatal_error; 271 int lock = -EBUSY; 272 273 fatal_error = check_fatal_sensors(dev); 274 275 if (fatal_error || force) { 276 if (xchg(&dev->state, MLX5_DEVICE_STATE_INTERNAL_ERROR) == 277 MLX5_DEVICE_STATE_INTERNAL_ERROR) 278 return; 279 if (!force) 280 mlx5_core_err(dev, "internal state error detected\n"); 281 282 /* 283 * Queue the command completion handler on the command 284 * work queue to avoid racing with the real command 285 * completion handler and then wait for it to 286 * complete: 287 */ 288 queue_work(dev->priv.health.wq_cmd, &dev->priv.health.work_cmd_completion); 289 flush_workqueue(dev->priv.health.wq_cmd); 290 } 291 292 mutex_lock(&dev->intf_state_mutex); 293 294 if (force) 295 goto err_state_done; 296 297 if (fatal_error == MLX5_SENSOR_FW_SYND_RFR && 298 mlx5_health_allow_reset(dev)) { 299 /* Get cr-dump and reset FW semaphore */ 300 if (mlx5_core_is_pf(dev)) 301 lock = lock_sem_sw_reset(dev); 302 303 /* Execute cr-dump and SW reset */ 304 if (lock != -EBUSY) { 305 (void)mlx5_fwdump(dev); 306 reset_fw_if_needed(dev); 307 delay_ms = MLX5_FW_RESET_WAIT_MS; 308 } 309 } 310 311 /* Recover from SW reset */ 312 end = jiffies + msecs_to_jiffies(delay_ms); 313 do { 314 if (sensor_nic_disabled(dev)) 315 break; 316 317 msleep(MLX5_NIC_STATE_POLL_MS); 318 } while (!time_after(jiffies, end)); 319 320 if (!sensor_nic_disabled(dev)) { 321 mlx5_core_err(dev, "NIC IFC still %d after %ums.\n", 322 mlx5_get_nic_state(dev), delay_ms); 323 } 324 325 /* Release FW semaphore if you are the lock owner */ 326 if (!lock) 327 unlock_sem_sw_reset(dev); 328 329 mlx5_core_info(dev, "System error event triggered\n"); 330 331 err_state_done: 332 mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); 333 mutex_unlock(&dev->intf_state_mutex); 334 } 335 336 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) 337 { 338 u8 nic_mode = mlx5_get_nic_state(dev); 339 340 if (nic_mode == MLX5_NIC_IFC_SW_RESET) { 341 /* The IFC mode field is 3 bits, so it will read 0x7 in two cases: 342 * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded 343 * and this is a VF), this is not recoverable by SW reset. 344 * Logging of this is handled elsewhere. 345 * 2. FW reset has been issued by another function, driver can 346 * be reloaded to recover after the mode switches to 347 * MLX5_NIC_IFC_DISABLED. 348 */ 349 if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR) 350 mlx5_core_warn(dev, 351 "NIC SW reset is already progress\n"); 352 else 353 mlx5_core_warn(dev, 354 "Communication with FW over the PCI link is down\n"); 355 } else { 356 mlx5_core_warn(dev, "NIC mode %d\n", nic_mode); 357 } 358 359 mlx5_disable_device(dev); 360 } 361 362 #define MLX5_FW_RESET_WAIT_MS 1000 363 #define MLX5_NIC_STATE_POLL_MS 5 364 static void health_recover(struct work_struct *work) 365 { 366 unsigned long end = jiffies + msecs_to_jiffies(MLX5_FW_RESET_WAIT_MS); 367 struct mlx5_core_health *health; 368 struct delayed_work *dwork; 369 struct mlx5_core_dev *dev; 370 struct mlx5_priv *priv; 371 bool recover = true; 372 u8 nic_mode; 373 374 dwork = container_of(work, struct delayed_work, work); 375 health = container_of(dwork, struct mlx5_core_health, recover_work); 376 priv = container_of(health, struct mlx5_priv, health); 377 dev = container_of(priv, struct mlx5_core_dev, priv); 378 379 /* This might likely be wrong, cut and paste from elsewhere? */ 380 bus_topo_lock(); 381 382 if (sensor_pci_no_comm(dev)) { 383 mlx5_core_err(dev, 384 "health recovery flow aborted, PCI reads still not working\n"); 385 recover = false; 386 } 387 388 nic_mode = mlx5_get_nic_state(dev); 389 while (nic_mode != MLX5_NIC_IFC_DISABLED && 390 !time_after(jiffies, end)) { 391 msleep(MLX5_NIC_STATE_POLL_MS); 392 nic_mode = mlx5_get_nic_state(dev); 393 } 394 395 if (nic_mode != MLX5_NIC_IFC_DISABLED) { 396 mlx5_core_err(dev, 397 "health recovery flow aborted, unexpected NIC IFC mode %d.\n", 398 nic_mode); 399 recover = false; 400 } 401 402 if (recover) { 403 mlx5_core_info(dev, "Starting health recovery flow\n"); 404 mlx5_recover_device(dev); 405 } 406 407 bus_topo_unlock(); 408 } 409 410 /* How much time to wait until health resetting the driver (in msecs) */ 411 #define MLX5_RECOVERY_DELAY_MSECS 60000 412 #define MLX5_RECOVERY_NO_DELAY 0 413 static unsigned long get_recovery_delay(struct mlx5_core_dev *dev) 414 { 415 return dev->priv.health.fatal_error == MLX5_SENSOR_PCI_ERR || 416 dev->priv.health.fatal_error == MLX5_SENSOR_PCI_COMM_ERR ? 417 MLX5_RECOVERY_DELAY_MSECS : MLX5_RECOVERY_NO_DELAY; 418 } 419 420 static void health_care(struct work_struct *work) 421 { 422 struct mlx5_core_health *health; 423 unsigned long recover_delay; 424 struct mlx5_core_dev *dev; 425 struct mlx5_priv *priv; 426 unsigned long flags; 427 428 health = container_of(work, struct mlx5_core_health, work); 429 priv = container_of(health, struct mlx5_priv, health); 430 dev = container_of(priv, struct mlx5_core_dev, priv); 431 432 mlx5_core_warn(dev, "handling bad device here\n"); 433 mlx5_handle_bad_state(dev); 434 recover_delay = msecs_to_jiffies(get_recovery_delay(dev)); 435 436 spin_lock_irqsave(&health->wq_lock, flags); 437 if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) { 438 mlx5_core_warn(dev, 439 "Scheduling recovery work with %lums delay\n", 440 recover_delay); 441 schedule_delayed_work(&health->recover_work, recover_delay); 442 } else { 443 mlx5_core_err(dev, 444 "new health works are not permitted at this stage\n"); 445 } 446 spin_unlock_irqrestore(&health->wq_lock, flags); 447 } 448 449 static unsigned long get_next_poll_jiffies(void) 450 { 451 unsigned long next; 452 453 get_random_bytes(&next, sizeof(next)); 454 next %= HZ; 455 next += jiffies + MLX5_HEALTH_POLL_INTERVAL; 456 457 return next; 458 } 459 460 void mlx5_trigger_health_work(struct mlx5_core_dev *dev) 461 { 462 struct mlx5_core_health *health = &dev->priv.health; 463 unsigned long flags; 464 465 spin_lock_irqsave(&health->wq_lock, flags); 466 if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) 467 queue_work(health->wq, &health->work); 468 else 469 mlx5_core_err(dev, 470 "new health works are not permitted at this stage\n"); 471 spin_unlock_irqrestore(&health->wq_lock, flags); 472 } 473 474 static const char *hsynd_str(u8 synd) 475 { 476 switch (synd) { 477 case MLX5_HEALTH_SYNDR_FW_ERR: 478 return "firmware internal error"; 479 case MLX5_HEALTH_SYNDR_IRISC_ERR: 480 return "irisc not responding"; 481 case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR: 482 return "unrecoverable hardware error"; 483 case MLX5_HEALTH_SYNDR_CRC_ERR: 484 return "firmware CRC error"; 485 case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR: 486 return "ICM fetch PCI error"; 487 case MLX5_HEALTH_SYNDR_HW_FTL_ERR: 488 return "HW fatal error\n"; 489 case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR: 490 return "async EQ buffer overrun"; 491 case MLX5_HEALTH_SYNDR_EQ_ERR: 492 return "EQ error"; 493 case MLX5_HEALTH_SYNDR_EQ_INV: 494 return "Invalid EQ referenced"; 495 case MLX5_HEALTH_SYNDR_FFSER_ERR: 496 return "FFSER error"; 497 case MLX5_HEALTH_SYNDR_HIGH_TEMP: 498 return "High temperature"; 499 default: 500 return "unrecognized error"; 501 } 502 } 503 504 static u8 505 print_health_info(struct mlx5_core_dev *dev) 506 { 507 struct mlx5_core_health *health = &dev->priv.health; 508 struct mlx5_health_buffer __iomem *h = health->health; 509 u8 synd = ioread8(&h->synd); 510 char fw_str[18]; 511 u32 fw; 512 int i; 513 514 /* 515 * If synd is 0x0 - this indicates that FW is unable to 516 * respond to initialization segment reads and health buffer 517 * should not be read. 518 */ 519 if (synd == 0) 520 return (0); 521 522 for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) 523 mlx5_core_info(dev, "assert_var[%d] 0x%08x\n", i, 524 ioread32be(h->assert_var + i)); 525 526 mlx5_core_info(dev, "assert_exit_ptr 0x%08x\n", 527 ioread32be(&h->assert_exit_ptr)); 528 mlx5_core_info(dev, "assert_callra 0x%08x\n", 529 ioread32be(&h->assert_callra)); 530 snprintf(fw_str, sizeof(fw_str), "%d.%d.%d", 531 fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev)); 532 mlx5_core_info(dev, "fw_ver %s\n", fw_str); 533 mlx5_core_info(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); 534 mlx5_core_info(dev, "irisc_index %d\n", ioread8(&h->irisc_index)); 535 mlx5_core_info(dev, "synd 0x%x: %s\n", 536 ioread8(&h->synd), hsynd_str(ioread8(&h->synd))); 537 mlx5_core_info(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); 538 fw = ioread32be(&h->fw_ver); 539 mlx5_core_info(dev, "raw fw_ver 0x%08x\n", fw); 540 541 return synd; 542 } 543 544 static void health_watchdog(struct work_struct *work) 545 { 546 struct mlx5_core_dev *dev; 547 u16 power; 548 u8 status; 549 int err; 550 551 dev = container_of(work, struct mlx5_core_dev, priv.health.work_watchdog); 552 553 if (!MLX5_CAP_GEN(dev, mcam_reg) || 554 !MLX5_CAP_MCAM_FEATURE(dev, pcie_status_and_power)) 555 return; 556 557 err = mlx5_pci_read_power_status(dev, &power, &status); 558 if (err < 0) { 559 mlx5_core_warn(dev, "Failed reading power status: %d\n", 560 err); 561 return; 562 } 563 564 dev->pwr_value = power; 565 566 if (dev->pwr_status != status) { 567 568 switch (status) { 569 case 0: 570 dev->pwr_status = status; 571 mlx5_core_info(dev, 572 "PCI power is not published by the PCIe slot.\n"); 573 break; 574 case 1: 575 dev->pwr_status = status; 576 mlx5_core_info(dev, 577 "PCIe slot advertised sufficient power (%uW).\n", 578 power); 579 break; 580 case 2: 581 dev->pwr_status = status; 582 mlx5_core_warn(dev, 583 "Detected insufficient power on the PCIe slot (%uW).\n", 584 power); 585 break; 586 default: 587 dev->pwr_status = 0; 588 mlx5_core_warn(dev, 589 "Unknown power state detected(%d).\n", 590 status); 591 break; 592 } 593 } 594 } 595 596 void 597 mlx5_trigger_health_watchdog(struct mlx5_core_dev *dev) 598 { 599 struct mlx5_core_health *health = &dev->priv.health; 600 unsigned long flags; 601 602 spin_lock_irqsave(&health->wq_lock, flags); 603 if (!test_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags)) 604 queue_work(health->wq_watchdog, &health->work_watchdog); 605 else 606 mlx5_core_err(dev, 607 "scheduling watchdog is not permitted at this stage\n"); 608 spin_unlock_irqrestore(&health->wq_lock, flags); 609 } 610 611 static void poll_health(unsigned long data) 612 { 613 struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; 614 struct mlx5_core_health *health = &dev->priv.health; 615 u32 fatal_error; 616 u32 count; 617 618 if (dev->state != MLX5_DEVICE_STATE_UP) 619 return; 620 621 count = ioread32be(health->health_counter); 622 if (count == health->prev) 623 ++health->miss_counter; 624 else 625 health->miss_counter = 0; 626 627 health->prev = count; 628 if (health->miss_counter == MAX_MISSES) { 629 mlx5_core_err(dev, "device's health compromised - reached miss count\n"); 630 if (print_health_info(dev) == 0) 631 mlx5_core_err(dev, "FW is unable to respond to initialization segment reads\n"); 632 } 633 634 fatal_error = check_fatal_sensors(dev); 635 636 if (fatal_error && !health->fatal_error) { 637 mlx5_core_err(dev, 638 "Fatal error %u detected\n", fatal_error); 639 dev->priv.health.fatal_error = fatal_error; 640 print_health_info(dev); 641 mlx5_trigger_health_work(dev); 642 } 643 644 mod_timer(&health->timer, get_next_poll_jiffies()); 645 } 646 647 void mlx5_start_health_poll(struct mlx5_core_dev *dev) 648 { 649 struct mlx5_core_health *health = &dev->priv.health; 650 651 init_timer(&health->timer); 652 health->fatal_error = MLX5_SENSOR_NO_ERR; 653 clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); 654 clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); 655 clear_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags); 656 health->health = &dev->iseg->health; 657 health->health_counter = &dev->iseg->health_counter; 658 659 setup_timer(&health->timer, poll_health, (unsigned long)dev); 660 mod_timer(&health->timer, 661 round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL)); 662 663 /* do initial PCI power state readout */ 664 mlx5_trigger_health_watchdog(dev); 665 } 666 667 void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) 668 { 669 struct mlx5_core_health *health = &dev->priv.health; 670 unsigned long flags; 671 672 if (disable_health) { 673 spin_lock_irqsave(&health->wq_lock, flags); 674 set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); 675 set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); 676 set_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags); 677 spin_unlock_irqrestore(&health->wq_lock, flags); 678 } 679 680 del_timer_sync(&health->timer); 681 } 682 683 void mlx5_drain_health_wq(struct mlx5_core_dev *dev) 684 { 685 struct mlx5_core_health *health = &dev->priv.health; 686 unsigned long flags; 687 688 spin_lock_irqsave(&health->wq_lock, flags); 689 set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); 690 set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); 691 set_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags); 692 spin_unlock_irqrestore(&health->wq_lock, flags); 693 cancel_delayed_work_sync(&health->recover_work); 694 cancel_work_sync(&health->work); 695 cancel_work_sync(&health->work_watchdog); 696 } 697 698 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) 699 { 700 struct mlx5_core_health *health = &dev->priv.health; 701 unsigned long flags; 702 703 spin_lock_irqsave(&health->wq_lock, flags); 704 set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); 705 spin_unlock_irqrestore(&health->wq_lock, flags); 706 cancel_delayed_work_sync(&dev->priv.health.recover_work); 707 } 708 709 void mlx5_health_cleanup(struct mlx5_core_dev *dev) 710 { 711 struct mlx5_core_health *health = &dev->priv.health; 712 713 destroy_workqueue(health->wq); 714 destroy_workqueue(health->wq_watchdog); 715 destroy_workqueue(health->wq_cmd); 716 } 717 718 int mlx5_health_init(struct mlx5_core_dev *dev) 719 { 720 struct mlx5_core_health *health; 721 char name[64]; 722 723 health = &dev->priv.health; 724 725 snprintf(name, sizeof(name), "%s-rec", dev_name(&dev->pdev->dev)); 726 health->wq = create_singlethread_workqueue(name); 727 if (!health->wq) 728 goto err_recovery; 729 730 snprintf(name, sizeof(name), "%s-wdg", dev_name(&dev->pdev->dev)); 731 health->wq_watchdog = create_singlethread_workqueue(name); 732 if (!health->wq_watchdog) 733 goto err_watchdog; 734 735 snprintf(name, sizeof(name), "%s-cmd", dev_name(&dev->pdev->dev)); 736 health->wq_cmd = create_singlethread_workqueue(name); 737 if (!health->wq_cmd) 738 goto err_cmd; 739 740 spin_lock_init(&health->wq_lock); 741 INIT_WORK(&health->work, health_care); 742 INIT_WORK(&health->work_watchdog, health_watchdog); 743 INIT_WORK(&health->work_cmd_completion, mlx5_trigger_cmd_completions); 744 INIT_DELAYED_WORK(&health->recover_work, health_recover); 745 746 return 0; 747 748 err_cmd: 749 destroy_workqueue(health->wq_watchdog); 750 err_watchdog: 751 destroy_workqueue(health->wq); 752 err_recovery: 753 return -ENOMEM; 754 } 755