1 /* 2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. 3 * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #define LINUXKPI_PARAM_PREFIX mlx4_ 35 36 #include <linux/workqueue.h> 37 #include <linux/module.h> 38 39 #include <asm/byteorder.h> 40 41 #include "mlx4.h" 42 43 #define MLX4_CATAS_POLL_INTERVAL (5 * HZ) 44 45 static DEFINE_SPINLOCK(catas_lock); 46 47 static LIST_HEAD(catas_list); 48 static struct work_struct catas_work; 49 50 static int internal_err_reset = 1; 51 module_param(internal_err_reset, int, 0644); 52 MODULE_PARM_DESC(internal_err_reset, 53 "Reset device on internal errors if non-zero" 54 " (default 1, in SRIOV mode default is 0)"); 55 56 static void dump_err_buf(struct mlx4_dev *dev) 57 { 58 struct mlx4_priv *priv = mlx4_priv(dev); 59 60 int i; 61 62 mlx4_err(dev, "Internal error detected:\n"); 63 for (i = 0; i < priv->fw.catas_size; ++i) 64 mlx4_err(dev, " buf[%02x]: %08x\n", 65 i, swab32(readl(priv->catas_err.map + i))); 66 } 67 68 static void poll_catas(unsigned long dev_ptr) 69 { 70 struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; 71 struct mlx4_priv *priv = mlx4_priv(dev); 72 73 if (readl(priv->catas_err.map)) { 74 /* If the device is off-line, we cannot try to recover it */ 75 if (pci_channel_offline(dev->pdev)) 76 mod_timer(&priv->catas_err.timer, 77 round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); 78 else { 79 dump_err_buf(dev); 80 mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); 81 82 if (internal_err_reset) { 83 spin_lock(&catas_lock); 84 list_add(&priv->catas_err.list, &catas_list); 85 spin_unlock(&catas_lock); 86 87 queue_work(mlx4_wq, &catas_work); 88 } 89 } 90 } else 91 mod_timer(&priv->catas_err.timer, 92 round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); 93 } 94 95 static void catas_reset(struct work_struct *work) 96 { 97 struct mlx4_priv *priv, *tmppriv; 98 struct mlx4_dev *dev; 99 100 LIST_HEAD(tlist); 101 int ret; 102 103 spin_lock_irq(&catas_lock); 104 list_splice_init(&catas_list, &tlist); 105 spin_unlock_irq(&catas_lock); 106 107 list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) { 108 struct pci_dev *pdev = priv->dev.pdev; 109 110 /* If the device is off-line, we cannot reset it */ 111 if (pci_channel_offline(pdev)) 112 continue; 113 114 ret = mlx4_restart_one(priv->dev.pdev); 115 /* 'priv' now is not valid */ 116 if (ret) 117 pr_err("mlx4 %s: Reset failed (%d)\n", 118 pci_name(pdev), ret); 119 else { 120 dev = pci_get_drvdata(pdev); 121 mlx4_dbg(dev, "Reset succeeded\n"); 122 } 123 } 124 } 125 126 void mlx4_start_catas_poll(struct mlx4_dev *dev) 127 { 128 struct mlx4_priv *priv = mlx4_priv(dev); 129 phys_addr_t addr; 130 131 /*If we are in SRIOV the default of the module param must be 0*/ 132 if (mlx4_is_mfunc(dev)) 133 internal_err_reset = 0; 134 135 INIT_LIST_HEAD(&priv->catas_err.list); 136 init_timer(&priv->catas_err.timer); 137 priv->catas_err.map = NULL; 138 139 addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) + 140 priv->fw.catas_offset; 141 142 priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); 143 if (!priv->catas_err.map) { 144 mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", 145 (unsigned long long) addr); 146 return; 147 } 148 149 priv->catas_err.timer.data = (unsigned long) dev; 150 priv->catas_err.timer.function = poll_catas; 151 priv->catas_err.timer.expires = 152 round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL); 153 add_timer(&priv->catas_err.timer); 154 } 155 156 void mlx4_stop_catas_poll(struct mlx4_dev *dev) 157 { 158 struct mlx4_priv *priv = mlx4_priv(dev); 159 160 del_timer_sync(&priv->catas_err.timer); 161 162 if (priv->catas_err.map) { 163 iounmap(priv->catas_err.map); 164 priv->catas_err.map = NULL; 165 } 166 167 spin_lock_irq(&catas_lock); 168 list_del_init(&priv->catas_err.list); 169 spin_unlock_irq(&catas_lock); 170 } 171 172 void __init mlx4_catas_init(void) 173 { 174 INIT_WORK(&catas_work, catas_reset); 175 } 176