xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/en/health.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Mellanox Technologies.
3 
4 #include "health.h"
5 #include "lib/eq.h"
6 #include "lib/mlx5.h"
7 
mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg * fmsg,char * name)8 void mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
9 {
10 	devlink_fmsg_pair_nest_start(fmsg, name);
11 	devlink_fmsg_obj_nest_start(fmsg);
12 }
13 
mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg * fmsg)14 void mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg)
15 {
16 	devlink_fmsg_obj_nest_end(fmsg);
17 	devlink_fmsg_pair_nest_end(fmsg);
18 }
19 
mlx5e_health_cq_diag_fmsg(struct mlx5e_cq * cq,struct devlink_fmsg * fmsg)20 void mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
21 {
22 	u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
23 	u8 hw_status;
24 	void *cqc;
25 
26 	mlx5_core_query_cq(cq->mdev, &cq->mcq, out);
27 	cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
28 	hw_status = MLX5_GET(cqc, cqc, status);
29 
30 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
31 	devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
32 	devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
33 	devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq));
34 	devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq));
35 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
36 }
37 
mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq * cq,struct devlink_fmsg * fmsg)38 void mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
39 {
40 	u8 cq_log_stride;
41 	u32 cq_sz;
42 
43 	cq_sz = mlx5_cqwq_get_size(&cq->wq);
44 	cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
45 
46 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
47 	devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
48 	devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
49 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
50 }
51 
mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp * eq,struct devlink_fmsg * fmsg)52 void mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg)
53 {
54 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ");
55 	devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn);
56 	devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn);
57 	devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx);
58 	devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index);
59 	devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core));
60 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
61 }
62 
mlx5e_health_create_reporters(struct mlx5e_priv * priv)63 void mlx5e_health_create_reporters(struct mlx5e_priv *priv)
64 {
65 	mlx5e_reporter_tx_create(priv);
66 	mlx5e_reporter_rx_create(priv);
67 }
68 
mlx5e_health_destroy_reporters(struct mlx5e_priv * priv)69 void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
70 {
71 	mlx5e_reporter_rx_destroy(priv);
72 	mlx5e_reporter_tx_destroy(priv);
73 }
74 
mlx5e_health_channels_update(struct mlx5e_priv * priv)75 void mlx5e_health_channels_update(struct mlx5e_priv *priv)
76 {
77 	if (priv->tx_reporter)
78 		devlink_health_reporter_state_update(priv->tx_reporter,
79 						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
80 	if (priv->rx_reporter)
81 		devlink_health_reporter_state_update(priv->rx_reporter,
82 						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
83 }
84 
mlx5e_health_sq_to_ready(struct mlx5_core_dev * mdev,struct net_device * dev,u32 sqn)85 int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn)
86 {
87 	struct mlx5e_modify_sq_param msp = {};
88 	int err;
89 
90 	msp.curr_state = MLX5_SQC_STATE_ERR;
91 	msp.next_state = MLX5_SQC_STATE_RST;
92 
93 	err = mlx5e_modify_sq(mdev, sqn, &msp);
94 	if (err) {
95 		netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
96 		return err;
97 	}
98 
99 	memset(&msp, 0, sizeof(msp));
100 	msp.curr_state = MLX5_SQC_STATE_RST;
101 	msp.next_state = MLX5_SQC_STATE_RDY;
102 
103 	err = mlx5e_modify_sq(mdev, sqn, &msp);
104 	if (err) {
105 		netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
106 		return err;
107 	}
108 
109 	return 0;
110 }
111 
mlx5e_health_recover_channels(struct mlx5e_priv * priv)112 int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
113 {
114 	int err = 0;
115 
116 	rtnl_lock();
117 	netdev_lock(priv->netdev);
118 	mutex_lock(&priv->state_lock);
119 
120 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
121 		goto out;
122 
123 	err = mlx5e_safe_reopen_channels(priv);
124 
125 out:
126 	mutex_unlock(&priv->state_lock);
127 	netdev_unlock(priv->netdev);
128 	rtnl_unlock();
129 
130 	return err;
131 }
132 
mlx5e_health_channel_eq_recover(struct net_device * dev,struct mlx5_eq_comp * eq,struct mlx5e_ch_stats * stats)133 int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
134 				    struct mlx5e_ch_stats *stats)
135 {
136 	u32 eqe_count;
137 
138 	netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
139 		   eq->core.eqn, eq->core.cons_index, eq->core.irqn);
140 
141 	eqe_count = mlx5_eq_poll_irq_disabled(eq);
142 	if (!eqe_count)
143 		return -EIO;
144 
145 	netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n",
146 		   eqe_count, eq->core.eqn);
147 
148 	stats->eq_rearm++;
149 	return 0;
150 }
151 
mlx5e_health_report(struct mlx5e_priv * priv,struct devlink_health_reporter * reporter,char * err_str,struct mlx5e_err_ctx * err_ctx)152 int mlx5e_health_report(struct mlx5e_priv *priv,
153 			struct devlink_health_reporter *reporter, char *err_str,
154 			struct mlx5e_err_ctx *err_ctx)
155 {
156 	netdev_err(priv->netdev, "%s\n", err_str);
157 
158 	if (!reporter)
159 		return err_ctx->recover(err_ctx->ctx);
160 
161 	return devlink_health_report(reporter, err_str, err_ctx);
162 }
163 
164 #define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg * fmsg,const void * value,u32 value_len)165 static void mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
166 					 const void *value, u32 value_len)
167 
168 {
169 	u32 data_size;
170 	u32 offset;
171 
172 	for (offset = 0; offset < value_len; offset += data_size) {
173 		data_size = value_len - offset;
174 		if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
175 			data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
176 		devlink_fmsg_binary_put(fmsg, value + offset, data_size);
177 	}
178 }
179 
mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv * priv,struct mlx5_rsc_key * key,struct devlink_fmsg * fmsg)180 int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
181 			       struct devlink_fmsg *fmsg)
182 {
183 	struct mlx5_core_dev *mdev = priv->mdev;
184 	struct mlx5_rsc_dump_cmd *cmd;
185 	int cmd_err, err = 0;
186 	struct page *page;
187 	int size;
188 
189 	if (IS_ERR_OR_NULL(mdev->rsc_dump))
190 		return -EOPNOTSUPP;
191 
192 	page = alloc_page(GFP_KERNEL);
193 	if (!page)
194 		return -ENOMEM;
195 
196 	devlink_fmsg_binary_pair_nest_start(fmsg, "data");
197 
198 	cmd = mlx5_rsc_dump_cmd_create(mdev, key);
199 	if (IS_ERR(cmd)) {
200 		err = PTR_ERR(cmd);
201 		goto free_page;
202 	}
203 
204 	do {
205 		cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
206 		if (cmd_err < 0) {
207 			err = cmd_err;
208 			goto destroy_cmd;
209 		}
210 
211 		mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
212 	} while (cmd_err > 0);
213 
214 destroy_cmd:
215 	mlx5_rsc_dump_cmd_destroy(cmd);
216 	devlink_fmsg_binary_pair_nest_end(fmsg);
217 free_page:
218 	__free_page(page);
219 	return err;
220 }
221 
mlx5e_health_queue_dump(struct mlx5e_priv * priv,struct devlink_fmsg * fmsg,int queue_idx,char * lbl)222 void mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
223 			     int queue_idx, char *lbl)
224 {
225 	struct mlx5_rsc_key key = {};
226 
227 	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
228 	key.index1 = queue_idx;
229 	key.size = PAGE_SIZE;
230 	key.num_of_obj1 = 1;
231 
232 	devlink_fmsg_obj_nest_start(fmsg);
233 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl);
234 	devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
235 	mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
236 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
237 	devlink_fmsg_obj_nest_end(fmsg);
238 }
239