xref: /src/sys/dev/irdma/icrdma.c (revision 5b7aa6c7bc9db19e8bd34a5b7892fb5df2a3068b)
1 /*-
2  * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
3  *
4  * Copyright (c) 2021 - 2026 Intel Corporation
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenFabrics.org BSD license below:
11  *
12  *   Redistribution and use in source and binary forms, with or
13  *   without modification, are permitted provided that the following
14  *   conditions are met:
15  *
16  *    - Redistributions of source code must retain the above
17  *	copyright notice, this list of conditions and the following
18  *	disclaimer.
19  *
20  *    - Redistributions in binary form must reproduce the above
21  *	copyright notice, this list of conditions and the following
22  *	disclaimer in the documentation and/or other materials
23  *	provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bus.h>
38 #include <sys/kernel.h>
39 #include <sys/module.h>
40 #include <sys/sysctl.h>
41 #include <machine/bus.h>
42 #include <linux/device.h>
43 #include <sys/rman.h>
44 
45 #include "ice_rdma.h"
46 #include "irdma_main.h"
47 #include "icrdma_hw.h"
48 
49 #include "irdma_if.h"
50 #include "irdma_di_if.h"
51 
52 /**
53  *  Driver version
54  */
55 char irdma_driver_version[] = "1.3.56-k";
56 
57 /**
58  * irdma_init_tunable - prepare tunables
59  * @rf: RDMA PCI function
60  * @pf_id: id of the pf
61  */
62 static void
irdma_init_tunable(struct irdma_pci_f * rf,uint8_t pf_id)63 irdma_init_tunable(struct irdma_pci_f *rf, uint8_t pf_id)
64 {
65 	struct sysctl_oid_list *irdma_oid_list;
66 	struct irdma_tunable_info *t_info = &rf->tun_info;
67 	char pf_name[16];
68 
69 	snprintf(pf_name, 15, "irdma%d", pf_id);
70 	sysctl_ctx_init(&t_info->irdma_sysctl_ctx);
71 
72 	t_info->irdma_sysctl_tree = SYSCTL_ADD_NODE(&t_info->irdma_sysctl_ctx,
73 						    SYSCTL_STATIC_CHILDREN(_dev),
74 						    OID_AUTO, pf_name,
75 						    CTLFLAG_RD, NULL, "");
76 
77 	irdma_oid_list = SYSCTL_CHILDREN(t_info->irdma_sysctl_tree);
78 
79 	t_info->qos_sysctl_tree = SYSCTL_ADD_NODE(&t_info->irdma_sysctl_ctx,
80 						  irdma_oid_list, OID_AUTO,
81 						  "qos", CTLFLAG_RD,
82 						  NULL, "");
83 	t_info->sws_sysctl_tree = SYSCTL_ADD_NODE(&t_info->irdma_sysctl_ctx,
84 						  irdma_oid_list, OID_AUTO,
85 						  "sw_stats", CTLFLAG_RD,
86 						  NULL, "");
87 	/*
88 	 * debug mask setting
89 	 */
90 	SYSCTL_ADD_S32(&t_info->irdma_sysctl_ctx, irdma_oid_list,
91 		       OID_AUTO, "debug", CTLFLAG_RWTUN, &rf->sc_dev.debug_mask,
92 		       0, "irdma debug");
93 
94 	/*
95 	 * RoCEv2/iWARP setting RoCEv2 the default mode
96 	 */
97 	t_info->roce_ena = 1;
98 	SYSCTL_ADD_U8(&t_info->irdma_sysctl_ctx, irdma_oid_list, OID_AUTO,
99 		      "roce_enable", CTLFLAG_RDTUN, &t_info->roce_ena, 0,
100 		      "RoCEv2 mode enable");
101 
102 	rf->protocol_used = IRDMA_IWARP_PROTOCOL_ONLY;
103 	if (t_info->roce_ena == 1)
104 		rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY;
105 	else if (t_info->roce_ena != 0)
106 		printf("%s:%d wrong roce_enable value (%d), using iWARP\n",
107 		       __func__, __LINE__, t_info->roce_ena);
108 	printf("%s:%d protocol: %s, roce_enable value: %d\n", __func__, __LINE__,
109 	       (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? "iWARP" : "RoCEv2",
110 	       t_info->roce_ena);
111 
112 	snprintf(t_info->drv_ver, IRDMA_VER_LEN, "%s", irdma_driver_version);
113 	SYSCTL_ADD_STRING(&t_info->irdma_sysctl_ctx, irdma_oid_list,
114 			  OID_AUTO, "drv_ver", CTLFLAG_RDTUN, t_info->drv_ver,
115 			  IRDMA_VER_LEN, "driver version");
116 
117 	irdma_dcqcn_tunables_init(rf);
118 	irdma_sysctl_settings(rf);
119 }
120 
121 /**
122  * irdma_find_handler - obtain hdl object to identify pf
123  * @p_dev: the peer interface structure
124  */
125 static struct irdma_handler *
irdma_find_handler(struct ice_rdma_peer * p_dev)126 irdma_find_handler(struct ice_rdma_peer *p_dev)
127 {
128 	struct irdma_handler *hdl;
129 	unsigned long flags;
130 
131 	spin_lock_irqsave(&irdma_handler_lock, flags);
132 	list_for_each_entry(hdl, &irdma_handlers, list) {
133 		if (!hdl->iwdev->rf->peer_info)
134 			continue;
135 		if (hdl->iwdev->rf->peer_info->dev == p_dev->dev) {
136 			spin_unlock_irqrestore(&irdma_handler_lock, flags);
137 			return hdl;
138 		}
139 	}
140 	spin_unlock_irqrestore(&irdma_handler_lock, flags);
141 
142 	return NULL;
143 }
144 
145 /**
146  * peer_to_iwdev - return iwdev based on peer
147  * @peer: the peer interface structure
148  */
149 static struct irdma_device *
peer_to_iwdev(struct ice_rdma_peer * peer)150 peer_to_iwdev(struct ice_rdma_peer *peer)
151 {
152 	struct irdma_handler *hdl;
153 
154 	hdl = irdma_find_handler(peer);
155 	if (!hdl) {
156 		printf("%s:%d rdma handler not found\n", __func__, __LINE__);
157 		return NULL;
158 	}
159 
160 	return hdl->iwdev;
161 }
162 
163 /**
164  * irdma_get_qos_info - save qos info from parameters to internal struct
165  * @l2params: destination, qos, tc, mtu info structure
166  * @qos_info: source, DCB settings structure
167  */
168 static void
irdma_get_qos_info(struct irdma_pci_f * rf,struct irdma_l2params * l2params,struct ice_qos_params * qos_info)169 irdma_get_qos_info(struct irdma_pci_f *rf, struct irdma_l2params *l2params,
170 		   struct ice_qos_params *qos_info)
171 {
172 	int i;
173 	char txt[7][128] = {"", "", "", "", "", "", ""};
174 	u8 len;
175 
176 	l2params->num_tc = qos_info->num_tc;
177 	l2params->num_apps = qos_info->num_apps;
178 	l2params->vsi_prio_type = qos_info->vsi_priority_type;
179 	l2params->vsi_rel_bw = qos_info->vsi_relative_bw;
180 	for (i = 0; i < l2params->num_tc; i++) {
181 		l2params->tc_info[i].egress_virt_up =
182 		    qos_info->tc_info[i].egress_virt_up;
183 		l2params->tc_info[i].ingress_virt_up =
184 		    qos_info->tc_info[i].ingress_virt_up;
185 		l2params->tc_info[i].prio_type = qos_info->tc_info[i].prio_type;
186 		l2params->tc_info[i].rel_bw = qos_info->tc_info[i].rel_bw;
187 		l2params->tc_info[i].tc_ctx = qos_info->tc_info[i].tc_ctx;
188 	}
189 	for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
190 		l2params->up2tc[i] = qos_info->up2tc[i];
191 
192 	if (qos_info->pfc_mode == IRDMA_QOS_MODE_DSCP) {
193 		l2params->dscp_mode = true;
194 		memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map));
195 	}
196 	if (!(rf->sc_dev.debug_mask & IRDMA_DEBUG_DCB))
197 		return;
198 	for (i = 0; i < l2params->num_tc; i++) {
199 		len = strlen(txt[0]);
200 		snprintf(txt[0] + len, sizeof(txt[0]) - 5, " %d",
201 			 l2params->tc_info[i].egress_virt_up);
202 		len = strlen(txt[1]);
203 		snprintf(txt[1] + len, sizeof(txt[1]) - 5, " %d",
204 			 l2params->tc_info[i].ingress_virt_up);
205 		len = strlen(txt[2]);
206 		snprintf(txt[2] + len, sizeof(txt[2]) - 5, " %d",
207 			 l2params->tc_info[i].prio_type);
208 		len = strlen(txt[3]);
209 		snprintf(txt[3] + len, sizeof(txt[3]) - 5, " %d",
210 			 l2params->tc_info[i].rel_bw);
211 		len = strlen(txt[4]);
212 		snprintf(txt[4] + len, sizeof(txt[4]) - 5, " %lu",
213 			 l2params->tc_info[i].tc_ctx);
214 	}
215 	len = strlen(txt[5]);
216 	for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
217 		len += snprintf(txt[5] + len, sizeof(txt[5]) - 5, " %d",
218 				l2params->up2tc[i]);
219 	len = strlen(txt[6]);
220 	for (i = 0; i < IRDMA_DSCP_NUM_VAL; i++)
221 		len += snprintf(txt[6] + len, sizeof(txt[6]) - 5, " %d",
222 				l2params->dscp_map[i]);
223 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "num_tc:          %d\n", l2params->num_tc);
224 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "num_apps:        %d\n", l2params->num_apps);
225 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "vsi_prio_type:   %d\n", l2params->vsi_prio_type);
226 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "vsi_rel_bw:      %d\n", l2params->vsi_rel_bw);
227 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "egress_virt_up: %s\n", txt[0]);
228 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "ingress_virt_up:%s\n", txt[1]);
229 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "prio_type: %s\n", txt[2]);
230 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "rel_bw:    %s\n", txt[3]);
231 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "tc_ctx:    %s\n", txt[4]);
232 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "up2tc:     %s\n", txt[5]);
233 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "dscp_mode: %s\n", txt[6]);
234 
235 	irdma_debug_buf(&rf->sc_dev, IRDMA_DEBUG_DCB, "l2params", l2params, sizeof(*l2params));
236 }
237 
238 /**
239  * irdma_log_invalid_mtu - check mtu setting validity
240  * @mtu: mtu value
241  * @dev: hardware control device structure
242  */
243 static void
irdma_log_invalid_mtu(u16 mtu,struct irdma_sc_dev * dev)244 irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev)
245 {
246 	if (mtu < IRDMA_MIN_MTU_IPV4)
247 		irdma_dev_warn(to_ibdev(dev),
248 			       "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 576 for IPv4\n",
249 			       mtu);
250 	else if (mtu < IRDMA_MIN_MTU_IPV6)
251 		irdma_dev_warn(to_ibdev(dev),
252 			       "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 1280 for IPv6\n",
253 			       mtu);
254 }
255 
256 /**
257  * irdma_get_event_name - convert type enum to string
258  * @type: event type enum
259  */
260 static const char *
irdma_get_event_name(enum ice_rdma_event_type type)261 irdma_get_event_name(enum ice_rdma_event_type type)
262 {
263 	switch (type) {
264 	case ICE_RDMA_EVENT_LINK_CHANGE:
265 		return "LINK CHANGE";
266 	case ICE_RDMA_EVENT_MTU_CHANGE:
267 		return "MTU CHANGE";
268 	case ICE_RDMA_EVENT_TC_CHANGE:
269 		return "TC CHANGE";
270 	case ICE_RDMA_EVENT_API_CHANGE:
271 		return "API CHANGE";
272 	case ICE_RDMA_EVENT_CRIT_ERR:
273 		return "CRITICAL ERROR";
274 	case ICE_RDMA_EVENT_RESET:
275 		return "RESET";
276 	case ICE_RDMA_EVENT_QSET_REGISTER:
277 		return "QSET REGISTER";
278 	case ICE_RDMA_EVENT_VSI_FILTER_UPDATE:
279 		return "VSI FILTER UPDATE";
280 	default:
281 		return "UNKNOWN";
282 	}
283 }
284 
285 /**
286  * irdma_event_handler - handling events from lan driver
287  * @peer: the peer interface structure
288  * @event: event info structure
289  */
290 static void
irdma_event_handler(struct ice_rdma_peer * peer,struct ice_rdma_event * event)291 irdma_event_handler(struct ice_rdma_peer *peer, struct ice_rdma_event *event)
292 {
293 	struct irdma_device *iwdev;
294 	struct irdma_l2params l2params = {};
295 
296 	printf("%s:%d event_handler %s (%x) on pf %d (%d)\n", __func__, __LINE__,
297 	       irdma_get_event_name(event->type),
298 	       event->type, peer->pf_id, if_getdunit(peer->ifp));
299 	iwdev = peer_to_iwdev(peer);
300 	if (!iwdev) {
301 		printf("%s:%d rdma device not found\n", __func__, __LINE__);
302 		return;
303 	}
304 
305 	switch (event->type) {
306 	case ICE_RDMA_EVENT_LINK_CHANGE:
307 		printf("%s:%d PF: %x (%x), state: %d, speed: %lu\n", __func__, __LINE__,
308 		       peer->pf_id, if_getdunit(peer->ifp), event->linkstate,
309 		       event->baudrate);
310 		break;
311 	case ICE_RDMA_EVENT_MTU_CHANGE:
312 		if (iwdev->vsi.mtu != event->mtu) {
313 			l2params.mtu = event->mtu;
314 			l2params.mtu_changed = true;
315 			irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
316 			irdma_change_l2params(&iwdev->vsi, &l2params);
317 		}
318 		break;
319 	case ICE_RDMA_EVENT_TC_CHANGE:
320 		/*
321 		 * 1. check if it is pre or post 2. check if it is currently being done
322 		 */
323 		if (event->prep == iwdev->vsi.tc_change_pending) {
324 			printf("%s:%d can't process %s TC change if TC change is %spending\n",
325 			       __func__, __LINE__,
326 			       event->prep ? "pre" : "post",
327 			       event->prep ? " " : "not ");
328 			goto done;
329 		}
330 		if (!atomic_inc_not_zero(&iwdev->rf->dev_ctx.event_rfcnt)) {
331 			printf("%s:%d (%d) EVENT_TC_CHANGE received, but not processed %d\n",
332 			       __func__, __LINE__, if_getdunit(peer->ifp),
333 			       atomic_read(&iwdev->rf->dev_ctx.event_rfcnt));
334 			break;
335 		}
336 		if (event->prep) {
337 			iwdev->vsi.tc_change_pending = true;
338 			irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND);
339 			wait_event_timeout(iwdev->suspend_wq,
340 					   !atomic_read(&iwdev->vsi.qp_suspend_reqs),
341 					   IRDMA_EVENT_TIMEOUT_MS * 10);
342 			irdma_ws_reset(&iwdev->vsi);
343 			printf("%s:%d TC change preparation done\n", __func__, __LINE__);
344 		} else {
345 			l2params.tc_changed = true;
346 			irdma_get_qos_info(iwdev->rf, &l2params, &event->port_qos);
347 			if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
348 				iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
349 
350 			irdma_check_fc_for_tc_update(&iwdev->vsi, &l2params);
351 			irdma_change_l2params(&iwdev->vsi, &l2params);
352 			printf("%s:%d TC change done\n", __func__, __LINE__);
353 		}
354 		atomic_dec(&iwdev->rf->dev_ctx.event_rfcnt);
355 		break;
356 	case ICE_RDMA_EVENT_CRIT_ERR:
357 		if (event->oicr_reg & IRDMAPFINT_OICR_PE_CRITERR_M) {
358 			u32 pe_criterr;
359 
360 #define IRDMA_Q1_RESOURCE_ERR  0x0001024d
361 			pe_criterr = readl(iwdev->rf->sc_dev.hw_regs[IRDMA_GLPE_CRITERR]);
362 			if (pe_criterr != IRDMA_Q1_RESOURCE_ERR) {
363 				irdma_pr_err("critical PE Error, GLPE_CRITERR=0x%08x\n",
364 					     pe_criterr);
365 				iwdev->rf->reset = true;
366 			} else {
367 				irdma_dev_warn(to_ibdev(&iwdev->rf->sc_dev),
368 					       "Q1 Resource Check\n");
369 			}
370 		}
371 		if (event->oicr_reg & IRDMAPFINT_OICR_HMC_ERR_M) {
372 			irdma_pr_err("HMC Error\n");
373 			iwdev->rf->reset = true;
374 		}
375 		if (iwdev->rf->reset)
376 			iwdev->rf->gen_ops.request_reset(iwdev->rf);
377 		break;
378 	case ICE_RDMA_EVENT_RESET:
379 		iwdev->rf->reset = true;
380 		break;
381 	default:
382 		printf("%s:%d event type unsupported: %d\n", __func__, __LINE__, event->type);
383 	}
384 done:
385 	return;
386 }
387 
388 /**
389  * irdma_link_change - Callback for link state change
390  * @peer: the peer interface structure
391  * @linkstate: state of the link
392  * @baudrate: speed of the link
393  */
394 static void
irdma_link_change(struct ice_rdma_peer * peer,int linkstate,uint64_t baudrate)395 irdma_link_change(struct ice_rdma_peer *peer, int linkstate, uint64_t baudrate)
396 {
397 	printf("%s:%d PF: %x (%x), state: %d, speed: %lu\n", __func__, __LINE__,
398 	       peer->pf_id, if_getdunit(peer->ifp), linkstate, baudrate);
399 }
400 
401 /**
402  * irdma_finalize_task - Finish open or close phase in a separate thread
403  * @context: instance holding peer and iwdev information
404  *
405  * Triggered from irdma_open or irdma_close to perform rt_init_hw or
406  * rt_deinit_hw respectively. Does registration and unregistration of
407  * the device.
408  */
409 static void
irdma_finalize_task(void * context,int pending)410 irdma_finalize_task(void *context, int pending)
411 {
412 	struct irdma_task_arg *task_arg = (struct irdma_task_arg *)context;
413 	struct irdma_device *iwdev = task_arg->iwdev;
414 	struct irdma_pci_f *rf = iwdev->rf;
415 	struct ice_rdma_peer *peer = task_arg->peer;
416 	struct irdma_l2params l2params = {{{0}}};
417 	struct ice_rdma_request req = {0};
418 	int status = 0;
419 
420 	if (iwdev->iw_status) {
421 		irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT,
422 			    "Starting deferred closing %d (%d)\n",
423 			    rf->peer_info->pf_id, if_getdunit(peer->ifp));
424 		atomic_dec(&rf->dev_ctx.event_rfcnt);
425 		if (rf->rdma_ver == IRDMA_GEN_2 && !rf->ftype) {
426 			cancel_delayed_work_sync(&iwdev->rf->dwork_cqp_poll);
427 			irdma_free_stag(iwdev->rf->iwdev, iwdev->rf->chk_stag);
428 		}
429 		wait_event_timeout(iwdev->suspend_wq,
430 				   !atomic_read(&rf->dev_ctx.event_rfcnt),
431 				   IRDMA_MAX_TIMEOUT);
432 		if (atomic_read(&rf->dev_ctx.event_rfcnt) != 0) {
433 			printf("%s:%d (%d) waiting for event_rfcnt (%d) timeout, proceed with unload\n",
434 			       __func__, __LINE__, if_getdunit(peer->ifp),
435 			       atomic_read(&rf->dev_ctx.event_rfcnt));
436 		}
437 		irdma_dereg_ipaddr_event_cb(rf);
438 		irdma_ib_unregister_device(iwdev);
439 		req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE;
440 		req.enable_filter = false;
441 		IRDMA_DI_REQ_HANDLER(peer, &req);
442 		irdma_cleanup_dead_qps(&iwdev->vsi);
443 		irdma_rt_deinit_hw(iwdev);
444 	} else {
445 		irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT,
446 			    "Starting deferred opening %d (%d)\n",
447 			    rf->peer_info->pf_id, if_getdunit(peer->ifp));
448 		irdma_get_qos_info(iwdev->rf, &l2params, &peer->initial_qos_info);
449 		if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
450 			iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
451 
452 #define IRDMA_MIN_MTU_HEADERS IB_GRH_BYTES + IB_BTH_BYTES + 28
453 		l2params.mtu = (peer->mtu) ? peer->mtu :
454 		    ib_mtu_enum_to_int(IB_MTU_256) +
455 		    IRDMA_MIN_MTU_HEADERS;
456 		status = irdma_rt_init_hw(iwdev, &l2params);
457 		if (status) {
458 			irdma_pr_err("RT init failed %d\n", status);
459 			ib_dealloc_device(&iwdev->ibdev);
460 			return;
461 		}
462 		status = irdma_ib_register_device(iwdev);
463 		if (status) {
464 			irdma_pr_err("Registration failed %d\n", status);
465 			irdma_rt_deinit_hw(iwdev);
466 			ib_dealloc_device(&iwdev->ibdev);
467 		}
468 		irdma_qos_info_tunables_init(rf);
469 		irdma_sw_stats_tunables_init(rf);
470 		req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE;
471 		req.enable_filter = true;
472 		IRDMA_DI_REQ_HANDLER(peer, &req);
473 		irdma_reg_ipaddr_event_cb(rf);
474 		atomic_inc(&rf->dev_ctx.event_rfcnt);
475 		if (rf->rdma_ver == IRDMA_GEN_2 && !rf->ftype) {
476 			INIT_DELAYED_WORK(&rf->dwork_cqp_poll, cqp_poll_worker);
477 			rf->chk_stag = irdma_create_stag(rf->iwdev);
478 			rf->used_mrs++;
479 			mod_delayed_work(iwdev->cleanup_wq, &rf->dwork_cqp_poll,
480 					 msecs_to_jiffies(5000));
481 		}
482 
483 		irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT,
484 			    "Deferred opening finished %d (%d)\n",
485 			    rf->peer_info->pf_id, if_getdunit(peer->ifp));
486 	}
487 }
488 
489 /**
490  * irdma_alloc_pcidev - allocate memory for pcidev and populate data
491  * @peer: the new peer interface structure
492  * @rf: RDMA PCI function
493  */
494 static int
irdma_alloc_pcidev(struct ice_rdma_peer * peer,struct irdma_pci_f * rf)495 irdma_alloc_pcidev(struct ice_rdma_peer *peer, struct irdma_pci_f *rf)
496 {
497 	rf->pcidev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL);
498 	if (!rf->pcidev) {
499 		return -ENOMEM;
500 	}
501 	if (linux_pci_attach_device(rf->dev_ctx.dev, NULL, NULL, rf->pcidev))
502 		return -ENOMEM;
503 
504 	return 0;
505 }
506 
507 /**
508  * irdma_dealloc_pcidev - deallocate memory for pcidev
509  * @rf: RDMA PCI function
510  */
511 static void
irdma_dealloc_pcidev(struct irdma_pci_f * rf)512 irdma_dealloc_pcidev(struct irdma_pci_f *rf)
513 {
514 	linux_pci_detach_device(rf->pcidev);
515 	kfree(rf->pcidev);
516 }
517 
518 /**
519  * irdma_fill_device_info - assign initial values to rf variables
520  * @iwdev: irdma device
521  * @peer: the peer interface structure
522  */
523 static void
irdma_fill_device_info(struct irdma_device * iwdev,struct ice_rdma_peer * peer)524 irdma_fill_device_info(struct irdma_device *iwdev,
525 		       struct ice_rdma_peer *peer)
526 {
527 	struct irdma_pci_f *rf = iwdev->rf;
528 
529 	rf->peer_info = peer;
530 	rf->gen_ops.register_qset = irdma_register_qset;
531 	rf->gen_ops.unregister_qset = irdma_unregister_qset;
532 
533 	rf->rdma_ver = IRDMA_GEN_2;
534 	rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_2;
535 	rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
536 	rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
537 	rf->check_fc = irdma_check_fc_for_qp;
538 	rf->gen_ops.request_reset = irdma_request_reset;
539 	irdma_set_rf_user_cfg_params(rf);
540 
541 	rf->default_vsi.vsi_idx = peer->pf_vsi_num;
542 	rf->dev_ctx.dev = peer->dev;
543 	rf->dev_ctx.mem_bus_space_tag = rman_get_bustag(peer->pci_mem);
544 	rf->dev_ctx.mem_bus_space_handle = rman_get_bushandle(peer->pci_mem);
545 	rf->dev_ctx.mem_bus_space_size = rman_get_size(peer->pci_mem);
546 
547 	rf->hw.dev_context = &rf->dev_ctx;
548 	rf->hw.hw_addr = (u8 *)rman_get_virtual(peer->pci_mem);
549 	rf->msix_count = peer->msix.count;
550 	rf->msix_info.entry = peer->msix.base;
551 	rf->msix_info.vector = peer->msix.count;
552 	printf("%s:%d msix_info: %d %d %d\n", __func__, __LINE__,
553 	       rf->msix_count, rf->msix_info.entry, rf->msix_info.vector);
554 
555 	rf->iwdev = iwdev;
556 	iwdev->netdev = peer->ifp;
557 	iwdev->init_state = INITIAL_STATE;
558 	iwdev->vsi_num = peer->pf_vsi_num;
559 	iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
560 	iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
561 	iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT;
562 	iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT;
563 	iwdev->roce_rtomin = 5;
564 
565 	if (rf->protocol_used == IRDMA_ROCE_PROTOCOL_ONLY) {
566 		iwdev->roce_mode = true;
567 	}
568 }
569 
570 /**
571  * irdma_probe - Callback to probe a new RDMA peer device
572  * @peer: the new peer interface structure
573  *
574  * Callback implementing the RDMA_PROBE function. Called by the ice driver to
575  * notify the RDMA client driver that a new device has been created
576  */
577 static int
irdma_probe(struct ice_rdma_peer * peer)578 irdma_probe(struct ice_rdma_peer *peer)
579 {
580 	struct irdma_device *iwdev;
581 	struct irdma_pci_f *rf;
582 	struct irdma_handler *hdl;
583 	int err = 0;
584 
585 	irdma_pr_info("probe: irdma-%s peer=%p, peer->pf_id=%d, peer->ifp=%p\n",
586 		      irdma_driver_version, peer, peer->pf_id, peer->ifp);
587 	irdma_pr_info("peer->ifp->if_dunit=%d, peer->pci_mem->r_bustag=%p\n",
588 		      if_getdunit(peer->ifp), (void *)(uintptr_t)peer->pci_mem->r_bustag);
589 
590 	hdl = irdma_find_handler(peer);
591 	if (hdl)
592 		return -EBUSY;
593 
594 	hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
595 	if (!hdl)
596 		return -ENOMEM;
597 
598 	iwdev = (struct irdma_device *)ib_alloc_device(sizeof(*iwdev));
599 	if (!iwdev) {
600 		kfree(hdl);
601 		return -ENOMEM;
602 	}
603 
604 	iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL);
605 	if (!iwdev->rf) {
606 		ib_dealloc_device(&iwdev->ibdev);
607 		kfree(hdl);
608 		return -ENOMEM;
609 	}
610 	hdl->iwdev = iwdev;
611 	iwdev->hdl = hdl;
612 
613 	irdma_init_tunable(iwdev->rf, if_getdunit(peer->ifp));
614 	irdma_fill_device_info(iwdev, peer);
615 	rf = iwdev->rf;
616 
617 	if (irdma_alloc_pcidev(peer, rf))
618 		goto err_pcidev;
619 
620 	irdma_add_handler(hdl);
621 
622 	if (irdma_ctrl_init_hw(rf)) {
623 		err = -EIO;
624 		goto err_ctrl_init;
625 	}
626 
627 	rf->dev_ctx.task_arg.peer = peer;
628 	rf->dev_ctx.task_arg.iwdev = iwdev;
629 	rf->dev_ctx.task_arg.peer = peer;
630 
631 	TASK_INIT(&hdl->deferred_task, 0, irdma_finalize_task, &rf->dev_ctx.task_arg);
632 	hdl->deferred_tq = taskqueue_create_fast("irdma_defer",
633 						 M_NOWAIT, taskqueue_thread_enqueue,
634 						 &hdl->deferred_tq);
635 	taskqueue_start_threads(&hdl->deferred_tq, 1, PI_NET, "irdma_defer_t");
636 
637 	taskqueue_enqueue(hdl->deferred_tq, &hdl->deferred_task);
638 
639 	return 0;
640 
641 err_ctrl_init:
642 	irdma_del_handler(hdl);
643 	irdma_dealloc_pcidev(rf);
644 err_pcidev:
645 	kfree(iwdev->rf);
646 	ib_dealloc_device(&iwdev->ibdev);
647 	kfree(hdl);
648 
649 	return err;
650 }
651 
652 /**
653  * irdma_remove - Callback to remove an RDMA peer device
654  * @peer: the new peer interface structure
655  *
656  * Callback implementing the RDMA_REMOVE function. Called by the ice driver to
657  * notify the RDMA client driver that the device wille be delated
658  */
659 static int
irdma_remove(struct ice_rdma_peer * peer)660 irdma_remove(struct ice_rdma_peer *peer)
661 {
662 	struct irdma_handler *hdl;
663 	struct irdma_device *iwdev;
664 
665 	irdma_debug((struct irdma_sc_dev *)NULL, IRDMA_DEBUG_INIT,
666 		    "removing %s irdma%d\n", __func__, if_getdunit(peer->ifp));
667 
668 	hdl = irdma_find_handler(peer);
669 	if (!hdl)
670 		return 0;
671 
672 	iwdev = hdl->iwdev;
673 
674 	if (iwdev->vsi.tc_change_pending) {
675 		iwdev->vsi.tc_change_pending = false;
676 		irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_RESUME);
677 	}
678 
679 	taskqueue_enqueue(hdl->deferred_tq, &hdl->deferred_task);
680 
681 	taskqueue_drain(hdl->deferred_tq, &hdl->deferred_task);
682 	taskqueue_free(hdl->deferred_tq);
683 	hdl->iwdev->rf->dev_ctx.task_arg.iwdev = NULL;
684 	hdl->iwdev->rf->dev_ctx.task_arg.peer = NULL;
685 
686 	sysctl_ctx_free(&iwdev->rf->tun_info.irdma_sysctl_ctx);
687 	hdl->iwdev->rf->tun_info.irdma_sysctl_tree = NULL;
688 	hdl->iwdev->rf->tun_info.qos_sysctl_tree = NULL;
689 	hdl->iwdev->rf->tun_info.sws_sysctl_tree = NULL;
690 
691 	irdma_ctrl_deinit_hw(iwdev->rf);
692 
693 	irdma_dealloc_pcidev(iwdev->rf);
694 
695 	irdma_del_handler(iwdev->hdl);
696 	kfree(iwdev->hdl);
697 	kfree(iwdev->rf);
698 	ib_dealloc_device(&iwdev->ibdev);
699 	irdma_pr_info("IRDMA hardware deinitialization complete irdma%d\n",
700 		      if_getdunit(peer->ifp));
701 
702 	return 0;
703 }
704 
705 /**
706  * irdma_open - Callback for operation open for RDMA device
707  * @peer: the new peer interface structure
708  *
709  * Callback implementing the RDMA_OPEN function. Called by the ice driver to
710  * notify the RDMA client driver that a new device has been initialized.
711  */
712 static int
irdma_open(struct ice_rdma_peer * peer)713 irdma_open(struct ice_rdma_peer *peer)
714 {
715 	struct irdma_device *iwdev;
716 	struct ice_rdma_event event = {0};
717 
718 	iwdev = peer_to_iwdev(peer);
719 	if (iwdev) {
720 		event.type = ICE_RDMA_EVENT_MTU_CHANGE;
721 		event.mtu = peer->mtu;
722 
723 		irdma_event_handler(peer, &event);
724 	} else {
725 		irdma_probe(peer);
726 	}
727 
728 	return 0;
729 }
730 
731 /**
732  * irdma_close - Callback to notify that a peer device is down
733  * @peer: the RDMA peer device being stopped
734  *
735  * Callback implementing the RDMA_CLOSE function. Called by the ice driver to
736  * notify the RDMA client driver that a peer device is being stopped.
737  */
738 static int
irdma_close(struct ice_rdma_peer * peer)739 irdma_close(struct ice_rdma_peer *peer)
740 {
741 	/*
742 	 * This is called when ifconfig down or pf-reset is about to happen.
743 	 */
744 	struct irdma_device *iwdev;
745 
746 	iwdev = peer_to_iwdev(peer);
747 	if (iwdev && iwdev->rf->reset)
748 		irdma_remove(peer);
749 
750 	return 0;
751 }
752 
753 /**
754  * irdma_prep_for_unregister - ensure the driver is ready to unregister
755  */
756 static void
irdma_prep_for_unregister(void)757 irdma_prep_for_unregister(void)
758 {
759 	struct irdma_handler *hdl;
760 	unsigned long flags;
761 	bool hdl_valid;
762 
763 	do {
764 		hdl_valid = false;
765 		spin_lock_irqsave(&irdma_handler_lock, flags);
766 		list_for_each_entry(hdl, &irdma_handlers, list) {
767 			if (!hdl->iwdev->rf->peer_info)
768 				continue;
769 			hdl_valid = true;
770 			break;
771 		}
772 		spin_unlock_irqrestore(&irdma_handler_lock, flags);
773 		if (!hdl || !hdl_valid)
774 			break;
775 		IRDMA_CLOSE(hdl->iwdev->rf->peer_info);
776 		IRDMA_REMOVE(hdl->iwdev->rf->peer_info);
777 	} while (1);
778 }
779 
780 static kobj_method_t irdma_methods[] = {
781 	KOBJMETHOD(irdma_probe, irdma_probe),
782 	    KOBJMETHOD(irdma_open, irdma_open),
783 	    KOBJMETHOD(irdma_close, irdma_close),
784 	    KOBJMETHOD(irdma_remove, irdma_remove),
785 	    KOBJMETHOD(irdma_link_change, irdma_link_change),
786 	    KOBJMETHOD(irdma_event_handler, irdma_event_handler),
787 	    KOBJMETHOD_END
788 };
789 
790 /* declare irdma_class which extends the ice_rdma_di class */
791 DEFINE_CLASS_1(irdma, irdma_class, irdma_methods, sizeof(struct ice_rdma_peer), ice_rdma_di_class);
792 
793 static struct ice_rdma_info irdma_info = {
794 	.major_version = ICE_RDMA_MAJOR_VERSION,
795 	.minor_version = ICE_RDMA_MINOR_VERSION,
796 	.patch_version = ICE_RDMA_PATCH_VERSION,
797 	.rdma_class = &irdma_class,
798 };
799 
800 /**
801  * irdma_module_event_handler - Module event handler callback
802  * @mod: unused mod argument
803  * @what: the module event to handle
804  * @arg: unused module event argument
805  *
806  * Callback used by the FreeBSD module stack to notify the driver of module
807  * events. Used to implement custom handling for certain module events such as
808  * load and unload.
809  */
810 static int
irdma_module_event_handler(module_t __unused mod,int what,void __unused * arg)811 irdma_module_event_handler(module_t __unused mod, int what, void __unused * arg)
812 {
813 	switch (what) {
814 	case MOD_LOAD:
815 		printf("Loading irdma module\n");
816 		return ice_rdma_register(&irdma_info);
817 	case MOD_UNLOAD:
818 		printf("Unloading irdma module\n");
819 		irdma_prep_for_unregister();
820 		ice_rdma_unregister();
821 		return (0);
822 	default:
823 		return (EOPNOTSUPP);
824 	}
825 
826 	return (0);
827 }
828 
829 static moduledata_t irdma_moduledata = {
830 	"irdma",
831 	    irdma_module_event_handler,
832 	    NULL
833 };
834 
835 DECLARE_MODULE(irdma, irdma_moduledata, SI_SUB_LAST, SI_ORDER_ANY);
836 MODULE_VERSION(irdma, 1);
837 MODULE_DEPEND(irdma, ice, 1, 1, 1);
838 MODULE_DEPEND(irdma, ibcore, 1, 1, 1);
839