xref: /freebsd/sys/dev/irdma/icrdma.c (revision 1d386b48)
1 /*-
2  * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
3  *
4  * Copyright (c) 2021 - 2022 Intel Corporation
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenFabrics.org BSD license below:
11  *
12  *   Redistribution and use in source and binary forms, with or
13  *   without modification, are permitted provided that the following
14  *   conditions are met:
15  *
16  *    - Redistributions of source code must retain the above
17  *	copyright notice, this list of conditions and the following
18  *	disclaimer.
19  *
20  *    - Redistributions in binary form must reproduce the above
21  *	copyright notice, this list of conditions and the following
22  *	disclaimer in the documentation and/or other materials
23  *	provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bus.h>
38 #include <sys/kernel.h>
39 #include <sys/module.h>
40 #include <sys/sysctl.h>
41 #include <machine/bus.h>
42 #include <linux/device.h>
43 #include <sys/rman.h>
44 
45 #include "ice_rdma.h"
46 #include "irdma_main.h"
47 #include "icrdma_hw.h"
48 
49 #include "irdma_if.h"
50 #include "irdma_di_if.h"
51 
52 /**
53  *  Driver version
54  */
55 char irdma_driver_version[] = "1.1.11-k";
56 
57 /**
58  * irdma_init_tunable - prepare tunables
59  * @rf: RDMA PCI function
60  * @pf_id: id of the pf
61  */
62 static void
63 irdma_init_tunable(struct irdma_pci_f *rf, uint8_t pf_id)
64 {
65 	struct sysctl_oid_list *irdma_sysctl_oid_list;
66 	char pf_name[16];
67 
68 	snprintf(pf_name, 15, "irdma%d", pf_id);
69 	sysctl_ctx_init(&rf->tun_info.irdma_sysctl_ctx);
70 
71 	rf->tun_info.irdma_sysctl_tree = SYSCTL_ADD_NODE(&rf->tun_info.irdma_sysctl_ctx,
72 							 SYSCTL_STATIC_CHILDREN(_dev),
73 							 OID_AUTO, pf_name, CTLFLAG_RD,
74 							 NULL, "");
75 
76 	irdma_sysctl_oid_list = SYSCTL_CHILDREN(rf->tun_info.irdma_sysctl_tree);
77 
78 	/*
79 	 * debug mask setting
80 	 */
81 	SYSCTL_ADD_S32(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list,
82 		       OID_AUTO, "debug", CTLFLAG_RWTUN, &rf->sc_dev.debug_mask,
83 		       0, "irdma debug");
84 
85 	/*
86 	 * RoCEv2/iWARP setting RoCEv2 the default mode
87 	 */
88 	rf->tun_info.roce_ena = 1;
89 	SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO,
90 		      "roce_enable", CTLFLAG_RDTUN, &rf->tun_info.roce_ena, 0,
91 		      "RoCEv2 mode enable");
92 
93 	rf->protocol_used = IRDMA_IWARP_PROTOCOL_ONLY;
94 	if (rf->tun_info.roce_ena == 1)
95 		rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY;
96 	else if (rf->tun_info.roce_ena != 0)
97 		printf("%s:%d wrong roce_enable value (%d), using iWARP\n",
98 		       __func__, __LINE__, rf->tun_info.roce_ena);
99 	printf("%s:%d protocol: %s, roce_enable value: %d\n", __func__, __LINE__,
100 	       (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? "iWARP" : "RoCEv2",
101 	       rf->tun_info.roce_ena);
102 
103 	snprintf(rf->tun_info.drv_ver, IRDMA_VER_LEN, "%s", irdma_driver_version);
104 	SYSCTL_ADD_STRING(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list,
105 			  OID_AUTO, "drv_ver", CTLFLAG_RDTUN, rf->tun_info.drv_ver,
106 			  IRDMA_VER_LEN, "driver version");
107 
108 	irdma_dcqcn_tunables_init(rf);
109 }
110 
111 /**
112  * irdma_find_handler - obtain hdl object to identify pf
113  * @p_dev: the peer interface structure
114  */
115 static struct irdma_handler *
116 irdma_find_handler(struct ice_rdma_peer *p_dev)
117 {
118 	struct irdma_handler *hdl;
119 	unsigned long flags;
120 
121 	spin_lock_irqsave(&irdma_handler_lock, flags);
122 	list_for_each_entry(hdl, &irdma_handlers, list) {
123 		if (!hdl)
124 			continue;
125 		if (!hdl->iwdev->rf->peer_info)
126 			continue;
127 		if (hdl->iwdev->rf->peer_info->dev == p_dev->dev) {
128 			spin_unlock_irqrestore(&irdma_handler_lock, flags);
129 			return hdl;
130 		}
131 	}
132 	spin_unlock_irqrestore(&irdma_handler_lock, flags);
133 
134 	return NULL;
135 }
136 
137 /**
138  * peer_to_iwdev - return iwdev based on peer
139  * @peer: the peer interface structure
140  */
141 static struct irdma_device *
142 peer_to_iwdev(struct ice_rdma_peer *peer)
143 {
144 	struct irdma_handler *hdl;
145 
146 	hdl = irdma_find_handler(peer);
147 	if (!hdl) {
148 		printf("%s:%d rdma handler not found\n", __func__, __LINE__);
149 		return NULL;
150 	}
151 
152 	return hdl->iwdev;
153 }
154 
155 /**
156  * irdma_get_qos_info - save qos info from parameters to internal struct
157  * @l2params: destination, qos, tc, mtu info structure
158  * @qos_info: source, DCB settings structure
159  */
160 static void
161 irdma_get_qos_info(struct irdma_l2params *l2params, struct ice_qos_params *qos_info)
162 {
163 	int i;
164 
165 	l2params->num_tc = qos_info->num_tc;
166 	l2params->num_apps = qos_info->num_apps;
167 	l2params->vsi_prio_type = qos_info->vsi_priority_type;
168 	l2params->vsi_rel_bw = qos_info->vsi_relative_bw;
169 	for (i = 0; i < l2params->num_tc; i++) {
170 		l2params->tc_info[i].egress_virt_up =
171 		    qos_info->tc_info[i].egress_virt_up;
172 		l2params->tc_info[i].ingress_virt_up =
173 		    qos_info->tc_info[i].ingress_virt_up;
174 		l2params->tc_info[i].prio_type = qos_info->tc_info[i].prio_type;
175 		l2params->tc_info[i].rel_bw = qos_info->tc_info[i].rel_bw;
176 		l2params->tc_info[i].tc_ctx = qos_info->tc_info[i].tc_ctx;
177 	}
178 	for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
179 		l2params->up2tc[i] = qos_info->up2tc[i];
180 
181 	if (qos_info->pfc_mode == IRDMA_QOS_MODE_DSCP) {
182 		l2params->dscp_mode = true;
183 		memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map));
184 	}
185 	printf("%s:%d: l2params settings:\n num_tc %d,\n num_apps %d,\n",
186 	       __func__, __LINE__, l2params->num_tc, l2params->num_apps);
187 	printf(" vsi_prio_type %d,\n vsi_rel_bw %d,\n egress_virt_up:",
188 	       l2params->vsi_prio_type, l2params->vsi_rel_bw);
189 	for (i = 0; i < l2params->num_tc; i++)
190 		printf(" %d", l2params->tc_info[i].egress_virt_up);
191 	printf("\n ingress_virt_up:");
192 	for (i = 0; i < l2params->num_tc; i++)
193 		printf(" %d", l2params->tc_info[i].ingress_virt_up);
194 	printf("\n prio_type:");
195 	for (i = 0; i < l2params->num_tc; i++)
196 		printf(" %d", l2params->tc_info[i].prio_type);
197 	printf("\n rel_bw:");
198 	for (i = 0; i < l2params->num_tc; i++)
199 		printf(" %d", l2params->tc_info[i].rel_bw);
200 	printf("\n tc_ctx:");
201 	for (i = 0; i < l2params->num_tc; i++)
202 		printf(" %lu", l2params->tc_info[i].tc_ctx);
203 	printf("\n up2tc:");
204 	for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
205 		printf(" %d", l2params->up2tc[i]);
206 	printf(" dscp_mode: %d,\n", l2params->dscp_mode);
207 	for (i = 0; i < IRDMA_DSCP_NUM_VAL; i++)
208 		printf(" %d", l2params->dscp_map[i]);
209 	printf("\n");
210 
211 	dump_struct(l2params, sizeof(*l2params), "l2params");
212 }
213 
214 /**
215  * irdma_log_invalid_mtu - check mtu setting validity
216  * @mtu: mtu value
217  * @dev: hardware control device structure
218  */
219 static void
220 irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev)
221 {
222 	if (mtu < IRDMA_MIN_MTU_IPV4)
223 		irdma_dev_warn(to_ibdev(dev),
224 			       "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 576 for IPv4\n",
225 			       mtu);
226 	else if (mtu < IRDMA_MIN_MTU_IPV6)
227 		irdma_dev_warn(to_ibdev(dev),
228 			       "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 1280 for IPv6\\n",
229 			       mtu);
230 }
231 
232 /**
233  * irdma_event_handler - handling events from lan driver
234  * @peer: the peer interface structure
235  * @event: event info structure
236  */
237 static void
238 irdma_event_handler(struct ice_rdma_peer *peer, struct ice_rdma_event *event)
239 {
240 	struct irdma_device *iwdev;
241 	struct irdma_l2params l2params = {};
242 
243 	printf("%s:%d event_handler %s (%x) on pf %d (%d)\n", __func__, __LINE__,
244 	       (event->type == 1) ? "LINK CHANGE" :
245 	       (event->type == 2) ? "MTU CHANGE" :
246 	       (event->type == 3) ? "TC CHANGE" : "UNKNOWN",
247 	       event->type, peer->pf_id, if_getdunit(peer->ifp));
248 	iwdev = peer_to_iwdev(peer);
249 	if (!iwdev) {
250 		printf("%s:%d rdma device not found\n", __func__, __LINE__);
251 		return;
252 	}
253 
254 	switch (event->type) {
255 	case ICE_RDMA_EVENT_LINK_CHANGE:
256 		printf("%s:%d PF: %x (%x), state: %d, speed: %lu\n", __func__, __LINE__,
257 		       peer->pf_id, if_getdunit(peer->ifp), event->linkstate,
258 		       event->baudrate);
259 		break;
260 	case ICE_RDMA_EVENT_MTU_CHANGE:
261 		if (iwdev->vsi.mtu != event->mtu) {
262 			l2params.mtu = event->mtu;
263 			l2params.mtu_changed = true;
264 			irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
265 			irdma_change_l2params(&iwdev->vsi, &l2params);
266 		}
267 		break;
268 	case ICE_RDMA_EVENT_TC_CHANGE:
269 		/*
270 		 * 1. check if it is pre or post 2. check if it is currently being done
271 		 */
272 		if (event->prep == iwdev->vsi.tc_change_pending) {
273 			printf("%s:%d can't process %s TC change if TC change is %spending\n",
274 			       __func__, __LINE__,
275 			       event->prep ? "pre" : "post",
276 			       event->prep ? " " : "not ");
277 			goto done;
278 		}
279 		if (event->prep) {
280 			iwdev->vsi.tc_change_pending = true;
281 			irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND);
282 			wait_event_timeout(iwdev->suspend_wq,
283 					   !atomic_read(&iwdev->vsi.qp_suspend_reqs),
284 					   IRDMA_EVENT_TIMEOUT_MS * 10);
285 			irdma_ws_reset(&iwdev->vsi);
286 			printf("%s:%d TC change preparation done\n", __func__, __LINE__);
287 		} else {
288 			l2params.tc_changed = true;
289 			irdma_get_qos_info(&l2params, &event->port_qos);
290 			if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
291 				iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
292 
293 			irdma_check_fc_for_tc_update(&iwdev->vsi, &l2params);
294 			irdma_change_l2params(&iwdev->vsi, &l2params);
295 			printf("%s:%d TC change done\n", __func__, __LINE__);
296 		}
297 		break;
298 	case ICE_RDMA_EVENT_CRIT_ERR:
299 		printf("%s:%d event type received: %d\n", __func__, __LINE__, event->type);
300 		break;
301 	default:
302 		printf("%s:%d event type unsupported: %d\n", __func__, __LINE__, event->type);
303 	}
304 done:
305 	return;
306 }
307 
308 /**
309  * irdma_link_change - Callback for link state change
310  * @peer: the peer interface structure
311  * @linkstate: state of the link
312  * @baudrate: speed of the link
313  */
314 static void
315 irdma_link_change(struct ice_rdma_peer *peer, int linkstate, uint64_t baudrate)
316 {
317 	printf("%s:%d PF: %x (%x), state: %d, speed: %lu\n", __func__, __LINE__,
318 	       peer->pf_id, if_getdunit(peer->ifp), linkstate, baudrate);
319 }
320 
321 /**
322  * irdma_finalize_task - Finish open or close phase in a separate thread
323  * @context: instance holding peer and iwdev information
324  *
325  * Triggered from irdma_open or irdma_close to perform rt_init_hw or
326  * rt_deinit_hw respectively. Does registration and unregistration of
327  * the device.
328  */
329 static void
330 irdma_finalize_task(void *context, int pending)
331 {
332 	struct irdma_task_arg *task_arg = (struct irdma_task_arg *)context;
333 	struct irdma_device *iwdev = task_arg->iwdev;
334 	struct irdma_pci_f *rf = iwdev->rf;
335 	struct ice_rdma_peer *peer = task_arg->peer;
336 	struct irdma_l2params l2params = {{{0}}};
337 	struct ice_rdma_request req = {0};
338 	int status = 0;
339 
340 	if (iwdev->iw_status) {
341 		irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT,
342 			    "Starting deferred closing %d (%d)\n",
343 			    rf->peer_info->pf_id, if_getdunit(peer->ifp));
344 		irdma_dereg_ipaddr_event_cb(rf);
345 		irdma_ib_unregister_device(iwdev);
346 		req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE;
347 		req.enable_filter = false;
348 		IRDMA_DI_REQ_HANDLER(peer, &req);
349 		irdma_cleanup_dead_qps(&iwdev->vsi);
350 		irdma_rt_deinit_hw(iwdev);
351 	} else {
352 		irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT,
353 			    "Starting deferred opening %d (%d)\n",
354 			    rf->peer_info->pf_id, if_getdunit(peer->ifp));
355 		irdma_get_qos_info(&l2params, &peer->initial_qos_info);
356 		if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
357 			iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
358 
359 		l2params.mtu = peer->mtu;
360 		status = irdma_rt_init_hw(iwdev, &l2params);
361 		if (status) {
362 			irdma_pr_err("RT init failed %d\n", status);
363 			ib_dealloc_device(&iwdev->ibdev);
364 			return;
365 		}
366 		status = irdma_ib_register_device(iwdev);
367 		if (status) {
368 			irdma_pr_err("Registration failed %d\n", status);
369 			irdma_rt_deinit_hw(iwdev);
370 			ib_dealloc_device(&iwdev->ibdev);
371 		}
372 		req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE;
373 		req.enable_filter = true;
374 		IRDMA_DI_REQ_HANDLER(peer, &req);
375 		irdma_reg_ipaddr_event_cb(rf);
376 		irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT,
377 			    "Deferred opening finished %d (%d)\n",
378 			    rf->peer_info->pf_id, if_getdunit(peer->ifp));
379 	}
380 }
381 
382 /**
383  * irdma_open - Callback for operation open for RDMA device
384  * @peer: the new peer interface structure
385  *
386  * Callback implementing the RDMA_OPEN function. Called by the ice driver to
387  * notify the RDMA client driver that a new device has been initialized.
388  */
389 static int
390 irdma_open(struct ice_rdma_peer *peer)
391 {
392 	struct ice_rdma_event event = {0};
393 
394 	event.type = ICE_RDMA_EVENT_MTU_CHANGE;
395 	event.mtu = peer->mtu;
396 
397 	irdma_event_handler(peer, &event);
398 
399 	return 0;
400 }
401 
402 /**
403  * irdma_close - Callback to notify that a peer device is down
404  * @peer: the RDMA peer device being stopped
405  *
406  * Callback implementing the RDMA_CLOSE function. Called by the ice driver to
407  * notify the RDMA client driver that a peer device is being stopped.
408  */
409 static int
410 irdma_close(struct ice_rdma_peer *peer)
411 {
412 	/*
413 	 * This is called when ifconfig down. Keeping it for compatibility with ice. This event might be usefull for
414 	 * future.
415 	 */
416 	return 0;
417 }
418 
419 /**
420  * irdma_alloc_pcidev - allocate memory for pcidev and populate data
421  * @peer: the new peer interface structure
422  * @rf: RDMA PCI function
423  */
424 static int
425 irdma_alloc_pcidev(struct ice_rdma_peer *peer, struct irdma_pci_f *rf)
426 {
427 	rf->pcidev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL);
428 	if (!rf->pcidev) {
429 		return -ENOMEM;
430 	}
431 	if (linux_pci_attach_device(rf->dev_ctx.dev, NULL, NULL, rf->pcidev))
432 		return -ENOMEM;
433 
434 	return 0;
435 }
436 
437 /**
438  * irdma_dealloc_pcidev - deallocate memory for pcidev
439  * @rf: RDMA PCI function
440  */
441 static void
442 irdma_dealloc_pcidev(struct irdma_pci_f *rf)
443 {
444 	linux_pci_detach_device(rf->pcidev);
445 	kfree(rf->pcidev);
446 }
447 
448 /**
449  * irdma_fill_device_info - assign initial values to rf variables
450  * @iwdev: irdma device
451  * @peer: the peer interface structure
452  */
453 static void
454 irdma_fill_device_info(struct irdma_device *iwdev,
455 		       struct ice_rdma_peer *peer)
456 {
457 	struct irdma_pci_f *rf = iwdev->rf;
458 
459 	rf->peer_info = peer;
460 	rf->gen_ops.register_qset = irdma_register_qset;
461 	rf->gen_ops.unregister_qset = irdma_unregister_qset;
462 
463 	rf->rdma_ver = IRDMA_GEN_2;
464 	rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_2;
465 	rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
466 	rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
467 	rf->check_fc = irdma_check_fc_for_qp;
468 	rf->gen_ops.request_reset = irdma_request_reset;
469 	irdma_set_rf_user_cfg_params(rf);
470 
471 	rf->default_vsi.vsi_idx = peer->pf_vsi_num;
472 	rf->dev_ctx.dev = peer->dev;
473 	rf->dev_ctx.mem_bus_space_tag = rman_get_bustag(peer->pci_mem);
474 	rf->dev_ctx.mem_bus_space_handle = rman_get_bushandle(peer->pci_mem);
475 	rf->dev_ctx.mem_bus_space_size = rman_get_size(peer->pci_mem);
476 
477 	rf->hw.dev_context = &rf->dev_ctx;
478 	rf->hw.hw_addr = (u8 *)rman_get_virtual(peer->pci_mem);
479 	rf->msix_count = peer->msix.count;
480 	rf->msix_info.entry = peer->msix.base;
481 	rf->msix_info.vector = peer->msix.count;
482 	printf("%s:%d msix_info: %d %d %d\n", __func__, __LINE__,
483 	       rf->msix_count, rf->msix_info.entry, rf->msix_info.vector);
484 
485 	rf->iwdev = iwdev;
486 	iwdev->netdev = peer->ifp;
487 	iwdev->init_state = INITIAL_STATE;
488 	iwdev->vsi_num = peer->pf_vsi_num;
489 	iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
490 	iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
491 	iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT;
492 	iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT;
493 	iwdev->roce_rtomin = 5;
494 
495 	if (rf->protocol_used == IRDMA_ROCE_PROTOCOL_ONLY) {
496 		iwdev->roce_mode = true;
497 	}
498 }
499 
500 /**
501  * irdma_probe - Callback to probe a new RDMA peer device
502  * @peer: the new peer interface structure
503  *
504  * Callback implementing the RDMA_PROBE function. Called by the ice driver to
505  * notify the RDMA client driver that a new device has been created
506  */
507 static int
508 irdma_probe(struct ice_rdma_peer *peer)
509 {
510 	struct irdma_device *iwdev;
511 	struct irdma_pci_f *rf;
512 	struct irdma_handler *hdl;
513 	int err = 0;
514 
515 	irdma_pr_info("probe: irdma-%s peer=%p, peer->pf_id=%d, peer->ifp=%p, peer->ifp->if_dunit=%d, peer->pci_mem->r_bustag=%p\n",
516 		      irdma_driver_version, peer, peer->pf_id, peer->ifp,
517 		      if_getdunit(peer->ifp), (void *)(uintptr_t)peer->pci_mem->r_bustag);
518 
519 	hdl = irdma_find_handler(peer);
520 	if (hdl)
521 		return -EBUSY;
522 
523 	hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
524 	if (!hdl)
525 		return -ENOMEM;
526 
527 	iwdev = (struct irdma_device *)ib_alloc_device(sizeof(*iwdev));
528 	if (!iwdev) {
529 		kfree(hdl);
530 		return -ENOMEM;
531 	}
532 
533 	iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL);
534 	if (!iwdev->rf) {
535 		ib_dealloc_device(&iwdev->ibdev);
536 		kfree(hdl);
537 		return -ENOMEM;
538 	}
539 	hdl->iwdev = iwdev;
540 	iwdev->hdl = hdl;
541 
542 	irdma_init_tunable(iwdev->rf, if_getdunit(peer->ifp));
543 	irdma_fill_device_info(iwdev, peer);
544 	rf = iwdev->rf;
545 
546 	if (irdma_alloc_pcidev(peer, rf))
547 		goto err_pcidev;
548 
549 	irdma_add_handler(hdl);
550 
551 	if (irdma_ctrl_init_hw(rf)) {
552 		err = -EIO;
553 		goto err_ctrl_init;
554 	}
555 
556 	rf->dev_ctx.task_arg.peer = peer;
557 	rf->dev_ctx.task_arg.iwdev = iwdev;
558 	rf->dev_ctx.task_arg.peer = peer;
559 
560 	TASK_INIT(&hdl->deferred_task, 0, irdma_finalize_task, &rf->dev_ctx.task_arg);
561 	hdl->deferred_tq = taskqueue_create_fast("irdma_defer",
562 						 M_NOWAIT, taskqueue_thread_enqueue,
563 						 &hdl->deferred_tq);
564 	taskqueue_start_threads(&hdl->deferred_tq, 1, PI_NET, "irdma_defer_t");
565 
566 	taskqueue_enqueue(hdl->deferred_tq, &hdl->deferred_task);
567 
568 	return 0;
569 
570 err_ctrl_init:
571 	irdma_del_handler(hdl);
572 	irdma_dealloc_pcidev(rf);
573 err_pcidev:
574 	kfree(iwdev->rf);
575 	ib_dealloc_device(&iwdev->ibdev);
576 	kfree(hdl);
577 
578 	return err;
579 }
580 
581 /**
582  * irdma_remove - Callback to remove an RDMA peer device
583  * @peer: the new peer interface structure
584  *
585  * Callback implementing the RDMA_REMOVE function. Called by the ice driver to
586  * notify the RDMA client driver that the device wille be delated
587  */
588 static int
589 irdma_remove(struct ice_rdma_peer *peer)
590 {
591 	struct irdma_handler *hdl;
592 	struct irdma_device *iwdev;
593 
594 	irdma_debug((struct irdma_sc_dev *)NULL, IRDMA_DEBUG_INIT,
595 		    "removing %s irdma%d\n", __func__, if_getdunit(peer->ifp));
596 
597 	hdl = irdma_find_handler(peer);
598 	if (!hdl)
599 		return 0;
600 
601 	iwdev = hdl->iwdev;
602 
603 	if (iwdev->vsi.tc_change_pending) {
604 		iwdev->vsi.tc_change_pending = false;
605 		irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_RESUME);
606 	}
607 
608 	taskqueue_enqueue(hdl->deferred_tq, &hdl->deferred_task);
609 
610 	taskqueue_drain(hdl->deferred_tq, &hdl->deferred_task);
611 	taskqueue_free(hdl->deferred_tq);
612 	hdl->iwdev->rf->dev_ctx.task_arg.iwdev = NULL;
613 	hdl->iwdev->rf->dev_ctx.task_arg.peer = NULL;
614 
615 	sysctl_ctx_free(&iwdev->rf->tun_info.irdma_sysctl_ctx);
616 	hdl->iwdev->rf->tun_info.irdma_sysctl_tree = NULL;
617 
618 	irdma_ctrl_deinit_hw(iwdev->rf);
619 
620 	irdma_dealloc_pcidev(iwdev->rf);
621 
622 	irdma_del_handler(iwdev->hdl);
623 	kfree(iwdev->hdl);
624 	kfree(iwdev->rf);
625 	ib_dealloc_device(&iwdev->ibdev);
626 	irdma_pr_info("IRDMA hardware deinitialization complete irdma%d\n",
627 		      if_getdunit(peer->ifp));
628 
629 	return 0;
630 }
631 
632 /**
633  * irdma_prep_for_unregister - ensure the driver is ready to unregister
634  */
635 static void
636 irdma_prep_for_unregister(void)
637 {
638 	struct irdma_handler *hdl;
639 	unsigned long flags;
640 	bool hdl_valid;
641 
642 	do {
643 		hdl_valid = false;
644 		spin_lock_irqsave(&irdma_handler_lock, flags);
645 		list_for_each_entry(hdl, &irdma_handlers, list) {
646 			if (!hdl)
647 				continue;
648 			if (!hdl->iwdev->rf->peer_info)
649 				continue;
650 			hdl_valid = true;
651 			break;
652 		}
653 		spin_unlock_irqrestore(&irdma_handler_lock, flags);
654 		if (!hdl || !hdl_valid)
655 			break;
656 		IRDMA_CLOSE(hdl->iwdev->rf->peer_info);
657 		IRDMA_REMOVE(hdl->iwdev->rf->peer_info);
658 	} while (1);
659 }
660 
661 static kobj_method_t irdma_methods[] = {
662 	KOBJMETHOD(irdma_probe, irdma_probe),
663 	    KOBJMETHOD(irdma_open, irdma_open),
664 	    KOBJMETHOD(irdma_close, irdma_close),
665 	    KOBJMETHOD(irdma_remove, irdma_remove),
666 	    KOBJMETHOD(irdma_link_change, irdma_link_change),
667 	    KOBJMETHOD(irdma_event_handler, irdma_event_handler),
668 	    KOBJMETHOD_END
669 };
670 
671 /* declare irdma_class which extends the ice_rdma_di class */
672 DEFINE_CLASS_1(irdma, irdma_class, irdma_methods, sizeof(struct ice_rdma_peer), ice_rdma_di_class);
673 
674 static struct ice_rdma_info irdma_info = {
675 	.major_version = ICE_RDMA_MAJOR_VERSION,
676 	.minor_version = ICE_RDMA_MINOR_VERSION,
677 	.patch_version = ICE_RDMA_PATCH_VERSION,
678 	.rdma_class = &irdma_class,
679 };
680 
681 /**
682  * irdma_module_event_handler - Module event handler callback
683  * @mod: unused mod argument
684  * @what: the module event to handle
685  * @arg: unused module event argument
686  *
687  * Callback used by the FreeBSD module stack to notify the driver of module
688  * events. Used to implement custom handling for certain module events such as
689  * load and unload.
690  */
691 static int
692 irdma_module_event_handler(module_t __unused mod, int what, void __unused * arg)
693 {
694 	switch (what) {
695 	case MOD_LOAD:
696 		printf("Loading irdma module\n");
697 		return ice_rdma_register(&irdma_info);
698 	case MOD_UNLOAD:
699 		printf("Unloading irdma module\n");
700 		irdma_prep_for_unregister();
701 		ice_rdma_unregister();
702 		return (0);
703 	default:
704 		return (EOPNOTSUPP);
705 	}
706 
707 	return (0);
708 }
709 
710 static moduledata_t irdma_moduledata = {
711 	"irdma",
712 	    irdma_module_event_handler,
713 	    NULL
714 };
715 
716 DECLARE_MODULE(irdma, irdma_moduledata, SI_SUB_LAST, SI_ORDER_ANY);
717 MODULE_VERSION(irdma, 1);
718 MODULE_DEPEND(irdma, ice, 1, 1, 1);
719 MODULE_DEPEND(irdma, ibcore, 1, 1, 1);
720