xref: /freebsd/sys/dev/ice/if_ice_iflib.c (revision 315ee00f)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2023, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file if_ice_iflib.c
34  * @brief iflib driver implementation
35  *
36  * Contains the main entry point for the iflib driver implementation. It
37  * implements the various ifdi driver methods, and sets up the module and
38  * driver values to load an iflib driver.
39  */
40 
41 #include "ice_iflib.h"
42 #include "ice_drv_info.h"
43 #include "ice_switch.h"
44 #include "ice_sched.h"
45 
46 #include <sys/module.h>
47 #include <sys/sockio.h>
48 #include <sys/smp.h>
49 #include <dev/pci/pcivar.h>
50 #include <dev/pci/pcireg.h>
51 
52 /*
53  * Device method prototypes
54  */
55 
56 static void *ice_register(device_t);
57 static int  ice_if_attach_pre(if_ctx_t);
58 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
59 static int  ice_if_attach_post(if_ctx_t);
60 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
61 static int  ice_if_detach(if_ctx_t);
62 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
63 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
64 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
65 static void ice_if_queues_free(if_ctx_t ctx);
66 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
67 static void ice_if_intr_enable(if_ctx_t ctx);
68 static void ice_if_intr_disable(if_ctx_t ctx);
69 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
70 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
71 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
72 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
73 static int ice_if_media_change(if_ctx_t ctx);
74 static void ice_if_init(if_ctx_t ctx);
75 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
76 static void ice_if_update_admin_status(if_ctx_t ctx);
77 static void ice_if_multi_set(if_ctx_t ctx);
78 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
79 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
80 static void ice_if_stop(if_ctx_t ctx);
81 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
82 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
83 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
84 static int ice_if_suspend(if_ctx_t ctx);
85 static int ice_if_resume(if_ctx_t ctx);
86 static bool ice_if_needs_restart(if_ctx_t, enum iflib_restart_event);
87 
88 static int ice_msix_que(void *arg);
89 static int ice_msix_admin(void *arg);
90 
91 /*
92  * Helper function prototypes
93  */
94 static int ice_pci_mapping(struct ice_softc *sc);
95 static void ice_free_pci_mapping(struct ice_softc *sc);
96 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
97 static void ice_init_device_features(struct ice_softc *sc);
98 static void ice_init_tx_tracking(struct ice_vsi *vsi);
99 static void ice_handle_reset_event(struct ice_softc *sc);
100 static void ice_handle_pf_reset_request(struct ice_softc *sc);
101 static void ice_prepare_for_reset(struct ice_softc *sc);
102 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
103 static void ice_rebuild(struct ice_softc *sc);
104 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
105 static void ice_free_irqvs(struct ice_softc *sc);
106 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
107 static void ice_poll_for_media_avail(struct ice_softc *sc);
108 static void ice_setup_scctx(struct ice_softc *sc);
109 static int ice_allocate_msix(struct ice_softc *sc);
110 static void ice_admin_timer(void *arg);
111 static void ice_transition_recovery_mode(struct ice_softc *sc);
112 static void ice_transition_safe_mode(struct ice_softc *sc);
113 
114 /*
115  * Device Interface Declaration
116  */
117 
118 /**
119  * @var ice_methods
120  * @brief ice driver method entry points
121  *
122  * List of device methods implementing the generic device interface used by
123  * the device stack to interact with the ice driver. Since this is an iflib
124  * driver, most of the methods point to the generic iflib implementation.
125  */
126 static device_method_t ice_methods[] = {
127 	/* Device interface */
128 	DEVMETHOD(device_register, ice_register),
129 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
130 	DEVMETHOD(device_attach,   iflib_device_attach),
131 	DEVMETHOD(device_detach,   iflib_device_detach),
132 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
133 	DEVMETHOD(device_suspend,  iflib_device_suspend),
134 	DEVMETHOD(device_resume,   iflib_device_resume),
135 	DEVMETHOD_END
136 };
137 
138 /**
139  * @var ice_iflib_methods
140  * @brief iflib method entry points
141  *
142  * List of device methods used by the iflib stack to interact with this
143  * driver. These are the real main entry points used to interact with this
144  * driver.
145  */
146 static device_method_t ice_iflib_methods[] = {
147 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
148 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
149 	DEVMETHOD(ifdi_detach, ice_if_detach),
150 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
151 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
152 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
153 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
154 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
155 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
156 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
157 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
158 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
159 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
160 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
161 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
162 	DEVMETHOD(ifdi_init, ice_if_init),
163 	DEVMETHOD(ifdi_stop, ice_if_stop),
164 	DEVMETHOD(ifdi_timer, ice_if_timer),
165 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
166 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
167 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
168 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
169 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
170 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
171 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
172 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
173 	DEVMETHOD(ifdi_resume, ice_if_resume),
174 	DEVMETHOD(ifdi_needs_restart, ice_if_needs_restart),
175 	DEVMETHOD_END
176 };
177 
178 /**
179  * @var ice_driver
180  * @brief driver structure for the generic device stack
181  *
182  * driver_t definition used to setup the generic device methods.
183  */
184 static driver_t ice_driver = {
185 	.name = "ice",
186 	.methods = ice_methods,
187 	.size = sizeof(struct ice_softc),
188 };
189 
190 /**
191  * @var ice_iflib_driver
192  * @brief driver structure for the iflib stack
193  *
194  * driver_t definition used to setup the iflib device methods.
195  */
196 static driver_t ice_iflib_driver = {
197 	.name = "ice",
198 	.methods = ice_iflib_methods,
199 	.size = sizeof(struct ice_softc),
200 };
201 
202 extern struct if_txrx ice_txrx;
203 extern struct if_txrx ice_recovery_txrx;
204 
205 /**
206  * @var ice_sctx
207  * @brief ice driver shared context
208  *
209  * Structure defining shared values (context) that is used by all instances of
210  * the device. Primarily used to setup details about how the iflib stack
211  * should treat this driver. Also defines the default, minimum, and maximum
212  * number of descriptors in each ring.
213  */
214 static struct if_shared_ctx ice_sctx = {
215 	.isc_magic = IFLIB_MAGIC,
216 	.isc_q_align = PAGE_SIZE,
217 
218 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
219 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
220 	 * that doesn't make sense since that would be larger than the maximum
221 	 * size of a single packet.
222 	 */
223 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
224 
225 	/* XXX: This is only used by iflib to ensure that
226 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
227 	 */
228 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
229 	/* XXX: This is used by iflib to set the number of segments in the TSO
230 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
231 	 * related ifnet parameter.
232 	 */
233 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
234 
235 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
236 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
237 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
238 
239 	.isc_nfl = 1,
240 	.isc_ntxqs = 1,
241 	.isc_nrxqs = 1,
242 
243 	.isc_admin_intrcnt = 1,
244 	.isc_vendor_info = ice_vendor_info_array,
245 	.isc_driver_version = __DECONST(char *, ice_driver_version),
246 	.isc_driver = &ice_iflib_driver,
247 
248 	/*
249 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
250 	 * for hardware checksum offload
251 	 *
252 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
253 	 * IP sum field, required by our hardware to calculate valid TSO
254 	 * checksums.
255 	 *
256 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
257 	 * even when the interface is down.
258 	 *
259 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
260 	 * vectors manually instead of relying on iflib code to do this.
261 	 */
262 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
263 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
264 
265 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
266 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
267 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
268 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
269 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
270 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
271 };
272 
273 DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL);
274 
275 MODULE_VERSION(ice, 1);
276 MODULE_DEPEND(ice, pci, 1, 1, 1);
277 MODULE_DEPEND(ice, ether, 1, 1, 1);
278 MODULE_DEPEND(ice, iflib, 1, 1, 1);
279 
280 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
281 
282 /* Static driver-wide sysctls */
283 #include "ice_iflib_sysctls.h"
284 
285 /**
286  * ice_pci_mapping - Map PCI BAR memory
287  * @sc: device private softc
288  *
289  * Map PCI BAR 0 for device operation.
290  */
291 static int
292 ice_pci_mapping(struct ice_softc *sc)
293 {
294 	int rc;
295 
296 	/* Map BAR0 */
297 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
298 	if (rc)
299 		return rc;
300 
301 	return 0;
302 }
303 
304 /**
305  * ice_free_pci_mapping - Release PCI BAR memory
306  * @sc: device private softc
307  *
308  * Release PCI BARs which were previously mapped by ice_pci_mapping().
309  */
310 static void
311 ice_free_pci_mapping(struct ice_softc *sc)
312 {
313 	/* Free BAR0 */
314 	ice_free_bar(sc->dev, &sc->bar0);
315 }
316 
317 /*
318  * Device methods
319  */
320 
321 /**
322  * ice_register - register device method callback
323  * @dev: the device being registered
324  *
325  * Returns a pointer to the shared context structure, which is used by iflib.
326  */
327 static void *
328 ice_register(device_t dev __unused)
329 {
330 	return &ice_sctx;
331 } /* ice_register */
332 
333 /**
334  * ice_setup_scctx - Setup the iflib softc context structure
335  * @sc: the device private structure
336  *
337  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
338  * when loading.
339  */
340 static void
341 ice_setup_scctx(struct ice_softc *sc)
342 {
343 	if_softc_ctx_t scctx = sc->scctx;
344 	struct ice_hw *hw = &sc->hw;
345 	bool safe_mode, recovery_mode;
346 
347 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
348 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
349 
350 	/*
351 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
352 	 * a single queue pair.
353 	 */
354 	if (safe_mode || recovery_mode) {
355 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
356 		scctx->isc_ntxqsets_max = 1;
357 		scctx->isc_nrxqsets_max = 1;
358 	} else {
359 		/*
360 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
361 		 * the values of the override sysctls. Cache these initial
362 		 * values so that the driver can be aware of what the iflib
363 		 * sysctl value is when setting up MSI-X vectors.
364 		 */
365 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
366 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
367 
368 		if (scctx->isc_ntxqsets == 0)
369 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
370 		if (scctx->isc_nrxqsets == 0)
371 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
372 
373 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
374 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
375 
376 		/*
377 		 * Sanity check that the iflib sysctl values are within the
378 		 * maximum supported range.
379 		 */
380 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
381 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
382 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
383 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
384 	}
385 
386 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
387 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
388 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
389 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
390 
391 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
392 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
393 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
394 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
395 
396 	scctx->isc_msix_bar = PCIR_BAR(ICE_MSIX_BAR);
397 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
398 
399 	/*
400 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
401 	 */
402 	if (recovery_mode)
403 		scctx->isc_txrx = &ice_recovery_txrx;
404 	else
405 		scctx->isc_txrx = &ice_txrx;
406 
407 	/*
408 	 * If the driver loads in Safe mode or Recovery mode, disable
409 	 * advanced features including hardware offloads.
410 	 */
411 	if (safe_mode || recovery_mode) {
412 		scctx->isc_capenable = ICE_SAFE_CAPS;
413 		scctx->isc_tx_csum_flags = 0;
414 	} else {
415 		scctx->isc_capenable = ICE_FULL_CAPS;
416 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
417 	}
418 
419 	scctx->isc_capabilities = scctx->isc_capenable;
420 } /* ice_setup_scctx */
421 
422 /**
423  * ice_if_attach_pre - Early device attach logic
424  * @ctx: the iflib context structure
425  *
426  * Called by iflib during the attach process. Earliest main driver entry
427  * point which performs necessary hardware and driver initialization. Called
428  * before the Tx and Rx queues are allocated.
429  */
430 static int
431 ice_if_attach_pre(if_ctx_t ctx)
432 {
433 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
434 	enum ice_fw_modes fw_mode;
435 	enum ice_status status;
436 	if_softc_ctx_t scctx;
437 	struct ice_hw *hw;
438 	device_t dev;
439 	int err;
440 
441 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
442 
443 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
444 
445 	sc->ctx = ctx;
446 	sc->media = iflib_get_media(ctx);
447 	sc->sctx = iflib_get_sctx(ctx);
448 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
449 
450 	dev = sc->dev = iflib_get_dev(ctx);
451 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
452 
453 	hw = &sc->hw;
454 	hw->back = sc;
455 
456 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
457 		 "%s:admin", device_get_nameunit(dev));
458 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
459 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
460 
461 	ASSERT_CTX_LOCKED(sc);
462 
463 	if (ice_pci_mapping(sc)) {
464 		err = (ENXIO);
465 		goto destroy_admin_timer;
466 	}
467 
468 	/* Save off the PCI information */
469 	ice_save_pci_info(hw, dev);
470 
471 	/* create tunables as early as possible */
472 	ice_add_device_tunables(sc);
473 
474 	/* Setup ControlQ lengths */
475 	ice_set_ctrlq_len(hw);
476 
477 reinit_hw:
478 
479 	fw_mode = ice_get_fw_mode(hw);
480 	if (fw_mode == ICE_FW_MODE_REC) {
481 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
482 
483 		err = ice_attach_pre_recovery_mode(sc);
484 		if (err)
485 			goto free_pci_mapping;
486 
487 		return (0);
488 	}
489 
490 	/* Initialize the hw data structure */
491 	status = ice_init_hw(hw);
492 	if (status) {
493 		if (status == ICE_ERR_FW_API_VER) {
494 			/* Enter recovery mode, so that the driver remains
495 			 * loaded. This way, if the system administrator
496 			 * cannot update the driver, they may still attempt to
497 			 * downgrade the NVM.
498 			 */
499 			err = ice_attach_pre_recovery_mode(sc);
500 			if (err)
501 				goto free_pci_mapping;
502 
503 			return (0);
504 		} else {
505 			err = EIO;
506 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
507 				      ice_status_str(status),
508 				      ice_aq_str(hw->adminq.sq_last_status));
509 		}
510 		goto free_pci_mapping;
511 	}
512 
513 	ice_init_device_features(sc);
514 
515 	/* Notify firmware of the device driver version */
516 	err = ice_send_version(sc);
517 	if (err)
518 		goto deinit_hw;
519 
520 	/*
521 	 * Success indicates a change was made that requires a reinitialization
522 	 * of the hardware
523 	 */
524 	err = ice_load_pkg_file(sc);
525 	if (err == ICE_SUCCESS) {
526 		ice_deinit_hw(hw);
527 		goto reinit_hw;
528 	}
529 
530 	err = ice_init_link_events(sc);
531 	if (err) {
532 		device_printf(dev, "ice_init_link_events failed: %s\n",
533 			      ice_err_str(err));
534 		goto deinit_hw;
535 	}
536 
537 	/* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package
538 	 * and firmware, this will force them to use single VLAN mode.
539 	 */
540 	status = ice_set_vlan_mode(hw);
541 	if (status) {
542 		err = EIO;
543 		device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n",
544 			      ice_status_str(status),
545 			      ice_aq_str(hw->adminq.sq_last_status));
546 		goto deinit_hw;
547 	}
548 
549 	ice_print_nvm_version(sc);
550 
551 	/* Setup the MAC address */
552 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
553 
554 	/* Setup the iflib softc context structure */
555 	ice_setup_scctx(sc);
556 
557 	/* Initialize the Tx queue manager */
558 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
559 	if (err) {
560 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
561 			      ice_err_str(err));
562 		goto deinit_hw;
563 	}
564 
565 	/* Initialize the Rx queue manager */
566 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
567 	if (err) {
568 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
569 			      ice_err_str(err));
570 		goto free_tx_qmgr;
571 	}
572 
573 	/* Initialize the interrupt resource manager */
574 	err = ice_alloc_intr_tracking(sc);
575 	if (err)
576 		/* Errors are already printed */
577 		goto free_rx_qmgr;
578 
579 	/* Determine maximum number of VSIs we'll prepare for */
580 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
581 				    hw->func_caps.guar_num_vsi);
582 
583 	if (!sc->num_available_vsi) {
584 		err = EIO;
585 		device_printf(dev, "No VSIs allocated to host\n");
586 		goto free_intr_tracking;
587 	}
588 
589 	/* Allocate storage for the VSI pointers */
590 	sc->all_vsi = (struct ice_vsi **)
591 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
592 		       M_ICE, M_WAITOK | M_ZERO);
593 	if (!sc->all_vsi) {
594 		err = ENOMEM;
595 		device_printf(dev, "Unable to allocate VSI array\n");
596 		goto free_intr_tracking;
597 	}
598 
599 	/*
600 	 * Prepare the statically allocated primary PF VSI in the softc
601 	 * structure. Other VSIs will be dynamically allocated as needed.
602 	 */
603 	ice_setup_pf_vsi(sc);
604 
605 	err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
606 	    scctx->isc_nrxqsets_max);
607 	if (err) {
608 		device_printf(dev, "Unable to allocate VSI Queue maps\n");
609 		goto free_main_vsi;
610 	}
611 
612 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
613 	err = ice_allocate_msix(sc);
614 	if (err)
615 		goto free_main_vsi;
616 
617 	return 0;
618 
619 free_main_vsi:
620 	/* ice_release_vsi will free the queue maps if they were allocated */
621 	ice_release_vsi(&sc->pf_vsi);
622 	free(sc->all_vsi, M_ICE);
623 	sc->all_vsi = NULL;
624 free_intr_tracking:
625 	ice_free_intr_tracking(sc);
626 free_rx_qmgr:
627 	ice_resmgr_destroy(&sc->rx_qmgr);
628 free_tx_qmgr:
629 	ice_resmgr_destroy(&sc->tx_qmgr);
630 deinit_hw:
631 	ice_deinit_hw(hw);
632 free_pci_mapping:
633 	ice_free_pci_mapping(sc);
634 destroy_admin_timer:
635 	mtx_lock(&sc->admin_mtx);
636 	callout_stop(&sc->admin_timer);
637 	mtx_unlock(&sc->admin_mtx);
638 	mtx_destroy(&sc->admin_mtx);
639 	return err;
640 } /* ice_if_attach_pre */
641 
642 /**
643  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
644  * @sc: the device private softc
645  *
646  * Loads the device driver in limited Firmware Recovery mode, intended to
647  * allow users to update the firmware to attempt to recover the device.
648  *
649  * @remark We may enter recovery mode in case either (a) the firmware is
650  * detected to be in an invalid state and must be re-programmed, or (b) the
651  * driver detects that the loaded firmware has a non-compatible API version
652  * that the driver cannot operate with.
653  */
654 static int
655 ice_attach_pre_recovery_mode(struct ice_softc *sc)
656 {
657 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
658 
659 	/* Setup the iflib softc context */
660 	ice_setup_scctx(sc);
661 
662 	/* Setup the PF VSI back pointer */
663 	sc->pf_vsi.sc = sc;
664 
665 	/*
666 	 * We still need to allocate MSI-X vectors since we need one vector to
667 	 * run the administrative admin interrupt
668 	 */
669 	return ice_allocate_msix(sc);
670 }
671 
672 /**
673  * ice_update_link_status - notify OS of link state change
674  * @sc: device private softc structure
675  * @update_media: true if we should update media even if link didn't change
676  *
677  * Called to notify iflib core of link status changes. Should be called once
678  * during attach_post, and whenever link status changes during runtime.
679  *
680  * This call only updates the currently supported media types if the link
681  * status changed, or if update_media is set to true.
682  */
683 static void
684 ice_update_link_status(struct ice_softc *sc, bool update_media)
685 {
686 	struct ice_hw *hw = &sc->hw;
687 	enum ice_status status;
688 
689 	/* Never report link up when in recovery mode */
690 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
691 		return;
692 
693 	/* Report link status to iflib only once each time it changes */
694 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
695 		if (sc->link_up) { /* link is up */
696 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
697 
698 			ice_set_default_local_lldp_mib(sc);
699 
700 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
701 			ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
702 
703 			ice_link_up_msg(sc);
704 		} else { /* link is down */
705 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
706 			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
707 		}
708 		update_media = true;
709 	}
710 
711 	/* Update the supported media types */
712 	if (update_media) {
713 		status = ice_add_media_types(sc, sc->media);
714 		if (status)
715 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
716 				      ice_status_str(status),
717 				      ice_aq_str(hw->adminq.sq_last_status));
718 	}
719 }
720 
721 /**
722  * ice_if_attach_post - Late device attach logic
723  * @ctx: the iflib context structure
724  *
725  * Called by iflib to finish up attaching the device. Performs any attach
726  * logic which must wait until after the Tx and Rx queues have been
727  * allocated.
728  */
729 static int
730 ice_if_attach_post(if_ctx_t ctx)
731 {
732 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
733 	if_t ifp = iflib_get_ifp(ctx);
734 	int err;
735 
736 	ASSERT_CTX_LOCKED(sc);
737 
738 	/* We don't yet support loading if MSI-X is not supported */
739 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
740 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
741 		return (ENOTSUP);
742 	}
743 
744 	/* The ifnet structure hasn't yet been initialized when the attach_pre
745 	 * handler is called, so wait until attach_post to setup the
746 	 * isc_max_frame_size.
747 	 */
748 
749 	sc->ifp = ifp;
750 	sc->scctx->isc_max_frame_size = if_getmtu(ifp) +
751 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
752 
753 	/*
754 	 * If we are in recovery mode, only perform a limited subset of
755 	 * initialization to support NVM recovery.
756 	 */
757 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
758 		ice_attach_post_recovery_mode(sc);
759 		return (0);
760 	}
761 
762 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
763 
764 	err = ice_initialize_vsi(&sc->pf_vsi);
765 	if (err) {
766 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
767 			      ice_err_str(err));
768 		return err;
769 	}
770 
771 	/* Enable FW health event reporting */
772 	ice_init_health_events(sc);
773 
774 	/* Configure the main PF VSI for RSS */
775 	err = ice_config_rss(&sc->pf_vsi);
776 	if (err) {
777 		device_printf(sc->dev,
778 			      "Unable to configure RSS for the main VSI, err %s\n",
779 			      ice_err_str(err));
780 		return err;
781 	}
782 
783 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
784 	err = ice_cfg_pf_ethertype_filters(sc);
785 	if (err)
786 		return err;
787 
788 	ice_get_and_print_bus_info(sc);
789 
790 	ice_set_link_management_mode(sc);
791 
792 	ice_init_saved_phy_cfg(sc);
793 
794 	ice_cfg_pba_num(sc);
795 
796 	ice_add_device_sysctls(sc);
797 
798 	/* Get DCBX/LLDP state and start DCBX agent */
799 	ice_init_dcb_setup(sc);
800 
801 	/* Setup link configuration parameters */
802 	ice_init_link_configuration(sc);
803 	ice_update_link_status(sc, true);
804 
805 	/* Configure interrupt causes for the administrative interrupt */
806 	ice_configure_misc_interrupts(sc);
807 
808 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
809 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
810 
811 	err = ice_rdma_pf_attach(sc);
812 	if (err)
813 		return (err);
814 
815 	/* Start the admin timer */
816 	mtx_lock(&sc->admin_mtx);
817 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
818 	mtx_unlock(&sc->admin_mtx);
819 
820 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
821 
822 	return 0;
823 } /* ice_if_attach_post */
824 
825 /**
826  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
827  * @sc: the device private softc
828  *
829  * Performs minimal work to prepare the driver to recover an NVM in case the
830  * firmware is in recovery mode.
831  */
832 static void
833 ice_attach_post_recovery_mode(struct ice_softc *sc)
834 {
835 	/* Configure interrupt causes for the administrative interrupt */
836 	ice_configure_misc_interrupts(sc);
837 
838 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
839 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
840 
841 	/* Start the admin timer */
842 	mtx_lock(&sc->admin_mtx);
843 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
844 	mtx_unlock(&sc->admin_mtx);
845 
846 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
847 }
848 
849 /**
850  * ice_free_irqvs - Free IRQ vector memory
851  * @sc: the device private softc structure
852  *
853  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
854  */
855 static void
856 ice_free_irqvs(struct ice_softc *sc)
857 {
858 	struct ice_vsi *vsi = &sc->pf_vsi;
859 	if_ctx_t ctx = sc->ctx;
860 	int i;
861 
862 	/* If the irqvs array is NULL, then there are no vectors to free */
863 	if (sc->irqvs == NULL)
864 		return;
865 
866 	/* Free the IRQ vectors */
867 	for (i = 0; i < sc->num_irq_vectors; i++)
868 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
869 
870 	/* Clear the irqv pointers */
871 	for (i = 0; i < vsi->num_rx_queues; i++)
872 		vsi->rx_queues[i].irqv = NULL;
873 
874 	for (i = 0; i < vsi->num_tx_queues; i++)
875 		vsi->tx_queues[i].irqv = NULL;
876 
877 	/* Release the vector array memory */
878 	free(sc->irqvs, M_ICE);
879 	sc->irqvs = NULL;
880 	sc->num_irq_vectors = 0;
881 }
882 
883 /**
884  * ice_if_detach - Device driver detach logic
885  * @ctx: iflib context structure
886  *
887  * Perform device shutdown logic to detach the device driver.
888  *
889  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
890  * ice_if_detach(). It is possible for the functions to be called in either
891  * order, and they must not assume to have a strict ordering.
892  */
893 static int
894 ice_if_detach(if_ctx_t ctx)
895 {
896 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
897 	struct ice_vsi *vsi = &sc->pf_vsi;
898 	int i;
899 
900 	ASSERT_CTX_LOCKED(sc);
901 
902 	/* Indicate that we're detaching */
903 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
904 
905 	/* Stop the admin timer */
906 	mtx_lock(&sc->admin_mtx);
907 	callout_stop(&sc->admin_timer);
908 	mtx_unlock(&sc->admin_mtx);
909 	mtx_destroy(&sc->admin_mtx);
910 
911 	ice_rdma_pf_detach(sc);
912 
913 	/* Free allocated media types */
914 	ifmedia_removeall(sc->media);
915 
916 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
917 	 * pointers. Note, the calls here and those in ice_if_queues_free()
918 	 * are *BOTH* necessary, as we cannot guarantee which path will be
919 	 * run first
920 	 */
921 	ice_vsi_del_txqs_ctx(vsi);
922 	ice_vsi_del_rxqs_ctx(vsi);
923 
924 	/* Release MSI-X resources */
925 	ice_free_irqvs(sc);
926 
927 	for (i = 0; i < sc->num_available_vsi; i++) {
928 		if (sc->all_vsi[i])
929 			ice_release_vsi(sc->all_vsi[i]);
930 	}
931 
932 	if (sc->all_vsi) {
933 		free(sc->all_vsi, M_ICE);
934 		sc->all_vsi = NULL;
935 	}
936 
937 	/* Release MSI-X memory */
938 	pci_release_msi(sc->dev);
939 
940 	if (sc->msix_table != NULL) {
941 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
942 				     rman_get_rid(sc->msix_table),
943 				     sc->msix_table);
944 		sc->msix_table = NULL;
945 	}
946 
947 	ice_free_intr_tracking(sc);
948 
949 	/* Destroy the queue managers */
950 	ice_resmgr_destroy(&sc->tx_qmgr);
951 	ice_resmgr_destroy(&sc->rx_qmgr);
952 
953 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
954 		ice_deinit_hw(&sc->hw);
955 
956 	ice_free_pci_mapping(sc);
957 
958 	return 0;
959 } /* ice_if_detach */
960 
961 /**
962  * ice_if_tx_queues_alloc - Allocate Tx queue memory
963  * @ctx: iflib context structure
964  * @vaddrs: virtual addresses for the queue memory
965  * @paddrs: physical addresses for the queue memory
966  * @ntxqs: the number of Tx queues per set (should always be 1)
967  * @ntxqsets: the number of Tx queue sets to allocate
968  *
969  * Called by iflib to allocate Tx queues for the device. Allocates driver
970  * memory to track each queue, the status arrays used for descriptor
971  * status reporting, and Tx queue sysctls.
972  */
973 static int
974 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
975 		       int __invariant_only ntxqs, int ntxqsets)
976 {
977 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
978 	struct ice_vsi *vsi = &sc->pf_vsi;
979 	struct ice_tx_queue *txq;
980 	int err, i, j;
981 
982 	MPASS(ntxqs == 1);
983 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
984 	ASSERT_CTX_LOCKED(sc);
985 
986 	/* Do not bother allocating queues if we're in recovery mode */
987 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
988 		return (0);
989 
990 	/* Allocate queue structure memory */
991 	if (!(vsi->tx_queues =
992 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
993 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
994 		return (ENOMEM);
995 	}
996 
997 	/* Allocate report status arrays */
998 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
999 		if (!(txq->tx_rsq =
1000 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
1001 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
1002 			err = ENOMEM;
1003 			goto free_tx_queues;
1004 		}
1005 		/* Initialize report status array */
1006 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
1007 			txq->tx_rsq[j] = QIDX_INVALID;
1008 	}
1009 
1010 	/* Assign queues from PF space to the main VSI */
1011 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
1012 	if (err) {
1013 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1014 			      ice_err_str(err));
1015 		goto free_tx_queues;
1016 	}
1017 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1018 
1019 	/* Add Tx queue sysctls context */
1020 	ice_vsi_add_txqs_ctx(vsi);
1021 
1022 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1023 		/* q_handle == me when only one TC */
1024 		txq->me = txq->q_handle = i;
1025 		txq->vsi = vsi;
1026 
1027 		/* store the queue size for easier access */
1028 		txq->desc_count = sc->scctx->isc_ntxd[0];
1029 
1030 		/* get the virtual and physical address of the hardware queues */
1031 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1032 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1033 		txq->tx_paddr = paddrs[i];
1034 
1035 		ice_add_txq_sysctls(txq);
1036 	}
1037 
1038 	vsi->num_tx_queues = ntxqsets;
1039 
1040 	return (0);
1041 
1042 free_tx_queues:
1043 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1044 		if (txq->tx_rsq != NULL) {
1045 			free(txq->tx_rsq, M_ICE);
1046 			txq->tx_rsq = NULL;
1047 		}
1048 	}
1049 	free(vsi->tx_queues, M_ICE);
1050 	vsi->tx_queues = NULL;
1051 	return err;
1052 }
1053 
1054 /**
1055  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1056  * @ctx: iflib context structure
1057  * @vaddrs: virtual addresses for the queue memory
1058  * @paddrs: physical addresses for the queue memory
1059  * @nrxqs: number of Rx queues per set (should always be 1)
1060  * @nrxqsets: number of Rx queue sets to allocate
1061  *
1062  * Called by iflib to allocate Rx queues for the device. Allocates driver
1063  * memory to track each queue, as well as sets up the Rx queue sysctls.
1064  */
1065 static int
1066 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1067 		       int __invariant_only nrxqs, int nrxqsets)
1068 {
1069 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1070 	struct ice_vsi *vsi = &sc->pf_vsi;
1071 	struct ice_rx_queue *rxq;
1072 	int err, i;
1073 
1074 	MPASS(nrxqs == 1);
1075 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1076 	ASSERT_CTX_LOCKED(sc);
1077 
1078 	/* Do not bother allocating queues if we're in recovery mode */
1079 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1080 		return (0);
1081 
1082 	/* Allocate queue structure memory */
1083 	if (!(vsi->rx_queues =
1084 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1085 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1086 		return (ENOMEM);
1087 	}
1088 
1089 	/* Assign queues from PF space to the main VSI */
1090 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1091 	if (err) {
1092 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1093 			      ice_err_str(err));
1094 		goto free_rx_queues;
1095 	}
1096 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1097 
1098 	/* Add Rx queue sysctls context */
1099 	ice_vsi_add_rxqs_ctx(vsi);
1100 
1101 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1102 		rxq->me = i;
1103 		rxq->vsi = vsi;
1104 
1105 		/* store the queue size for easier access */
1106 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1107 
1108 		/* get the virtual and physical address of the hardware queues */
1109 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1110 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1111 		rxq->rx_paddr = paddrs[i];
1112 
1113 		ice_add_rxq_sysctls(rxq);
1114 	}
1115 
1116 	vsi->num_rx_queues = nrxqsets;
1117 
1118 	return (0);
1119 
1120 free_rx_queues:
1121 	free(vsi->rx_queues, M_ICE);
1122 	vsi->rx_queues = NULL;
1123 	return err;
1124 }
1125 
1126 /**
1127  * ice_if_queues_free - Free queue memory
1128  * @ctx: the iflib context structure
1129  *
1130  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1131  * ice_if_rx_queues_alloc().
1132  *
1133  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1134  * called in the same order. It's possible for ice_if_queues_free() to be
1135  * called prior to ice_if_detach(), and vice versa.
1136  *
1137  * For this reason, the main VSI is a static member of the ice_softc, which is
1138  * not free'd until after iflib finishes calling both of these functions.
1139  *
1140  * Thus, care must be taken in how we manage the memory being freed by this
1141  * function, and in what tasks it can and must perform.
1142  */
1143 static void
1144 ice_if_queues_free(if_ctx_t ctx)
1145 {
1146 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1147 	struct ice_vsi *vsi = &sc->pf_vsi;
1148 	struct ice_tx_queue *txq;
1149 	int i;
1150 
1151 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1152 	 * pointers. Note, the calls here and those in ice_if_detach()
1153 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1154 	 * run first
1155 	 */
1156 	ice_vsi_del_txqs_ctx(vsi);
1157 	ice_vsi_del_rxqs_ctx(vsi);
1158 
1159 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1160 	ice_free_irqvs(sc);
1161 
1162 	if (vsi->tx_queues != NULL) {
1163 		/* free the tx_rsq arrays */
1164 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1165 			if (txq->tx_rsq != NULL) {
1166 				free(txq->tx_rsq, M_ICE);
1167 				txq->tx_rsq = NULL;
1168 			}
1169 		}
1170 		free(vsi->tx_queues, M_ICE);
1171 		vsi->tx_queues = NULL;
1172 		vsi->num_tx_queues = 0;
1173 	}
1174 	if (vsi->rx_queues != NULL) {
1175 		free(vsi->rx_queues, M_ICE);
1176 		vsi->rx_queues = NULL;
1177 		vsi->num_rx_queues = 0;
1178 	}
1179 }
1180 
1181 /**
1182  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1183  * @arg: The Rx queue memory
1184  *
1185  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1186  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1187  * iflib to schedule the main Rx thread.
1188  */
1189 static int
1190 ice_msix_que(void *arg)
1191 {
1192 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1193 
1194 	/* TODO: dynamic ITR algorithm?? */
1195 
1196 	return (FILTER_SCHEDULE_THREAD);
1197 }
1198 
1199 /**
1200  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1201  * @arg: pointer to device softc memory
1202  *
1203  * Called by iflib when an administrative interrupt occurs. Should perform any
1204  * fast logic for handling the interrupt cause, and then indicate whether the
1205  * admin task needs to be queued.
1206  */
1207 static int
1208 ice_msix_admin(void *arg)
1209 {
1210 	struct ice_softc *sc = (struct ice_softc *)arg;
1211 	struct ice_hw *hw = &sc->hw;
1212 	device_t dev = sc->dev;
1213 	u32 oicr;
1214 
1215 	/* There is no safe way to modify the enabled miscellaneous causes of
1216 	 * the OICR vector at runtime, as doing so would be prone to race
1217 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1218 	 * causes and allow future interrupts to occur. The admin interrupt
1219 	 * vector will not be re-enabled until after we exit this function,
1220 	 * but any delayed tasks must be resilient against possible "late
1221 	 * arrival" interrupts that occur while we're already handling the
1222 	 * task. This is done by using state bits and serializing these
1223 	 * delayed tasks via the admin status task function.
1224 	 */
1225 	oicr = rd32(hw, PFINT_OICR);
1226 
1227 	/* Processing multiple controlq interrupts on a single vector does not
1228 	 * provide an indication of which controlq triggered the interrupt.
1229 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1230 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1231 	 * it gets automatically cleared when the hardware acknowledges the
1232 	 * interrupt.
1233 	 *
1234 	 * This means we don't really have a good indication of whether or
1235 	 * which controlq triggered this interrupt. We'll just notify the
1236 	 * admin task that it should check all the controlqs.
1237 	 */
1238 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1239 
1240 	if (oicr & PFINT_OICR_VFLR_M) {
1241 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1242 	}
1243 
1244 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1245 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1246 	}
1247 
1248 	if (oicr & PFINT_OICR_GRST_M) {
1249 		u32 reset;
1250 
1251 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1252 			GLGEN_RSTAT_RESET_TYPE_S;
1253 
1254 		if (reset == ICE_RESET_CORER)
1255 			sc->soft_stats.corer_count++;
1256 		else if (reset == ICE_RESET_GLOBR)
1257 			sc->soft_stats.globr_count++;
1258 		else
1259 			sc->soft_stats.empr_count++;
1260 
1261 		/* There are a couple of bits at play for handling resets.
1262 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1263 		 * indicate that the driver has received an OICR with a reset
1264 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1265 		 * happen. Second, we set hw->reset_ongoing to indicate that
1266 		 * the hardware is in reset. We will set this back to false as
1267 		 * soon as the driver has determined that the hardware is out
1268 		 * of reset.
1269 		 *
1270 		 * If the driver wishes to trigger a request, it can set one of
1271 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1272 		 * correct type of reset.
1273 		 */
1274 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
1275 			hw->reset_ongoing = true;
1276 	}
1277 
1278 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1279 		device_printf(dev, "ECC Error detected!\n");
1280 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1281 	}
1282 
1283 	if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M)) {
1284 		if (oicr & PFINT_OICR_HMC_ERR_M)
1285 			/* Log the HMC errors */
1286 			ice_log_hmc_error(hw, dev);
1287 		ice_rdma_notify_pe_intr(sc, oicr);
1288 	}
1289 
1290 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1291 		device_printf(dev, "PCI Exception detected!\n");
1292 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1293 	}
1294 
1295 	return (FILTER_SCHEDULE_THREAD);
1296 }
1297 
1298 /**
1299  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1300  * @sc: the device private softc
1301  *
1302  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1303  *
1304  * First, determine a suitable total number of vectors based on the number
1305  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1306  * RDMA.
1307  *
1308  * Request the desired amount of vectors, and see how many we obtain. If we
1309  * don't obtain as many as desired, reduce the demands by lowering the number
1310  * of requested queues or reducing the demand from other features such as
1311  * RDMA.
1312  *
1313  * @remark This function is required because the driver sets the
1314  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1315  * manually.
1316  *
1317  * @remark This driver will only use MSI-X vectors. If this is not possible,
1318  * neither MSI or legacy interrupts will be tried.
1319  *
1320  * @post on success this function must set the following scctx parameters:
1321  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1322  *
1323  * @returns zero on success or an error code on failure.
1324  */
1325 static int
1326 ice_allocate_msix(struct ice_softc *sc)
1327 {
1328 	bool iflib_override_queue_count = false;
1329 	if_softc_ctx_t scctx = sc->scctx;
1330 	device_t dev = sc->dev;
1331 	cpuset_t cpus;
1332 	int bar, queues, vectors, requested;
1333 	int err = 0;
1334 	int rdma;
1335 
1336 	/* Allocate the MSI-X bar */
1337 	bar = scctx->isc_msix_bar;
1338 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1339 	if (!sc->msix_table) {
1340 		device_printf(dev, "Unable to map MSI-X table\n");
1341 		return (ENOMEM);
1342 	}
1343 
1344 	/* Check if the iflib queue count sysctls have been set */
1345 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1346 		iflib_override_queue_count = true;
1347 
1348 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1349 	if (err) {
1350 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1351 			      __func__, ice_err_str(err));
1352 		CPU_COPY(&all_cpus, &cpus);
1353 	}
1354 
1355 	/* Attempt to mimic behavior of iflib_msix_init */
1356 	if (iflib_override_queue_count) {
1357 		/*
1358 		 * If the override sysctls have been set, limit the queues to
1359 		 * the number of logical CPUs.
1360 		 */
1361 		queues = mp_ncpus;
1362 	} else {
1363 		/*
1364 		 * Otherwise, limit the queue count to the CPUs associated
1365 		 * with the NUMA node the device is associated with.
1366 		 */
1367 		queues = CPU_COUNT(&cpus);
1368 	}
1369 
1370 	/* Clamp to the number of RSS buckets */
1371 	queues = imin(queues, rss_getnumbuckets());
1372 
1373 	/*
1374 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1375 	 * and Rx queues.
1376 	 */
1377 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1378 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1379 
1380 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
1381 		/*
1382 		 * Choose a number of RDMA vectors based on the number of CPUs
1383 		 * up to a maximum
1384 		 */
1385 		rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
1386 
1387 		/* Further limit by the user configurable tunable */
1388 		rdma = min(rdma, ice_rdma_max_msix);
1389 	} else {
1390 		rdma = 0;
1391 	}
1392 
1393 	/*
1394 	 * Determine the number of vectors to request. Note that we also need
1395 	 * to allocate one vector for administrative tasks.
1396 	 */
1397 	requested = rdma + queues + 1;
1398 
1399 	vectors = requested;
1400 
1401 	err = pci_alloc_msix(dev, &vectors);
1402 	if (err) {
1403 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1404 			      vectors, ice_err_str(err));
1405 		goto err_free_msix_table;
1406 	}
1407 
1408 	/* If we don't receive enough vectors, reduce demands */
1409 	if (vectors < requested) {
1410 		int diff = requested - vectors;
1411 
1412 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1413 			      requested, vectors);
1414 
1415 		/*
1416 		 * The OS didn't grant us the requested number of vectors.
1417 		 * Check to see if we can reduce demands by limiting the
1418 		 * number of vectors allocated to certain features.
1419 		 */
1420 
1421 		if (rdma >= diff) {
1422 			/* Reduce the number of RDMA vectors we reserve */
1423 			rdma -= diff;
1424 			diff = 0;
1425 		} else {
1426 			/* Disable RDMA and reduce the difference */
1427 			ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
1428 			diff -= rdma;
1429 			rdma = 0;
1430 		}
1431 
1432 		/*
1433 		 * If we still have a difference, we need to reduce the number
1434 		 * of queue pairs.
1435 		 *
1436 		 * However, we still need at least one vector for the admin
1437 		 * interrupt and one queue pair.
1438 		 */
1439 		if (queues <= diff) {
1440 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1441 			err = (ERANGE);
1442 			goto err_pci_release_msi;
1443 		}
1444 
1445 		queues -= diff;
1446 	}
1447 
1448 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1449 	if (rdma)
1450 		device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
1451 			      rdma);
1452 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1453 		      vectors);
1454 
1455 	scctx->isc_vectors = vectors;
1456 	scctx->isc_nrxqsets = queues;
1457 	scctx->isc_ntxqsets = queues;
1458 	scctx->isc_intr = IFLIB_INTR_MSIX;
1459 
1460 	sc->irdma_vectors = rdma;
1461 
1462 	/* Interrupt allocation tracking isn't required in recovery mode,
1463 	 * since neither RDMA nor VFs are enabled.
1464 	 */
1465 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1466 		return (0);
1467 
1468 	/* Keep track of which interrupt indices are being used for what */
1469 	sc->lan_vectors = vectors - rdma;
1470 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors);
1471 	if (err) {
1472 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1473 			      ice_err_str(err));
1474 		goto err_pci_release_msi;
1475 	}
1476 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->rdma_imap, rdma);
1477 	if (err) {
1478 		device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
1479 			      ice_err_str(err));
1480 		ice_resmgr_release_map(&sc->imgr, sc->pf_imap,
1481 					    sc->lan_vectors);
1482 		goto err_pci_release_msi;
1483 	}
1484 
1485 	return (0);
1486 
1487 err_pci_release_msi:
1488 	pci_release_msi(dev);
1489 err_free_msix_table:
1490 	if (sc->msix_table != NULL) {
1491 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1492 				rman_get_rid(sc->msix_table),
1493 				sc->msix_table);
1494 		sc->msix_table = NULL;
1495 	}
1496 
1497 	return (err);
1498 }
1499 
1500 /**
1501  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1502  * @ctx: the iflib context structure
1503  * @msix: the number of vectors we were assigned
1504  *
1505  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1506  * we get at least the same number of vectors as we have queues, and that we
1507  * always have the same number of Tx and Rx queues.
1508  *
1509  * Tx queues use a softirq instead of using their own hardware interrupt.
1510  */
1511 static int
1512 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1513 {
1514 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1515 	struct ice_vsi *vsi = &sc->pf_vsi;
1516 	int err, i, vector;
1517 
1518 	ASSERT_CTX_LOCKED(sc);
1519 
1520 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1521 		device_printf(sc->dev,
1522 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1523 			      vsi->num_tx_queues, vsi->num_rx_queues);
1524 		return (EOPNOTSUPP);
1525 	}
1526 
1527 	if (msix < (vsi->num_rx_queues + 1)) {
1528 		device_printf(sc->dev,
1529 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1530 		return (EOPNOTSUPP);
1531 	}
1532 
1533 	/* Save the number of vectors for future use */
1534 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1535 
1536 	/* Allocate space to store the IRQ vector data */
1537 	if (!(sc->irqvs =
1538 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1539 					       M_ICE, M_NOWAIT))) {
1540 		device_printf(sc->dev,
1541 			      "Unable to allocate irqv memory\n");
1542 		return (ENOMEM);
1543 	}
1544 
1545 	/* Administrative interrupt events will use vector 0 */
1546 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1547 				      ice_msix_admin, sc, 0, "admin");
1548 	if (err) {
1549 		device_printf(sc->dev,
1550 			      "Failed to register Admin queue handler: %s\n",
1551 			      ice_err_str(err));
1552 		goto free_irqvs;
1553 	}
1554 	sc->irqvs[0].me = 0;
1555 
1556 	/* Do not allocate queue interrupts when in recovery mode */
1557 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1558 		return (0);
1559 
1560 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1561 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1562 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1563 		int rid = vector + 1;
1564 		char irq_name[16];
1565 
1566 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1567 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1568 					      IFLIB_INTR_RXTX, ice_msix_que,
1569 					      rxq, rxq->me, irq_name);
1570 		if (err) {
1571 			device_printf(sc->dev,
1572 				      "Failed to allocate q int %d err: %s\n",
1573 				      i, ice_err_str(err));
1574 			vector--;
1575 			i--;
1576 			goto fail;
1577 		}
1578 		sc->irqvs[vector].me = vector;
1579 		rxq->irqv = &sc->irqvs[vector];
1580 
1581 		bzero(irq_name, sizeof(irq_name));
1582 
1583 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1584 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1585 					    IFLIB_INTR_TX, txq,
1586 					    txq->me, irq_name);
1587 		txq->irqv = &sc->irqvs[vector];
1588 	}
1589 
1590 	return (0);
1591 fail:
1592 	for (; i >= 0; i--, vector--)
1593 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1594 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1595 free_irqvs:
1596 	free(sc->irqvs, M_ICE);
1597 	sc->irqvs = NULL;
1598 	return err;
1599 }
1600 
1601 /**
1602  * ice_if_mtu_set - Set the device MTU
1603  * @ctx: iflib context structure
1604  * @mtu: the MTU requested
1605  *
1606  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1607  *
1608  * @pre assumes the caller holds the iflib CTX lock
1609  */
1610 static int
1611 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1612 {
1613 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1614 
1615 	ASSERT_CTX_LOCKED(sc);
1616 
1617 	/* Do not support configuration when in recovery mode */
1618 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1619 		return (ENOSYS);
1620 
1621 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1622 		return (EINVAL);
1623 
1624 	sc->scctx->isc_max_frame_size = mtu +
1625 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1626 
1627 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1628 
1629 	return (0);
1630 }
1631 
1632 /**
1633  * ice_if_intr_enable - Enable device interrupts
1634  * @ctx: iflib context structure
1635  *
1636  * Called by iflib to request enabling device interrupts.
1637  */
1638 static void
1639 ice_if_intr_enable(if_ctx_t ctx)
1640 {
1641 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1642 	struct ice_vsi *vsi = &sc->pf_vsi;
1643 	struct ice_hw *hw = &sc->hw;
1644 
1645 	ASSERT_CTX_LOCKED(sc);
1646 
1647 	/* Enable ITR 0 */
1648 	ice_enable_intr(hw, sc->irqvs[0].me);
1649 
1650 	/* Do not enable queue interrupts in recovery mode */
1651 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1652 		return;
1653 
1654 	/* Enable all queue interrupts */
1655 	for (int i = 0; i < vsi->num_rx_queues; i++)
1656 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1657 }
1658 
1659 /**
1660  * ice_if_intr_disable - Disable device interrupts
1661  * @ctx: iflib context structure
1662  *
1663  * Called by iflib to request disabling device interrupts.
1664  */
1665 static void
1666 ice_if_intr_disable(if_ctx_t ctx)
1667 {
1668 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1669 	struct ice_hw *hw = &sc->hw;
1670 	unsigned int i;
1671 
1672 	ASSERT_CTX_LOCKED(sc);
1673 
1674 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1675 	 * assigned to queues. Instead of assuming that the interrupt
1676 	 * assignment in the rx_queues structure is valid, just disable all
1677 	 * possible interrupts
1678 	 *
1679 	 * Note that we choose not to disable ITR 0 because this handles the
1680 	 * AdminQ interrupts, and we want to keep processing these even when
1681 	 * the interface is offline.
1682 	 */
1683 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1684 		ice_disable_intr(hw, i);
1685 }
1686 
1687 /**
1688  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1689  * @ctx: iflib context structure
1690  * @rxqid: the Rx queue to enable
1691  *
1692  * Enable a specific Rx queue interrupt.
1693  *
1694  * This function is not protected by the iflib CTX lock.
1695  */
1696 static int
1697 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1698 {
1699 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1700 	struct ice_vsi *vsi = &sc->pf_vsi;
1701 	struct ice_hw *hw = &sc->hw;
1702 
1703 	/* Do not enable queue interrupts in recovery mode */
1704 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1705 		return (ENOSYS);
1706 
1707 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1708 	return (0);
1709 }
1710 
1711 /**
1712  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1713  * @ctx: iflib context structure
1714  * @txqid: the Tx queue to enable
1715  *
1716  * Enable a specific Tx queue interrupt.
1717  *
1718  * This function is not protected by the iflib CTX lock.
1719  */
1720 static int
1721 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1722 {
1723 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1724 	struct ice_vsi *vsi = &sc->pf_vsi;
1725 	struct ice_hw *hw = &sc->hw;
1726 
1727 	/* Do not enable queue interrupts in recovery mode */
1728 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1729 		return (ENOSYS);
1730 
1731 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1732 	return (0);
1733 }
1734 
1735 /**
1736  * ice_if_promisc_set - Set device promiscuous mode
1737  * @ctx: iflib context structure
1738  * @flags: promiscuous flags to configure
1739  *
1740  * Called by iflib to configure device promiscuous mode.
1741  *
1742  * @remark Calls to this function will always overwrite the previous setting
1743  */
1744 static int
1745 ice_if_promisc_set(if_ctx_t ctx, int flags)
1746 {
1747 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1748 	struct ice_hw *hw = &sc->hw;
1749 	device_t dev = sc->dev;
1750 	enum ice_status status;
1751 	bool promisc_enable = flags & IFF_PROMISC;
1752 	bool multi_enable = flags & IFF_ALLMULTI;
1753 
1754 	/* Do not support configuration when in recovery mode */
1755 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1756 		return (ENOSYS);
1757 
1758 	if (multi_enable)
1759 		return (EOPNOTSUPP);
1760 
1761 	if (promisc_enable) {
1762 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1763 					     ICE_VSI_PROMISC_MASK, 0);
1764 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1765 			device_printf(dev,
1766 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1767 				      ice_status_str(status),
1768 				      ice_aq_str(hw->adminq.sq_last_status));
1769 			return (EIO);
1770 		}
1771 	} else {
1772 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1773 					       ICE_VSI_PROMISC_MASK, 0);
1774 		if (status) {
1775 			device_printf(dev,
1776 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1777 				      ice_status_str(status),
1778 				      ice_aq_str(hw->adminq.sq_last_status));
1779 			return (EIO);
1780 		}
1781 	}
1782 
1783 	return (0);
1784 }
1785 
1786 /**
1787  * ice_if_media_change - Change device media
1788  * @ctx: device ctx structure
1789  *
1790  * Called by iflib when a media change is requested. This operation is not
1791  * supported by the hardware, so we just return an error code.
1792  */
1793 static int
1794 ice_if_media_change(if_ctx_t ctx)
1795 {
1796 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1797 
1798 	device_printf(sc->dev, "Media change is not supported.\n");
1799 	return (ENODEV);
1800 }
1801 
1802 /**
1803  * ice_if_media_status - Report current device media
1804  * @ctx: iflib context structure
1805  * @ifmr: ifmedia request structure to update
1806  *
1807  * Updates the provided ifmr with current device media status, including link
1808  * status and media type.
1809  */
1810 static void
1811 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1812 {
1813 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1814 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1815 
1816 	ifmr->ifm_status = IFM_AVALID;
1817 	ifmr->ifm_active = IFM_ETHER;
1818 
1819 	/* Never report link up or media types when in recovery mode */
1820 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1821 		return;
1822 
1823 	if (!sc->link_up)
1824 		return;
1825 
1826 	ifmr->ifm_status |= IFM_ACTIVE;
1827 	ifmr->ifm_active |= IFM_FDX;
1828 
1829 	if (li->phy_type_low)
1830 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1831 	else if (li->phy_type_high)
1832 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1833 	else
1834 		ifmr->ifm_active |= IFM_UNKNOWN;
1835 
1836 	/* Report flow control status as well */
1837 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1838 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1839 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1840 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1841 }
1842 
1843 /**
1844  * ice_init_tx_tracking - Initialize Tx queue software tracking values
1845  * @vsi: the VSI to initialize
1846  *
1847  * Initialize Tx queue software tracking values, including the Report Status
1848  * queue, and related software tracking values.
1849  */
1850 static void
1851 ice_init_tx_tracking(struct ice_vsi *vsi)
1852 {
1853 	struct ice_tx_queue *txq;
1854 	size_t j;
1855 	int i;
1856 
1857 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1858 
1859 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1860 
1861 		/* Initialize the last processed descriptor to be the end of
1862 		 * the ring, rather than the start, so that we avoid an
1863 		 * off-by-one error in ice_ift_txd_credits_update for the
1864 		 * first packet.
1865 		 */
1866 		txq->tx_cidx_processed = txq->desc_count - 1;
1867 
1868 		for (j = 0; j < txq->desc_count; j++)
1869 			txq->tx_rsq[j] = QIDX_INVALID;
1870 	}
1871 }
1872 
1873 /**
1874  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1875  * @sc: the device softc
1876  *
1877  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1878  * buffer sizes when programming hardware.
1879  */
1880 static void
1881 ice_update_rx_mbuf_sz(struct ice_softc *sc)
1882 {
1883 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
1884 	struct ice_vsi *vsi = &sc->pf_vsi;
1885 
1886 	MPASS(mbuf_sz <= UINT16_MAX);
1887 	vsi->mbuf_sz = mbuf_sz;
1888 }
1889 
1890 /**
1891  * ice_if_init - Initialize the device
1892  * @ctx: iflib ctx structure
1893  *
1894  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
1895  * device filters and prepares the Tx and Rx engines.
1896  *
1897  * @pre assumes the caller holds the iflib CTX lock
1898  */
1899 static void
1900 ice_if_init(if_ctx_t ctx)
1901 {
1902 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1903 	device_t dev = sc->dev;
1904 	int err;
1905 
1906 	ASSERT_CTX_LOCKED(sc);
1907 
1908 	/*
1909 	 * We've seen an issue with 11.3/12.1 where sideband routines are
1910 	 * called after detach is called.  This would call routines after
1911 	 * if_stop, causing issues with the teardown process.  This has
1912 	 * seemingly been fixed in STABLE snapshots, but it seems like a
1913 	 * good idea to have this guard here regardless.
1914 	 */
1915 	if (ice_driver_is_detaching(sc))
1916 		return;
1917 
1918 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1919 		return;
1920 
1921 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
1922 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
1923 		return;
1924 	}
1925 
1926 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
1927 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
1928 		return;
1929 	}
1930 
1931 	ice_update_rx_mbuf_sz(sc);
1932 
1933 	/* Update the MAC address... User might use a LAA */
1934 	err = ice_update_laa_mac(sc);
1935 	if (err) {
1936 		device_printf(dev,
1937 			      "LAA address change failed, err %s\n",
1938 			      ice_err_str(err));
1939 		return;
1940 	}
1941 
1942 	/* Initialize software Tx tracking values */
1943 	ice_init_tx_tracking(&sc->pf_vsi);
1944 
1945 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
1946 	if (err) {
1947 		device_printf(dev,
1948 			      "Unable to configure the main VSI for Tx: %s\n",
1949 			      ice_err_str(err));
1950 		return;
1951 	}
1952 
1953 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
1954 	if (err) {
1955 		device_printf(dev,
1956 			      "Unable to configure the main VSI for Rx: %s\n",
1957 			      ice_err_str(err));
1958 		goto err_cleanup_tx;
1959 	}
1960 
1961 	err = ice_control_all_rx_queues(&sc->pf_vsi, true);
1962 	if (err) {
1963 		device_printf(dev,
1964 			      "Unable to enable Rx rings for transmit: %s\n",
1965 			      ice_err_str(err));
1966 		goto err_cleanup_tx;
1967 	}
1968 
1969 	err = ice_cfg_pf_default_mac_filters(sc);
1970 	if (err) {
1971 		device_printf(dev,
1972 			      "Unable to configure default MAC filters: %s\n",
1973 			      ice_err_str(err));
1974 		goto err_stop_rx;
1975 	}
1976 
1977 	/* We use software interrupts for Tx, so we only program the hardware
1978 	 * interrupts for Rx.
1979 	 */
1980 	ice_configure_all_rxq_interrupts(&sc->pf_vsi);
1981 	ice_configure_rx_itr(&sc->pf_vsi);
1982 
1983 	/* Configure promiscuous mode */
1984 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
1985 
1986 	ice_rdma_pf_init(sc);
1987 
1988 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
1989 	return;
1990 
1991 err_stop_rx:
1992 	ice_control_all_rx_queues(&sc->pf_vsi, false);
1993 err_cleanup_tx:
1994 	ice_vsi_disable_tx(&sc->pf_vsi);
1995 }
1996 
1997 /**
1998  * ice_poll_for_media_avail - Re-enable link if media is detected
1999  * @sc: device private structure
2000  *
2001  * Intended to be called from the driver's timer function, this function
2002  * sends the Get Link Status AQ command and re-enables HW link if the
2003  * command says that media is available.
2004  *
2005  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
2006  * since media removal events are supposed to be sent to the driver through
2007  * a link status event.
2008  */
2009 static void
2010 ice_poll_for_media_avail(struct ice_softc *sc)
2011 {
2012 	struct ice_hw *hw = &sc->hw;
2013 	struct ice_port_info *pi = hw->port_info;
2014 
2015 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2016 		pi->phy.get_link_info = true;
2017 		ice_get_link_status(pi, &sc->link_up);
2018 
2019 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2020 			enum ice_status status;
2021 
2022 			/* Re-enable link and re-apply user link settings */
2023 			ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
2024 
2025 			/* Update the OS about changes in media capability */
2026 			status = ice_add_media_types(sc, sc->media);
2027 			if (status)
2028 				device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
2029 					      ice_status_str(status),
2030 					      ice_aq_str(hw->adminq.sq_last_status));
2031 
2032 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
2033 		}
2034 	}
2035 }
2036 
2037 /**
2038  * ice_if_timer - called by iflib periodically
2039  * @ctx: iflib ctx structure
2040  * @qid: the queue this timer was called for
2041  *
2042  * This callback is triggered by iflib periodically. We use it to update the
2043  * hw statistics.
2044  *
2045  * @remark this function is not protected by the iflib CTX lock.
2046  */
2047 static void
2048 ice_if_timer(if_ctx_t ctx, uint16_t qid)
2049 {
2050 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2051 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
2052 
2053 	if (qid != 0)
2054 		return;
2055 
2056 	/* Do not attempt to update stats when in recovery mode */
2057 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2058 		return;
2059 
2060 	/* Update device statistics */
2061 	ice_update_pf_stats(sc);
2062 
2063 	/*
2064 	 * For proper watchdog management, the iflib stack needs to know if
2065 	 * we've been paused during the last interval. Check if the
2066 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
2067 	 */
2068 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2069 		sc->scctx->isc_pause_frames = 1;
2070 
2071 	/* Update the primary VSI stats */
2072 	ice_update_vsi_hw_stats(&sc->pf_vsi);
2073 }
2074 
2075 /**
2076  * ice_admin_timer - called periodically to trigger the admin task
2077  * @arg: callout(9) argument pointing to the device private softc structure
2078  *
2079  * Timer function used as part of a callout(9) timer that will periodically
2080  * trigger the admin task, even when the interface is down.
2081  *
2082  * @remark this function is not called by iflib and is not protected by the
2083  * iflib CTX lock.
2084  *
2085  * @remark because this is a callout function, it cannot sleep and should not
2086  * attempt taking the iflib CTX lock.
2087  */
2088 static void
2089 ice_admin_timer(void *arg)
2090 {
2091 	struct ice_softc *sc = (struct ice_softc *)arg;
2092 
2093 	/*
2094 	 * There is a point where callout routines are no longer
2095 	 * cancelable.  So there exists a window of time where the
2096 	 * driver enters detach() and tries to cancel the callout, but the
2097 	 * callout routine has passed the cancellation point.  The detach()
2098 	 * routine is unaware of this and tries to free resources that the
2099 	 * callout routine needs.  So we check for the detach state flag to
2100 	 * at least shrink the window of opportunity.
2101 	 */
2102 	if (ice_driver_is_detaching(sc))
2103 		return;
2104 
2105 	/* Fire off the admin task */
2106 	iflib_admin_intr_deferred(sc->ctx);
2107 
2108 	/* Reschedule the admin timer */
2109 	callout_schedule(&sc->admin_timer, hz/2);
2110 }
2111 
2112 /**
2113  * ice_transition_recovery_mode - Transition to recovery mode
2114  * @sc: the device private softc
2115  *
2116  * Called when the driver detects that the firmware has entered recovery mode
2117  * at run time.
2118  */
2119 static void
2120 ice_transition_recovery_mode(struct ice_softc *sc)
2121 {
2122 	struct ice_vsi *vsi = &sc->pf_vsi;
2123 	int i;
2124 
2125 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2126 
2127 	/* Tell the stack that the link has gone down */
2128 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2129 
2130 	/* Request that the device be re-initialized */
2131 	ice_request_stack_reinit(sc);
2132 
2133 	ice_rdma_pf_detach(sc);
2134 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2135 
2136 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2137 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2138 
2139 	ice_vsi_del_txqs_ctx(vsi);
2140 	ice_vsi_del_rxqs_ctx(vsi);
2141 
2142 	for (i = 0; i < sc->num_available_vsi; i++) {
2143 		if (sc->all_vsi[i])
2144 			ice_release_vsi(sc->all_vsi[i]);
2145 	}
2146 	sc->num_available_vsi = 0;
2147 
2148 	if (sc->all_vsi) {
2149 		free(sc->all_vsi, M_ICE);
2150 		sc->all_vsi = NULL;
2151 	}
2152 
2153 	/* Destroy the interrupt manager */
2154 	ice_resmgr_destroy(&sc->imgr);
2155 	/* Destroy the queue managers */
2156 	ice_resmgr_destroy(&sc->tx_qmgr);
2157 	ice_resmgr_destroy(&sc->rx_qmgr);
2158 
2159 	ice_deinit_hw(&sc->hw);
2160 }
2161 
2162 /**
2163  * ice_transition_safe_mode - Transition to safe mode
2164  * @sc: the device private softc
2165  *
2166  * Called when the driver attempts to reload the DDP package during a device
2167  * reset, and the new download fails. If so, we must transition to safe mode
2168  * at run time.
2169  *
2170  * @remark although safe mode normally allocates only a single queue, we can't
2171  * change the number of queues dynamically when using iflib. Due to this, we
2172  * do not attempt to reduce the number of queues.
2173  */
2174 static void
2175 ice_transition_safe_mode(struct ice_softc *sc)
2176 {
2177 	/* Indicate that we are in Safe mode */
2178 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2179 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2180 
2181 	ice_rdma_pf_detach(sc);
2182 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2183 
2184 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2185 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2186 
2187 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2188 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2189 }
2190 
2191 /**
2192  * ice_if_update_admin_status - update admin status
2193  * @ctx: iflib ctx structure
2194  *
2195  * Called by iflib to update the admin status. For our purposes, this means
2196  * check the adminq, and update the link status. It's ultimately triggered by
2197  * our admin interrupt, or by the ice_if_timer periodically.
2198  *
2199  * @pre assumes the caller holds the iflib CTX lock
2200  */
2201 static void
2202 ice_if_update_admin_status(if_ctx_t ctx)
2203 {
2204 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2205 	enum ice_fw_modes fw_mode;
2206 	bool reschedule = false;
2207 	u16 pending = 0;
2208 
2209 	ASSERT_CTX_LOCKED(sc);
2210 
2211 	/* Check if the firmware entered recovery mode at run time */
2212 	fw_mode = ice_get_fw_mode(&sc->hw);
2213 	if (fw_mode == ICE_FW_MODE_REC) {
2214 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2215 			/* If we just entered recovery mode, log a warning to
2216 			 * the system administrator and deinit driver state
2217 			 * that is no longer functional.
2218 			 */
2219 			ice_transition_recovery_mode(sc);
2220 		}
2221 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2222 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2223 			/* Rollback mode isn't fatal, but we don't want to
2224 			 * repeatedly post a message about it.
2225 			 */
2226 			ice_print_rollback_msg(&sc->hw);
2227 		}
2228 	}
2229 
2230 	/* Handle global reset events */
2231 	ice_handle_reset_event(sc);
2232 
2233 	/* Handle PF reset requests */
2234 	ice_handle_pf_reset_request(sc);
2235 
2236 	/* Handle MDD events */
2237 	ice_handle_mdd_event(sc);
2238 
2239 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2240 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2241 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2242 		/*
2243 		 * If we know the control queues are disabled, skip processing
2244 		 * the control queues entirely.
2245 		 */
2246 		;
2247 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2248 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2249 		if (pending > 0)
2250 			reschedule = true;
2251 
2252 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2253 		if (pending > 0)
2254 			reschedule = true;
2255 	}
2256 
2257 	/* Poll for link up */
2258 	ice_poll_for_media_avail(sc);
2259 
2260 	/* Check and update link status */
2261 	ice_update_link_status(sc, false);
2262 
2263 	/*
2264 	 * If there are still messages to process, we need to reschedule
2265 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2266 	 * woken up at the next interrupt or timer event.
2267 	 */
2268 	if (reschedule) {
2269 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2270 		iflib_admin_intr_deferred(ctx);
2271 	} else {
2272 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2273 	}
2274 }
2275 
2276 /**
2277  * ice_prepare_for_reset - Prepare device for an impending reset
2278  * @sc: The device private softc
2279  *
2280  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2281  * scheduler setup, and shutting down controlqs. Uses the
2282  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2283  * driver for reset or not.
2284  */
2285 static void
2286 ice_prepare_for_reset(struct ice_softc *sc)
2287 {
2288 	struct ice_hw *hw = &sc->hw;
2289 
2290 	/* If we're already prepared, there's nothing to do */
2291 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2292 		return;
2293 
2294 	log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp));
2295 
2296 	/* In recovery mode, hardware is not initialized */
2297 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2298 		return;
2299 
2300 	/* inform the RDMA client */
2301 	ice_rdma_notify_reset(sc);
2302 	/* stop the RDMA client */
2303 	ice_rdma_pf_stop(sc);
2304 
2305 	/* Release the main PF VSI queue mappings */
2306 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2307 				    sc->pf_vsi.num_tx_queues);
2308 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2309 				    sc->pf_vsi.num_rx_queues);
2310 
2311 	ice_clear_hw_tbls(hw);
2312 
2313 	if (hw->port_info)
2314 		ice_sched_clear_port(hw->port_info);
2315 
2316 	ice_shutdown_all_ctrlq(hw, false);
2317 }
2318 
2319 /**
2320  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2321  * @sc: the device softc pointer
2322  *
2323  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2324  * mapping after a reset occurred.
2325  */
2326 static int
2327 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2328 {
2329 	struct ice_vsi *vsi = &sc->pf_vsi;
2330 	struct ice_tx_queue *txq;
2331 	struct ice_rx_queue *rxq;
2332 	int err, i;
2333 
2334 	/* Re-assign Tx queues from PF space to the main VSI */
2335 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2336 					    vsi->num_tx_queues);
2337 	if (err) {
2338 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2339 			      ice_err_str(err));
2340 		return (err);
2341 	}
2342 
2343 	/* Re-assign Rx queues from PF space to this VSI */
2344 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2345 					    vsi->num_rx_queues);
2346 	if (err) {
2347 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2348 			      ice_err_str(err));
2349 		goto err_release_tx_queues;
2350 	}
2351 
2352 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2353 
2354 	/* Re-assign Tx queue tail pointers */
2355 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2356 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2357 
2358 	/* Re-assign Rx queue tail pointers */
2359 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2360 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2361 
2362 	return (0);
2363 
2364 err_release_tx_queues:
2365 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2366 				   sc->pf_vsi.num_tx_queues);
2367 
2368 	return (err);
2369 }
2370 
2371 /* determine if the iflib context is active */
2372 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2373 
2374 /**
2375  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2376  * @sc: The device private softc
2377  *
2378  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2379  * limited functionality supported while in recovery mode.
2380  */
2381 static void
2382 ice_rebuild_recovery_mode(struct ice_softc *sc)
2383 {
2384 	device_t dev = sc->dev;
2385 
2386 	/* enable PCIe bus master */
2387 	pci_enable_busmaster(dev);
2388 
2389 	/* Configure interrupt causes for the administrative interrupt */
2390 	ice_configure_misc_interrupts(sc);
2391 
2392 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2393 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2394 
2395 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2396 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2397 
2398 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2399 
2400 	/* In order to completely restore device functionality, the iflib core
2401 	 * needs to be reset. We need to request an iflib reset. Additionally,
2402 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2403 	 * the iflib core, we also want re-run the admin task so that iflib
2404 	 * resets immediately instead of waiting for the next interrupt.
2405 	 */
2406 	ice_request_stack_reinit(sc);
2407 
2408 	return;
2409 }
2410 
2411 /**
2412  * ice_rebuild - Rebuild driver state post reset
2413  * @sc: The device private softc
2414  *
2415  * Restore driver state after a reset occurred. Restart the controlqs, setup
2416  * the hardware port, and re-enable the VSIs.
2417  */
2418 static void
2419 ice_rebuild(struct ice_softc *sc)
2420 {
2421 	struct ice_hw *hw = &sc->hw;
2422 	device_t dev = sc->dev;
2423 	enum ice_ddp_state pkg_state;
2424 	enum ice_status status;
2425 	int err;
2426 
2427 	sc->rebuild_ticks = ticks;
2428 
2429 	/* If we're rebuilding, then a reset has succeeded. */
2430 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2431 
2432 	/*
2433 	 * If the firmware is in recovery mode, only restore the limited
2434 	 * functionality supported by recovery mode.
2435 	 */
2436 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2437 		ice_rebuild_recovery_mode(sc);
2438 		return;
2439 	}
2440 
2441 	/* enable PCIe bus master */
2442 	pci_enable_busmaster(dev);
2443 
2444 	status = ice_init_all_ctrlq(hw);
2445 	if (status) {
2446 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2447 			      ice_status_str(status));
2448 		goto err_shutdown_ctrlq;
2449 	}
2450 
2451 	/* Query the allocated resources for Tx scheduler */
2452 	status = ice_sched_query_res_alloc(hw);
2453 	if (status) {
2454 		device_printf(dev,
2455 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2456 			      ice_status_str(status),
2457 			      ice_aq_str(hw->adminq.sq_last_status));
2458 		goto err_shutdown_ctrlq;
2459 	}
2460 
2461 	/* Re-enable FW logging. Keep going even if this fails */
2462 	status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2463 	if (!status) {
2464 		/*
2465 		 * We should have the most updated cached copy of the
2466 		 * configuration, regardless of whether we're rebuilding
2467 		 * or not.  So we'll simply check to see if logging was
2468 		 * enabled pre-rebuild.
2469 		 */
2470 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2471 			status = ice_fwlog_register(hw);
2472 			if (status)
2473 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2474 				   ice_status_str(status),
2475 				   ice_aq_str(hw->adminq.sq_last_status));
2476 		}
2477 	} else
2478 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2479 		   ice_status_str(status),
2480 		   ice_aq_str(hw->adminq.sq_last_status));
2481 
2482 	err = ice_send_version(sc);
2483 	if (err)
2484 		goto err_shutdown_ctrlq;
2485 
2486 	err = ice_init_link_events(sc);
2487 	if (err) {
2488 		device_printf(dev, "ice_init_link_events failed: %s\n",
2489 			      ice_err_str(err));
2490 		goto err_shutdown_ctrlq;
2491 	}
2492 
2493 	status = ice_clear_pf_cfg(hw);
2494 	if (status) {
2495 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2496 			      ice_status_str(status));
2497 		goto err_shutdown_ctrlq;
2498 	}
2499 
2500 	ice_clear_pxe_mode(hw);
2501 
2502 	status = ice_get_caps(hw);
2503 	if (status) {
2504 		device_printf(dev, "failed to get capabilities, err %s\n",
2505 			      ice_status_str(status));
2506 		goto err_shutdown_ctrlq;
2507 	}
2508 
2509 	status = ice_sched_init_port(hw->port_info);
2510 	if (status) {
2511 		device_printf(dev, "failed to initialize port, err %s\n",
2512 			      ice_status_str(status));
2513 		goto err_sched_cleanup;
2514 	}
2515 
2516 	/* If we previously loaded the package, it needs to be reloaded now */
2517 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2518 		pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2519 		if (!ice_is_init_pkg_successful(pkg_state)) {
2520 			ice_log_pkg_init(sc, pkg_state);
2521 			ice_transition_safe_mode(sc);
2522 		}
2523 	}
2524 
2525 	ice_reset_pf_stats(sc);
2526 
2527 	err = ice_rebuild_pf_vsi_qmap(sc);
2528 	if (err) {
2529 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2530 			      ice_err_str(err));
2531 		goto err_sched_cleanup;
2532 	}
2533 	err = ice_initialize_vsi(&sc->pf_vsi);
2534 	if (err) {
2535 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2536 			      ice_err_str(err));
2537 		goto err_release_queue_allocations;
2538 	}
2539 
2540 	/* Replay all VSI configuration */
2541 	err = ice_replay_all_vsi_cfg(sc);
2542 	if (err)
2543 		goto err_deinit_pf_vsi;
2544 
2545 	/* Re-enable FW health event reporting */
2546 	ice_init_health_events(sc);
2547 
2548 	/* Reconfigure the main PF VSI for RSS */
2549 	err = ice_config_rss(&sc->pf_vsi);
2550 	if (err) {
2551 		device_printf(sc->dev,
2552 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2553 			      ice_err_str(err));
2554 		goto err_deinit_pf_vsi;
2555 	}
2556 
2557 	/* Refresh link status */
2558 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2559 	sc->hw.port_info->phy.get_link_info = true;
2560 	ice_get_link_status(sc->hw.port_info, &sc->link_up);
2561 	ice_update_link_status(sc, true);
2562 
2563 	/* RDMA interface will be restarted by the stack re-init */
2564 
2565 	/* Configure interrupt causes for the administrative interrupt */
2566 	ice_configure_misc_interrupts(sc);
2567 
2568 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2569 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2570 
2571 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2572 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2573 
2574 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2575 
2576 	/* In order to completely restore device functionality, the iflib core
2577 	 * needs to be reset. We need to request an iflib reset. Additionally,
2578 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2579 	 * the iflib core, we also want re-run the admin task so that iflib
2580 	 * resets immediately instead of waiting for the next interrupt.
2581 	 */
2582 	ice_request_stack_reinit(sc);
2583 
2584 	return;
2585 
2586 err_deinit_pf_vsi:
2587 	ice_deinit_vsi(&sc->pf_vsi);
2588 err_release_queue_allocations:
2589 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2590 				    sc->pf_vsi.num_tx_queues);
2591 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2592 				    sc->pf_vsi.num_rx_queues);
2593 err_sched_cleanup:
2594 	ice_sched_cleanup_all(hw);
2595 err_shutdown_ctrlq:
2596 	ice_shutdown_all_ctrlq(hw, false);
2597 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2598 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2599 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2600 }
2601 
2602 /**
2603  * ice_handle_reset_event - Handle reset events triggered by OICR
2604  * @sc: The device private softc
2605  *
2606  * Handle reset events triggered by an OICR notification. This includes CORER,
2607  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2608  * firmware.
2609  *
2610  * @pre assumes the iflib context lock is held, and will unlock it while
2611  * waiting for the hardware to finish reset.
2612  */
2613 static void
2614 ice_handle_reset_event(struct ice_softc *sc)
2615 {
2616 	struct ice_hw *hw = &sc->hw;
2617 	enum ice_status status;
2618 	device_t dev = sc->dev;
2619 
2620 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2621 	 * trigger an OICR interrupt. Our OICR handler will determine when
2622 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2623 	 * appropriate.
2624 	 */
2625 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2626 		return;
2627 
2628 	ice_prepare_for_reset(sc);
2629 
2630 	/*
2631 	 * Release the iflib context lock and wait for the device to finish
2632 	 * resetting.
2633 	 */
2634 	IFLIB_CTX_UNLOCK(sc);
2635 	status = ice_check_reset(hw);
2636 	IFLIB_CTX_LOCK(sc);
2637 	if (status) {
2638 		device_printf(dev, "Device never came out of reset, err %s\n",
2639 			      ice_status_str(status));
2640 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2641 		return;
2642 	}
2643 
2644 	/* We're done with the reset, so we can rebuild driver state */
2645 	sc->hw.reset_ongoing = false;
2646 	ice_rebuild(sc);
2647 
2648 	/* In the unlikely event that a PF reset request occurs at the same
2649 	 * time as a global reset, clear the request now. This avoids
2650 	 * resetting a second time right after we reset due to a global event.
2651 	 */
2652 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2653 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2654 }
2655 
2656 /**
2657  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2658  * @sc: The device private softc
2659  *
2660  * Initiate a PF reset requested by software. We handle this in the admin task
2661  * so that only one thread actually handles driver preparation and cleanup,
2662  * rather than having multiple threads possibly attempt to run this code
2663  * simultaneously.
2664  *
2665  * @pre assumes the iflib context lock is held and will unlock it while
2666  * waiting for the PF reset to complete.
2667  */
2668 static void
2669 ice_handle_pf_reset_request(struct ice_softc *sc)
2670 {
2671 	struct ice_hw *hw = &sc->hw;
2672 	enum ice_status status;
2673 
2674 	/* Check for PF reset requests */
2675 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2676 		return;
2677 
2678 	/* Make sure we're prepared for reset */
2679 	ice_prepare_for_reset(sc);
2680 
2681 	/*
2682 	 * Release the iflib context lock and wait for the device to finish
2683 	 * resetting.
2684 	 */
2685 	IFLIB_CTX_UNLOCK(sc);
2686 	status = ice_reset(hw, ICE_RESET_PFR);
2687 	IFLIB_CTX_LOCK(sc);
2688 	if (status) {
2689 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2690 			      ice_status_str(status));
2691 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2692 		return;
2693 	}
2694 
2695 	sc->soft_stats.pfr_count++;
2696 	ice_rebuild(sc);
2697 }
2698 
2699 /**
2700  * ice_init_device_features - Init device driver features
2701  * @sc: driver softc structure
2702  *
2703  * @pre assumes that the function capabilities bits have been set up by
2704  * ice_init_hw().
2705  */
2706 static void
2707 ice_init_device_features(struct ice_softc *sc)
2708 {
2709 	/* Set capabilities that all devices support */
2710 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2711 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2712 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2713 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2714 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2715 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2716 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2717 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2718 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2719 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap);
2720 	ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2721 
2722 	/* Disable features due to hardware limitations... */
2723 	if (!sc->hw.func_caps.common_cap.rss_table_size)
2724 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2725 	if (!sc->hw.func_caps.common_cap.iwarp || !ice_enable_irdma)
2726 		ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2727 	if (!sc->hw.func_caps.common_cap.dcb)
2728 		ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap);
2729 	/* Disable features due to firmware limitations... */
2730 	if (!ice_is_fw_health_report_supported(&sc->hw))
2731 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2732 	if (!ice_fwlog_supported(&sc->hw))
2733 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2734 	if (sc->hw.fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2735 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
2736 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
2737 		else
2738 			ice_fwlog_unregister(&sc->hw);
2739 	}
2740 
2741 	/* Disable capabilities not supported by the OS */
2742 	ice_disable_unsupported_features(sc->feat_cap);
2743 
2744 	/* RSS is always enabled for iflib */
2745 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2746 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2747 
2748 	/* Disable features based on sysctl settings */
2749 	if (!ice_tx_balance_en)
2750 		ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2751 }
2752 
2753 /**
2754  * ice_if_multi_set - Callback to update Multicast filters in HW
2755  * @ctx: iflib ctx structure
2756  *
2757  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2758  * the if_multiaddrs list and determine which filters have been added or
2759  * removed from the list, and update HW programming to reflect the new list.
2760  *
2761  * @pre assumes the caller holds the iflib CTX lock
2762  */
2763 static void
2764 ice_if_multi_set(if_ctx_t ctx)
2765 {
2766 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2767 	int err;
2768 
2769 	ASSERT_CTX_LOCKED(sc);
2770 
2771 	/* Do not handle multicast configuration in recovery mode */
2772 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2773 		return;
2774 
2775 	err = ice_sync_multicast_filters(sc);
2776 	if (err) {
2777 		device_printf(sc->dev,
2778 			      "Failed to synchronize multicast filter list: %s\n",
2779 			      ice_err_str(err));
2780 		return;
2781 	}
2782 }
2783 
2784 /**
2785  * ice_if_vlan_register - Register a VLAN with the hardware
2786  * @ctx: iflib ctx pointer
2787  * @vtag: VLAN to add
2788  *
2789  * Programs the main PF VSI with a hardware filter for the given VLAN.
2790  *
2791  * @pre assumes the caller holds the iflib CTX lock
2792  */
2793 static void
2794 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
2795 {
2796 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2797 	enum ice_status status;
2798 
2799 	ASSERT_CTX_LOCKED(sc);
2800 
2801 	/* Do not handle VLAN configuration in recovery mode */
2802 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2803 		return;
2804 
2805 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
2806 	if (status) {
2807 		device_printf(sc->dev,
2808 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
2809 			      vtag, ice_status_str(status),
2810 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2811 	}
2812 }
2813 
2814 /**
2815  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
2816  * @ctx: iflib ctx pointer
2817  * @vtag: VLAN to add
2818  *
2819  * Removes the previously programmed VLAN filter from the main PF VSI.
2820  *
2821  * @pre assumes the caller holds the iflib CTX lock
2822  */
2823 static void
2824 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
2825 {
2826 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2827 	enum ice_status status;
2828 
2829 	ASSERT_CTX_LOCKED(sc);
2830 
2831 	/* Do not handle VLAN configuration in recovery mode */
2832 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2833 		return;
2834 
2835 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
2836 	if (status) {
2837 		device_printf(sc->dev,
2838 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
2839 			      vtag, ice_status_str(status),
2840 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2841 	}
2842 }
2843 
2844 /**
2845  * ice_if_stop - Stop the device
2846  * @ctx: iflib context structure
2847  *
2848  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
2849  * down)
2850  *
2851  * @pre assumes the caller holds the iflib CTX lock
2852  */
2853 static void
2854 ice_if_stop(if_ctx_t ctx)
2855 {
2856 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2857 
2858 	ASSERT_CTX_LOCKED(sc);
2859 
2860 	/*
2861 	 * The iflib core may call IFDI_STOP prior to the first call to
2862 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
2863 	 * don't have, and disable Tx queues which aren't yet configured.
2864 	 * Although it is likely these extra operations are harmless, they do
2865 	 * cause spurious warning messages to be displayed, which may confuse
2866 	 * users.
2867 	 *
2868 	 * To avoid these messages, we use a state bit indicating if we've
2869 	 * been initialized. It will be set when ice_if_init is called, and
2870 	 * cleared here in ice_if_stop.
2871 	 */
2872 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
2873 		return;
2874 
2875 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2876 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
2877 		return;
2878 	}
2879 
2880 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2881 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
2882 		return;
2883 	}
2884 
2885 	ice_rdma_pf_stop(sc);
2886 
2887 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
2888 	 * return of these functions because there's nothing we can really do
2889 	 * if they fail, and the functions already print error messages.
2890 	 * Just try to shut down as much as we can.
2891 	 */
2892 	ice_rm_pf_default_mac_filters(sc);
2893 
2894 	/* Dissociate the Tx and Rx queues from the interrupts */
2895 	ice_flush_txq_interrupts(&sc->pf_vsi);
2896 	ice_flush_rxq_interrupts(&sc->pf_vsi);
2897 
2898 	/* Disable the Tx and Rx queues */
2899 	ice_vsi_disable_tx(&sc->pf_vsi);
2900 	ice_control_all_rx_queues(&sc->pf_vsi, false);
2901 }
2902 
2903 /**
2904  * ice_if_get_counter - Get current value of an ifnet statistic
2905  * @ctx: iflib context pointer
2906  * @counter: ifnet counter to read
2907  *
2908  * Reads the current value of an ifnet counter for the device.
2909  *
2910  * This function is not protected by the iflib CTX lock.
2911  */
2912 static uint64_t
2913 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
2914 {
2915 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2916 
2917 	/* Return the counter for the main PF VSI */
2918 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
2919 }
2920 
2921 /**
2922  * ice_request_stack_reinit - Request that iflib re-initialize
2923  * @sc: the device private softc
2924  *
2925  * Request that the device be brought down and up, to re-initialize. For
2926  * example, this may be called when a device reset occurs, or when Tx and Rx
2927  * queues need to be re-initialized.
2928  *
2929  * This is required because the iflib state is outside the driver, and must be
2930  * re-initialized if we need to resart Tx and Rx queues.
2931  */
2932 void
2933 ice_request_stack_reinit(struct ice_softc *sc)
2934 {
2935 	if (CTX_ACTIVE(sc->ctx)) {
2936 		iflib_request_reset(sc->ctx);
2937 		iflib_admin_intr_deferred(sc->ctx);
2938 	}
2939 }
2940 
2941 /**
2942  * ice_driver_is_detaching - Check if the driver is detaching/unloading
2943  * @sc: device private softc
2944  *
2945  * Returns true if the driver is detaching, false otherwise.
2946  *
2947  * @remark on newer kernels, take advantage of iflib_in_detach in order to
2948  * report detachment correctly as early as possible.
2949  *
2950  * @remark this function is used by various code paths that want to avoid
2951  * running if the driver is about to be removed. This includes sysctls and
2952  * other driver access points. Note that it does not fully resolve
2953  * detach-based race conditions as it is possible for a thread to race with
2954  * iflib_in_detach.
2955  */
2956 bool
2957 ice_driver_is_detaching(struct ice_softc *sc)
2958 {
2959 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
2960 		iflib_in_detach(sc->ctx));
2961 }
2962 
2963 /**
2964  * ice_if_priv_ioctl - Device private ioctl handler
2965  * @ctx: iflib context pointer
2966  * @command: The ioctl command issued
2967  * @data: ioctl specific data
2968  *
2969  * iflib callback for handling custom driver specific ioctls.
2970  *
2971  * @pre Assumes that the iflib context lock is held.
2972  */
2973 static int
2974 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
2975 {
2976 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2977 	struct ifdrv *ifd;
2978 	device_t dev = sc->dev;
2979 
2980 	if (data == NULL)
2981 		return (EINVAL);
2982 
2983 	ASSERT_CTX_LOCKED(sc);
2984 
2985 	/* Make sure the command type is valid */
2986 	switch (command) {
2987 	case SIOCSDRVSPEC:
2988 	case SIOCGDRVSPEC:
2989 		/* Accepted commands */
2990 		break;
2991 	case SIOCGPRIVATE_0:
2992 		/*
2993 		 * Although we do not support this ioctl command, it's
2994 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
2995 		 * handler. Do not print a message in this case
2996 		 */
2997 		return (ENOTSUP);
2998 	default:
2999 		/*
3000 		 * If we get a different command for this function, it's
3001 		 * definitely unexpected, so log a message indicating what
3002 		 * command we got for debugging purposes.
3003 		 */
3004 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
3005 			      __func__, command);
3006 		return (EINVAL);
3007 	}
3008 
3009 	ifd = (struct ifdrv *)data;
3010 
3011 	switch (ifd->ifd_cmd) {
3012 	case ICE_NVM_ACCESS:
3013 		return ice_handle_nvm_access_ioctl(sc, ifd);
3014 	case ICE_DEBUG_DUMP:
3015 		return ice_handle_debug_dump_ioctl(sc, ifd);
3016 	default:
3017 		return EINVAL;
3018 	}
3019 }
3020 
3021 /**
3022  * ice_if_i2c_req - I2C request handler for iflib
3023  * @ctx: iflib context pointer
3024  * @req: The I2C parameters to use
3025  *
3026  * Read from the port's I2C eeprom using the parameters from the ioctl.
3027  *
3028  * @remark The iflib-only part is pretty simple.
3029  */
3030 static int
3031 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
3032 {
3033 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3034 
3035 	return ice_handle_i2c_req(sc, req);
3036 }
3037 
3038 /**
3039  * ice_if_suspend - PCI device suspend handler for iflib
3040  * @ctx: iflib context pointer
3041  *
3042  * Deinitializes the driver and clears HW resources in preparation for
3043  * suspend or an FLR.
3044  *
3045  * @returns 0; this return value is ignored
3046  */
3047 static int
3048 ice_if_suspend(if_ctx_t ctx)
3049 {
3050 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3051 
3052 	/* At least a PFR is always going to happen after this;
3053 	 * either via FLR or during the D3->D0 transition.
3054 	 */
3055 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
3056 
3057 	ice_prepare_for_reset(sc);
3058 
3059 	return (0);
3060 }
3061 
3062 /**
3063  * ice_if_resume - PCI device resume handler for iflib
3064  * @ctx: iflib context pointer
3065  *
3066  * Reinitializes the driver and the HW after PCI resume or after
3067  * an FLR. An init is performed by iflib after this function is finished.
3068  *
3069  * @returns 0; this return value is ignored
3070  */
3071 static int
3072 ice_if_resume(if_ctx_t ctx)
3073 {
3074 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3075 
3076 	ice_rebuild(sc);
3077 
3078 	return (0);
3079 }
3080 
3081 /* ice_if_needs_restart - Tell iflib when the driver needs to be reinitialized
3082  * @ctx: iflib context
3083  * @event: event code to check
3084  *
3085  * Defaults to returning false for unknown events.
3086  *
3087  * @returns true if iflib needs to reinit the interface
3088  */
3089 static bool
3090 ice_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event)
3091 {
3092 	switch (event) {
3093 	case IFLIB_RESTART_VLAN_CONFIG:
3094 	default:
3095 		return (false);
3096 	}
3097 }
3098 
3099