xref: /freebsd/sys/dev/ice/ice_lib.c (revision 4b9d6057)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2023, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file ice_lib.c
34  * @brief Generic device setup and sysctl functions
35  *
36  * Library of generic device functions not specific to the networking stack.
37  *
38  * This includes hardware initialization functions, as well as handlers for
39  * many of the device sysctls used to probe driver status or tune specific
40  * behaviors.
41  */
42 
43 #include "ice_lib.h"
44 #include "ice_iflib.h"
45 #include <dev/pci/pcivar.h>
46 #include <dev/pci/pcireg.h>
47 #include <machine/resource.h>
48 #include <net/if_dl.h>
49 #include <sys/firmware.h>
50 #include <sys/priv.h>
51 #include <sys/limits.h>
52 
53 /**
54  * @var M_ICE
55  * @brief main ice driver allocation type
56  *
57  * malloc(9) allocation type used by the majority of memory allocations in the
58  * ice driver.
59  */
60 MALLOC_DEFINE(M_ICE, "ice", "Intel(R) 100Gb Network Driver lib allocations");
61 
62 /*
63  * Helper function prototypes
64  */
65 static int ice_get_next_vsi(struct ice_vsi **all_vsi, int size);
66 static void ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx);
67 static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type);
68 static int ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx);
69 static int ice_setup_tx_ctx(struct ice_tx_queue *txq,
70 			    struct ice_tlan_ctx *tlan_ctx, u16 pf_q);
71 static int ice_setup_rx_ctx(struct ice_rx_queue *rxq);
72 static int ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg);
73 static void ice_free_fltr_list(struct ice_list_head *list);
74 static int ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
75 			       const u8 *addr, enum ice_sw_fwd_act_type action);
76 static void ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
77 				   struct ice_ctl_q_info *cq);
78 static void ice_process_link_event(struct ice_softc *sc, struct ice_rq_event_info *e);
79 static void ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
80 				    struct ice_rq_event_info *event);
81 static void ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf);
82 static void ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
83 static void ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
84 static bool ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info);
85 static u_int ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl, u_int errors);
86 static void ice_add_debug_tunables(struct ice_softc *sc);
87 static void ice_add_debug_sysctls(struct ice_softc *sc);
88 static void ice_vsi_set_rss_params(struct ice_vsi *vsi);
89 static void ice_get_default_rss_key(u8 *seed);
90 static int  ice_set_rss_key(struct ice_vsi *vsi);
91 static int  ice_set_rss_lut(struct ice_vsi *vsi);
92 static void ice_set_rss_flow_flds(struct ice_vsi *vsi);
93 static void ice_clean_vsi_rss_cfg(struct ice_vsi *vsi);
94 static const char *ice_aq_speed_to_str(struct ice_port_info *pi);
95 static const char *ice_requested_fec_mode(struct ice_port_info *pi);
96 static const char *ice_negotiated_fec_mode(struct ice_port_info *pi);
97 static const char *ice_autoneg_mode(struct ice_port_info *pi);
98 static const char *ice_flowcontrol_mode(struct ice_port_info *pi);
99 static void ice_print_bus_link_data(device_t dev, struct ice_hw *hw);
100 static void ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status);
101 static uint8_t ice_pcie_bandwidth_check(struct ice_softc *sc);
102 static uint64_t ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed);
103 static int ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width);
104 static uint64_t ice_phy_types_to_max_rate(struct ice_port_info *pi);
105 static void ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
106 				     struct sysctl_ctx_list *ctx,
107 				     struct sysctl_oid *parent);
108 static void
109 ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
110 				 struct sysctl_oid_list *parent_list,
111 				 u64* pfc_stat_location,
112 				 const char *node_name,
113 				 const char *descr);
114 static void ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
115 					  struct sysctl_oid *parent,
116 					  struct ice_hw_port_stats *stats);
117 static void ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
118 				 enum ice_vsi_type type, int idx,
119 				 bool dynamic);
120 static void ice_handle_mib_change_event(struct ice_softc *sc,
121 				 struct ice_rq_event_info *event);
122 static void
123 ice_handle_lan_overflow_event(struct ice_softc *sc,
124 			      struct ice_rq_event_info *event);
125 static int ice_add_ethertype_to_list(struct ice_vsi *vsi,
126 				     struct ice_list_head *list,
127 				     u16 ethertype, u16 direction,
128 				     enum ice_sw_fwd_act_type action);
129 static void ice_del_rx_lldp_filter(struct ice_softc *sc);
130 static u16 ice_aq_phy_types_to_link_speeds(u64 phy_type_low,
131 					   u64 phy_type_high);
132 struct ice_phy_data;
133 static int
134 ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
135 				   struct ice_phy_data *phy_data);
136 static int
137 ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
138 			       struct ice_aqc_set_phy_cfg_data *cfg);
139 static int
140 ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
141 			       struct ice_aqc_set_phy_cfg_data *cfg);
142 static void
143 ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
144 			      struct ice_aqc_set_phy_cfg_data *cfg);
145 static void
146 ice_print_ldo_tlv(struct ice_softc *sc,
147 		  struct ice_link_default_override_tlv *tlv);
148 static void
149 ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
150 				  u64 *phy_type_high);
151 static u16 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type);
152 static void
153 ice_handle_health_status_event(struct ice_softc *sc,
154 			       struct ice_rq_event_info *event);
155 static void
156 ice_print_health_status_string(device_t dev,
157 			       struct ice_aqc_health_status_elem *elem);
158 static void
159 ice_debug_print_mib_change_event(struct ice_softc *sc,
160 				 struct ice_rq_event_info *event);
161 static bool ice_check_ets_bw(u8 *table);
162 static u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
163 static bool
164 ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
165 		       struct ice_dcbx_cfg *new_cfg);
166 static void ice_dcb_recfg(struct ice_softc *sc);
167 static u8 ice_dcb_tc_contig(u8 tc_map);
168 static int ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit);
169 static int ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map);
170 static void ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name,
171 				   struct ice_dcb_ets_cfg *ets);
172 static void ice_stop_pf_vsi(struct ice_softc *sc);
173 static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt);
174 static int ice_config_pfc(struct ice_softc *sc, u8 new_mode);
175 void
176 ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
177 			    struct sysctl_ctx_list *ctx,
178 			    struct sysctl_oid_list *ctx_list);
179 static void ice_set_default_local_mib_settings(struct ice_softc *sc);
180 static bool ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg);
181 static void ice_start_dcbx_agent(struct ice_softc *sc);
182 static void ice_fw_debug_dump_print_cluster(struct ice_softc *sc,
183 					    struct sbuf *sbuf, u16 cluster_id);
184 
185 static int ice_module_init(void);
186 static int ice_module_exit(void);
187 
188 /*
189  * package version comparison functions
190  */
191 static bool pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name);
192 static int pkg_ver_compatible(struct ice_pkg_ver *pkg_ver);
193 
194 /*
195  * dynamic sysctl handlers
196  */
197 static int ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS);
198 static int ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS);
199 static int ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS);
200 static int ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS);
201 static int ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS);
202 static int ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS);
203 static int ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS);
204 static int ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS);
205 static int ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS);
206 static int ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS);
207 static int ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS);
208 static int ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS);
209 static int ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS);
210 static int ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS);
211 static int ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS);
212 static int ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS);
213 static int __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS,
214 					 bool is_phy_type_high);
215 static int ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS);
216 static int ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS);
217 static int ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS);
218 static int ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS);
219 static int ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS);
220 static int ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS);
221 static int ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode);
222 static int ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS);
223 static int ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS);
224 static int ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS);
225 static int ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS);
226 static int ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS);
227 static int ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS);
228 static int ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS);
229 static int ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS);
230 static int ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS);
231 static int ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS);
232 static int ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS);
233 static int ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS);
234 static int ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS);
235 static int ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS);
236 static int ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS);
237 static int ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS);
238 static int ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS);
239 static int ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS);
240 static int ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS);
241 static int ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS);
242 static int ice_sysctl_set_link_active(SYSCTL_HANDLER_ARGS);
243 static int ice_sysctl_debug_set_link(SYSCTL_HANDLER_ARGS);
244 static int ice_sysctl_temperature(SYSCTL_HANDLER_ARGS);
245 
246 /**
247  * ice_map_bar - Map PCIe BAR memory
248  * @dev: the PCIe device
249  * @bar: the BAR info structure
250  * @bar_num: PCIe BAR number
251  *
252  * Maps the specified PCIe BAR. Stores the mapping data in struct
253  * ice_bar_info.
254  */
255 int
256 ice_map_bar(device_t dev, struct ice_bar_info *bar, int bar_num)
257 {
258 	if (bar->res != NULL) {
259 		device_printf(dev, "PCI BAR%d already mapped\n", bar_num);
260 		return (EDOOFUS);
261 	}
262 
263 	bar->rid = PCIR_BAR(bar_num);
264 	bar->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar->rid,
265 					  RF_ACTIVE);
266 	if (!bar->res) {
267 		device_printf(dev, "PCI BAR%d mapping failed\n", bar_num);
268 		return (ENXIO);
269 	}
270 
271 	bar->tag = rman_get_bustag(bar->res);
272 	bar->handle = rman_get_bushandle(bar->res);
273 	bar->size = rman_get_size(bar->res);
274 
275 	return (0);
276 }
277 
278 /**
279  * ice_free_bar - Free PCIe BAR memory
280  * @dev: the PCIe device
281  * @bar: the BAR info structure
282  *
283  * Frees the specified PCIe BAR, releasing its resources.
284  */
285 void
286 ice_free_bar(device_t dev, struct ice_bar_info *bar)
287 {
288 	if (bar->res != NULL)
289 		bus_release_resource(dev, SYS_RES_MEMORY, bar->rid, bar->res);
290 	bar->res = NULL;
291 }
292 
293 /**
294  * ice_set_ctrlq_len - Configure ctrlq lengths for a device
295  * @hw: the device hardware structure
296  *
297  * Configures the control queues for the given device, setting up the
298  * specified lengths, prior to initializing hardware.
299  */
300 void
301 ice_set_ctrlq_len(struct ice_hw *hw)
302 {
303 	hw->adminq.num_rq_entries = ICE_AQ_LEN;
304 	hw->adminq.num_sq_entries = ICE_AQ_LEN;
305 	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
306 	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
307 
308 	hw->mailboxq.num_rq_entries = ICE_MBXQ_LEN;
309 	hw->mailboxq.num_sq_entries = ICE_MBXQ_LEN;
310 	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
311 	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
312 
313 }
314 
315 /**
316  * ice_get_next_vsi - Get the next available VSI slot
317  * @all_vsi: the VSI list
318  * @size: the size of the VSI list
319  *
320  * Returns the index to the first available VSI slot. Will return size (one
321  * past the last index) if there are no slots available.
322  */
323 static int
324 ice_get_next_vsi(struct ice_vsi **all_vsi, int size)
325 {
326 	int i;
327 
328 	for (i = 0; i < size; i++) {
329 		if (all_vsi[i] == NULL)
330 			return i;
331 	}
332 
333 	return size;
334 }
335 
336 /**
337  * ice_setup_vsi_common - Common VSI setup for both dynamic and static VSIs
338  * @sc: the device private softc structure
339  * @vsi: the VSI to setup
340  * @type: the VSI type of the new VSI
341  * @idx: the index in the all_vsi array to use
342  * @dynamic: whether this VSI memory was dynamically allocated
343  *
344  * Perform setup for a VSI that is common to both dynamically allocated VSIs
345  * and the static PF VSI which is embedded in the softc structure.
346  */
347 static void
348 ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
349 		     enum ice_vsi_type type, int idx, bool dynamic)
350 {
351 	/* Store important values in VSI struct */
352 	vsi->type = type;
353 	vsi->sc = sc;
354 	vsi->idx = idx;
355 	sc->all_vsi[idx] = vsi;
356 	vsi->dynamic = dynamic;
357 
358 	/* Setup the VSI tunables now */
359 	ice_add_vsi_tunables(vsi, sc->vsi_sysctls);
360 }
361 
362 /**
363  * ice_alloc_vsi - Allocate a dynamic VSI
364  * @sc: device softc structure
365  * @type: VSI type
366  *
367  * Allocates a new dynamic VSI structure and inserts it into the VSI list.
368  */
369 struct ice_vsi *
370 ice_alloc_vsi(struct ice_softc *sc, enum ice_vsi_type type)
371 {
372 	struct ice_vsi *vsi;
373 	int idx;
374 
375 	/* Find an open index for a new VSI to be allocated. If the returned
376 	 * index is >= the num_available_vsi then it means no slot is
377 	 * available.
378 	 */
379 	idx = ice_get_next_vsi(sc->all_vsi, sc->num_available_vsi);
380 	if (idx >= sc->num_available_vsi) {
381 		device_printf(sc->dev, "No available VSI slots\n");
382 		return NULL;
383 	}
384 
385 	vsi = (struct ice_vsi *)malloc(sizeof(*vsi), M_ICE, M_WAITOK|M_ZERO);
386 	if (!vsi) {
387 		device_printf(sc->dev, "Unable to allocate VSI memory\n");
388 		return NULL;
389 	}
390 
391 	ice_setup_vsi_common(sc, vsi, type, idx, true);
392 
393 	return vsi;
394 }
395 
396 /**
397  * ice_setup_pf_vsi - Setup the PF VSI
398  * @sc: the device private softc
399  *
400  * Setup the PF VSI structure which is embedded as sc->pf_vsi in the device
401  * private softc. Unlike other VSIs, the PF VSI memory is allocated as part of
402  * the softc memory, instead of being dynamically allocated at creation.
403  */
404 void
405 ice_setup_pf_vsi(struct ice_softc *sc)
406 {
407 	ice_setup_vsi_common(sc, &sc->pf_vsi, ICE_VSI_PF, 0, false);
408 }
409 
410 /**
411  * ice_alloc_vsi_qmap
412  * @vsi: VSI structure
413  * @max_tx_queues: Number of transmit queues to identify
414  * @max_rx_queues: Number of receive queues to identify
415  *
416  * Allocates a max_[t|r]x_queues array of words for the VSI where each
417  * word contains the index of the queue it represents.  In here, all
418  * words are initialized to an index of ICE_INVALID_RES_IDX, indicating
419  * all queues for this VSI are not yet assigned an index and thus,
420  * not ready for use.
421  *
422  * Returns an error code on failure.
423  */
424 int
425 ice_alloc_vsi_qmap(struct ice_vsi *vsi, const int max_tx_queues,
426 		   const int max_rx_queues)
427 {
428 	struct ice_softc *sc = vsi->sc;
429 	int i;
430 
431 	MPASS(max_tx_queues > 0);
432 	MPASS(max_rx_queues > 0);
433 
434 	/* Allocate Tx queue mapping memory */
435 	if (!(vsi->tx_qmap =
436 	      (u16 *) malloc(sizeof(u16) * max_tx_queues, M_ICE, M_WAITOK))) {
437 		device_printf(sc->dev, "Unable to allocate Tx qmap memory\n");
438 		return (ENOMEM);
439 	}
440 
441 	/* Allocate Rx queue mapping memory */
442 	if (!(vsi->rx_qmap =
443 	      (u16 *) malloc(sizeof(u16) * max_rx_queues, M_ICE, M_WAITOK))) {
444 		device_printf(sc->dev, "Unable to allocate Rx qmap memory\n");
445 		goto free_tx_qmap;
446 	}
447 
448 	/* Mark every queue map as invalid to start with */
449 	for (i = 0; i < max_tx_queues; i++) {
450 		vsi->tx_qmap[i] = ICE_INVALID_RES_IDX;
451 	}
452 	for (i = 0; i < max_rx_queues; i++) {
453 		vsi->rx_qmap[i] = ICE_INVALID_RES_IDX;
454 	}
455 
456 	return 0;
457 
458 free_tx_qmap:
459 	free(vsi->tx_qmap, M_ICE);
460 	vsi->tx_qmap = NULL;
461 
462 	return (ENOMEM);
463 }
464 
465 /**
466  * ice_free_vsi_qmaps - Free the PF qmaps associated with a VSI
467  * @vsi: the VSI private structure
468  *
469  * Frees the PF qmaps associated with the given VSI. Generally this will be
470  * called by ice_release_vsi, but may need to be called during attach cleanup,
471  * depending on when the qmaps were allocated.
472  */
473 void
474 ice_free_vsi_qmaps(struct ice_vsi *vsi)
475 {
476 	struct ice_softc *sc = vsi->sc;
477 
478 	if (vsi->tx_qmap) {
479 		ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
480 					   vsi->num_tx_queues);
481 		free(vsi->tx_qmap, M_ICE);
482 		vsi->tx_qmap = NULL;
483 	}
484 
485 	if (vsi->rx_qmap) {
486 		ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
487 					   vsi->num_rx_queues);
488 		free(vsi->rx_qmap, M_ICE);
489 		vsi->rx_qmap = NULL;
490 	}
491 }
492 
493 /**
494  * ice_set_default_vsi_ctx - Setup default VSI context parameters
495  * @ctx: the VSI context to initialize
496  *
497  * Initialize and prepare a default VSI context for configuring a new VSI.
498  */
499 static void
500 ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx)
501 {
502 	u32 table = 0;
503 
504 	memset(&ctx->info, 0, sizeof(ctx->info));
505 	/* VSI will be allocated from shared pool */
506 	ctx->alloc_from_pool = true;
507 	/* Enable source pruning by default */
508 	ctx->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
509 	/* Traffic from VSI can be sent to LAN */
510 	ctx->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
511 	/* Allow all packets untagged/tagged */
512 	ctx->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL &
513 				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >>
514 				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_S);
515 	/* Show VLAN/UP from packets in Rx descriptors */
516 	ctx->info.inner_vlan_flags |= ((ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH &
517 					ICE_AQ_VSI_INNER_VLAN_EMODE_M) >>
518 					ICE_AQ_VSI_INNER_VLAN_EMODE_S);
519 	/* Have 1:1 UP mapping for both ingress/egress tables */
520 	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
521 	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
522 	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
523 	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
524 	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
525 	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
526 	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
527 	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
528 	ctx->info.ingress_table = CPU_TO_LE32(table);
529 	ctx->info.egress_table = CPU_TO_LE32(table);
530 	/* Have 1:1 UP mapping for outer to inner UP table */
531 	ctx->info.outer_up_table = CPU_TO_LE32(table);
532 	/* No Outer tag support, so outer_vlan_flags remains zero */
533 }
534 
535 /**
536  * ice_set_rss_vsi_ctx - Setup VSI context parameters for RSS
537  * @ctx: the VSI context to configure
538  * @type: the VSI type
539  *
540  * Configures the VSI context for RSS, based on the VSI type.
541  */
542 static void
543 ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type)
544 {
545 	u8 lut_type, hash_type;
546 
547 	switch (type) {
548 	case ICE_VSI_PF:
549 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
550 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
551 		break;
552 	case ICE_VSI_VF:
553 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
554 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
555 		break;
556 	default:
557 		/* Other VSI types do not support RSS */
558 		return;
559 	}
560 
561 	ctx->info.q_opt_rss = (((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
562 				 ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
563 				((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
564 				 ICE_AQ_VSI_Q_OPT_RSS_HASH_M));
565 }
566 
567 /**
568  * ice_setup_vsi_qmap - Setup the queue mapping for a VSI
569  * @vsi: the VSI to configure
570  * @ctx: the VSI context to configure
571  *
572  * Configures the context for the given VSI, setting up how the firmware
573  * should map the queues for this VSI.
574  */
575 static int
576 ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
577 {
578 	int pow = 0;
579 	u16 qmap;
580 
581 	MPASS(vsi->rx_qmap != NULL);
582 
583 	switch (vsi->qmap_type) {
584 	case ICE_RESMGR_ALLOC_CONTIGUOUS:
585 		ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
586 
587 		ctx->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
588 		ctx->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
589 
590 		break;
591 	case ICE_RESMGR_ALLOC_SCATTERED:
592 		ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_NONCONTIG);
593 
594 		for (int i = 0; i < vsi->num_rx_queues; i++)
595 			ctx->info.q_mapping[i] = CPU_TO_LE16(vsi->rx_qmap[i]);
596 		break;
597 	default:
598 		return (EOPNOTSUPP);
599 	}
600 
601 	/* Calculate the next power-of-2 of number of queues */
602 	if (vsi->num_rx_queues)
603 		pow = flsl(vsi->num_rx_queues - 1);
604 
605 	/* Assign all the queues to traffic class zero */
606 	qmap = (pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M;
607 	ctx->info.tc_mapping[0] = CPU_TO_LE16(qmap);
608 
609 	/* Fill out default driver TC queue info for VSI */
610 	vsi->tc_info[0].qoffset = 0;
611 	vsi->tc_info[0].qcount_rx = vsi->num_rx_queues;
612 	vsi->tc_info[0].qcount_tx = vsi->num_tx_queues;
613 	for (int i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
614 		vsi->tc_info[i].qoffset = 0;
615 		vsi->tc_info[i].qcount_rx = 1;
616 		vsi->tc_info[i].qcount_tx = 1;
617 	}
618 	vsi->tc_map = 0x1;
619 
620 	return 0;
621 }
622 
623 /**
624  * ice_initialize_vsi - Initialize a VSI for use
625  * @vsi: the vsi to initialize
626  *
627  * Initialize a VSI over the adminq and prepare it for operation.
628  */
629 int
630 ice_initialize_vsi(struct ice_vsi *vsi)
631 {
632 	struct ice_vsi_ctx ctx = { 0 };
633 	struct ice_hw *hw = &vsi->sc->hw;
634 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
635 	enum ice_status status;
636 	int err;
637 
638 	/* For now, we only have code supporting PF VSIs */
639 	switch (vsi->type) {
640 	case ICE_VSI_PF:
641 		ctx.flags = ICE_AQ_VSI_TYPE_PF;
642 		break;
643 	default:
644 		return (ENODEV);
645 	}
646 
647 	ice_set_default_vsi_ctx(&ctx);
648 	ice_set_rss_vsi_ctx(&ctx, vsi->type);
649 
650 	/* XXX: VSIs of other types may need different port info? */
651 	ctx.info.sw_id = hw->port_info->sw_id;
652 
653 	/* Set some RSS parameters based on the VSI type */
654 	ice_vsi_set_rss_params(vsi);
655 
656 	/* Initialize the Rx queue mapping for this VSI */
657 	err = ice_setup_vsi_qmap(vsi, &ctx);
658 	if (err) {
659 		return err;
660 	}
661 
662 	/* (Re-)add VSI to HW VSI handle list */
663 	status = ice_add_vsi(hw, vsi->idx, &ctx, NULL);
664 	if (status != 0) {
665 		device_printf(vsi->sc->dev,
666 		    "Add VSI AQ call failed, err %s aq_err %s\n",
667 		    ice_status_str(status),
668 		    ice_aq_str(hw->adminq.sq_last_status));
669 		return (EIO);
670 	}
671 	vsi->info = ctx.info;
672 
673 	/* Initialize VSI with just 1 TC to start */
674 	max_txqs[0] = vsi->num_tx_queues;
675 
676 	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx,
677 			      ICE_DFLT_TRAFFIC_CLASS, max_txqs);
678 	if (status) {
679 		device_printf(vsi->sc->dev,
680 		    "Failed VSI lan queue config, err %s aq_err %s\n",
681 		    ice_status_str(status),
682 		    ice_aq_str(hw->adminq.sq_last_status));
683 		ice_deinit_vsi(vsi);
684 		return (ENODEV);
685 	}
686 
687 	/* Reset VSI stats */
688 	ice_reset_vsi_stats(vsi);
689 
690 	return 0;
691 }
692 
693 /**
694  * ice_deinit_vsi - Tell firmware to release resources for a VSI
695  * @vsi: the VSI to release
696  *
697  * Helper function which requests the firmware to release the hardware
698  * resources associated with a given VSI.
699  */
700 void
701 ice_deinit_vsi(struct ice_vsi *vsi)
702 {
703 	struct ice_vsi_ctx ctx = { 0 };
704 	struct ice_softc *sc = vsi->sc;
705 	struct ice_hw *hw = &sc->hw;
706 	enum ice_status status;
707 
708 	/* Assert that the VSI pointer matches in the list */
709 	MPASS(vsi == sc->all_vsi[vsi->idx]);
710 
711 	ctx.info = vsi->info;
712 
713 	status = ice_rm_vsi_lan_cfg(hw->port_info, vsi->idx);
714 	if (status) {
715 		/*
716 		 * This should only fail if the VSI handle is invalid, or if
717 		 * any of the nodes have leaf nodes which are still in use.
718 		 */
719 		device_printf(sc->dev,
720 			      "Unable to remove scheduler nodes for VSI %d, err %s\n",
721 			      vsi->idx, ice_status_str(status));
722 	}
723 
724 	/* Tell firmware to release the VSI resources */
725 	status = ice_free_vsi(hw, vsi->idx, &ctx, false, NULL);
726 	if (status != 0) {
727 		device_printf(sc->dev,
728 		    "Free VSI %u AQ call failed, err %s aq_err %s\n",
729 		    vsi->idx, ice_status_str(status),
730 		    ice_aq_str(hw->adminq.sq_last_status));
731 	}
732 }
733 
734 /**
735  * ice_release_vsi - Release resources associated with a VSI
736  * @vsi: the VSI to release
737  *
738  * Release software and firmware resources associated with a VSI. Release the
739  * queue managers associated with this VSI. Also free the VSI structure memory
740  * if the VSI was allocated dynamically using ice_alloc_vsi().
741  */
742 void
743 ice_release_vsi(struct ice_vsi *vsi)
744 {
745 	struct ice_softc *sc = vsi->sc;
746 	int idx = vsi->idx;
747 
748 	/* Assert that the VSI pointer matches in the list */
749 	MPASS(vsi == sc->all_vsi[idx]);
750 
751 	/* Cleanup RSS configuration */
752 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
753 		ice_clean_vsi_rss_cfg(vsi);
754 
755 	ice_del_vsi_sysctl_ctx(vsi);
756 
757 	/*
758 	 * If we unload the driver after a reset fails, we do not need to do
759 	 * this step.
760 	 */
761 	if (!ice_test_state(&sc->state, ICE_STATE_RESET_FAILED))
762 		ice_deinit_vsi(vsi);
763 
764 	ice_free_vsi_qmaps(vsi);
765 
766 	if (vsi->dynamic) {
767 		free(sc->all_vsi[idx], M_ICE);
768 	}
769 
770 	sc->all_vsi[idx] = NULL;
771 }
772 
773 /**
774  * ice_aq_speed_to_rate - Convert AdminQ speed enum to baudrate
775  * @pi: port info data
776  *
777  * Returns the baudrate value for the current link speed of a given port.
778  */
779 uint64_t
780 ice_aq_speed_to_rate(struct ice_port_info *pi)
781 {
782 	switch (pi->phy.link_info.link_speed) {
783 	case ICE_AQ_LINK_SPEED_100GB:
784 		return IF_Gbps(100);
785 	case ICE_AQ_LINK_SPEED_50GB:
786 		return IF_Gbps(50);
787 	case ICE_AQ_LINK_SPEED_40GB:
788 		return IF_Gbps(40);
789 	case ICE_AQ_LINK_SPEED_25GB:
790 		return IF_Gbps(25);
791 	case ICE_AQ_LINK_SPEED_10GB:
792 		return IF_Gbps(10);
793 	case ICE_AQ_LINK_SPEED_5GB:
794 		return IF_Gbps(5);
795 	case ICE_AQ_LINK_SPEED_2500MB:
796 		return IF_Mbps(2500);
797 	case ICE_AQ_LINK_SPEED_1000MB:
798 		return IF_Mbps(1000);
799 	case ICE_AQ_LINK_SPEED_100MB:
800 		return IF_Mbps(100);
801 	case ICE_AQ_LINK_SPEED_10MB:
802 		return IF_Mbps(10);
803 	case ICE_AQ_LINK_SPEED_UNKNOWN:
804 	default:
805 		/* return 0 if we don't know the link speed */
806 		return 0;
807 	}
808 }
809 
810 /**
811  * ice_aq_speed_to_str - Convert AdminQ speed enum to string representation
812  * @pi: port info data
813  *
814  * Returns the string representation of the current link speed for a given
815  * port.
816  */
817 static const char *
818 ice_aq_speed_to_str(struct ice_port_info *pi)
819 {
820 	switch (pi->phy.link_info.link_speed) {
821 	case ICE_AQ_LINK_SPEED_100GB:
822 		return "100 Gbps";
823 	case ICE_AQ_LINK_SPEED_50GB:
824 		return "50 Gbps";
825 	case ICE_AQ_LINK_SPEED_40GB:
826 		return "40 Gbps";
827 	case ICE_AQ_LINK_SPEED_25GB:
828 		return "25 Gbps";
829 	case ICE_AQ_LINK_SPEED_20GB:
830 		return "20 Gbps";
831 	case ICE_AQ_LINK_SPEED_10GB:
832 		return "10 Gbps";
833 	case ICE_AQ_LINK_SPEED_5GB:
834 		return "5 Gbps";
835 	case ICE_AQ_LINK_SPEED_2500MB:
836 		return "2.5 Gbps";
837 	case ICE_AQ_LINK_SPEED_1000MB:
838 		return "1 Gbps";
839 	case ICE_AQ_LINK_SPEED_100MB:
840 		return "100 Mbps";
841 	case ICE_AQ_LINK_SPEED_10MB:
842 		return "10 Mbps";
843 	case ICE_AQ_LINK_SPEED_UNKNOWN:
844 	default:
845 		return "Unknown speed";
846 	}
847 }
848 
849 /**
850  * ice_get_phy_type_low - Get media associated with phy_type_low
851  * @phy_type_low: the low 64bits of phy_type from the AdminQ
852  *
853  * Given the lower 64bits of the phy_type from the hardware, return the
854  * ifm_active bit associated. Return IFM_UNKNOWN when phy_type_low is unknown.
855  * Note that only one of ice_get_phy_type_low or ice_get_phy_type_high should
856  * be called. If phy_type_low is zero, call ice_phy_type_high.
857  */
858 int
859 ice_get_phy_type_low(uint64_t phy_type_low)
860 {
861 	switch (phy_type_low) {
862 	case ICE_PHY_TYPE_LOW_100BASE_TX:
863 		return IFM_100_TX;
864 	case ICE_PHY_TYPE_LOW_100M_SGMII:
865 		return IFM_100_SGMII;
866 	case ICE_PHY_TYPE_LOW_1000BASE_T:
867 		return IFM_1000_T;
868 	case ICE_PHY_TYPE_LOW_1000BASE_SX:
869 		return IFM_1000_SX;
870 	case ICE_PHY_TYPE_LOW_1000BASE_LX:
871 		return IFM_1000_LX;
872 	case ICE_PHY_TYPE_LOW_1000BASE_KX:
873 		return IFM_1000_KX;
874 	case ICE_PHY_TYPE_LOW_1G_SGMII:
875 		return IFM_1000_SGMII;
876 	case ICE_PHY_TYPE_LOW_2500BASE_T:
877 		return IFM_2500_T;
878 	case ICE_PHY_TYPE_LOW_2500BASE_X:
879 		return IFM_2500_X;
880 	case ICE_PHY_TYPE_LOW_2500BASE_KX:
881 		return IFM_2500_KX;
882 	case ICE_PHY_TYPE_LOW_5GBASE_T:
883 		return IFM_5000_T;
884 	case ICE_PHY_TYPE_LOW_5GBASE_KR:
885 		return IFM_5000_KR;
886 	case ICE_PHY_TYPE_LOW_10GBASE_T:
887 		return IFM_10G_T;
888 	case ICE_PHY_TYPE_LOW_10G_SFI_DA:
889 		return IFM_10G_TWINAX;
890 	case ICE_PHY_TYPE_LOW_10GBASE_SR:
891 		return IFM_10G_SR;
892 	case ICE_PHY_TYPE_LOW_10GBASE_LR:
893 		return IFM_10G_LR;
894 	case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
895 		return IFM_10G_KR;
896 	case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
897 		return IFM_10G_AOC;
898 	case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
899 		return IFM_10G_SFI;
900 	case ICE_PHY_TYPE_LOW_25GBASE_T:
901 		return IFM_25G_T;
902 	case ICE_PHY_TYPE_LOW_25GBASE_CR:
903 		return IFM_25G_CR;
904 	case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
905 		return IFM_25G_CR_S;
906 	case ICE_PHY_TYPE_LOW_25GBASE_CR1:
907 		return IFM_25G_CR1;
908 	case ICE_PHY_TYPE_LOW_25GBASE_SR:
909 		return IFM_25G_SR;
910 	case ICE_PHY_TYPE_LOW_25GBASE_LR:
911 		return IFM_25G_LR;
912 	case ICE_PHY_TYPE_LOW_25GBASE_KR:
913 		return IFM_25G_KR;
914 	case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
915 		return IFM_25G_KR_S;
916 	case ICE_PHY_TYPE_LOW_25GBASE_KR1:
917 		return IFM_25G_KR1;
918 	case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
919 		return IFM_25G_AOC;
920 	case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
921 		return IFM_25G_AUI;
922 	case ICE_PHY_TYPE_LOW_40GBASE_CR4:
923 		return IFM_40G_CR4;
924 	case ICE_PHY_TYPE_LOW_40GBASE_SR4:
925 		return IFM_40G_SR4;
926 	case ICE_PHY_TYPE_LOW_40GBASE_LR4:
927 		return IFM_40G_LR4;
928 	case ICE_PHY_TYPE_LOW_40GBASE_KR4:
929 		return IFM_40G_KR4;
930 	case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
931 		return IFM_40G_XLAUI_AC;
932 	case ICE_PHY_TYPE_LOW_40G_XLAUI:
933 		return IFM_40G_XLAUI;
934 	case ICE_PHY_TYPE_LOW_50GBASE_CR2:
935 		return IFM_50G_CR2;
936 	case ICE_PHY_TYPE_LOW_50GBASE_SR2:
937 		return IFM_50G_SR2;
938 	case ICE_PHY_TYPE_LOW_50GBASE_LR2:
939 		return IFM_50G_LR2;
940 	case ICE_PHY_TYPE_LOW_50GBASE_KR2:
941 		return IFM_50G_KR2;
942 	case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
943 		return IFM_50G_LAUI2_AC;
944 	case ICE_PHY_TYPE_LOW_50G_LAUI2:
945 		return IFM_50G_LAUI2;
946 	case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
947 		return IFM_50G_AUI2_AC;
948 	case ICE_PHY_TYPE_LOW_50G_AUI2:
949 		return IFM_50G_AUI2;
950 	case ICE_PHY_TYPE_LOW_50GBASE_CP:
951 		return IFM_50G_CP;
952 	case ICE_PHY_TYPE_LOW_50GBASE_SR:
953 		return IFM_50G_SR;
954 	case ICE_PHY_TYPE_LOW_50GBASE_FR:
955 		return IFM_50G_FR;
956 	case ICE_PHY_TYPE_LOW_50GBASE_LR:
957 		return IFM_50G_LR;
958 	case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
959 		return IFM_50G_KR_PAM4;
960 	case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
961 		return IFM_50G_AUI1_AC;
962 	case ICE_PHY_TYPE_LOW_50G_AUI1:
963 		return IFM_50G_AUI1;
964 	case ICE_PHY_TYPE_LOW_100GBASE_CR4:
965 		return IFM_100G_CR4;
966 	case ICE_PHY_TYPE_LOW_100GBASE_SR4:
967 		return IFM_100G_SR4;
968 	case ICE_PHY_TYPE_LOW_100GBASE_LR4:
969 		return IFM_100G_LR4;
970 	case ICE_PHY_TYPE_LOW_100GBASE_KR4:
971 		return IFM_100G_KR4;
972 	case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
973 		return IFM_100G_CAUI4_AC;
974 	case ICE_PHY_TYPE_LOW_100G_CAUI4:
975 		return IFM_100G_CAUI4;
976 	case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
977 		return IFM_100G_AUI4_AC;
978 	case ICE_PHY_TYPE_LOW_100G_AUI4:
979 		return IFM_100G_AUI4;
980 	case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
981 		return IFM_100G_CR_PAM4;
982 	case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
983 		return IFM_100G_KR_PAM4;
984 	case ICE_PHY_TYPE_LOW_100GBASE_CP2:
985 		return IFM_100G_CP2;
986 	case ICE_PHY_TYPE_LOW_100GBASE_SR2:
987 		return IFM_100G_SR2;
988 	case ICE_PHY_TYPE_LOW_100GBASE_DR:
989 		return IFM_100G_DR;
990 	default:
991 		return IFM_UNKNOWN;
992 	}
993 }
994 
995 /**
996  * ice_get_phy_type_high - Get media associated with phy_type_high
997  * @phy_type_high: the upper 64bits of phy_type from the AdminQ
998  *
999  * Given the upper 64bits of the phy_type from the hardware, return the
1000  * ifm_active bit associated. Return IFM_UNKNOWN on an unknown value. Note
1001  * that only one of ice_get_phy_type_low or ice_get_phy_type_high should be
1002  * called. If phy_type_high is zero, call ice_get_phy_type_low.
1003  */
1004 int
1005 ice_get_phy_type_high(uint64_t phy_type_high)
1006 {
1007 	switch (phy_type_high) {
1008 	case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
1009 		return IFM_100G_KR2_PAM4;
1010 	case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
1011 		return IFM_100G_CAUI2_AC;
1012 	case ICE_PHY_TYPE_HIGH_100G_CAUI2:
1013 		return IFM_100G_CAUI2;
1014 	case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
1015 		return IFM_100G_AUI2_AC;
1016 	case ICE_PHY_TYPE_HIGH_100G_AUI2:
1017 		return IFM_100G_AUI2;
1018 	default:
1019 		return IFM_UNKNOWN;
1020 	}
1021 }
1022 
1023 /**
1024  * ice_phy_types_to_max_rate - Returns port's max supported baudrate
1025  * @pi: port info struct
1026  *
1027  * ice_aq_get_phy_caps() w/ ICE_AQC_REPORT_TOPO_CAP_MEDIA parameter needs
1028  * to have been called before this function for it to work.
1029  */
1030 static uint64_t
1031 ice_phy_types_to_max_rate(struct ice_port_info *pi)
1032 {
1033 	uint64_t phy_low = pi->phy.phy_type_low;
1034 	uint64_t phy_high = pi->phy.phy_type_high;
1035 	uint64_t max_rate = 0;
1036 	int bit;
1037 
1038 	/*
1039 	 * These are based on the indices used in the BIT() macros for
1040 	 * ICE_PHY_TYPE_LOW_*
1041 	 */
1042 	static const uint64_t phy_rates[] = {
1043 	    IF_Mbps(100),
1044 	    IF_Mbps(100),
1045 	    IF_Gbps(1ULL),
1046 	    IF_Gbps(1ULL),
1047 	    IF_Gbps(1ULL),
1048 	    IF_Gbps(1ULL),
1049 	    IF_Gbps(1ULL),
1050 	    IF_Mbps(2500ULL),
1051 	    IF_Mbps(2500ULL),
1052 	    IF_Mbps(2500ULL),
1053 	    IF_Gbps(5ULL),
1054 	    IF_Gbps(5ULL),
1055 	    IF_Gbps(10ULL),
1056 	    IF_Gbps(10ULL),
1057 	    IF_Gbps(10ULL),
1058 	    IF_Gbps(10ULL),
1059 	    IF_Gbps(10ULL),
1060 	    IF_Gbps(10ULL),
1061 	    IF_Gbps(10ULL),
1062 	    IF_Gbps(25ULL),
1063 	    IF_Gbps(25ULL),
1064 	    IF_Gbps(25ULL),
1065 	    IF_Gbps(25ULL),
1066 	    IF_Gbps(25ULL),
1067 	    IF_Gbps(25ULL),
1068 	    IF_Gbps(25ULL),
1069 	    IF_Gbps(25ULL),
1070 	    IF_Gbps(25ULL),
1071 	    IF_Gbps(25ULL),
1072 	    IF_Gbps(25ULL),
1073 	    IF_Gbps(40ULL),
1074 	    IF_Gbps(40ULL),
1075 	    IF_Gbps(40ULL),
1076 	    IF_Gbps(40ULL),
1077 	    IF_Gbps(40ULL),
1078 	    IF_Gbps(40ULL),
1079 	    IF_Gbps(50ULL),
1080 	    IF_Gbps(50ULL),
1081 	    IF_Gbps(50ULL),
1082 	    IF_Gbps(50ULL),
1083 	    IF_Gbps(50ULL),
1084 	    IF_Gbps(50ULL),
1085 	    IF_Gbps(50ULL),
1086 	    IF_Gbps(50ULL),
1087 	    IF_Gbps(50ULL),
1088 	    IF_Gbps(50ULL),
1089 	    IF_Gbps(50ULL),
1090 	    IF_Gbps(50ULL),
1091 	    IF_Gbps(50ULL),
1092 	    IF_Gbps(50ULL),
1093 	    IF_Gbps(50ULL),
1094 	    IF_Gbps(100ULL),
1095 	    IF_Gbps(100ULL),
1096 	    IF_Gbps(100ULL),
1097 	    IF_Gbps(100ULL),
1098 	    IF_Gbps(100ULL),
1099 	    IF_Gbps(100ULL),
1100 	    IF_Gbps(100ULL),
1101 	    IF_Gbps(100ULL),
1102 	    IF_Gbps(100ULL),
1103 	    IF_Gbps(100ULL),
1104 	    IF_Gbps(100ULL),
1105 	    IF_Gbps(100ULL),
1106 	    IF_Gbps(100ULL),
1107 	    /* These rates are for ICE_PHY_TYPE_HIGH_* */
1108 	    IF_Gbps(100ULL),
1109 	    IF_Gbps(100ULL),
1110 	    IF_Gbps(100ULL),
1111 	    IF_Gbps(100ULL),
1112 	    IF_Gbps(100ULL)
1113 	};
1114 
1115 	/* coverity[address_of] */
1116 	for_each_set_bit(bit, &phy_high, 64)
1117 		if ((bit + 64) < (int)ARRAY_SIZE(phy_rates))
1118 			max_rate = uqmax(max_rate, phy_rates[(bit + 64)]);
1119 
1120 	/* coverity[address_of] */
1121 	for_each_set_bit(bit, &phy_low, 64)
1122 		max_rate = uqmax(max_rate, phy_rates[bit]);
1123 
1124 	return (max_rate);
1125 }
1126 
1127 /* The if_media type is split over the original 5 bit media variant field,
1128  * along with extended types using up extra bits in the options section.
1129  * We want to convert this split number into a bitmap index, so we reverse the
1130  * calculation of IFM_X here.
1131  */
1132 #define IFM_IDX(x) (((x) & IFM_TMASK) | \
1133 		    (((x) & IFM_ETH_XTYPE) >> IFM_ETH_XSHIFT))
1134 
1135 /**
1136  * ice_add_media_types - Add supported media types to the media structure
1137  * @sc: ice private softc structure
1138  * @media: ifmedia structure to setup
1139  *
1140  * Looks up the supported phy types, and initializes the various media types
1141  * available.
1142  *
1143  * @pre this function must be protected from being called while another thread
1144  * is accessing the ifmedia types.
1145  */
1146 enum ice_status
1147 ice_add_media_types(struct ice_softc *sc, struct ifmedia *media)
1148 {
1149 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
1150 	struct ice_port_info *pi = sc->hw.port_info;
1151 	enum ice_status status;
1152 	uint64_t phy_low, phy_high;
1153 	int bit;
1154 
1155 	ASSERT_CFG_LOCKED(sc);
1156 
1157 	/* the maximum possible media type index is 511. We probably don't
1158 	 * need most of this space, but this ensures future compatibility when
1159 	 * additional media types are used.
1160 	 */
1161 	ice_declare_bitmap(already_added, 511);
1162 
1163 	/* Remove all previous media types */
1164 	ifmedia_removeall(media);
1165 
1166 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
1167 				     &pcaps, NULL);
1168 	if (status != ICE_SUCCESS) {
1169 		device_printf(sc->dev,
1170 		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
1171 		    __func__, ice_status_str(status),
1172 		    ice_aq_str(sc->hw.adminq.sq_last_status));
1173 		return (status);
1174 	}
1175 	phy_low = le64toh(pcaps.phy_type_low);
1176 	phy_high = le64toh(pcaps.phy_type_high);
1177 
1178 	/* make sure the added bitmap is zero'd */
1179 	memset(already_added, 0, sizeof(already_added));
1180 
1181 	/* coverity[address_of] */
1182 	for_each_set_bit(bit, &phy_low, 64) {
1183 		uint64_t type = BIT_ULL(bit);
1184 		int ostype;
1185 
1186 		/* get the OS media type */
1187 		ostype = ice_get_phy_type_low(type);
1188 
1189 		/* don't bother adding the unknown type */
1190 		if (ostype == IFM_UNKNOWN)
1191 			continue;
1192 
1193 		/* only add each media type to the list once */
1194 		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1195 			continue;
1196 
1197 		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1198 		ice_set_bit(IFM_IDX(ostype), already_added);
1199 	}
1200 
1201 	/* coverity[address_of] */
1202 	for_each_set_bit(bit, &phy_high, 64) {
1203 		uint64_t type = BIT_ULL(bit);
1204 		int ostype;
1205 
1206 		/* get the OS media type */
1207 		ostype = ice_get_phy_type_high(type);
1208 
1209 		/* don't bother adding the unknown type */
1210 		if (ostype == IFM_UNKNOWN)
1211 			continue;
1212 
1213 		/* only add each media type to the list once */
1214 		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1215 			continue;
1216 
1217 		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1218 		ice_set_bit(IFM_IDX(ostype), already_added);
1219 	}
1220 
1221 	/* Use autoselect media by default */
1222 	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
1223 	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
1224 
1225 	return (ICE_SUCCESS);
1226 }
1227 
1228 /**
1229  * ice_configure_rxq_interrupt - Configure HW Rx queue for an MSI-X interrupt
1230  * @hw: ice hw structure
1231  * @rxqid: Rx queue index in PF space
1232  * @vector: MSI-X vector index in PF/VF space
1233  * @itr_idx: ITR index to use for interrupt
1234  *
1235  * @remark ice_flush() may need to be called after this
1236  */
1237 void
1238 ice_configure_rxq_interrupt(struct ice_hw *hw, u16 rxqid, u16 vector, u8 itr_idx)
1239 {
1240 	u32 val;
1241 
1242 	MPASS(itr_idx <= ICE_ITR_NONE);
1243 
1244 	val = (QINT_RQCTL_CAUSE_ENA_M |
1245 	       (itr_idx << QINT_RQCTL_ITR_INDX_S) |
1246 	       (vector << QINT_RQCTL_MSIX_INDX_S));
1247 	wr32(hw, QINT_RQCTL(rxqid), val);
1248 }
1249 
1250 /**
1251  * ice_configure_all_rxq_interrupts - Configure HW Rx queues for MSI-X interrupts
1252  * @vsi: the VSI to configure
1253  *
1254  * Called when setting up MSI-X interrupts to configure the Rx hardware queues.
1255  */
1256 void
1257 ice_configure_all_rxq_interrupts(struct ice_vsi *vsi)
1258 {
1259 	struct ice_hw *hw = &vsi->sc->hw;
1260 	int i;
1261 
1262 	for (i = 0; i < vsi->num_rx_queues; i++) {
1263 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1264 
1265 		ice_configure_rxq_interrupt(hw, vsi->rx_qmap[rxq->me],
1266 					    rxq->irqv->me, ICE_RX_ITR);
1267 	}
1268 
1269 	ice_flush(hw);
1270 }
1271 
1272 /**
1273  * ice_configure_txq_interrupt - Configure HW Tx queue for an MSI-X interrupt
1274  * @hw: ice hw structure
1275  * @txqid: Tx queue index in PF space
1276  * @vector: MSI-X vector index in PF/VF space
1277  * @itr_idx: ITR index to use for interrupt
1278  *
1279  * @remark ice_flush() may need to be called after this
1280  */
1281 void
1282 ice_configure_txq_interrupt(struct ice_hw *hw, u16 txqid, u16 vector, u8 itr_idx)
1283 {
1284 	u32 val;
1285 
1286 	MPASS(itr_idx <= ICE_ITR_NONE);
1287 
1288 	val = (QINT_TQCTL_CAUSE_ENA_M |
1289 	       (itr_idx << QINT_TQCTL_ITR_INDX_S) |
1290 	       (vector << QINT_TQCTL_MSIX_INDX_S));
1291 	wr32(hw, QINT_TQCTL(txqid), val);
1292 }
1293 
1294 /**
1295  * ice_configure_all_txq_interrupts - Configure HW Tx queues for MSI-X interrupts
1296  * @vsi: the VSI to configure
1297  *
1298  * Called when setting up MSI-X interrupts to configure the Tx hardware queues.
1299  */
1300 void
1301 ice_configure_all_txq_interrupts(struct ice_vsi *vsi)
1302 {
1303 	struct ice_hw *hw = &vsi->sc->hw;
1304 	int i;
1305 
1306 	for (i = 0; i < vsi->num_tx_queues; i++) {
1307 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1308 
1309 		ice_configure_txq_interrupt(hw, vsi->tx_qmap[txq->me],
1310 					    txq->irqv->me, ICE_TX_ITR);
1311 	}
1312 
1313 	ice_flush(hw);
1314 }
1315 
1316 /**
1317  * ice_flush_rxq_interrupts - Unconfigure Hw Rx queues MSI-X interrupt cause
1318  * @vsi: the VSI to configure
1319  *
1320  * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1321  * a software interrupt on that cause. This is required as part of the Rx
1322  * queue disable logic to dissociate the Rx queue from the interrupt.
1323  *
1324  * Note: this function must be called prior to disabling Rx queues with
1325  * ice_control_all_rx_queues, otherwise the Rx queue may not be disabled properly.
1326  */
1327 void
1328 ice_flush_rxq_interrupts(struct ice_vsi *vsi)
1329 {
1330 	struct ice_hw *hw = &vsi->sc->hw;
1331 	int i;
1332 
1333 	for (i = 0; i < vsi->num_rx_queues; i++) {
1334 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1335 		u32 reg, val;
1336 
1337 		/* Clear the CAUSE_ENA flag */
1338 		reg = vsi->rx_qmap[rxq->me];
1339 		val = rd32(hw, QINT_RQCTL(reg));
1340 		val &= ~QINT_RQCTL_CAUSE_ENA_M;
1341 		wr32(hw, QINT_RQCTL(reg), val);
1342 
1343 		ice_flush(hw);
1344 
1345 		/* Trigger a software interrupt to complete interrupt
1346 		 * dissociation.
1347 		 */
1348 		wr32(hw, GLINT_DYN_CTL(rxq->irqv->me),
1349 		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1350 	}
1351 }
1352 
1353 /**
1354  * ice_flush_txq_interrupts - Unconfigure Hw Tx queues MSI-X interrupt cause
1355  * @vsi: the VSI to configure
1356  *
1357  * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1358  * a software interrupt on that cause. This is required as part of the Tx
1359  * queue disable logic to dissociate the Tx queue from the interrupt.
1360  *
1361  * Note: this function must be called prior to ice_vsi_disable_tx, otherwise
1362  * the Tx queue disable may not complete properly.
1363  */
1364 void
1365 ice_flush_txq_interrupts(struct ice_vsi *vsi)
1366 {
1367 	struct ice_hw *hw = &vsi->sc->hw;
1368 	int i;
1369 
1370 	for (i = 0; i < vsi->num_tx_queues; i++) {
1371 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1372 		u32 reg, val;
1373 
1374 		/* Clear the CAUSE_ENA flag */
1375 		reg = vsi->tx_qmap[txq->me];
1376 		val = rd32(hw, QINT_TQCTL(reg));
1377 		val &= ~QINT_TQCTL_CAUSE_ENA_M;
1378 		wr32(hw, QINT_TQCTL(reg), val);
1379 
1380 		ice_flush(hw);
1381 
1382 		/* Trigger a software interrupt to complete interrupt
1383 		 * dissociation.
1384 		 */
1385 		wr32(hw, GLINT_DYN_CTL(txq->irqv->me),
1386 		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1387 	}
1388 }
1389 
1390 /**
1391  * ice_configure_rx_itr - Configure the Rx ITR settings for this VSI
1392  * @vsi: the VSI to configure
1393  *
1394  * Program the hardware ITR registers with the settings for this VSI.
1395  */
1396 void
1397 ice_configure_rx_itr(struct ice_vsi *vsi)
1398 {
1399 	struct ice_hw *hw = &vsi->sc->hw;
1400 	int i;
1401 
1402 	/* TODO: Handle per-queue/per-vector ITR? */
1403 
1404 	for (i = 0; i < vsi->num_rx_queues; i++) {
1405 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1406 
1407 		wr32(hw, GLINT_ITR(ICE_RX_ITR, rxq->irqv->me),
1408 		     ice_itr_to_reg(hw, vsi->rx_itr));
1409 	}
1410 
1411 	ice_flush(hw);
1412 }
1413 
1414 /**
1415  * ice_configure_tx_itr - Configure the Tx ITR settings for this VSI
1416  * @vsi: the VSI to configure
1417  *
1418  * Program the hardware ITR registers with the settings for this VSI.
1419  */
1420 void
1421 ice_configure_tx_itr(struct ice_vsi *vsi)
1422 {
1423 	struct ice_hw *hw = &vsi->sc->hw;
1424 	int i;
1425 
1426 	/* TODO: Handle per-queue/per-vector ITR? */
1427 
1428 	for (i = 0; i < vsi->num_tx_queues; i++) {
1429 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1430 
1431 		wr32(hw, GLINT_ITR(ICE_TX_ITR, txq->irqv->me),
1432 		     ice_itr_to_reg(hw, vsi->tx_itr));
1433 	}
1434 
1435 	ice_flush(hw);
1436 }
1437 
1438 /**
1439  * ice_setup_tx_ctx - Setup an ice_tlan_ctx structure for a queue
1440  * @txq: the Tx queue to configure
1441  * @tlan_ctx: the Tx LAN queue context structure to initialize
1442  * @pf_q: real queue number
1443  */
1444 static int
1445 ice_setup_tx_ctx(struct ice_tx_queue *txq, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
1446 {
1447 	struct ice_vsi *vsi = txq->vsi;
1448 	struct ice_softc *sc = vsi->sc;
1449 	struct ice_hw *hw = &sc->hw;
1450 
1451 	tlan_ctx->port_num = hw->port_info->lport;
1452 
1453 	/* number of descriptors in the queue */
1454 	tlan_ctx->qlen = txq->desc_count;
1455 
1456 	/* set the transmit queue base address, defined in 128 byte units */
1457 	tlan_ctx->base = txq->tx_paddr >> 7;
1458 
1459 	tlan_ctx->pf_num = hw->pf_id;
1460 
1461 	switch (vsi->type) {
1462 	case ICE_VSI_PF:
1463 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
1464 		break;
1465 	default:
1466 		return (ENODEV);
1467 	}
1468 
1469 	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
1470 
1471 	/* Enable TSO */
1472 	tlan_ctx->tso_ena = 1;
1473 	tlan_ctx->internal_usage_flag = 1;
1474 
1475 	tlan_ctx->tso_qnum = pf_q;
1476 
1477 	/*
1478 	 * Stick with the older legacy Tx queue interface, instead of the new
1479 	 * advanced queue interface.
1480 	 */
1481 	tlan_ctx->legacy_int = 1;
1482 
1483 	/* Descriptor WB mode */
1484 	tlan_ctx->wb_mode = 0;
1485 
1486 	return (0);
1487 }
1488 
1489 /**
1490  * ice_cfg_vsi_for_tx - Configure the hardware for Tx
1491  * @vsi: the VSI to configure
1492  *
1493  * Configure the device Tx queues through firmware AdminQ commands. After
1494  * this, Tx queues will be ready for transmit.
1495  */
1496 int
1497 ice_cfg_vsi_for_tx(struct ice_vsi *vsi)
1498 {
1499 	struct ice_aqc_add_tx_qgrp *qg;
1500 	struct ice_hw *hw = &vsi->sc->hw;
1501 	device_t dev = vsi->sc->dev;
1502 	enum ice_status status;
1503 	int i;
1504 	int err = 0;
1505 	u16 qg_size, pf_q;
1506 
1507 	qg_size = ice_struct_size(qg, txqs, 1);
1508 	qg = (struct ice_aqc_add_tx_qgrp *)malloc(qg_size, M_ICE, M_NOWAIT|M_ZERO);
1509 	if (!qg)
1510 		return (ENOMEM);
1511 
1512 	qg->num_txqs = 1;
1513 
1514 	for (i = 0; i < vsi->num_tx_queues; i++) {
1515 		struct ice_tlan_ctx tlan_ctx = { 0 };
1516 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1517 
1518 		pf_q = vsi->tx_qmap[txq->me];
1519 		qg->txqs[0].txq_id = htole16(pf_q);
1520 
1521 		err = ice_setup_tx_ctx(txq, &tlan_ctx, pf_q);
1522 		if (err)
1523 			goto free_txqg;
1524 
1525 		ice_set_ctx(hw, (u8 *)&tlan_ctx, qg->txqs[0].txq_ctx,
1526 			    ice_tlan_ctx_info);
1527 
1528 		status = ice_ena_vsi_txq(hw->port_info, vsi->idx, txq->tc,
1529 					 txq->q_handle, 1, qg, qg_size, NULL);
1530 		if (status) {
1531 			device_printf(dev,
1532 				      "Failed to set LAN Tx queue %d (TC %d, handle %d) context, err %s aq_err %s\n",
1533 				      i, txq->tc, txq->q_handle,
1534 				      ice_status_str(status),
1535 				      ice_aq_str(hw->adminq.sq_last_status));
1536 			err = ENODEV;
1537 			goto free_txqg;
1538 		}
1539 
1540 		/* Keep track of the Tx queue TEID */
1541 		if (pf_q == le16toh(qg->txqs[0].txq_id))
1542 			txq->q_teid = le32toh(qg->txqs[0].q_teid);
1543 	}
1544 
1545 free_txqg:
1546 	free(qg, M_ICE);
1547 
1548 	return (err);
1549 }
1550 
1551 /**
1552  * ice_setup_rx_ctx - Setup an Rx context structure for a receive queue
1553  * @rxq: the receive queue to program
1554  *
1555  * Setup an Rx queue context structure and program it into the hardware
1556  * registers. This is a necessary step for enabling the Rx queue.
1557  *
1558  * @pre the VSI associated with this queue must have initialized mbuf_sz
1559  */
1560 static int
1561 ice_setup_rx_ctx(struct ice_rx_queue *rxq)
1562 {
1563 	struct ice_rlan_ctx rlan_ctx = {0};
1564 	struct ice_vsi *vsi = rxq->vsi;
1565 	struct ice_softc *sc = vsi->sc;
1566 	struct ice_hw *hw = &sc->hw;
1567 	enum ice_status status;
1568 	u32 rxdid = ICE_RXDID_FLEX_NIC;
1569 	u32 regval;
1570 	u16 pf_q;
1571 
1572 	pf_q = vsi->rx_qmap[rxq->me];
1573 
1574 	/* set the receive queue base address, defined in 128 byte units */
1575 	rlan_ctx.base = rxq->rx_paddr >> 7;
1576 
1577 	rlan_ctx.qlen = rxq->desc_count;
1578 
1579 	rlan_ctx.dbuf = vsi->mbuf_sz >> ICE_RLAN_CTX_DBUF_S;
1580 
1581 	/* use 32 byte descriptors */
1582 	rlan_ctx.dsize = 1;
1583 
1584 	/* Strip the Ethernet CRC bytes before the packet is posted to the
1585 	 * host memory.
1586 	 */
1587 	rlan_ctx.crcstrip = 1;
1588 
1589 	rlan_ctx.l2tsel = 1;
1590 
1591 	/* don't do header splitting */
1592 	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
1593 	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
1594 	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
1595 
1596 	/* strip VLAN from inner headers */
1597 	rlan_ctx.showiv = 1;
1598 
1599 	rlan_ctx.rxmax = min(vsi->max_frame_size,
1600 			     ICE_MAX_RX_SEGS * vsi->mbuf_sz);
1601 
1602 	rlan_ctx.lrxqthresh = 1;
1603 
1604 	if (vsi->type != ICE_VSI_VF) {
1605 		regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
1606 		regval &= ~QRXFLXP_CNTXT_RXDID_IDX_M;
1607 		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
1608 			QRXFLXP_CNTXT_RXDID_IDX_M;
1609 
1610 		regval &= ~QRXFLXP_CNTXT_RXDID_PRIO_M;
1611 		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
1612 			QRXFLXP_CNTXT_RXDID_PRIO_M;
1613 
1614 		wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
1615 	}
1616 
1617 	status = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
1618 	if (status) {
1619 		device_printf(sc->dev,
1620 			      "Failed to set LAN Rx queue context, err %s aq_err %s\n",
1621 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
1622 		return (EIO);
1623 	}
1624 
1625 	wr32(hw, rxq->tail, 0);
1626 
1627 	return 0;
1628 }
1629 
1630 /**
1631  * ice_cfg_vsi_for_rx - Configure the hardware for Rx
1632  * @vsi: the VSI to configure
1633  *
1634  * Prepare an Rx context descriptor and configure the device to receive
1635  * traffic.
1636  *
1637  * @pre the VSI must have initialized mbuf_sz
1638  */
1639 int
1640 ice_cfg_vsi_for_rx(struct ice_vsi *vsi)
1641 {
1642 	int i, err;
1643 
1644 	for (i = 0; i < vsi->num_rx_queues; i++) {
1645 		MPASS(vsi->mbuf_sz > 0);
1646 		err = ice_setup_rx_ctx(&vsi->rx_queues[i]);
1647 		if (err)
1648 			return err;
1649 	}
1650 
1651 	return (0);
1652 }
1653 
1654 /**
1655  * ice_is_rxq_ready - Check if an Rx queue is ready
1656  * @hw: ice hw structure
1657  * @pf_q: absolute PF queue index to check
1658  * @reg: on successful return, contains qrx_ctrl contents
1659  *
1660  * Reads the QRX_CTRL register and verifies if the queue is in a consistent
1661  * state. That is, QENA_REQ matches QENA_STAT. Used to check before making
1662  * a request to change the queue, as well as to verify the request has
1663  * finished. The queue should change status within a few microseconds, so we
1664  * use a small delay while polling the register.
1665  *
1666  * Returns an error code if the queue does not update after a few retries.
1667  */
1668 static int
1669 ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg)
1670 {
1671 	u32 qrx_ctrl, qena_req, qena_stat;
1672 	int i;
1673 
1674 	for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) {
1675 		qrx_ctrl = rd32(hw, QRX_CTRL(pf_q));
1676 		qena_req = (qrx_ctrl >> QRX_CTRL_QENA_REQ_S) & 1;
1677 		qena_stat = (qrx_ctrl >> QRX_CTRL_QENA_STAT_S) & 1;
1678 
1679 		/* if the request and status bits equal, then the queue is
1680 		 * fully disabled or enabled.
1681 		 */
1682 		if (qena_req == qena_stat) {
1683 			*reg = qrx_ctrl;
1684 			return (0);
1685 		}
1686 
1687 		/* wait a few microseconds before we check again */
1688 		DELAY(10);
1689 	}
1690 
1691 	return (ETIMEDOUT);
1692 }
1693 
1694 /**
1695  * ice_control_rx_queue - Configure hardware to start or stop an Rx queue
1696  * @vsi: VSI containing queue to enable/disable
1697  * @qidx: Queue index in VSI space
1698  * @enable: true to enable queue, false to disable
1699  *
1700  * Control the Rx queue through the QRX_CTRL register, enabling or disabling
1701  * it. Wait for the appropriate time to ensure that the queue has actually
1702  * reached the expected state.
1703  */
1704 int
1705 ice_control_rx_queue(struct ice_vsi *vsi, u16 qidx, bool enable)
1706 {
1707 	struct ice_hw *hw = &vsi->sc->hw;
1708 	device_t dev = vsi->sc->dev;
1709 	u32 qrx_ctrl = 0;
1710 	int err;
1711 
1712 	struct ice_rx_queue *rxq = &vsi->rx_queues[qidx];
1713 	int pf_q = vsi->rx_qmap[rxq->me];
1714 
1715 	err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1716 	if (err) {
1717 		device_printf(dev,
1718 			      "Rx queue %d is not ready\n",
1719 			      pf_q);
1720 		return err;
1721 	}
1722 
1723 	/* Skip if the queue is already in correct state */
1724 	if (enable == !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M))
1725 		return (0);
1726 
1727 	if (enable)
1728 		qrx_ctrl |= QRX_CTRL_QENA_REQ_M;
1729 	else
1730 		qrx_ctrl &= ~QRX_CTRL_QENA_REQ_M;
1731 	wr32(hw, QRX_CTRL(pf_q), qrx_ctrl);
1732 
1733 	/* wait for the queue to finalize the request */
1734 	err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1735 	if (err) {
1736 		device_printf(dev,
1737 			      "Rx queue %d %sable timeout\n",
1738 			      pf_q, (enable ? "en" : "dis"));
1739 		return err;
1740 	}
1741 
1742 	/* this should never happen */
1743 	if (enable != !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M)) {
1744 		device_printf(dev,
1745 			      "Rx queue %d invalid state\n",
1746 			      pf_q);
1747 		return (EDOOFUS);
1748 	}
1749 
1750 	return (0);
1751 }
1752 
1753 /**
1754  * ice_control_all_rx_queues - Configure hardware to start or stop the Rx queues
1755  * @vsi: VSI to enable/disable queues
1756  * @enable: true to enable queues, false to disable
1757  *
1758  * Control the Rx queues through the QRX_CTRL register, enabling or disabling
1759  * them. Wait for the appropriate time to ensure that the queues have actually
1760  * reached the expected state.
1761  */
1762 int
1763 ice_control_all_rx_queues(struct ice_vsi *vsi, bool enable)
1764 {
1765 	int i, err;
1766 
1767 	/* TODO: amortize waits by changing all queues up front and then
1768 	 * checking their status afterwards. This will become more necessary
1769 	 * when we have a large number of queues.
1770 	 */
1771 	for (i = 0; i < vsi->num_rx_queues; i++) {
1772 		err = ice_control_rx_queue(vsi, i, enable);
1773 		if (err)
1774 			break;
1775 	}
1776 
1777 	return (0);
1778 }
1779 
1780 /**
1781  * ice_add_mac_to_list - Add MAC filter to a MAC filter list
1782  * @vsi: the VSI to forward to
1783  * @list: list which contains MAC filter entries
1784  * @addr: the MAC address to be added
1785  * @action: filter action to perform on match
1786  *
1787  * Adds a MAC address filter to the list which will be forwarded to firmware
1788  * to add a series of MAC address filters.
1789  *
1790  * Returns 0 on success, and an error code on failure.
1791  *
1792  */
1793 static int
1794 ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
1795 		    const u8 *addr, enum ice_sw_fwd_act_type action)
1796 {
1797 	struct ice_fltr_list_entry *entry;
1798 
1799 	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
1800 	if (!entry)
1801 		return (ENOMEM);
1802 
1803 	entry->fltr_info.flag = ICE_FLTR_TX;
1804 	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
1805 	entry->fltr_info.lkup_type = ICE_SW_LKUP_MAC;
1806 	entry->fltr_info.fltr_act = action;
1807 	entry->fltr_info.vsi_handle = vsi->idx;
1808 	bcopy(addr, entry->fltr_info.l_data.mac.mac_addr, ETHER_ADDR_LEN);
1809 
1810 	LIST_ADD(&entry->list_entry, list);
1811 
1812 	return 0;
1813 }
1814 
1815 /**
1816  * ice_free_fltr_list - Free memory associated with a MAC address list
1817  * @list: the list to free
1818  *
1819  * Free the memory of each entry associated with the list.
1820  */
1821 static void
1822 ice_free_fltr_list(struct ice_list_head *list)
1823 {
1824 	struct ice_fltr_list_entry *e, *tmp;
1825 
1826 	LIST_FOR_EACH_ENTRY_SAFE(e, tmp, list, ice_fltr_list_entry, list_entry) {
1827 		LIST_DEL(&e->list_entry);
1828 		free(e, M_ICE);
1829 	}
1830 }
1831 
1832 /**
1833  * ice_add_vsi_mac_filter - Add a MAC address filter for a VSI
1834  * @vsi: the VSI to add the filter for
1835  * @addr: MAC address to add a filter for
1836  *
1837  * Add a MAC address filter for a given VSI. This is a wrapper around
1838  * ice_add_mac to simplify the interface. First, it only accepts a single
1839  * address, so we don't have to mess around with the list setup in other
1840  * functions. Second, it ignores the ICE_ERR_ALREADY_EXISTS error, so that
1841  * callers don't need to worry about attempting to add the same filter twice.
1842  */
1843 int
1844 ice_add_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
1845 {
1846 	struct ice_list_head mac_addr_list;
1847 	struct ice_hw *hw = &vsi->sc->hw;
1848 	device_t dev = vsi->sc->dev;
1849 	enum ice_status status;
1850 	int err = 0;
1851 
1852 	INIT_LIST_HEAD(&mac_addr_list);
1853 
1854 	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
1855 	if (err)
1856 		goto free_mac_list;
1857 
1858 	status = ice_add_mac(hw, &mac_addr_list);
1859 	if (status == ICE_ERR_ALREADY_EXISTS) {
1860 		; /* Don't complain if we try to add a filter that already exists */
1861 	} else if (status) {
1862 		device_printf(dev,
1863 			      "Failed to add a filter for MAC %6D, err %s aq_err %s\n",
1864 			      addr, ":",
1865 			      ice_status_str(status),
1866 			      ice_aq_str(hw->adminq.sq_last_status));
1867 		err = (EIO);
1868 	}
1869 
1870 free_mac_list:
1871 	ice_free_fltr_list(&mac_addr_list);
1872 	return err;
1873 }
1874 
1875 /**
1876  * ice_cfg_pf_default_mac_filters - Setup default unicast and broadcast addrs
1877  * @sc: device softc structure
1878  *
1879  * Program the default unicast and broadcast filters for the PF VSI.
1880  */
1881 int
1882 ice_cfg_pf_default_mac_filters(struct ice_softc *sc)
1883 {
1884 	struct ice_vsi *vsi = &sc->pf_vsi;
1885 	struct ice_hw *hw = &sc->hw;
1886 	int err;
1887 
1888 	/* Add the LAN MAC address */
1889 	err = ice_add_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
1890 	if (err)
1891 		return err;
1892 
1893 	/* Add the broadcast address */
1894 	err = ice_add_vsi_mac_filter(vsi, broadcastaddr);
1895 	if (err)
1896 		return err;
1897 
1898 	return (0);
1899 }
1900 
1901 /**
1902  * ice_remove_vsi_mac_filter - Remove a MAC address filter for a VSI
1903  * @vsi: the VSI to add the filter for
1904  * @addr: MAC address to remove a filter for
1905  *
1906  * Remove a MAC address filter from a given VSI. This is a wrapper around
1907  * ice_remove_mac to simplify the interface. First, it only accepts a single
1908  * address, so we don't have to mess around with the list setup in other
1909  * functions. Second, it ignores the ICE_ERR_DOES_NOT_EXIST error, so that
1910  * callers don't need to worry about attempting to remove filters which
1911  * haven't yet been added.
1912  */
1913 int
1914 ice_remove_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
1915 {
1916 	struct ice_list_head mac_addr_list;
1917 	struct ice_hw *hw = &vsi->sc->hw;
1918 	device_t dev = vsi->sc->dev;
1919 	enum ice_status status;
1920 	int err = 0;
1921 
1922 	INIT_LIST_HEAD(&mac_addr_list);
1923 
1924 	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
1925 	if (err)
1926 		goto free_mac_list;
1927 
1928 	status = ice_remove_mac(hw, &mac_addr_list);
1929 	if (status == ICE_ERR_DOES_NOT_EXIST) {
1930 		; /* Don't complain if we try to remove a filter that doesn't exist */
1931 	} else if (status) {
1932 		device_printf(dev,
1933 			      "Failed to remove a filter for MAC %6D, err %s aq_err %s\n",
1934 			      addr, ":",
1935 			      ice_status_str(status),
1936 			      ice_aq_str(hw->adminq.sq_last_status));
1937 		err = (EIO);
1938 	}
1939 
1940 free_mac_list:
1941 	ice_free_fltr_list(&mac_addr_list);
1942 	return err;
1943 }
1944 
1945 /**
1946  * ice_rm_pf_default_mac_filters - Remove default unicast and broadcast addrs
1947  * @sc: device softc structure
1948  *
1949  * Remove the default unicast and broadcast filters from the PF VSI.
1950  */
1951 int
1952 ice_rm_pf_default_mac_filters(struct ice_softc *sc)
1953 {
1954 	struct ice_vsi *vsi = &sc->pf_vsi;
1955 	struct ice_hw *hw = &sc->hw;
1956 	int err;
1957 
1958 	/* Remove the LAN MAC address */
1959 	err = ice_remove_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
1960 	if (err)
1961 		return err;
1962 
1963 	/* Remove the broadcast address */
1964 	err = ice_remove_vsi_mac_filter(vsi, broadcastaddr);
1965 	if (err)
1966 		return (EIO);
1967 
1968 	return (0);
1969 }
1970 
1971 /**
1972  * ice_check_ctrlq_errors - Check for and report controlq errors
1973  * @sc: device private structure
1974  * @qname: name of the controlq
1975  * @cq: the controlq to check
1976  *
1977  * Check and report controlq errors. Currently all we do is report them to the
1978  * kernel message log, but we might want to improve this in the future, such
1979  * as to keep track of statistics.
1980  */
1981 static void
1982 ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
1983 		       struct ice_ctl_q_info *cq)
1984 {
1985 	struct ice_hw *hw = &sc->hw;
1986 	u32 val;
1987 
1988 	/* Check for error indications. Note that all the controlqs use the
1989 	 * same register layout, so we use the PF_FW_AxQLEN defines only.
1990 	 */
1991 	val = rd32(hw, cq->rq.len);
1992 	if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
1993 		   PF_FW_ARQLEN_ARQCRIT_M)) {
1994 		if (val & PF_FW_ARQLEN_ARQVFE_M)
1995 			device_printf(sc->dev,
1996 				"%s Receive Queue VF Error detected\n", qname);
1997 		if (val & PF_FW_ARQLEN_ARQOVFL_M)
1998 			device_printf(sc->dev,
1999 				"%s Receive Queue Overflow Error detected\n",
2000 				qname);
2001 		if (val & PF_FW_ARQLEN_ARQCRIT_M)
2002 			device_printf(sc->dev,
2003 				"%s Receive Queue Critical Error detected\n",
2004 				qname);
2005 		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
2006 			 PF_FW_ARQLEN_ARQCRIT_M);
2007 		wr32(hw, cq->rq.len, val);
2008 	}
2009 
2010 	val = rd32(hw, cq->sq.len);
2011 	if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
2012 		   PF_FW_ATQLEN_ATQCRIT_M)) {
2013 		if (val & PF_FW_ATQLEN_ATQVFE_M)
2014 			device_printf(sc->dev,
2015 				"%s Send Queue VF Error detected\n", qname);
2016 		if (val & PF_FW_ATQLEN_ATQOVFL_M)
2017 			device_printf(sc->dev,
2018 				"%s Send Queue Overflow Error detected\n",
2019 				qname);
2020 		if (val & PF_FW_ATQLEN_ATQCRIT_M)
2021 			device_printf(sc->dev,
2022 				"%s Send Queue Critical Error detected\n",
2023 				qname);
2024 		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
2025 			 PF_FW_ATQLEN_ATQCRIT_M);
2026 		wr32(hw, cq->sq.len, val);
2027 	}
2028 }
2029 
2030 /**
2031  * ice_process_link_event - Process a link event indication from firmware
2032  * @sc: device softc structure
2033  * @e: the received event data
2034  *
2035  * Gets the current link status from hardware, and may print a message if an
2036  * unqualified is detected.
2037  */
2038 static void
2039 ice_process_link_event(struct ice_softc *sc,
2040 		       struct ice_rq_event_info __invariant_only *e)
2041 {
2042 	struct ice_port_info *pi = sc->hw.port_info;
2043 	struct ice_hw *hw = &sc->hw;
2044 	device_t dev = sc->dev;
2045 	enum ice_status status;
2046 
2047 	/* Sanity check that the data length isn't too small */
2048 	MPASS(le16toh(e->desc.datalen) >= ICE_GET_LINK_STATUS_DATALEN_V1);
2049 
2050 	/*
2051 	 * Even though the adapter gets link status information inside the
2052 	 * event, it needs to send a Get Link Status AQ command in order
2053 	 * to re-enable link events.
2054 	 */
2055 	pi->phy.get_link_info = true;
2056 	ice_get_link_status(pi, &sc->link_up);
2057 
2058 	if (pi->phy.link_info.topo_media_conflict &
2059 	   (ICE_AQ_LINK_TOPO_CONFLICT | ICE_AQ_LINK_MEDIA_CONFLICT |
2060 	    ICE_AQ_LINK_TOPO_CORRUPT))
2061 		device_printf(dev,
2062 		    "Possible mis-configuration of the Ethernet port detected; please use the Intel (R) Ethernet Port Configuration Tool utility to address the issue.\n");
2063 
2064 	if ((pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) &&
2065 	    !(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) {
2066 		if (!(pi->phy.link_info.an_info & ICE_AQ_QUALIFIED_MODULE))
2067 			device_printf(dev,
2068 			    "Link is disabled on this device because an unsupported module type was detected! Refer to the Intel (R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
2069 		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED)
2070 			device_printf(dev,
2071 			    "The module's power requirements exceed the device's power supply. Cannot start link.\n");
2072 		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT)
2073 			device_printf(dev,
2074 			    "The installed module is incompatible with the device's NVM image. Cannot start link.\n");
2075 	}
2076 
2077 	if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
2078 		if (!ice_testandset_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2079 			status = ice_aq_set_link_restart_an(pi, false, NULL);
2080 			if (status != ICE_SUCCESS && hw->adminq.sq_last_status != ICE_AQ_RC_EMODE)
2081 				device_printf(dev,
2082 				    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
2083 				    __func__, ice_status_str(status),
2084 				    ice_aq_str(hw->adminq.sq_last_status));
2085 		}
2086 	}
2087 	/* ICE_STATE_NO_MEDIA is cleared when polling task detects media */
2088 
2089 	/* Indicate that link status must be reported again */
2090 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2091 
2092 	/* OS link info is updated elsewhere */
2093 }
2094 
2095 /**
2096  * ice_process_ctrlq_event - Respond to a controlq event
2097  * @sc: device private structure
2098  * @qname: the name for this controlq
2099  * @event: the event to process
2100  *
2101  * Perform actions in response to various controlq event notifications.
2102  */
2103 static void
2104 ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
2105 			struct ice_rq_event_info *event)
2106 {
2107 	u16 opcode;
2108 
2109 	opcode = le16toh(event->desc.opcode);
2110 
2111 	switch (opcode) {
2112 	case ice_aqc_opc_get_link_status:
2113 		ice_process_link_event(sc, event);
2114 		break;
2115 	case ice_aqc_opc_fw_logs_event:
2116 		ice_handle_fw_log_event(sc, &event->desc, event->msg_buf);
2117 		break;
2118 	case ice_aqc_opc_lldp_set_mib_change:
2119 		ice_handle_mib_change_event(sc, event);
2120 		break;
2121 	case ice_aqc_opc_event_lan_overflow:
2122 		ice_handle_lan_overflow_event(sc, event);
2123 		break;
2124 	case ice_aqc_opc_get_health_status:
2125 		ice_handle_health_status_event(sc, event);
2126 		break;
2127 	default:
2128 		device_printf(sc->dev,
2129 			      "%s Receive Queue unhandled event 0x%04x ignored\n",
2130 			      qname, opcode);
2131 	}
2132 }
2133 
2134 /**
2135  * ice_process_ctrlq - helper function to process controlq rings
2136  * @sc: device private structure
2137  * @q_type: specific control queue type
2138  * @pending: return parameter to track remaining events
2139  *
2140  * Process controlq events for a given control queue type. Returns zero on
2141  * success, and an error code on failure. If successful, pending is the number
2142  * of remaining events left in the queue.
2143  */
2144 int
2145 ice_process_ctrlq(struct ice_softc *sc, enum ice_ctl_q q_type, u16 *pending)
2146 {
2147 	struct ice_rq_event_info event = { { 0 } };
2148 	struct ice_hw *hw = &sc->hw;
2149 	struct ice_ctl_q_info *cq;
2150 	enum ice_status status;
2151 	const char *qname;
2152 	int loop = 0;
2153 
2154 	switch (q_type) {
2155 	case ICE_CTL_Q_ADMIN:
2156 		cq = &hw->adminq;
2157 		qname = "Admin";
2158 		break;
2159 	case ICE_CTL_Q_MAILBOX:
2160 		cq = &hw->mailboxq;
2161 		qname = "Mailbox";
2162 		break;
2163 	default:
2164 		device_printf(sc->dev,
2165 			      "Unknown control queue type 0x%x\n",
2166 			      q_type);
2167 		return 0;
2168 	}
2169 
2170 	ice_check_ctrlq_errors(sc, qname, cq);
2171 
2172 	/*
2173 	 * Control queue processing happens during the admin task which may be
2174 	 * holding a non-sleepable lock, so we *must* use M_NOWAIT here.
2175 	 */
2176 	event.buf_len = cq->rq_buf_size;
2177 	event.msg_buf = (u8 *)malloc(event.buf_len, M_ICE, M_ZERO | M_NOWAIT);
2178 	if (!event.msg_buf) {
2179 		device_printf(sc->dev,
2180 			      "Unable to allocate memory for %s Receive Queue event\n",
2181 			      qname);
2182 		return (ENOMEM);
2183 	}
2184 
2185 	do {
2186 		status = ice_clean_rq_elem(hw, cq, &event, pending);
2187 		if (status == ICE_ERR_AQ_NO_WORK)
2188 			break;
2189 		if (status) {
2190 			if (q_type == ICE_CTL_Q_ADMIN)
2191 				device_printf(sc->dev,
2192 					      "%s Receive Queue event error %s\n",
2193 					      qname, ice_status_str(status));
2194 			else
2195 				device_printf(sc->dev,
2196 					      "%s Receive Queue event error %s\n",
2197 					      qname, ice_status_str(status));
2198 			free(event.msg_buf, M_ICE);
2199 			return (EIO);
2200 		}
2201 		/* XXX should we separate this handler by controlq type? */
2202 		ice_process_ctrlq_event(sc, qname, &event);
2203 	} while (*pending && (++loop < ICE_CTRLQ_WORK_LIMIT));
2204 
2205 	free(event.msg_buf, M_ICE);
2206 
2207 	return 0;
2208 }
2209 
2210 /**
2211  * pkg_ver_empty - Check if a package version is empty
2212  * @pkg_ver: the package version to check
2213  * @pkg_name: the package name to check
2214  *
2215  * Checks if the package version structure is empty. We consider a package
2216  * version as empty if none of the versions are non-zero and the name string
2217  * is null as well.
2218  *
2219  * This is used to check if the package version was initialized by the driver,
2220  * as we do not expect an actual DDP package file to have a zero'd version and
2221  * name.
2222  *
2223  * @returns true if the package version is valid, or false otherwise.
2224  */
2225 static bool
2226 pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name)
2227 {
2228 	return (pkg_name[0] == '\0' &&
2229 		pkg_ver->major == 0 &&
2230 		pkg_ver->minor == 0 &&
2231 		pkg_ver->update == 0 &&
2232 		pkg_ver->draft == 0);
2233 }
2234 
2235 /**
2236  * pkg_ver_compatible - Check if the package version is compatible
2237  * @pkg_ver: the package version to check
2238  *
2239  * Compares the package version number to the driver's expected major/minor
2240  * version. Returns an integer indicating whether the version is older, newer,
2241  * or compatible with the driver.
2242  *
2243  * @returns 0 if the package version is compatible, -1 if the package version
2244  * is older, and 1 if the package version is newer than the driver version.
2245  */
2246 static int
2247 pkg_ver_compatible(struct ice_pkg_ver *pkg_ver)
2248 {
2249 	if (pkg_ver->major > ICE_PKG_SUPP_VER_MAJ)
2250 		return (1); /* newer */
2251 	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2252 		 (pkg_ver->minor > ICE_PKG_SUPP_VER_MNR))
2253 		return (1); /* newer */
2254 	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2255 		 (pkg_ver->minor == ICE_PKG_SUPP_VER_MNR))
2256 		return (0); /* compatible */
2257 	else
2258 		return (-1); /* older */
2259 }
2260 
2261 /**
2262  * ice_os_pkg_version_str - Format OS package version info into a sbuf
2263  * @hw: device hw structure
2264  * @buf: string buffer to store name/version string
2265  *
2266  * Formats the name and version of the OS DDP package as found in the ice_ddp
2267  * module into a string.
2268  *
2269  * @remark This will almost always be the same as the active package, but
2270  * could be different in some cases. Use ice_active_pkg_version_str to get the
2271  * version of the active DDP package.
2272  */
2273 static void
2274 ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2275 {
2276 	char name_buf[ICE_PKG_NAME_SIZE];
2277 
2278 	/* If the OS DDP package info is empty, use "None" */
2279 	if (pkg_ver_empty(&hw->pkg_ver, hw->pkg_name)) {
2280 		sbuf_printf(buf, "None");
2281 		return;
2282 	}
2283 
2284 	/*
2285 	 * This should already be null-terminated, but since this is a raw
2286 	 * value from an external source, strlcpy() into a new buffer to
2287 	 * make sure.
2288 	 */
2289 	bzero(name_buf, sizeof(name_buf));
2290 	strlcpy(name_buf, (char *)hw->pkg_name, ICE_PKG_NAME_SIZE);
2291 
2292 	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2293 	    name_buf,
2294 	    hw->pkg_ver.major,
2295 	    hw->pkg_ver.minor,
2296 	    hw->pkg_ver.update,
2297 	    hw->pkg_ver.draft);
2298 }
2299 
2300 /**
2301  * ice_active_pkg_version_str - Format active package version info into a sbuf
2302  * @hw: device hw structure
2303  * @buf: string buffer to store name/version string
2304  *
2305  * Formats the name and version of the active DDP package info into a string
2306  * buffer for use.
2307  */
2308 static void
2309 ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2310 {
2311 	char name_buf[ICE_PKG_NAME_SIZE];
2312 
2313 	/* If the active DDP package info is empty, use "None" */
2314 	if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
2315 		sbuf_printf(buf, "None");
2316 		return;
2317 	}
2318 
2319 	/*
2320 	 * This should already be null-terminated, but since this is a raw
2321 	 * value from an external source, strlcpy() into a new buffer to
2322 	 * make sure.
2323 	 */
2324 	bzero(name_buf, sizeof(name_buf));
2325 	strlcpy(name_buf, (char *)hw->active_pkg_name, ICE_PKG_NAME_SIZE);
2326 
2327 	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2328 	    name_buf,
2329 	    hw->active_pkg_ver.major,
2330 	    hw->active_pkg_ver.minor,
2331 	    hw->active_pkg_ver.update,
2332 	    hw->active_pkg_ver.draft);
2333 
2334 	if (hw->active_track_id != 0)
2335 		sbuf_printf(buf, ", track id 0x%08x", hw->active_track_id);
2336 }
2337 
2338 /**
2339  * ice_nvm_version_str - Format the NVM version information into a sbuf
2340  * @hw: device hw structure
2341  * @buf: string buffer to store version string
2342  *
2343  * Formats the NVM information including firmware version, API version, NVM
2344  * version, the EETRACK id, and OEM specific version information into a string
2345  * buffer.
2346  */
2347 static void
2348 ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf)
2349 {
2350 	struct ice_nvm_info *nvm = &hw->flash.nvm;
2351 	struct ice_orom_info *orom = &hw->flash.orom;
2352 	struct ice_netlist_info *netlist = &hw->flash.netlist;
2353 
2354 	/* Note that the netlist versions are stored in packed Binary Coded
2355 	 * Decimal format. The use of '%x' will correctly display these as
2356 	 * decimal numbers. This works because every 4 bits will be displayed
2357 	 * as a hexadecimal digit, and the BCD format will only use the values
2358 	 * 0-9.
2359 	 */
2360 	sbuf_printf(buf,
2361 		    "fw %u.%u.%u api %u.%u nvm %x.%02x etid %08x netlist %x.%x.%x-%x.%x.%x.%04x oem %u.%u.%u",
2362 		    hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch,
2363 		    hw->api_maj_ver, hw->api_min_ver,
2364 		    nvm->major, nvm->minor, nvm->eetrack,
2365 		    netlist->major, netlist->minor,
2366 		    netlist->type >> 16, netlist->type & 0xFFFF,
2367 		    netlist->rev, netlist->cust_ver, netlist->hash,
2368 		    orom->major, orom->build, orom->patch);
2369 }
2370 
2371 /**
2372  * ice_print_nvm_version - Print the NVM info to the kernel message log
2373  * @sc: the device softc structure
2374  *
2375  * Format and print an NVM version string using ice_nvm_version_str().
2376  */
2377 void
2378 ice_print_nvm_version(struct ice_softc *sc)
2379 {
2380 	struct ice_hw *hw = &sc->hw;
2381 	device_t dev = sc->dev;
2382 	struct sbuf *sbuf;
2383 
2384 	sbuf = sbuf_new_auto();
2385 	ice_nvm_version_str(hw, sbuf);
2386 	sbuf_finish(sbuf);
2387 	device_printf(dev, "%s\n", sbuf_data(sbuf));
2388 	sbuf_delete(sbuf);
2389 }
2390 
2391 /**
2392  * ice_update_vsi_hw_stats - Update VSI-specific ethernet statistics counters
2393  * @vsi: the VSI to be updated
2394  *
2395  * Reads hardware stats and updates the ice_vsi_hw_stats tracking structure with
2396  * the updated values.
2397  */
2398 void
2399 ice_update_vsi_hw_stats(struct ice_vsi *vsi)
2400 {
2401 	struct ice_eth_stats *prev_es, *cur_es;
2402 	struct ice_hw *hw = &vsi->sc->hw;
2403 	u16 vsi_num;
2404 
2405 	if (!ice_is_vsi_valid(hw, vsi->idx))
2406 		return;
2407 
2408 	vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); /* HW absolute index of a VSI */
2409 	prev_es = &vsi->hw_stats.prev;
2410 	cur_es = &vsi->hw_stats.cur;
2411 
2412 #define ICE_VSI_STAT40(name, location) \
2413 	ice_stat_update40(hw, name ## L(vsi_num), \
2414 			  vsi->hw_stats.offsets_loaded, \
2415 			  &prev_es->location, &cur_es->location)
2416 
2417 #define ICE_VSI_STAT32(name, location) \
2418 	ice_stat_update32(hw, name(vsi_num), \
2419 			  vsi->hw_stats.offsets_loaded, \
2420 			  &prev_es->location, &cur_es->location)
2421 
2422 	ICE_VSI_STAT40(GLV_GORC, rx_bytes);
2423 	ICE_VSI_STAT40(GLV_UPRC, rx_unicast);
2424 	ICE_VSI_STAT40(GLV_MPRC, rx_multicast);
2425 	ICE_VSI_STAT40(GLV_BPRC, rx_broadcast);
2426 	ICE_VSI_STAT32(GLV_RDPC, rx_discards);
2427 	ICE_VSI_STAT40(GLV_GOTC, tx_bytes);
2428 	ICE_VSI_STAT40(GLV_UPTC, tx_unicast);
2429 	ICE_VSI_STAT40(GLV_MPTC, tx_multicast);
2430 	ICE_VSI_STAT40(GLV_BPTC, tx_broadcast);
2431 	ICE_VSI_STAT32(GLV_TEPC, tx_errors);
2432 
2433 	ice_stat_update_repc(hw, vsi->idx, vsi->hw_stats.offsets_loaded,
2434 			     cur_es);
2435 
2436 #undef ICE_VSI_STAT40
2437 #undef ICE_VSI_STAT32
2438 
2439 	vsi->hw_stats.offsets_loaded = true;
2440 }
2441 
2442 /**
2443  * ice_reset_vsi_stats - Reset VSI statistics counters
2444  * @vsi: VSI structure
2445  *
2446  * Resets the software tracking counters for the VSI statistics, and indicate
2447  * that the offsets haven't been loaded. This is intended to be called
2448  * post-reset so that VSI statistics count from zero again.
2449  */
2450 void
2451 ice_reset_vsi_stats(struct ice_vsi *vsi)
2452 {
2453 	/* Reset HW stats */
2454 	memset(&vsi->hw_stats.prev, 0, sizeof(vsi->hw_stats.prev));
2455 	memset(&vsi->hw_stats.cur, 0, sizeof(vsi->hw_stats.cur));
2456 	vsi->hw_stats.offsets_loaded = false;
2457 }
2458 
2459 /**
2460  * ice_update_pf_stats - Update port stats counters
2461  * @sc: device private softc structure
2462  *
2463  * Reads hardware statistics registers and updates the software tracking
2464  * structure with new values.
2465  */
2466 void
2467 ice_update_pf_stats(struct ice_softc *sc)
2468 {
2469 	struct ice_hw_port_stats *prev_ps, *cur_ps;
2470 	struct ice_hw *hw = &sc->hw;
2471 	u8 lport;
2472 
2473 	MPASS(hw->port_info);
2474 
2475 	prev_ps = &sc->stats.prev;
2476 	cur_ps = &sc->stats.cur;
2477 	lport = hw->port_info->lport;
2478 
2479 #define ICE_PF_STAT_PFC(name, location, index) \
2480 	ice_stat_update40(hw, name(lport, index), \
2481 			  sc->stats.offsets_loaded, \
2482 			  &prev_ps->location[index], &cur_ps->location[index])
2483 
2484 #define ICE_PF_STAT40(name, location) \
2485 	ice_stat_update40(hw, name ## L(lport), \
2486 			  sc->stats.offsets_loaded, \
2487 			  &prev_ps->location, &cur_ps->location)
2488 
2489 #define ICE_PF_STAT32(name, location) \
2490 	ice_stat_update32(hw, name(lport), \
2491 			  sc->stats.offsets_loaded, \
2492 			  &prev_ps->location, &cur_ps->location)
2493 
2494 	ICE_PF_STAT40(GLPRT_GORC, eth.rx_bytes);
2495 	ICE_PF_STAT40(GLPRT_UPRC, eth.rx_unicast);
2496 	ICE_PF_STAT40(GLPRT_MPRC, eth.rx_multicast);
2497 	ICE_PF_STAT40(GLPRT_BPRC, eth.rx_broadcast);
2498 	ICE_PF_STAT40(GLPRT_GOTC, eth.tx_bytes);
2499 	ICE_PF_STAT40(GLPRT_UPTC, eth.tx_unicast);
2500 	ICE_PF_STAT40(GLPRT_MPTC, eth.tx_multicast);
2501 	ICE_PF_STAT40(GLPRT_BPTC, eth.tx_broadcast);
2502 	/* This stat register doesn't have an lport */
2503 	ice_stat_update32(hw, PRTRPB_RDPC,
2504 			  sc->stats.offsets_loaded,
2505 			  &prev_ps->eth.rx_discards, &cur_ps->eth.rx_discards);
2506 
2507 	ICE_PF_STAT32(GLPRT_TDOLD, tx_dropped_link_down);
2508 	ICE_PF_STAT40(GLPRT_PRC64, rx_size_64);
2509 	ICE_PF_STAT40(GLPRT_PRC127, rx_size_127);
2510 	ICE_PF_STAT40(GLPRT_PRC255, rx_size_255);
2511 	ICE_PF_STAT40(GLPRT_PRC511, rx_size_511);
2512 	ICE_PF_STAT40(GLPRT_PRC1023, rx_size_1023);
2513 	ICE_PF_STAT40(GLPRT_PRC1522, rx_size_1522);
2514 	ICE_PF_STAT40(GLPRT_PRC9522, rx_size_big);
2515 	ICE_PF_STAT40(GLPRT_PTC64, tx_size_64);
2516 	ICE_PF_STAT40(GLPRT_PTC127, tx_size_127);
2517 	ICE_PF_STAT40(GLPRT_PTC255, tx_size_255);
2518 	ICE_PF_STAT40(GLPRT_PTC511, tx_size_511);
2519 	ICE_PF_STAT40(GLPRT_PTC1023, tx_size_1023);
2520 	ICE_PF_STAT40(GLPRT_PTC1522, tx_size_1522);
2521 	ICE_PF_STAT40(GLPRT_PTC9522, tx_size_big);
2522 
2523 	/* Update Priority Flow Control Stats */
2524 	for (int i = 0; i <= GLPRT_PXOFFRXC_MAX_INDEX; i++) {
2525 		ICE_PF_STAT_PFC(GLPRT_PXONRXC, priority_xon_rx, i);
2526 		ICE_PF_STAT_PFC(GLPRT_PXOFFRXC, priority_xoff_rx, i);
2527 		ICE_PF_STAT_PFC(GLPRT_PXONTXC, priority_xon_tx, i);
2528 		ICE_PF_STAT_PFC(GLPRT_PXOFFTXC, priority_xoff_tx, i);
2529 		ICE_PF_STAT_PFC(GLPRT_RXON2OFFCNT, priority_xon_2_xoff, i);
2530 	}
2531 
2532 	ICE_PF_STAT32(GLPRT_LXONRXC, link_xon_rx);
2533 	ICE_PF_STAT32(GLPRT_LXOFFRXC, link_xoff_rx);
2534 	ICE_PF_STAT32(GLPRT_LXONTXC, link_xon_tx);
2535 	ICE_PF_STAT32(GLPRT_LXOFFTXC, link_xoff_tx);
2536 	ICE_PF_STAT32(GLPRT_CRCERRS, crc_errors);
2537 	ICE_PF_STAT32(GLPRT_ILLERRC, illegal_bytes);
2538 	ICE_PF_STAT32(GLPRT_MLFC, mac_local_faults);
2539 	ICE_PF_STAT32(GLPRT_MRFC, mac_remote_faults);
2540 	ICE_PF_STAT32(GLPRT_RLEC, rx_len_errors);
2541 	ICE_PF_STAT32(GLPRT_RUC, rx_undersize);
2542 	ICE_PF_STAT32(GLPRT_RFC, rx_fragments);
2543 	ICE_PF_STAT32(GLPRT_ROC, rx_oversize);
2544 	ICE_PF_STAT32(GLPRT_RJC, rx_jabber);
2545 
2546 #undef ICE_PF_STAT40
2547 #undef ICE_PF_STAT32
2548 #undef ICE_PF_STAT_PFC
2549 
2550 	sc->stats.offsets_loaded = true;
2551 }
2552 
2553 /**
2554  * ice_reset_pf_stats - Reset port stats counters
2555  * @sc: Device private softc structure
2556  *
2557  * Reset software tracking values for statistics to zero, and indicate that
2558  * offsets haven't been loaded. Intended to be called after a device reset so
2559  * that statistics count from zero again.
2560  */
2561 void
2562 ice_reset_pf_stats(struct ice_softc *sc)
2563 {
2564 	memset(&sc->stats.prev, 0, sizeof(sc->stats.prev));
2565 	memset(&sc->stats.cur, 0, sizeof(sc->stats.cur));
2566 	sc->stats.offsets_loaded = false;
2567 }
2568 
2569 /**
2570  * ice_sysctl_show_fw - sysctl callback to show firmware information
2571  * @oidp: sysctl oid structure
2572  * @arg1: pointer to private data structure
2573  * @arg2: unused
2574  * @req: sysctl request pointer
2575  *
2576  * Callback for the fw_version sysctl, to display the current firmware
2577  * information found at hardware init time.
2578  */
2579 static int
2580 ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS)
2581 {
2582 	struct ice_softc *sc = (struct ice_softc *)arg1;
2583 	struct ice_hw *hw = &sc->hw;
2584 	struct sbuf *sbuf;
2585 
2586 	UNREFERENCED_PARAMETER(oidp);
2587 	UNREFERENCED_PARAMETER(arg2);
2588 
2589 	if (ice_driver_is_detaching(sc))
2590 		return (ESHUTDOWN);
2591 
2592 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2593 	ice_nvm_version_str(hw, sbuf);
2594 	sbuf_finish(sbuf);
2595 	sbuf_delete(sbuf);
2596 
2597 	return (0);
2598 }
2599 
2600 /**
2601  * ice_sysctl_pba_number - sysctl callback to show PBA number
2602  * @oidp: sysctl oid structure
2603  * @arg1: pointer to private data structure
2604  * @arg2: unused
2605  * @req: sysctl request pointer
2606  *
2607  * Callback for the pba_number sysctl, used to read the Product Board Assembly
2608  * number for this device.
2609  */
2610 static int
2611 ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS)
2612 {
2613 	struct ice_softc *sc = (struct ice_softc *)arg1;
2614 	struct ice_hw *hw = &sc->hw;
2615 	device_t dev = sc->dev;
2616 	u8 pba_string[32] = "";
2617 	enum ice_status status;
2618 
2619 	UNREFERENCED_PARAMETER(arg2);
2620 
2621 	if (ice_driver_is_detaching(sc))
2622 		return (ESHUTDOWN);
2623 
2624 	status = ice_read_pba_string(hw, pba_string, sizeof(pba_string));
2625 	if (status) {
2626 		device_printf(dev,
2627 		    "%s: failed to read PBA string from NVM; status %s, aq_err %s\n",
2628 		    __func__, ice_status_str(status),
2629 		    ice_aq_str(hw->adminq.sq_last_status));
2630 		return (EIO);
2631 	}
2632 
2633 	return sysctl_handle_string(oidp, pba_string, sizeof(pba_string), req);
2634 }
2635 
2636 /**
2637  * ice_sysctl_pkg_version - sysctl to show the active package version info
2638  * @oidp: sysctl oid structure
2639  * @arg1: pointer to private data structure
2640  * @arg2: unused
2641  * @req: sysctl request pointer
2642  *
2643  * Callback for the pkg_version sysctl, to display the active DDP package name
2644  * and version information.
2645  */
2646 static int
2647 ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS)
2648 {
2649 	struct ice_softc *sc = (struct ice_softc *)arg1;
2650 	struct ice_hw *hw = &sc->hw;
2651 	struct sbuf *sbuf;
2652 
2653 	UNREFERENCED_PARAMETER(oidp);
2654 	UNREFERENCED_PARAMETER(arg2);
2655 
2656 	if (ice_driver_is_detaching(sc))
2657 		return (ESHUTDOWN);
2658 
2659 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2660 	ice_active_pkg_version_str(hw, sbuf);
2661 	sbuf_finish(sbuf);
2662 	sbuf_delete(sbuf);
2663 
2664 	return (0);
2665 }
2666 
2667 /**
2668  * ice_sysctl_os_pkg_version - sysctl to show the OS package version info
2669  * @oidp: sysctl oid structure
2670  * @arg1: pointer to private data structure
2671  * @arg2: unused
2672  * @req: sysctl request pointer
2673  *
2674  * Callback for the pkg_version sysctl, to display the OS DDP package name and
2675  * version info found in the ice_ddp module.
2676  */
2677 static int
2678 ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS)
2679 {
2680 	struct ice_softc *sc = (struct ice_softc *)arg1;
2681 	struct ice_hw *hw = &sc->hw;
2682 	struct sbuf *sbuf;
2683 
2684 	UNREFERENCED_PARAMETER(oidp);
2685 	UNREFERENCED_PARAMETER(arg2);
2686 
2687 	if (ice_driver_is_detaching(sc))
2688 		return (ESHUTDOWN);
2689 
2690 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2691 	ice_os_pkg_version_str(hw, sbuf);
2692 	sbuf_finish(sbuf);
2693 	sbuf_delete(sbuf);
2694 
2695 	return (0);
2696 }
2697 
2698 /**
2699  * ice_sysctl_current_speed - sysctl callback to show current link speed
2700  * @oidp: sysctl oid structure
2701  * @arg1: pointer to private data structure
2702  * @arg2: unused
2703  * @req: sysctl request pointer
2704  *
2705  * Callback for the current_speed sysctl, to display the string representing
2706  * the current link speed.
2707  */
2708 static int
2709 ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS)
2710 {
2711 	struct ice_softc *sc = (struct ice_softc *)arg1;
2712 	struct ice_hw *hw = &sc->hw;
2713 	struct sbuf *sbuf;
2714 
2715 	UNREFERENCED_PARAMETER(oidp);
2716 	UNREFERENCED_PARAMETER(arg2);
2717 
2718 	if (ice_driver_is_detaching(sc))
2719 		return (ESHUTDOWN);
2720 
2721 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 10, req);
2722 	sbuf_printf(sbuf, "%s", ice_aq_speed_to_str(hw->port_info));
2723 	sbuf_finish(sbuf);
2724 	sbuf_delete(sbuf);
2725 
2726 	return (0);
2727 }
2728 
2729 /**
2730  * @var phy_link_speeds
2731  * @brief PHY link speed conversion array
2732  *
2733  * Array of link speeds to convert ICE_PHY_TYPE_LOW and ICE_PHY_TYPE_HIGH into
2734  * link speeds used by the link speed sysctls.
2735  *
2736  * @remark these are based on the indices used in the BIT() macros for the
2737  * ICE_PHY_TYPE_LOW_* and ICE_PHY_TYPE_HIGH_* definitions.
2738  */
2739 static const uint16_t phy_link_speeds[] = {
2740     ICE_AQ_LINK_SPEED_100MB,
2741     ICE_AQ_LINK_SPEED_100MB,
2742     ICE_AQ_LINK_SPEED_1000MB,
2743     ICE_AQ_LINK_SPEED_1000MB,
2744     ICE_AQ_LINK_SPEED_1000MB,
2745     ICE_AQ_LINK_SPEED_1000MB,
2746     ICE_AQ_LINK_SPEED_1000MB,
2747     ICE_AQ_LINK_SPEED_2500MB,
2748     ICE_AQ_LINK_SPEED_2500MB,
2749     ICE_AQ_LINK_SPEED_2500MB,
2750     ICE_AQ_LINK_SPEED_5GB,
2751     ICE_AQ_LINK_SPEED_5GB,
2752     ICE_AQ_LINK_SPEED_10GB,
2753     ICE_AQ_LINK_SPEED_10GB,
2754     ICE_AQ_LINK_SPEED_10GB,
2755     ICE_AQ_LINK_SPEED_10GB,
2756     ICE_AQ_LINK_SPEED_10GB,
2757     ICE_AQ_LINK_SPEED_10GB,
2758     ICE_AQ_LINK_SPEED_10GB,
2759     ICE_AQ_LINK_SPEED_25GB,
2760     ICE_AQ_LINK_SPEED_25GB,
2761     ICE_AQ_LINK_SPEED_25GB,
2762     ICE_AQ_LINK_SPEED_25GB,
2763     ICE_AQ_LINK_SPEED_25GB,
2764     ICE_AQ_LINK_SPEED_25GB,
2765     ICE_AQ_LINK_SPEED_25GB,
2766     ICE_AQ_LINK_SPEED_25GB,
2767     ICE_AQ_LINK_SPEED_25GB,
2768     ICE_AQ_LINK_SPEED_25GB,
2769     ICE_AQ_LINK_SPEED_25GB,
2770     ICE_AQ_LINK_SPEED_40GB,
2771     ICE_AQ_LINK_SPEED_40GB,
2772     ICE_AQ_LINK_SPEED_40GB,
2773     ICE_AQ_LINK_SPEED_40GB,
2774     ICE_AQ_LINK_SPEED_40GB,
2775     ICE_AQ_LINK_SPEED_40GB,
2776     ICE_AQ_LINK_SPEED_50GB,
2777     ICE_AQ_LINK_SPEED_50GB,
2778     ICE_AQ_LINK_SPEED_50GB,
2779     ICE_AQ_LINK_SPEED_50GB,
2780     ICE_AQ_LINK_SPEED_50GB,
2781     ICE_AQ_LINK_SPEED_50GB,
2782     ICE_AQ_LINK_SPEED_50GB,
2783     ICE_AQ_LINK_SPEED_50GB,
2784     ICE_AQ_LINK_SPEED_50GB,
2785     ICE_AQ_LINK_SPEED_50GB,
2786     ICE_AQ_LINK_SPEED_50GB,
2787     ICE_AQ_LINK_SPEED_50GB,
2788     ICE_AQ_LINK_SPEED_50GB,
2789     ICE_AQ_LINK_SPEED_50GB,
2790     ICE_AQ_LINK_SPEED_50GB,
2791     ICE_AQ_LINK_SPEED_100GB,
2792     ICE_AQ_LINK_SPEED_100GB,
2793     ICE_AQ_LINK_SPEED_100GB,
2794     ICE_AQ_LINK_SPEED_100GB,
2795     ICE_AQ_LINK_SPEED_100GB,
2796     ICE_AQ_LINK_SPEED_100GB,
2797     ICE_AQ_LINK_SPEED_100GB,
2798     ICE_AQ_LINK_SPEED_100GB,
2799     ICE_AQ_LINK_SPEED_100GB,
2800     ICE_AQ_LINK_SPEED_100GB,
2801     ICE_AQ_LINK_SPEED_100GB,
2802     ICE_AQ_LINK_SPEED_100GB,
2803     ICE_AQ_LINK_SPEED_100GB,
2804     /* These rates are for ICE_PHY_TYPE_HIGH_* */
2805     ICE_AQ_LINK_SPEED_100GB,
2806     ICE_AQ_LINK_SPEED_100GB,
2807     ICE_AQ_LINK_SPEED_100GB,
2808     ICE_AQ_LINK_SPEED_100GB,
2809     ICE_AQ_LINK_SPEED_100GB
2810 };
2811 
2812 #define ICE_SYSCTL_HELP_ADVERTISE_SPEED		\
2813 "\nControl advertised link speed."		\
2814 "\nFlags:"					\
2815 "\n\t   0x0 - Auto"				\
2816 "\n\t   0x1 - 10 Mb"				\
2817 "\n\t   0x2 - 100 Mb"				\
2818 "\n\t   0x4 - 1G"				\
2819 "\n\t   0x8 - 2.5G"				\
2820 "\n\t  0x10 - 5G"				\
2821 "\n\t  0x20 - 10G"				\
2822 "\n\t  0x40 - 20G"				\
2823 "\n\t  0x80 - 25G"				\
2824 "\n\t 0x100 - 40G"				\
2825 "\n\t 0x200 - 50G"				\
2826 "\n\t 0x400 - 100G"				\
2827 "\n\t0x8000 - Unknown"				\
2828 "\n\t"						\
2829 "\nUse \"sysctl -x\" to view flags properly."
2830 
2831 #define ICE_PHYS_100MB			\
2832     (ICE_PHY_TYPE_LOW_100BASE_TX |	\
2833      ICE_PHY_TYPE_LOW_100M_SGMII)
2834 #define ICE_PHYS_1000MB			\
2835     (ICE_PHY_TYPE_LOW_1000BASE_T |	\
2836      ICE_PHY_TYPE_LOW_1000BASE_SX |	\
2837      ICE_PHY_TYPE_LOW_1000BASE_LX |	\
2838      ICE_PHY_TYPE_LOW_1000BASE_KX |	\
2839      ICE_PHY_TYPE_LOW_1G_SGMII)
2840 #define ICE_PHYS_2500MB			\
2841     (ICE_PHY_TYPE_LOW_2500BASE_T |	\
2842      ICE_PHY_TYPE_LOW_2500BASE_X |	\
2843      ICE_PHY_TYPE_LOW_2500BASE_KX)
2844 #define ICE_PHYS_5GB			\
2845     (ICE_PHY_TYPE_LOW_5GBASE_T |	\
2846      ICE_PHY_TYPE_LOW_5GBASE_KR)
2847 #define ICE_PHYS_10GB			\
2848     (ICE_PHY_TYPE_LOW_10GBASE_T |	\
2849      ICE_PHY_TYPE_LOW_10G_SFI_DA |	\
2850      ICE_PHY_TYPE_LOW_10GBASE_SR |	\
2851      ICE_PHY_TYPE_LOW_10GBASE_LR |	\
2852      ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 |	\
2853      ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC |	\
2854      ICE_PHY_TYPE_LOW_10G_SFI_C2C)
2855 #define ICE_PHYS_25GB			\
2856     (ICE_PHY_TYPE_LOW_25GBASE_T |	\
2857      ICE_PHY_TYPE_LOW_25GBASE_CR |	\
2858      ICE_PHY_TYPE_LOW_25GBASE_CR_S |	\
2859      ICE_PHY_TYPE_LOW_25GBASE_CR1 |	\
2860      ICE_PHY_TYPE_LOW_25GBASE_SR |	\
2861      ICE_PHY_TYPE_LOW_25GBASE_LR |	\
2862      ICE_PHY_TYPE_LOW_25GBASE_KR |	\
2863      ICE_PHY_TYPE_LOW_25GBASE_KR_S |	\
2864      ICE_PHY_TYPE_LOW_25GBASE_KR1 |	\
2865      ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC |	\
2866      ICE_PHY_TYPE_LOW_25G_AUI_C2C)
2867 #define ICE_PHYS_40GB			\
2868     (ICE_PHY_TYPE_LOW_40GBASE_CR4 |	\
2869      ICE_PHY_TYPE_LOW_40GBASE_SR4 |	\
2870      ICE_PHY_TYPE_LOW_40GBASE_LR4 |	\
2871      ICE_PHY_TYPE_LOW_40GBASE_KR4 |	\
2872      ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC | \
2873      ICE_PHY_TYPE_LOW_40G_XLAUI)
2874 #define ICE_PHYS_50GB			\
2875     (ICE_PHY_TYPE_LOW_50GBASE_CR2 |	\
2876      ICE_PHY_TYPE_LOW_50GBASE_SR2 |	\
2877      ICE_PHY_TYPE_LOW_50GBASE_LR2 |	\
2878      ICE_PHY_TYPE_LOW_50GBASE_KR2 |	\
2879      ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC | \
2880      ICE_PHY_TYPE_LOW_50G_LAUI2 |	\
2881      ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC | \
2882      ICE_PHY_TYPE_LOW_50G_AUI2 |	\
2883      ICE_PHY_TYPE_LOW_50GBASE_CP |	\
2884      ICE_PHY_TYPE_LOW_50GBASE_SR |	\
2885      ICE_PHY_TYPE_LOW_50GBASE_FR |	\
2886      ICE_PHY_TYPE_LOW_50GBASE_LR |	\
2887      ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4 |	\
2888      ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC | \
2889      ICE_PHY_TYPE_LOW_50G_AUI1)
2890 #define ICE_PHYS_100GB_LOW		\
2891     (ICE_PHY_TYPE_LOW_100GBASE_CR4 |	\
2892      ICE_PHY_TYPE_LOW_100GBASE_SR4 |	\
2893      ICE_PHY_TYPE_LOW_100GBASE_LR4 |	\
2894      ICE_PHY_TYPE_LOW_100GBASE_KR4 |	\
2895      ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \
2896      ICE_PHY_TYPE_LOW_100G_CAUI4 |	\
2897      ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \
2898      ICE_PHY_TYPE_LOW_100G_AUI4 |	\
2899      ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \
2900      ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \
2901      ICE_PHY_TYPE_LOW_100GBASE_CP2 |	\
2902      ICE_PHY_TYPE_LOW_100GBASE_SR2 |	\
2903      ICE_PHY_TYPE_LOW_100GBASE_DR)
2904 #define ICE_PHYS_100GB_HIGH		\
2905     (ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \
2906      ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC | \
2907      ICE_PHY_TYPE_HIGH_100G_CAUI2 |	\
2908      ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \
2909      ICE_PHY_TYPE_HIGH_100G_AUI2)
2910 
2911 /**
2912  * ice_aq_phy_types_to_link_speeds - Convert the PHY Types to speeds
2913  * @phy_type_low: lower 64-bit PHY Type bitmask
2914  * @phy_type_high: upper 64-bit PHY Type bitmask
2915  *
2916  * Convert the PHY Type fields from Get PHY Abilities and Set PHY Config into
2917  * link speed flags. If phy_type_high has an unknown PHY type, then the return
2918  * value will include the "ICE_AQ_LINK_SPEED_UNKNOWN" flag as well.
2919  */
2920 static u16
2921 ice_aq_phy_types_to_link_speeds(u64 phy_type_low, u64 phy_type_high)
2922 {
2923 	u16 sysctl_speeds = 0;
2924 	int bit;
2925 
2926 	/* coverity[address_of] */
2927 	for_each_set_bit(bit, &phy_type_low, 64)
2928 		sysctl_speeds |= phy_link_speeds[bit];
2929 
2930 	/* coverity[address_of] */
2931 	for_each_set_bit(bit, &phy_type_high, 64) {
2932 		if ((bit + 64) < (int)ARRAY_SIZE(phy_link_speeds))
2933 			sysctl_speeds |= phy_link_speeds[bit + 64];
2934 		else
2935 			sysctl_speeds |= ICE_AQ_LINK_SPEED_UNKNOWN;
2936 	}
2937 
2938 	return (sysctl_speeds);
2939 }
2940 
2941 /**
2942  * ice_sysctl_speeds_to_aq_phy_types - Convert sysctl speed flags to AQ PHY flags
2943  * @sysctl_speeds: 16-bit sysctl speeds or AQ_LINK_SPEED flags
2944  * @phy_type_low: output parameter for lower AQ PHY flags
2945  * @phy_type_high: output parameter for higher AQ PHY flags
2946  *
2947  * Converts the given link speed flags into AQ PHY type flag sets appropriate
2948  * for use in a Set PHY Config command.
2949  */
2950 static void
2951 ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
2952 				  u64 *phy_type_high)
2953 {
2954 	*phy_type_low = 0, *phy_type_high = 0;
2955 
2956 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100MB)
2957 		*phy_type_low |= ICE_PHYS_100MB;
2958 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_1000MB)
2959 		*phy_type_low |= ICE_PHYS_1000MB;
2960 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_2500MB)
2961 		*phy_type_low |= ICE_PHYS_2500MB;
2962 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_5GB)
2963 		*phy_type_low |= ICE_PHYS_5GB;
2964 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_10GB)
2965 		*phy_type_low |= ICE_PHYS_10GB;
2966 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_25GB)
2967 		*phy_type_low |= ICE_PHYS_25GB;
2968 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_40GB)
2969 		*phy_type_low |= ICE_PHYS_40GB;
2970 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_50GB)
2971 		*phy_type_low |= ICE_PHYS_50GB;
2972 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100GB) {
2973 		*phy_type_low |= ICE_PHYS_100GB_LOW;
2974 		*phy_type_high |= ICE_PHYS_100GB_HIGH;
2975 	}
2976 }
2977 
2978 /**
2979  * @struct ice_phy_data
2980  * @brief PHY caps and link speeds
2981  *
2982  * Buffer providing report mode and user speeds;
2983  * returning intersection of PHY types and speeds.
2984  */
2985 struct ice_phy_data {
2986 	u64 phy_low_orig;     /* PHY low quad from report */
2987 	u64 phy_high_orig;    /* PHY high quad from report */
2988 	u64 phy_low_intr;     /* PHY low quad intersection with user speeds */
2989 	u64 phy_high_intr;    /* PHY high quad intersection with user speeds */
2990 	u16 user_speeds_orig; /* Input from caller - See ICE_AQ_LINK_SPEED_* */
2991 	u16 user_speeds_intr; /* Intersect with report speeds */
2992 	u8 report_mode;       /* See ICE_AQC_REPORT_* */
2993 };
2994 
2995 /**
2996  * ice_intersect_phy_types_and_speeds - Return intersection of link speeds
2997  * @sc: device private structure
2998  * @phy_data: device PHY data
2999  *
3000  * On read: Displays the currently supported speeds
3001  * On write: Sets the device's supported speeds
3002  * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3003  */
3004 static int
3005 ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
3006 				   struct ice_phy_data *phy_data)
3007 {
3008 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3009 	const char *report_types[5] = { "w/o MEDIA",
3010 					"w/MEDIA",
3011 					"ACTIVE",
3012 					"EDOOFUS", /* Not used */
3013 					"DFLT" };
3014 	struct ice_hw *hw = &sc->hw;
3015 	struct ice_port_info *pi = hw->port_info;
3016 	enum ice_status status;
3017 	u16 report_speeds, temp_speeds;
3018 	u8 report_type;
3019 	bool apply_speed_filter = false;
3020 
3021 	switch (phy_data->report_mode) {
3022 	case ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA:
3023 	case ICE_AQC_REPORT_TOPO_CAP_MEDIA:
3024 	case ICE_AQC_REPORT_ACTIVE_CFG:
3025 	case ICE_AQC_REPORT_DFLT_CFG:
3026 		report_type = phy_data->report_mode >> 1;
3027 		break;
3028 	default:
3029 		device_printf(sc->dev,
3030 		    "%s: phy_data.report_mode \"%u\" doesn't exist\n",
3031 		    __func__, phy_data->report_mode);
3032 		return (EINVAL);
3033 	}
3034 
3035 	/* 0 is treated as "Auto"; the driver will handle selecting the
3036 	 * correct speeds. Including, in some cases, applying an override
3037 	 * if provided.
3038 	 */
3039 	if (phy_data->user_speeds_orig == 0)
3040 		phy_data->user_speeds_orig = USHRT_MAX;
3041 	else if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE))
3042 		apply_speed_filter = true;
3043 
3044 	status = ice_aq_get_phy_caps(pi, false, phy_data->report_mode, &pcaps, NULL);
3045 	if (status != ICE_SUCCESS) {
3046 		device_printf(sc->dev,
3047 		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
3048 		    __func__, report_types[report_type],
3049 		    ice_status_str(status),
3050 		    ice_aq_str(sc->hw.adminq.sq_last_status));
3051 		return (EIO);
3052 	}
3053 
3054 	phy_data->phy_low_orig = le64toh(pcaps.phy_type_low);
3055 	phy_data->phy_high_orig = le64toh(pcaps.phy_type_high);
3056 	report_speeds = ice_aq_phy_types_to_link_speeds(phy_data->phy_low_orig,
3057 	    phy_data->phy_high_orig);
3058 	if (apply_speed_filter) {
3059 		temp_speeds = ice_apply_supported_speed_filter(report_speeds,
3060 		    pcaps.module_type[0]);
3061 		if ((phy_data->user_speeds_orig & temp_speeds) == 0) {
3062 			device_printf(sc->dev,
3063 			    "User-specified speeds (\"0x%04X\") not supported\n",
3064 			    phy_data->user_speeds_orig);
3065 			return (EINVAL);
3066 		}
3067 		report_speeds = temp_speeds;
3068 	}
3069 	ice_sysctl_speeds_to_aq_phy_types(phy_data->user_speeds_orig,
3070 	    &phy_data->phy_low_intr, &phy_data->phy_high_intr);
3071 	phy_data->user_speeds_intr = phy_data->user_speeds_orig & report_speeds;
3072 	phy_data->phy_low_intr &= phy_data->phy_low_orig;
3073 	phy_data->phy_high_intr &= phy_data->phy_high_orig;
3074 
3075 	return (0);
3076  }
3077 
3078 /**
3079  * ice_sysctl_advertise_speed - Display/change link speeds supported by port
3080  * @oidp: sysctl oid structure
3081  * @arg1: pointer to private data structure
3082  * @arg2: unused
3083  * @req: sysctl request pointer
3084  *
3085  * On read: Displays the currently supported speeds
3086  * On write: Sets the device's supported speeds
3087  * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3088  */
3089 static int
3090 ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS)
3091 {
3092 	struct ice_softc *sc = (struct ice_softc *)arg1;
3093 	struct ice_port_info *pi = sc->hw.port_info;
3094 	struct ice_phy_data phy_data = { 0 };
3095 	device_t dev = sc->dev;
3096 	u16 sysctl_speeds;
3097 	int ret;
3098 
3099 	UNREFERENCED_PARAMETER(arg2);
3100 
3101 	if (ice_driver_is_detaching(sc))
3102 		return (ESHUTDOWN);
3103 
3104 	/* Get the current speeds from the adapter's "active" configuration. */
3105 	phy_data.report_mode = ICE_AQC_REPORT_ACTIVE_CFG;
3106 	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
3107 	if (ret) {
3108 		/* Error message already printed within function */
3109 		return (ret);
3110 	}
3111 
3112 	sysctl_speeds = phy_data.user_speeds_intr;
3113 
3114 	ret = sysctl_handle_16(oidp, &sysctl_speeds, 0, req);
3115 	if ((ret) || (req->newptr == NULL))
3116 		return (ret);
3117 
3118 	if (sysctl_speeds > 0x7FF) {
3119 		device_printf(dev,
3120 			      "%s: \"%u\" is outside of the range of acceptable values.\n",
3121 			      __func__, sysctl_speeds);
3122 		return (EINVAL);
3123 	}
3124 
3125 	pi->phy.curr_user_speed_req = sysctl_speeds;
3126 
3127 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up)
3128 		return 0;
3129 
3130 	/* Apply settings requested by user */
3131 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS);
3132 }
3133 
3134 #define ICE_SYSCTL_HELP_FEC_CONFIG			\
3135 "\nDisplay or set the port's requested FEC mode."	\
3136 "\n\tauto - " ICE_FEC_STRING_AUTO			\
3137 "\n\tfc - " ICE_FEC_STRING_BASER			\
3138 "\n\trs - " ICE_FEC_STRING_RS				\
3139 "\n\tnone - " ICE_FEC_STRING_NONE			\
3140 "\nEither of the left or right strings above can be used to set the requested mode."
3141 
3142 /**
3143  * ice_sysctl_fec_config - Display/change the configured FEC mode
3144  * @oidp: sysctl oid structure
3145  * @arg1: pointer to private data structure
3146  * @arg2: unused
3147  * @req: sysctl request pointer
3148  *
3149  * On read: Displays the configured FEC mode
3150  * On write: Sets the device's FEC mode to the input string, if it's valid.
3151  * Valid input strings: see ICE_SYSCTL_HELP_FEC_CONFIG
3152  */
3153 static int
3154 ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS)
3155 {
3156 	struct ice_softc *sc = (struct ice_softc *)arg1;
3157 	struct ice_port_info *pi = sc->hw.port_info;
3158 	enum ice_fec_mode new_mode;
3159 	device_t dev = sc->dev;
3160 	char req_fec[32];
3161 	int ret;
3162 
3163 	UNREFERENCED_PARAMETER(arg2);
3164 
3165 	if (ice_driver_is_detaching(sc))
3166 		return (ESHUTDOWN);
3167 
3168 	bzero(req_fec, sizeof(req_fec));
3169 	strlcpy(req_fec, ice_requested_fec_mode(pi), sizeof(req_fec));
3170 
3171 	ret = sysctl_handle_string(oidp, req_fec, sizeof(req_fec), req);
3172 	if ((ret) || (req->newptr == NULL))
3173 		return (ret);
3174 
3175 	if (strcmp(req_fec, "auto") == 0 ||
3176 	    strcmp(req_fec, ice_fec_str(ICE_FEC_AUTO)) == 0) {
3177 		if (sc->allow_no_fec_mod_in_auto)
3178 			new_mode = ICE_FEC_DIS_AUTO;
3179 		else
3180 			new_mode = ICE_FEC_AUTO;
3181 	} else if (strcmp(req_fec, "fc") == 0 ||
3182 	    strcmp(req_fec, ice_fec_str(ICE_FEC_BASER)) == 0) {
3183 		new_mode = ICE_FEC_BASER;
3184 	} else if (strcmp(req_fec, "rs") == 0 ||
3185 	    strcmp(req_fec, ice_fec_str(ICE_FEC_RS)) == 0) {
3186 		new_mode = ICE_FEC_RS;
3187 	} else if (strcmp(req_fec, "none") == 0 ||
3188 	    strcmp(req_fec, ice_fec_str(ICE_FEC_NONE)) == 0) {
3189 		new_mode = ICE_FEC_NONE;
3190 	} else {
3191 		device_printf(dev,
3192 		    "%s: \"%s\" is not a valid FEC mode\n",
3193 		    __func__, req_fec);
3194 		return (EINVAL);
3195 	}
3196 
3197 	/* Cache user FEC mode for later link ups */
3198 	pi->phy.curr_user_fec_req = new_mode;
3199 
3200 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up)
3201 		return 0;
3202 
3203 	/* Apply settings requested by user */
3204 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FEC);
3205 }
3206 
3207 /**
3208  * ice_sysctl_negotiated_fec - Display the negotiated FEC mode on the link
3209  * @oidp: sysctl oid structure
3210  * @arg1: pointer to private data structure
3211  * @arg2: unused
3212  * @req: sysctl request pointer
3213  *
3214  * On read: Displays the negotiated FEC mode, in a string
3215  */
3216 static int
3217 ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS)
3218 {
3219 	struct ice_softc *sc = (struct ice_softc *)arg1;
3220 	struct ice_hw *hw = &sc->hw;
3221 	char neg_fec[32];
3222 	int ret;
3223 
3224 	UNREFERENCED_PARAMETER(arg2);
3225 
3226 	if (ice_driver_is_detaching(sc))
3227 		return (ESHUTDOWN);
3228 
3229 	/* Copy const string into a buffer to drop const qualifier */
3230 	bzero(neg_fec, sizeof(neg_fec));
3231 	strlcpy(neg_fec, ice_negotiated_fec_mode(hw->port_info), sizeof(neg_fec));
3232 
3233 	ret = sysctl_handle_string(oidp, neg_fec, 0, req);
3234 	if (req->newptr != NULL)
3235 		return (EPERM);
3236 
3237 	return (ret);
3238 }
3239 
3240 #define ICE_SYSCTL_HELP_FC_CONFIG				\
3241 "\nDisplay or set the port's advertised flow control mode.\n"	\
3242 "\t0 - " ICE_FC_STRING_NONE					\
3243 "\n\t1 - " ICE_FC_STRING_RX					\
3244 "\n\t2 - " ICE_FC_STRING_TX					\
3245 "\n\t3 - " ICE_FC_STRING_FULL					\
3246 "\nEither the numbers or the strings above can be used to set the advertised mode."
3247 
3248 /**
3249  * ice_sysctl_fc_config - Display/change the advertised flow control mode
3250  * @oidp: sysctl oid structure
3251  * @arg1: pointer to private data structure
3252  * @arg2: unused
3253  * @req: sysctl request pointer
3254  *
3255  * On read: Displays the configured flow control mode
3256  * On write: Sets the device's flow control mode to the input, if it's valid.
3257  * Valid input strings: see ICE_SYSCTL_HELP_FC_CONFIG
3258  */
3259 static int
3260 ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS)
3261 {
3262 	struct ice_softc *sc = (struct ice_softc *)arg1;
3263 	struct ice_port_info *pi = sc->hw.port_info;
3264 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3265 	enum ice_fc_mode old_mode, new_mode;
3266 	struct ice_hw *hw = &sc->hw;
3267 	device_t dev = sc->dev;
3268 	enum ice_status status;
3269 	int ret, fc_num;
3270 	bool mode_set = false;
3271 	struct sbuf buf;
3272 	char *fc_str_end;
3273 	char fc_str[32];
3274 
3275 	UNREFERENCED_PARAMETER(arg2);
3276 
3277 	if (ice_driver_is_detaching(sc))
3278 		return (ESHUTDOWN);
3279 
3280 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
3281 				     &pcaps, NULL);
3282 	if (status != ICE_SUCCESS) {
3283 		device_printf(dev,
3284 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3285 		    __func__, ice_status_str(status),
3286 		    ice_aq_str(hw->adminq.sq_last_status));
3287 		return (EIO);
3288 	}
3289 
3290 	/* Convert HW response format to SW enum value */
3291 	if ((pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) &&
3292 	    (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE))
3293 		old_mode = ICE_FC_FULL;
3294 	else if (pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
3295 		old_mode = ICE_FC_TX_PAUSE;
3296 	else if (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
3297 		old_mode = ICE_FC_RX_PAUSE;
3298 	else
3299 		old_mode = ICE_FC_NONE;
3300 
3301 	/* Create "old" string for output */
3302 	bzero(fc_str, sizeof(fc_str));
3303 	sbuf_new_for_sysctl(&buf, fc_str, sizeof(fc_str), req);
3304 	sbuf_printf(&buf, "%d<%s>", old_mode, ice_fc_str(old_mode));
3305 	sbuf_finish(&buf);
3306 	sbuf_delete(&buf);
3307 
3308 	ret = sysctl_handle_string(oidp, fc_str, sizeof(fc_str), req);
3309 	if ((ret) || (req->newptr == NULL))
3310 		return (ret);
3311 
3312 	/* Try to parse input as a string, first */
3313 	if (strcasecmp(ice_fc_str(ICE_FC_FULL), fc_str) == 0) {
3314 		new_mode = ICE_FC_FULL;
3315 		mode_set = true;
3316 	}
3317 	else if (strcasecmp(ice_fc_str(ICE_FC_TX_PAUSE), fc_str) == 0) {
3318 		new_mode = ICE_FC_TX_PAUSE;
3319 		mode_set = true;
3320 	}
3321 	else if (strcasecmp(ice_fc_str(ICE_FC_RX_PAUSE), fc_str) == 0) {
3322 		new_mode = ICE_FC_RX_PAUSE;
3323 		mode_set = true;
3324 	}
3325 	else if (strcasecmp(ice_fc_str(ICE_FC_NONE), fc_str) == 0) {
3326 		new_mode = ICE_FC_NONE;
3327 		mode_set = true;
3328 	}
3329 
3330 	/*
3331 	 * Then check if it's an integer, for compatibility with the method
3332 	 * used in older drivers.
3333 	 */
3334 	if (!mode_set) {
3335 		fc_num = strtol(fc_str, &fc_str_end, 0);
3336 		if (fc_str_end == fc_str)
3337 			fc_num = -1;
3338 		switch (fc_num) {
3339 		case 3:
3340 			new_mode = ICE_FC_FULL;
3341 			break;
3342 		case 2:
3343 			new_mode = ICE_FC_TX_PAUSE;
3344 			break;
3345 		case 1:
3346 			new_mode = ICE_FC_RX_PAUSE;
3347 			break;
3348 		case 0:
3349 			new_mode = ICE_FC_NONE;
3350 			break;
3351 		default:
3352 			device_printf(dev,
3353 			    "%s: \"%s\" is not a valid flow control mode\n",
3354 			    __func__, fc_str);
3355 			return (EINVAL);
3356 		}
3357 	}
3358 
3359 	/* Save flow control mode from user */
3360 	pi->phy.curr_user_fc_req = new_mode;
3361 
3362 	/* Turn off Priority Flow Control when Link Flow Control is enabled */
3363 	if ((hw->port_info->qos_cfg.is_sw_lldp) &&
3364 	    (hw->port_info->qos_cfg.local_dcbx_cfg.pfc.pfcena != 0) &&
3365 	    (new_mode != ICE_FC_NONE)) {
3366 		ret = ice_config_pfc(sc, 0x0);
3367 		if (ret)
3368 			return (ret);
3369 	}
3370 
3371 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up)
3372 		return 0;
3373 
3374 	/* Apply settings requested by user */
3375 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
3376 }
3377 
3378 /**
3379  * ice_sysctl_negotiated_fc - Display currently negotiated FC mode
3380  * @oidp: sysctl oid structure
3381  * @arg1: pointer to private data structure
3382  * @arg2: unused
3383  * @req: sysctl request pointer
3384  *
3385  * On read: Displays the currently negotiated flow control settings.
3386  *
3387  * If link is not established, this will report ICE_FC_NONE, as no flow
3388  * control is negotiated while link is down.
3389  */
3390 static int
3391 ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS)
3392 {
3393 	struct ice_softc *sc = (struct ice_softc *)arg1;
3394 	struct ice_port_info *pi = sc->hw.port_info;
3395 	const char *negotiated_fc;
3396 
3397 	UNREFERENCED_PARAMETER(arg2);
3398 
3399 	if (ice_driver_is_detaching(sc))
3400 		return (ESHUTDOWN);
3401 
3402 	negotiated_fc = ice_flowcontrol_mode(pi);
3403 
3404 	return sysctl_handle_string(oidp, __DECONST(char *, negotiated_fc), 0, req);
3405 }
3406 
3407 /**
3408  * __ice_sysctl_phy_type_handler - Display/change supported PHY types/speeds
3409  * @oidp: sysctl oid structure
3410  * @arg1: pointer to private data structure
3411  * @arg2: unused
3412  * @req: sysctl request pointer
3413  * @is_phy_type_high: if true, handle the high PHY type instead of the low PHY type
3414  *
3415  * Private handler for phy_type_high and phy_type_low sysctls.
3416  */
3417 static int
3418 __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS, bool is_phy_type_high)
3419 {
3420 	struct ice_softc *sc = (struct ice_softc *)arg1;
3421 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3422 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
3423 	struct ice_hw *hw = &sc->hw;
3424 	device_t dev = sc->dev;
3425 	enum ice_status status;
3426 	uint64_t types;
3427 	int ret;
3428 
3429 	UNREFERENCED_PARAMETER(arg2);
3430 
3431 	if (ice_driver_is_detaching(sc))
3432 		return (ESHUTDOWN);
3433 
3434 	status = ice_aq_get_phy_caps(hw->port_info, false, ICE_AQC_REPORT_ACTIVE_CFG,
3435 				     &pcaps, NULL);
3436 	if (status != ICE_SUCCESS) {
3437 		device_printf(dev,
3438 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3439 		    __func__, ice_status_str(status),
3440 		    ice_aq_str(hw->adminq.sq_last_status));
3441 		return (EIO);
3442 	}
3443 
3444 	if (is_phy_type_high)
3445 		types = pcaps.phy_type_high;
3446 	else
3447 		types = pcaps.phy_type_low;
3448 
3449 	ret = sysctl_handle_64(oidp, &types, sizeof(types), req);
3450 	if ((ret) || (req->newptr == NULL))
3451 		return (ret);
3452 
3453 	ice_copy_phy_caps_to_cfg(hw->port_info, &pcaps, &cfg);
3454 
3455 	if (is_phy_type_high)
3456 		cfg.phy_type_high = types & hw->port_info->phy.phy_type_high;
3457 	else
3458 		cfg.phy_type_low = types & hw->port_info->phy.phy_type_low;
3459 	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
3460 
3461 	status = ice_aq_set_phy_cfg(hw, hw->port_info, &cfg, NULL);
3462 	if (status != ICE_SUCCESS) {
3463 		device_printf(dev,
3464 		    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
3465 		    __func__, ice_status_str(status),
3466 		    ice_aq_str(hw->adminq.sq_last_status));
3467 		return (EIO);
3468 	}
3469 
3470 	return (0);
3471 
3472 }
3473 
3474 /**
3475  * ice_sysctl_phy_type_low - Display/change supported lower PHY types/speeds
3476  * @oidp: sysctl oid structure
3477  * @arg1: pointer to private data structure
3478  * @arg2: unused
3479  * @req: sysctl request pointer
3480  *
3481  * On read: Displays the currently supported lower PHY types
3482  * On write: Sets the device's supported low PHY types
3483  */
3484 static int
3485 ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS)
3486 {
3487 	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, false);
3488 }
3489 
3490 /**
3491  * ice_sysctl_phy_type_high - Display/change supported higher PHY types/speeds
3492  * @oidp: sysctl oid structure
3493  * @arg1: pointer to private data structure
3494  * @arg2: unused
3495  * @req: sysctl request pointer
3496  *
3497  * On read: Displays the currently supported higher PHY types
3498  * On write: Sets the device's supported high PHY types
3499  */
3500 static int
3501 ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS)
3502 {
3503 	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, true);
3504 }
3505 
3506 /**
3507  * ice_sysctl_phy_caps - Display response from Get PHY abililties
3508  * @oidp: sysctl oid structure
3509  * @arg1: pointer to private data structure
3510  * @arg2: unused
3511  * @req: sysctl request pointer
3512  * @report_mode: the mode to report
3513  *
3514  * On read: Display the response from Get PHY abillities with the given report
3515  * mode.
3516  */
3517 static int
3518 ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode)
3519 {
3520 	struct ice_softc *sc = (struct ice_softc *)arg1;
3521 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3522 	struct ice_hw *hw = &sc->hw;
3523 	struct ice_port_info *pi = hw->port_info;
3524 	device_t dev = sc->dev;
3525 	enum ice_status status;
3526 	int ret;
3527 
3528 	UNREFERENCED_PARAMETER(arg2);
3529 
3530 	ret = priv_check(curthread, PRIV_DRIVER);
3531 	if (ret)
3532 		return (ret);
3533 
3534 	if (ice_driver_is_detaching(sc))
3535 		return (ESHUTDOWN);
3536 
3537 	status = ice_aq_get_phy_caps(pi, true, report_mode, &pcaps, NULL);
3538 	if (status != ICE_SUCCESS) {
3539 		device_printf(dev,
3540 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3541 		    __func__, ice_status_str(status),
3542 		    ice_aq_str(hw->adminq.sq_last_status));
3543 		return (EIO);
3544 	}
3545 
3546 	ret = sysctl_handle_opaque(oidp, &pcaps, sizeof(pcaps), req);
3547 	if (req->newptr != NULL)
3548 		return (EPERM);
3549 
3550 	return (ret);
3551 }
3552 
3553 /**
3554  * ice_sysctl_phy_sw_caps - Display response from Get PHY abililties
3555  * @oidp: sysctl oid structure
3556  * @arg1: pointer to private data structure
3557  * @arg2: unused
3558  * @req: sysctl request pointer
3559  *
3560  * On read: Display the response from Get PHY abillities reporting the last
3561  * software configuration.
3562  */
3563 static int
3564 ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS)
3565 {
3566 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3567 				   ICE_AQC_REPORT_ACTIVE_CFG);
3568 }
3569 
3570 /**
3571  * ice_sysctl_phy_nvm_caps - Display response from Get PHY abililties
3572  * @oidp: sysctl oid structure
3573  * @arg1: pointer to private data structure
3574  * @arg2: unused
3575  * @req: sysctl request pointer
3576  *
3577  * On read: Display the response from Get PHY abillities reporting the NVM
3578  * configuration.
3579  */
3580 static int
3581 ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS)
3582 {
3583 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3584 				   ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA);
3585 }
3586 
3587 /**
3588  * ice_sysctl_phy_topo_caps - Display response from Get PHY abililties
3589  * @oidp: sysctl oid structure
3590  * @arg1: pointer to private data structure
3591  * @arg2: unused
3592  * @req: sysctl request pointer
3593  *
3594  * On read: Display the response from Get PHY abillities reporting the
3595  * topology configuration.
3596  */
3597 static int
3598 ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS)
3599 {
3600 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3601 				   ICE_AQC_REPORT_TOPO_CAP_MEDIA);
3602 }
3603 
3604 /**
3605  * ice_sysctl_phy_link_status - Display response from Get Link Status
3606  * @oidp: sysctl oid structure
3607  * @arg1: pointer to private data structure
3608  * @arg2: unused
3609  * @req: sysctl request pointer
3610  *
3611  * On read: Display the response from firmware for the Get Link Status
3612  * request.
3613  */
3614 static int
3615 ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS)
3616 {
3617 	struct ice_aqc_get_link_status_data link_data = { 0 };
3618 	struct ice_softc *sc = (struct ice_softc *)arg1;
3619 	struct ice_hw *hw = &sc->hw;
3620 	struct ice_port_info *pi = hw->port_info;
3621 	struct ice_aqc_get_link_status *resp;
3622 	struct ice_aq_desc desc;
3623 	device_t dev = sc->dev;
3624 	enum ice_status status;
3625 	int ret;
3626 
3627 	UNREFERENCED_PARAMETER(arg2);
3628 
3629 	/*
3630 	 * Ensure that only contexts with driver privilege are allowed to
3631 	 * access this information
3632 	 */
3633 	ret = priv_check(curthread, PRIV_DRIVER);
3634 	if (ret)
3635 		return (ret);
3636 
3637 	if (ice_driver_is_detaching(sc))
3638 		return (ESHUTDOWN);
3639 
3640 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
3641 	resp = &desc.params.get_link_status;
3642 	resp->lport_num = pi->lport;
3643 
3644 	status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), NULL);
3645 	if (status != ICE_SUCCESS) {
3646 		device_printf(dev,
3647 		    "%s: ice_aq_send_cmd failed; status %s, aq_err %s\n",
3648 		    __func__, ice_status_str(status),
3649 		    ice_aq_str(hw->adminq.sq_last_status));
3650 		return (EIO);
3651 	}
3652 
3653 	ret = sysctl_handle_opaque(oidp, &link_data, sizeof(link_data), req);
3654 	if (req->newptr != NULL)
3655 		return (EPERM);
3656 
3657 	return (ret);
3658 }
3659 
3660 /**
3661  * ice_sysctl_fw_cur_lldp_persist_status - Display current FW LLDP status
3662  * @oidp: sysctl oid structure
3663  * @arg1: pointer to private softc structure
3664  * @arg2: unused
3665  * @req: sysctl request pointer
3666  *
3667  * On read: Displays current persistent LLDP status.
3668  */
3669 static int
3670 ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3671 {
3672 	struct ice_softc *sc = (struct ice_softc *)arg1;
3673 	struct ice_hw *hw = &sc->hw;
3674 	device_t dev = sc->dev;
3675 	enum ice_status status;
3676 	struct sbuf *sbuf;
3677 	u32 lldp_state;
3678 
3679 	UNREFERENCED_PARAMETER(arg2);
3680 	UNREFERENCED_PARAMETER(oidp);
3681 
3682 	if (ice_driver_is_detaching(sc))
3683 		return (ESHUTDOWN);
3684 
3685 	status = ice_get_cur_lldp_persist_status(hw, &lldp_state);
3686 	if (status) {
3687 		device_printf(dev,
3688 		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3689 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3690 		return (EIO);
3691 	}
3692 
3693 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3694 	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3695 	sbuf_finish(sbuf);
3696 	sbuf_delete(sbuf);
3697 
3698 	return (0);
3699 }
3700 
3701 /**
3702  * ice_sysctl_fw_dflt_lldp_persist_status - Display default FW LLDP status
3703  * @oidp: sysctl oid structure
3704  * @arg1: pointer to private softc structure
3705  * @arg2: unused
3706  * @req: sysctl request pointer
3707  *
3708  * On read: Displays default persistent LLDP status.
3709  */
3710 static int
3711 ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3712 {
3713 	struct ice_softc *sc = (struct ice_softc *)arg1;
3714 	struct ice_hw *hw = &sc->hw;
3715 	device_t dev = sc->dev;
3716 	enum ice_status status;
3717 	struct sbuf *sbuf;
3718 	u32 lldp_state;
3719 
3720 	UNREFERENCED_PARAMETER(arg2);
3721 	UNREFERENCED_PARAMETER(oidp);
3722 
3723 	if (ice_driver_is_detaching(sc))
3724 		return (ESHUTDOWN);
3725 
3726 	status = ice_get_dflt_lldp_persist_status(hw, &lldp_state);
3727 	if (status) {
3728 		device_printf(dev,
3729 		    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3730 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3731 		return (EIO);
3732 	}
3733 
3734 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3735 	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3736 	sbuf_finish(sbuf);
3737 	sbuf_delete(sbuf);
3738 
3739 	return (0);
3740 }
3741 
3742 /**
3743  * ice_dscp_is_mapped - Check for non-zero DSCP to TC mappings
3744  * @dcbcfg: Configuration struct to check for mappings in
3745  *
3746  * @return true if there exists a non-zero DSCP to TC mapping
3747  * inside the input DCB configuration struct.
3748  */
3749 static bool
3750 ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg)
3751 {
3752 	for (int i = 0; i < ICE_DSCP_NUM_VAL; i++)
3753 		if (dcbcfg->dscp_map[i] != 0)
3754 			return (true);
3755 
3756 	return (false);
3757 }
3758 
3759 #define ICE_SYSCTL_HELP_FW_LLDP_AGENT	\
3760 "\nDisplay or change FW LLDP agent state:" \
3761 "\n\t0 - disabled"			\
3762 "\n\t1 - enabled"
3763 
3764 /**
3765  * ice_sysctl_fw_lldp_agent - Display or change the FW LLDP agent status
3766  * @oidp: sysctl oid structure
3767  * @arg1: pointer to private softc structure
3768  * @arg2: unused
3769  * @req: sysctl request pointer
3770  *
3771  * On read: Displays whether the FW LLDP agent is running
3772  * On write: Persistently enables or disables the FW LLDP agent
3773  */
3774 static int
3775 ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS)
3776 {
3777 	struct ice_softc *sc = (struct ice_softc *)arg1;
3778 	struct ice_dcbx_cfg *local_dcbx_cfg;
3779 	struct ice_hw *hw = &sc->hw;
3780 	device_t dev = sc->dev;
3781 	enum ice_status status;
3782 	int ret;
3783 	u32 old_state;
3784 	u8 fw_lldp_enabled;
3785 	bool retried_start_lldp = false;
3786 
3787 	UNREFERENCED_PARAMETER(arg2);
3788 
3789 	if (ice_driver_is_detaching(sc))
3790 		return (ESHUTDOWN);
3791 
3792 	status = ice_get_cur_lldp_persist_status(hw, &old_state);
3793 	if (status) {
3794 		device_printf(dev,
3795 		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3796 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3797 		return (EIO);
3798 	}
3799 
3800 	if (old_state > ICE_LLDP_ADMINSTATUS_ENA_RXTX) {
3801 		status = ice_get_dflt_lldp_persist_status(hw, &old_state);
3802 		if (status) {
3803 			device_printf(dev,
3804 			    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3805 			    ice_status_str(status),
3806 			    ice_aq_str(hw->adminq.sq_last_status));
3807 			return (EIO);
3808 		}
3809 	}
3810 	if (old_state == 0)
3811 		fw_lldp_enabled = false;
3812 	else
3813 		fw_lldp_enabled = true;
3814 
3815 	ret = sysctl_handle_bool(oidp, &fw_lldp_enabled, 0, req);
3816 	if ((ret) || (req->newptr == NULL))
3817 		return (ret);
3818 
3819 	if (old_state == 0 && fw_lldp_enabled == false)
3820 		return (0);
3821 
3822 	if (old_state != 0 && fw_lldp_enabled == true)
3823 		return (0);
3824 
3825 	/* Block transition to FW LLDP if DSCP mode is enabled */
3826 	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
3827 	if ((local_dcbx_cfg->pfc_mode == ICE_QOS_MODE_DSCP) ||
3828 	    ice_dscp_is_mapped(local_dcbx_cfg)) {
3829 		device_printf(dev,
3830 			      "Cannot enable FW-LLDP agent while DSCP QoS is active.\n");
3831 		return (EOPNOTSUPP);
3832 	}
3833 
3834 	if (fw_lldp_enabled == false) {
3835 		status = ice_aq_stop_lldp(hw, true, true, NULL);
3836 		/* EPERM is returned if the LLDP agent is already shutdown */
3837 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM) {
3838 			device_printf(dev,
3839 			    "%s: ice_aq_stop_lldp failed; status %s, aq_err %s\n",
3840 			    __func__, ice_status_str(status),
3841 			    ice_aq_str(hw->adminq.sq_last_status));
3842 			return (EIO);
3843 		}
3844 		ice_aq_set_dcb_parameters(hw, true, NULL);
3845 		hw->port_info->qos_cfg.is_sw_lldp = true;
3846 		ice_add_rx_lldp_filter(sc);
3847 	} else {
3848 		ice_del_rx_lldp_filter(sc);
3849 retry_start_lldp:
3850 		status = ice_aq_start_lldp(hw, true, NULL);
3851 		if (status) {
3852 			switch (hw->adminq.sq_last_status) {
3853 			/* EEXIST is returned if the LLDP agent is already started */
3854 			case ICE_AQ_RC_EEXIST:
3855 				break;
3856 			case ICE_AQ_RC_EAGAIN:
3857 				/* Retry command after a 2 second wait */
3858 				if (retried_start_lldp == false) {
3859 					retried_start_lldp = true;
3860 					pause("slldp", ICE_START_LLDP_RETRY_WAIT);
3861 					goto retry_start_lldp;
3862 				}
3863 				/* Fallthrough */
3864 			default:
3865 				device_printf(dev,
3866 				    "%s: ice_aq_start_lldp failed; status %s, aq_err %s\n",
3867 				    __func__, ice_status_str(status),
3868 				    ice_aq_str(hw->adminq.sq_last_status));
3869 				return (EIO);
3870 			}
3871 		}
3872 		ice_start_dcbx_agent(sc);
3873 
3874 		/* Init DCB needs to be done during enabling LLDP to properly
3875 		 * propagate the configuration.
3876 		 */
3877 		status = ice_init_dcb(hw, true);
3878 		if (status) {
3879 			device_printf(dev,
3880 			    "%s: ice_init_dcb failed; status %s, aq_err %s\n",
3881 			    __func__, ice_status_str(status),
3882 			    ice_aq_str(hw->adminq.sq_last_status));
3883 			hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
3884 		}
3885 	}
3886 
3887 	return (ret);
3888 }
3889 
3890 #define ICE_SYSCTL_HELP_ETS_MIN_RATE \
3891 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS bandwidth table." \
3892 "\nIn SW DCB mode, displays and allows setting the table." \
3893 "\nInput must be in the format e.g. 30,10,10,10,10,10,10,10" \
3894 "\nWhere the bandwidth total must add up to 100"
3895 
3896 /**
3897  * ice_sysctl_ets_min_rate - Report/configure ETS bandwidth
3898  * @oidp: sysctl oid structure
3899  * @arg1: pointer to private data structure
3900  * @arg2: unused
3901  * @req: sysctl request pointer
3902  *
3903  * Returns the current ETS TC bandwidth table
3904  * cached by the driver.
3905  *
3906  * In SW DCB mode this sysctl also accepts a value that will
3907  * be sent to the firmware for configuration.
3908  */
3909 static int
3910 ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS)
3911 {
3912 	struct ice_softc *sc = (struct ice_softc *)arg1;
3913 	struct ice_dcbx_cfg *local_dcbx_cfg;
3914 	struct ice_port_info *pi;
3915 	struct ice_hw *hw = &sc->hw;
3916 	device_t dev = sc->dev;
3917 	enum ice_status status;
3918 	struct sbuf *sbuf;
3919 	int ret;
3920 
3921 	/* Store input rates from user */
3922 	char ets_user_buf[128] = "";
3923 	u8 new_ets_table[ICE_MAX_TRAFFIC_CLASS] = {};
3924 
3925 	UNREFERENCED_PARAMETER(arg2);
3926 
3927 	if (ice_driver_is_detaching(sc))
3928 		return (ESHUTDOWN);
3929 
3930 	if (req->oldptr == NULL && req->newptr == NULL) {
3931 		ret = SYSCTL_OUT(req, 0, 128);
3932 		return (ret);
3933 	}
3934 
3935 	pi = hw->port_info;
3936 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
3937 
3938 	sbuf = sbuf_new(NULL, ets_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3939 
3940 	/* Format ETS BW data for output */
3941 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
3942 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.tcbwtable[i]);
3943 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
3944 			sbuf_printf(sbuf, ",");
3945 	}
3946 
3947 	sbuf_finish(sbuf);
3948 	sbuf_delete(sbuf);
3949 
3950 	/* Read in the new ETS values */
3951 	ret = sysctl_handle_string(oidp, ets_user_buf, sizeof(ets_user_buf), req);
3952 	if ((ret) || (req->newptr == NULL))
3953 		return (ret);
3954 
3955 	/* Don't allow setting changes in FW DCB mode */
3956 	if (!hw->port_info->qos_cfg.is_sw_lldp)
3957 		return (EPERM);
3958 
3959 	ret = ice_ets_str_to_tbl(ets_user_buf, new_ets_table, 100);
3960 	if (ret) {
3961 		device_printf(dev, "%s: Could not parse input BW table: %s\n",
3962 		    __func__, ets_user_buf);
3963 		return (ret);
3964 	}
3965 
3966 	if (!ice_check_ets_bw(new_ets_table)) {
3967 		device_printf(dev, "%s: Bandwidth sum does not equal 100: %s\n",
3968 		    __func__, ets_user_buf);
3969 		return (EINVAL);
3970 	}
3971 
3972 	memcpy(local_dcbx_cfg->etscfg.tcbwtable, new_ets_table,
3973 	    sizeof(new_ets_table));
3974 
3975 	/* If BW > 0, then set TSA entry to 2 */
3976 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
3977 		if (new_ets_table[i] > 0)
3978 			local_dcbx_cfg->etscfg.tsatable[i] = 2;
3979 		else
3980 			local_dcbx_cfg->etscfg.tsatable[i] = 0;
3981 	}
3982 	local_dcbx_cfg->etscfg.willing = 0;
3983 	local_dcbx_cfg->etsrec = local_dcbx_cfg->etscfg;
3984 	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
3985 
3986 	status = ice_set_dcb_cfg(pi);
3987 	if (status) {
3988 		device_printf(dev,
3989 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
3990 		    __func__, ice_status_str(status),
3991 		    ice_aq_str(hw->adminq.sq_last_status));
3992 		return (EIO);
3993 	}
3994 
3995 	ice_do_dcb_reconfig(sc, false);
3996 
3997 	return (0);
3998 }
3999 
4000 #define ICE_SYSCTL_HELP_UP2TC_MAP \
4001 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS priority assignment table." \
4002 "\nIn SW DCB mode, displays and allows setting the table." \
4003 "\nInput must be in this format: 0,1,2,3,4,5,6,7" \
4004 "\nWhere the 1st number is the TC for UP0, 2nd number is the TC for UP1, etc"
4005 
4006 /**
4007  * ice_sysctl_up2tc_map - Report or configure UP2TC mapping
4008  * @oidp: sysctl oid structure
4009  * @arg1: pointer to private data structure
4010  * @arg2: unused
4011  * @req: sysctl request pointer
4012  *
4013  * In FW DCB mode, returns the current ETS prio table /
4014  * UP2TC mapping from the local MIB.
4015  *
4016  * In SW DCB mode this sysctl also accepts a value that will
4017  * be sent to the firmware for configuration.
4018  */
4019 static int
4020 ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS)
4021 {
4022 	struct ice_softc *sc = (struct ice_softc *)arg1;
4023 	struct ice_dcbx_cfg *local_dcbx_cfg;
4024 	struct ice_port_info *pi;
4025 	struct ice_hw *hw = &sc->hw;
4026 	device_t dev = sc->dev;
4027 	enum ice_status status;
4028 	struct sbuf *sbuf;
4029 	int ret;
4030 
4031 	/* Store input rates from user */
4032 	char up2tc_user_buf[128] = "";
4033 	/* This array is indexed by UP, not TC */
4034 	u8 new_up2tc[ICE_MAX_TRAFFIC_CLASS] = {};
4035 
4036 	UNREFERENCED_PARAMETER(arg2);
4037 
4038 	if (ice_driver_is_detaching(sc))
4039 		return (ESHUTDOWN);
4040 
4041 	if (req->oldptr == NULL && req->newptr == NULL) {
4042 		ret = SYSCTL_OUT(req, 0, 128);
4043 		return (ret);
4044 	}
4045 
4046 	pi = hw->port_info;
4047 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4048 
4049 	sbuf = sbuf_new(NULL, up2tc_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
4050 
4051 	/* Format ETS Priority Mapping Table for output */
4052 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4053 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.prio_table[i]);
4054 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
4055 			sbuf_printf(sbuf, ",");
4056 	}
4057 
4058 	sbuf_finish(sbuf);
4059 	sbuf_delete(sbuf);
4060 
4061 	/* Read in the new ETS priority mapping */
4062 	ret = sysctl_handle_string(oidp, up2tc_user_buf, sizeof(up2tc_user_buf), req);
4063 	if ((ret) || (req->newptr == NULL))
4064 		return (ret);
4065 
4066 	/* Don't allow setting changes in FW DCB mode */
4067 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4068 		return (EPERM);
4069 
4070 	ret = ice_ets_str_to_tbl(up2tc_user_buf, new_up2tc, 7);
4071 	if (ret) {
4072 		device_printf(dev, "%s: Could not parse input priority assignment table: %s\n",
4073 		    __func__, up2tc_user_buf);
4074 		return (ret);
4075 	}
4076 
4077 	/* Prepare updated ETS CFG/REC TLVs */
4078 	memcpy(local_dcbx_cfg->etscfg.prio_table, new_up2tc,
4079 	    sizeof(new_up2tc));
4080 	memcpy(local_dcbx_cfg->etsrec.prio_table, new_up2tc,
4081 	    sizeof(new_up2tc));
4082 
4083 	status = ice_set_dcb_cfg(pi);
4084 	if (status) {
4085 		device_printf(dev,
4086 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4087 		    __func__, ice_status_str(status),
4088 		    ice_aq_str(hw->adminq.sq_last_status));
4089 		return (EIO);
4090 	}
4091 
4092 	ice_do_dcb_reconfig(sc, false);
4093 
4094 	return (0);
4095 }
4096 
4097 /**
4098  * ice_config_pfc - helper function to set PFC config in FW
4099  * @sc: device private structure
4100  * @new_mode: bit flags indicating PFC status for TCs
4101  *
4102  * @pre must be in SW DCB mode
4103  *
4104  * Configures the driver's local PFC TLV and sends it to the
4105  * FW for configuration, then reconfigures the driver/VSI
4106  * for DCB if needed.
4107  */
4108 static int
4109 ice_config_pfc(struct ice_softc *sc, u8 new_mode)
4110 {
4111 	struct ice_dcbx_cfg *local_dcbx_cfg;
4112 	struct ice_hw *hw = &sc->hw;
4113 	struct ice_port_info *pi;
4114 	device_t dev = sc->dev;
4115 	enum ice_status status;
4116 
4117 	pi = hw->port_info;
4118 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4119 
4120 	/* Prepare updated PFC TLV */
4121 	local_dcbx_cfg->pfc.pfcena = new_mode;
4122 	local_dcbx_cfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
4123 	local_dcbx_cfg->pfc.willing = 0;
4124 	local_dcbx_cfg->pfc.mbc = 0;
4125 
4126 	/* Warn if PFC is being disabled with RoCE v2 in use */
4127 	if (new_mode == 0 && sc->rdma_entry.attached)
4128 		device_printf(dev,
4129 		    "WARNING: Recommended that Priority Flow Control is enabled when RoCEv2 is in use\n");
4130 
4131 	status = ice_set_dcb_cfg(pi);
4132 	if (status) {
4133 		device_printf(dev,
4134 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4135 		    __func__, ice_status_str(status),
4136 		    ice_aq_str(hw->adminq.sq_last_status));
4137 		return (EIO);
4138 	}
4139 
4140 	ice_do_dcb_reconfig(sc, false);
4141 
4142 	return (0);
4143 }
4144 
4145 #define ICE_SYSCTL_HELP_PFC_CONFIG \
4146 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current Priority Flow Control configuration" \
4147 "\nIn SW DCB mode, displays and allows setting the configuration" \
4148 "\nInput/Output is in this format: 0xff" \
4149 "\nWhere bit position # enables/disables PFC for that Traffic Class #"
4150 
4151 /**
4152  * ice_sysctl_pfc_config - Report or configure enabled PFC TCs
4153  * @oidp: sysctl oid structure
4154  * @arg1: pointer to private data structure
4155  * @arg2: unused
4156  * @req: sysctl request pointer
4157  *
4158  * In FW DCB mode, returns a bitmap containing the current TCs
4159  * that have PFC enabled on them.
4160  *
4161  * In SW DCB mode this sysctl also accepts a value that will
4162  * be sent to the firmware for configuration.
4163  */
4164 static int
4165 ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS)
4166 {
4167 	struct ice_softc *sc = (struct ice_softc *)arg1;
4168 	struct ice_dcbx_cfg *local_dcbx_cfg;
4169 	struct ice_port_info *pi;
4170 	struct ice_hw *hw = &sc->hw;
4171 	int ret;
4172 
4173 	/* Store input flags from user */
4174 	u8 user_pfc;
4175 
4176 	UNREFERENCED_PARAMETER(arg2);
4177 
4178 	if (ice_driver_is_detaching(sc))
4179 		return (ESHUTDOWN);
4180 
4181 	if (req->oldptr == NULL && req->newptr == NULL) {
4182 		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4183 		return (ret);
4184 	}
4185 
4186 	pi = hw->port_info;
4187 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4188 
4189 	/* Format current PFC enable setting for output */
4190 	user_pfc = local_dcbx_cfg->pfc.pfcena;
4191 
4192 	/* Read in the new PFC config */
4193 	ret = sysctl_handle_8(oidp, &user_pfc, 0, req);
4194 	if ((ret) || (req->newptr == NULL))
4195 		return (ret);
4196 
4197 	/* Don't allow setting changes in FW DCB mode */
4198 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4199 		return (EPERM);
4200 
4201 	/* If LFC is active and PFC is going to be turned on, turn LFC off */
4202 	if (user_pfc != 0 && pi->phy.curr_user_fc_req != ICE_FC_NONE) {
4203 		pi->phy.curr_user_fc_req = ICE_FC_NONE;
4204 		if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) ||
4205 			 sc->link_up) {
4206 			ret = ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
4207 			if (ret)
4208 				return (ret);
4209 		}
4210 	}
4211 
4212 	return ice_config_pfc(sc, user_pfc);
4213 }
4214 
4215 #define ICE_SYSCTL_HELP_PFC_MODE \
4216 "\nDisplay and set the current QoS mode for the firmware" \
4217 "\n\t0: VLAN UP mode" \
4218 "\n\t1: DSCP mode"
4219 
4220 /**
4221  * ice_sysctl_pfc_mode
4222  * @oidp: sysctl oid structure
4223  * @arg1: pointer to private data structure
4224  * @arg2: unused
4225  * @req: sysctl request pointer
4226  *
4227  * Gets and sets whether the port is in DSCP or VLAN PCP-based
4228  * PFC mode. This is also used to set whether DSCP or VLAN PCP
4229  * -based settings are configured for DCB.
4230  */
4231 static int
4232 ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS)
4233 {
4234 	struct ice_softc *sc = (struct ice_softc *)arg1;
4235 	struct ice_dcbx_cfg *local_dcbx_cfg;
4236 	struct ice_port_info *pi;
4237 	struct ice_hw *hw = &sc->hw;
4238 	device_t dev = sc->dev;
4239 	enum ice_status status;
4240 	u8 user_pfc_mode, aq_pfc_mode;
4241 	int ret;
4242 
4243 	UNREFERENCED_PARAMETER(arg2);
4244 
4245 	if (ice_driver_is_detaching(sc))
4246 		return (ESHUTDOWN);
4247 
4248 	if (req->oldptr == NULL && req->newptr == NULL) {
4249 		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4250 		return (ret);
4251 	}
4252 
4253 	pi = hw->port_info;
4254 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4255 
4256 	user_pfc_mode = local_dcbx_cfg->pfc_mode;
4257 
4258 	/* Read in the new mode */
4259 	ret = sysctl_handle_8(oidp, &user_pfc_mode, 0, req);
4260 	if ((ret) || (req->newptr == NULL))
4261 		return (ret);
4262 
4263 	/* Don't allow setting changes in FW DCB mode */
4264 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4265 		return (EPERM);
4266 
4267 	/* Currently, there are only two modes */
4268 	switch (user_pfc_mode) {
4269 	case 0:
4270 		aq_pfc_mode = ICE_AQC_PFC_VLAN_BASED_PFC;
4271 		break;
4272 	case 1:
4273 		aq_pfc_mode = ICE_AQC_PFC_DSCP_BASED_PFC;
4274 		break;
4275 	default:
4276 		device_printf(dev,
4277 		    "%s: Valid input range is 0-1 (input %d)\n",
4278 		    __func__, user_pfc_mode);
4279 		return (EINVAL);
4280 	}
4281 
4282 	status = ice_aq_set_pfc_mode(hw, aq_pfc_mode, NULL);
4283 	if (status == ICE_ERR_NOT_SUPPORTED) {
4284 		device_printf(dev,
4285 		    "%s: Failed to set PFC mode; DCB not supported\n",
4286 		    __func__);
4287 		return (ENODEV);
4288 	}
4289 	if (status) {
4290 		device_printf(dev,
4291 		    "%s: Failed to set PFC mode; status %s, aq_err %s\n",
4292 		    __func__, ice_status_str(status),
4293 		    ice_aq_str(hw->adminq.sq_last_status));
4294 		return (EIO);
4295 	}
4296 
4297 	/* Reset settings to default when mode is changed */
4298 	ice_set_default_local_mib_settings(sc);
4299 	/* Cache current settings and reconfigure */
4300 	local_dcbx_cfg->pfc_mode = user_pfc_mode;
4301 	ice_do_dcb_reconfig(sc, false);
4302 
4303 	return (0);
4304 }
4305 
4306 #define ICE_SYSCTL_HELP_SET_LINK_ACTIVE \
4307 "\nKeep link active after setting interface down:" \
4308 "\n\t0 - disable" \
4309 "\n\t1 - enable"
4310 
4311 /**
4312  * ice_sysctl_set_link_active
4313  * @oidp: sysctl oid structure
4314  * @arg1: pointer to private data structure
4315  * @arg2: unused
4316  * @req: sysctl request pointer
4317  *
4318  * Set the link_active_on_if_down sysctl flag.
4319  */
4320 static int
4321 ice_sysctl_set_link_active(SYSCTL_HANDLER_ARGS)
4322 {
4323 	struct ice_softc *sc = (struct ice_softc *)arg1;
4324 	bool mode;
4325 	int ret;
4326 
4327 	UNREFERENCED_PARAMETER(arg2);
4328 
4329 	mode = ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
4330 
4331 	ret = sysctl_handle_bool(oidp, &mode, 0, req);
4332 	if ((ret) || (req->newptr == NULL))
4333 		return (ret);
4334 
4335 	if (mode)
4336 		ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
4337 	else
4338 		ice_clear_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
4339 
4340 	return (0);
4341 }
4342 
4343 /**
4344  * ice_sysctl_debug_set_link
4345  * @oidp: sysctl oid structure
4346  * @arg1: pointer to private data structure
4347  * @arg2: unused
4348  * @req: sysctl request pointer
4349  *
4350  * Set link up/down in debug session.
4351  */
4352 static int
4353 ice_sysctl_debug_set_link(SYSCTL_HANDLER_ARGS)
4354 {
4355 	struct ice_softc *sc = (struct ice_softc *)arg1;
4356 	bool mode;
4357 	int ret;
4358 
4359 	UNREFERENCED_PARAMETER(arg2);
4360 
4361 	ret = sysctl_handle_bool(oidp, &mode, 0, req);
4362 	if ((ret) || (req->newptr == NULL))
4363 		return (ret);
4364 
4365 	ice_set_link(sc, mode != 0);
4366 
4367 	return (0);
4368 }
4369 
4370 /**
4371  * ice_add_device_sysctls - add device specific dynamic sysctls
4372  * @sc: device private structure
4373  *
4374  * Add per-device dynamic sysctls which show device configuration or enable
4375  * configuring device functionality. For tunable values which can be set prior
4376  * to load, see ice_add_device_tunables.
4377  *
4378  * This function depends on the sysctl layout setup by ice_add_device_tunables,
4379  * and likely should be called near the end of the attach process.
4380  */
4381 void
4382 ice_add_device_sysctls(struct ice_softc *sc)
4383 {
4384 	struct sysctl_oid *hw_node;
4385 	device_t dev = sc->dev;
4386 
4387 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4388 	struct sysctl_oid_list *ctx_list =
4389 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
4390 
4391 	SYSCTL_ADD_PROC(ctx, ctx_list,
4392 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD,
4393 	    sc, 0, ice_sysctl_show_fw, "A", "Firmware version");
4394 
4395 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_HAS_PBA)) {
4396 		SYSCTL_ADD_PROC(ctx, ctx_list,
4397 		    OID_AUTO, "pba_number", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4398 		    ice_sysctl_pba_number, "A", "Product Board Assembly Number");
4399 	}
4400 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_TEMP_SENSOR)) {
4401 		SYSCTL_ADD_PROC(ctx, ctx_list,
4402 		    OID_AUTO, "temp", CTLTYPE_S8 | CTLFLAG_RD,
4403 		    sc, 0, ice_sysctl_temperature, "CU",
4404 		    "Device temperature in degrees Celcius (C)");
4405 	}
4406 
4407 	SYSCTL_ADD_PROC(ctx, ctx_list,
4408 	    OID_AUTO, "ddp_version", CTLTYPE_STRING | CTLFLAG_RD,
4409 	    sc, 0, ice_sysctl_pkg_version, "A", "Active DDP package name and version");
4410 
4411 	SYSCTL_ADD_PROC(ctx, ctx_list,
4412 	    OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD,
4413 	    sc, 0, ice_sysctl_current_speed, "A", "Current Port Link Speed");
4414 
4415 	SYSCTL_ADD_PROC(ctx, ctx_list,
4416 	    OID_AUTO, "requested_fec", CTLTYPE_STRING | CTLFLAG_RW,
4417 	    sc, 0, ice_sysctl_fec_config, "A", ICE_SYSCTL_HELP_FEC_CONFIG);
4418 
4419 	SYSCTL_ADD_PROC(ctx, ctx_list,
4420 	    OID_AUTO, "negotiated_fec", CTLTYPE_STRING | CTLFLAG_RD,
4421 	    sc, 0, ice_sysctl_negotiated_fec, "A", "Current Negotiated FEC mode");
4422 
4423 	SYSCTL_ADD_PROC(ctx, ctx_list,
4424 	    OID_AUTO, "fc", CTLTYPE_STRING | CTLFLAG_RW,
4425 	    sc, 0, ice_sysctl_fc_config, "A", ICE_SYSCTL_HELP_FC_CONFIG);
4426 
4427 	SYSCTL_ADD_PROC(ctx, ctx_list,
4428 	    OID_AUTO, "advertise_speed", CTLTYPE_U16 | CTLFLAG_RW,
4429 	    sc, 0, ice_sysctl_advertise_speed, "SU", ICE_SYSCTL_HELP_ADVERTISE_SPEED);
4430 
4431 	SYSCTL_ADD_PROC(ctx, ctx_list,
4432 	    OID_AUTO, "fw_lldp_agent", CTLTYPE_U8 | CTLFLAG_RWTUN,
4433 	    sc, 0, ice_sysctl_fw_lldp_agent, "CU", ICE_SYSCTL_HELP_FW_LLDP_AGENT);
4434 
4435 	SYSCTL_ADD_PROC(ctx, ctx_list,
4436 	    OID_AUTO, "ets_min_rate", CTLTYPE_STRING | CTLFLAG_RW,
4437 	    sc, 0, ice_sysctl_ets_min_rate, "A", ICE_SYSCTL_HELP_ETS_MIN_RATE);
4438 
4439 	SYSCTL_ADD_PROC(ctx, ctx_list,
4440 	    OID_AUTO, "up2tc_map", CTLTYPE_STRING | CTLFLAG_RW,
4441 	    sc, 0, ice_sysctl_up2tc_map, "A", ICE_SYSCTL_HELP_UP2TC_MAP);
4442 
4443 	SYSCTL_ADD_PROC(ctx, ctx_list,
4444 	    OID_AUTO, "pfc", CTLTYPE_U8 | CTLFLAG_RW,
4445 	    sc, 0, ice_sysctl_pfc_config, "CU", ICE_SYSCTL_HELP_PFC_CONFIG);
4446 
4447 	SYSCTL_ADD_PROC(ctx, ctx_list,
4448 	    OID_AUTO, "pfc_mode", CTLTYPE_U8 | CTLFLAG_RWTUN,
4449 	    sc, 0, ice_sysctl_pfc_mode, "CU", ICE_SYSCTL_HELP_PFC_MODE);
4450 
4451 	SYSCTL_ADD_PROC(ctx, ctx_list,
4452 	    OID_AUTO, "allow_no_fec_modules_in_auto",
4453 	    CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
4454 	    sc, 0, ice_sysctl_allow_no_fec_mod_in_auto, "CU",
4455 	    "Allow \"No FEC\" mode in FEC auto-negotiation");
4456 
4457 	SYSCTL_ADD_PROC(ctx, ctx_list,
4458 	    OID_AUTO, "link_active_on_if_down", CTLTYPE_U8 | CTLFLAG_RWTUN,
4459 	    sc, 0, ice_sysctl_set_link_active, "CU", ICE_SYSCTL_HELP_SET_LINK_ACTIVE);
4460 
4461 	ice_add_dscp2tc_map_sysctls(sc, ctx, ctx_list);
4462 
4463 	/* Differentiate software and hardware statistics, by keeping hw stats
4464 	 * in their own node. This isn't in ice_add_device_tunables, because
4465 	 * we won't have any CTLFLAG_TUN sysctls under this node.
4466 	 */
4467 	hw_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "hw", CTLFLAG_RD,
4468 				  NULL, "Port Hardware Statistics");
4469 
4470 	ice_add_sysctls_mac_stats(ctx, hw_node, &sc->stats.cur);
4471 
4472 	/* Add the main PF VSI stats now. Other VSIs will add their own stats
4473 	 * during creation
4474 	 */
4475 	ice_add_vsi_sysctls(&sc->pf_vsi);
4476 
4477 	/* Add sysctls related to debugging the device driver. This includes
4478 	 * sysctls which display additional internal driver state for use in
4479 	 * understanding what is happening within the driver.
4480 	 */
4481 	ice_add_debug_sysctls(sc);
4482 }
4483 
4484 /**
4485  * @enum hmc_error_type
4486  * @brief enumeration of HMC errors
4487  *
4488  * Enumeration defining the possible HMC errors that might occur.
4489  */
4490 enum hmc_error_type {
4491 	HMC_ERR_PMF_INVALID = 0,
4492 	HMC_ERR_VF_IDX_INVALID = 1,
4493 	HMC_ERR_VF_PARENT_PF_INVALID = 2,
4494 	/* 3 is reserved */
4495 	HMC_ERR_INDEX_TOO_BIG = 4,
4496 	HMC_ERR_ADDRESS_TOO_LARGE = 5,
4497 	HMC_ERR_SEGMENT_DESC_INVALID = 6,
4498 	HMC_ERR_SEGMENT_DESC_TOO_SMALL = 7,
4499 	HMC_ERR_PAGE_DESC_INVALID = 8,
4500 	HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION = 9,
4501 	/* 10 is reserved */
4502 	HMC_ERR_INVALID_OBJECT_TYPE = 11,
4503 	/* 12 is reserved */
4504 };
4505 
4506 /**
4507  * ice_log_hmc_error - Log an HMC error message
4508  * @hw: device hw structure
4509  * @dev: the device to pass to device_printf()
4510  *
4511  * Log a message when an HMC error interrupt is triggered.
4512  */
4513 void
4514 ice_log_hmc_error(struct ice_hw *hw, device_t dev)
4515 {
4516 	u32 info, data;
4517 	u8 index, errtype, objtype;
4518 	bool isvf;
4519 
4520 	info = rd32(hw, PFHMC_ERRORINFO);
4521 	data = rd32(hw, PFHMC_ERRORDATA);
4522 
4523 	index = (u8)(info & PFHMC_ERRORINFO_PMF_INDEX_M);
4524 	errtype = (u8)((info & PFHMC_ERRORINFO_HMC_ERROR_TYPE_M) >>
4525 		       PFHMC_ERRORINFO_HMC_ERROR_TYPE_S);
4526 	objtype = (u8)((info & PFHMC_ERRORINFO_HMC_OBJECT_TYPE_M) >>
4527 		       PFHMC_ERRORINFO_HMC_OBJECT_TYPE_S);
4528 
4529 	isvf = info & PFHMC_ERRORINFO_PMF_ISVF_M;
4530 
4531 	device_printf(dev, "%s HMC Error detected on PMF index %d:\n",
4532 		      isvf ? "VF" : "PF", index);
4533 
4534 	device_printf(dev, "error type %d, object type %d, data 0x%08x\n",
4535 		      errtype, objtype, data);
4536 
4537 	switch (errtype) {
4538 	case HMC_ERR_PMF_INVALID:
4539 		device_printf(dev, "Private Memory Function is not valid\n");
4540 		break;
4541 	case HMC_ERR_VF_IDX_INVALID:
4542 		device_printf(dev, "Invalid Private Memory Function index for PE enabled VF\n");
4543 		break;
4544 	case HMC_ERR_VF_PARENT_PF_INVALID:
4545 		device_printf(dev, "Invalid parent PF for PE enabled VF\n");
4546 		break;
4547 	case HMC_ERR_INDEX_TOO_BIG:
4548 		device_printf(dev, "Object index too big\n");
4549 		break;
4550 	case HMC_ERR_ADDRESS_TOO_LARGE:
4551 		device_printf(dev, "Address extends beyond segment descriptor limit\n");
4552 		break;
4553 	case HMC_ERR_SEGMENT_DESC_INVALID:
4554 		device_printf(dev, "Segment descriptor is invalid\n");
4555 		break;
4556 	case HMC_ERR_SEGMENT_DESC_TOO_SMALL:
4557 		device_printf(dev, "Segment descriptor is too small\n");
4558 		break;
4559 	case HMC_ERR_PAGE_DESC_INVALID:
4560 		device_printf(dev, "Page descriptor is invalid\n");
4561 		break;
4562 	case HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION:
4563 		device_printf(dev, "Unsupported Request completion received from PCIe\n");
4564 		break;
4565 	case HMC_ERR_INVALID_OBJECT_TYPE:
4566 		device_printf(dev, "Invalid object type\n");
4567 		break;
4568 	default:
4569 		device_printf(dev, "Unknown HMC error\n");
4570 	}
4571 
4572 	/* Clear the error indication */
4573 	wr32(hw, PFHMC_ERRORINFO, 0);
4574 }
4575 
4576 /**
4577  * @struct ice_sysctl_info
4578  * @brief sysctl information
4579  *
4580  * Structure used to simplify the process of defining the many similar
4581  * statistics sysctls.
4582  */
4583 struct ice_sysctl_info {
4584 	u64		*stat;
4585 	const char	*name;
4586 	const char	*description;
4587 };
4588 
4589 /**
4590  * ice_add_sysctls_eth_stats - Add sysctls for ethernet statistics
4591  * @ctx: sysctl ctx to use
4592  * @parent: the parent node to add sysctls under
4593  * @stats: the ethernet stats structure to source values from
4594  *
4595  * Adds statistics sysctls for the ethernet statistics of the MAC or a VSI.
4596  * Will add them under the parent node specified.
4597  *
4598  * Note that tx_errors is only meaningful for VSIs and not the global MAC/PF
4599  * statistics, so it is not included here. Similarly, rx_discards has different
4600  * descriptions for VSIs and MAC/PF stats, so it is also not included here.
4601  */
4602 void
4603 ice_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
4604 			  struct sysctl_oid *parent,
4605 			  struct ice_eth_stats *stats)
4606 {
4607 	const struct ice_sysctl_info ctls[] = {
4608 		/* Rx Stats */
4609 		{ &stats->rx_bytes, "good_octets_rcvd", "Good Octets Received" },
4610 		{ &stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received" },
4611 		{ &stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received" },
4612 		{ &stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received" },
4613 		/* Tx Stats */
4614 		{ &stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted" },
4615 		{ &stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted" },
4616 		{ &stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted" },
4617 		{ &stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted" },
4618 		/* End */
4619 		{ 0, 0, 0 }
4620 	};
4621 
4622 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4623 
4624 	const struct ice_sysctl_info *entry = ctls;
4625 	while (entry->stat != 0) {
4626 		SYSCTL_ADD_U64(ctx, parent_list, OID_AUTO, entry->name,
4627 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
4628 			       entry->description);
4629 		entry++;
4630 	}
4631 }
4632 
4633 /**
4634  * ice_sysctl_tx_cso_stat - Display Tx checksum offload statistic
4635  * @oidp: sysctl oid structure
4636  * @arg1: pointer to private data structure
4637  * @arg2: Tx CSO stat to read
4638  * @req: sysctl request pointer
4639  *
4640  * On read: Sums the per-queue Tx CSO stat and displays it.
4641  */
4642 static int
4643 ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS)
4644 {
4645 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4646 	enum ice_tx_cso_stat type = (enum ice_tx_cso_stat)arg2;
4647 	u64 stat = 0;
4648 	int i;
4649 
4650 	if (ice_driver_is_detaching(vsi->sc))
4651 		return (ESHUTDOWN);
4652 
4653 	/* Check that the type is valid */
4654 	if (type >= ICE_CSO_STAT_TX_COUNT)
4655 		return (EDOOFUS);
4656 
4657 	/* Sum the stat for each of the Tx queues */
4658 	for (i = 0; i < vsi->num_tx_queues; i++)
4659 		stat += vsi->tx_queues[i].stats.cso[type];
4660 
4661 	return sysctl_handle_64(oidp, NULL, stat, req);
4662 }
4663 
4664 /**
4665  * ice_sysctl_rx_cso_stat - Display Rx checksum offload statistic
4666  * @oidp: sysctl oid structure
4667  * @arg1: pointer to private data structure
4668  * @arg2: Rx CSO stat to read
4669  * @req: sysctl request pointer
4670  *
4671  * On read: Sums the per-queue Rx CSO stat and displays it.
4672  */
4673 static int
4674 ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS)
4675 {
4676 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4677 	enum ice_rx_cso_stat type = (enum ice_rx_cso_stat)arg2;
4678 	u64 stat = 0;
4679 	int i;
4680 
4681 	if (ice_driver_is_detaching(vsi->sc))
4682 		return (ESHUTDOWN);
4683 
4684 	/* Check that the type is valid */
4685 	if (type >= ICE_CSO_STAT_RX_COUNT)
4686 		return (EDOOFUS);
4687 
4688 	/* Sum the stat for each of the Rx queues */
4689 	for (i = 0; i < vsi->num_rx_queues; i++)
4690 		stat += vsi->rx_queues[i].stats.cso[type];
4691 
4692 	return sysctl_handle_64(oidp, NULL, stat, req);
4693 }
4694 
4695 /**
4696  * ice_sysctl_rx_errors_stat - Display aggregate of Rx errors
4697  * @oidp: sysctl oid structure
4698  * @arg1: pointer to private data structure
4699  * @arg2: unused
4700  * @req: sysctl request pointer
4701  *
4702  * On read: Sums current values of Rx error statistics and
4703  * displays it.
4704  */
4705 static int
4706 ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS)
4707 {
4708 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4709 	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
4710 	u64 stat = 0;
4711 	int i, type;
4712 
4713 	UNREFERENCED_PARAMETER(arg2);
4714 
4715 	if (ice_driver_is_detaching(vsi->sc))
4716 		return (ESHUTDOWN);
4717 
4718 	stat += hs->rx_undersize;
4719 	stat += hs->rx_fragments;
4720 	stat += hs->rx_oversize;
4721 	stat += hs->rx_jabber;
4722 	stat += hs->rx_len_errors;
4723 	stat += hs->crc_errors;
4724 	stat += hs->illegal_bytes;
4725 
4726 	/* Checksum error stats */
4727 	for (i = 0; i < vsi->num_rx_queues; i++)
4728 		for (type = ICE_CSO_STAT_RX_IP4_ERR;
4729 		     type < ICE_CSO_STAT_RX_COUNT;
4730 		     type++)
4731 			stat += vsi->rx_queues[i].stats.cso[type];
4732 
4733 	return sysctl_handle_64(oidp, NULL, stat, req);
4734 }
4735 
4736 /**
4737  * @struct ice_rx_cso_stat_info
4738  * @brief sysctl information for an Rx checksum offload statistic
4739  *
4740  * Structure used to simplify the process of defining the checksum offload
4741  * statistics.
4742  */
4743 struct ice_rx_cso_stat_info {
4744 	enum ice_rx_cso_stat	type;
4745 	const char		*name;
4746 	const char		*description;
4747 };
4748 
4749 /**
4750  * @struct ice_tx_cso_stat_info
4751  * @brief sysctl information for a Tx checksum offload statistic
4752  *
4753  * Structure used to simplify the process of defining the checksum offload
4754  * statistics.
4755  */
4756 struct ice_tx_cso_stat_info {
4757 	enum ice_tx_cso_stat	type;
4758 	const char		*name;
4759 	const char		*description;
4760 };
4761 
4762 /**
4763  * ice_add_sysctls_sw_stats - Add sysctls for software statistics
4764  * @vsi: pointer to the VSI to add sysctls for
4765  * @ctx: sysctl ctx to use
4766  * @parent: the parent node to add sysctls under
4767  *
4768  * Add statistics sysctls for software tracked statistics of a VSI.
4769  *
4770  * Currently this only adds checksum offload statistics, but more counters may
4771  * be added in the future.
4772  */
4773 static void
4774 ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
4775 			 struct sysctl_ctx_list *ctx,
4776 			 struct sysctl_oid *parent)
4777 {
4778 	struct sysctl_oid *cso_node;
4779 	struct sysctl_oid_list *cso_list;
4780 
4781 	/* Tx CSO Stats */
4782 	const struct ice_tx_cso_stat_info tx_ctls[] = {
4783 		{ ICE_CSO_STAT_TX_TCP, "tx_tcp", "Transmit TCP Packets marked for HW checksum" },
4784 		{ ICE_CSO_STAT_TX_UDP, "tx_udp", "Transmit UDP Packets marked for HW checksum" },
4785 		{ ICE_CSO_STAT_TX_SCTP, "tx_sctp", "Transmit SCTP Packets marked for HW checksum" },
4786 		{ ICE_CSO_STAT_TX_IP4, "tx_ip4", "Transmit IPv4 Packets marked for HW checksum" },
4787 		{ ICE_CSO_STAT_TX_IP6, "tx_ip6", "Transmit IPv6 Packets marked for HW checksum" },
4788 		{ ICE_CSO_STAT_TX_L3_ERR, "tx_l3_err", "Transmit packets that driver failed to set L3 HW CSO bits for" },
4789 		{ ICE_CSO_STAT_TX_L4_ERR, "tx_l4_err", "Transmit packets that driver failed to set L4 HW CSO bits for" },
4790 		/* End */
4791 		{ ICE_CSO_STAT_TX_COUNT, 0, 0 }
4792 	};
4793 
4794 	/* Rx CSO Stats */
4795 	const struct ice_rx_cso_stat_info rx_ctls[] = {
4796 		{ ICE_CSO_STAT_RX_IP4_ERR, "rx_ip4_err", "Received packets with invalid IPv4 checksum indicated by HW" },
4797 		{ ICE_CSO_STAT_RX_IP6_ERR, "rx_ip6_err", "Received IPv6 packets with extension headers" },
4798 		{ ICE_CSO_STAT_RX_L3_ERR, "rx_l3_err", "Received packets with an unexpected invalid L3 checksum indicated by HW" },
4799 		{ ICE_CSO_STAT_RX_TCP_ERR, "rx_tcp_err", "Received packets with invalid TCP checksum indicated by HW" },
4800 		{ ICE_CSO_STAT_RX_UDP_ERR, "rx_udp_err", "Received packets with invalid UDP checksum indicated by HW" },
4801 		{ ICE_CSO_STAT_RX_SCTP_ERR, "rx_sctp_err", "Received packets with invalid SCTP checksum indicated by HW" },
4802 		{ ICE_CSO_STAT_RX_L4_ERR, "rx_l4_err", "Received packets with an unexpected invalid L4 checksum indicated by HW" },
4803 		/* End */
4804 		{ ICE_CSO_STAT_RX_COUNT, 0, 0 }
4805 	};
4806 
4807 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4808 
4809 	/* Add a node for statistics tracked by software. */
4810 	cso_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "cso", CTLFLAG_RD,
4811 				  NULL, "Checksum offload Statistics");
4812 	cso_list = SYSCTL_CHILDREN(cso_node);
4813 
4814 	const struct ice_tx_cso_stat_info *tx_entry = tx_ctls;
4815 	while (tx_entry->name && tx_entry->description) {
4816 		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, tx_entry->name,
4817 				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4818 				vsi, tx_entry->type, ice_sysctl_tx_cso_stat, "QU",
4819 				tx_entry->description);
4820 		tx_entry++;
4821 	}
4822 
4823 	const struct ice_rx_cso_stat_info *rx_entry = rx_ctls;
4824 	while (rx_entry->name && rx_entry->description) {
4825 		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, rx_entry->name,
4826 				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4827 				vsi, rx_entry->type, ice_sysctl_rx_cso_stat, "QU",
4828 				rx_entry->description);
4829 		rx_entry++;
4830 	}
4831 }
4832 
4833 /**
4834  * ice_add_vsi_sysctls - Add sysctls for a VSI
4835  * @vsi: pointer to VSI structure
4836  *
4837  * Add various sysctls for a given VSI.
4838  */
4839 void
4840 ice_add_vsi_sysctls(struct ice_vsi *vsi)
4841 {
4842 	struct sysctl_ctx_list *ctx = &vsi->ctx;
4843 	struct sysctl_oid *hw_node, *sw_node;
4844 	struct sysctl_oid_list *vsi_list, *hw_list;
4845 
4846 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
4847 
4848 	/* Keep hw stats in their own node. */
4849 	hw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "hw", CTLFLAG_RD,
4850 				  NULL, "VSI Hardware Statistics");
4851 	hw_list = SYSCTL_CHILDREN(hw_node);
4852 
4853 	/* Add the ethernet statistics for this VSI */
4854 	ice_add_sysctls_eth_stats(ctx, hw_node, &vsi->hw_stats.cur);
4855 
4856 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_discards",
4857 			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_discards,
4858 			0, "Discarded Rx Packets (see rx_errors or rx_no_desc)");
4859 
4860 	SYSCTL_ADD_PROC(ctx, hw_list, OID_AUTO, "rx_errors",
4861 			CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4862 			vsi, 0, ice_sysctl_rx_errors_stat, "QU",
4863 			"Aggregate of all Rx errors");
4864 
4865 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_no_desc",
4866 		       CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_no_desc,
4867 		       0, "Rx Packets Discarded Due To Lack Of Descriptors");
4868 
4869 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "tx_errors",
4870 			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.tx_errors,
4871 			0, "Tx Packets Discarded Due To Error");
4872 
4873 	/* Add a node for statistics tracked by software. */
4874 	sw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "sw", CTLFLAG_RD,
4875 				  NULL, "VSI Software Statistics");
4876 
4877 	ice_add_sysctls_sw_stats(vsi, ctx, sw_node);
4878 }
4879 
4880 /**
4881  * ice_add_sysctls_mac_pfc_one_stat - Add sysctl node for a PFC statistic
4882  * @ctx: sysctl ctx to use
4883  * @parent_list: parent sysctl list to add sysctls under
4884  * @pfc_stat_location: address of statistic for sysctl to display
4885  * @node_name: Name for statistic node
4886  * @descr: Description used for nodes added in this function
4887  *
4888  * A helper function for ice_add_sysctls_mac_pfc_stats that adds a node
4889  * for a stat and leaves for each traffic class for that stat.
4890  */
4891 static void
4892 ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
4893 				 struct sysctl_oid_list *parent_list,
4894 				 u64* pfc_stat_location,
4895 				 const char *node_name,
4896 				 const char *descr)
4897 {
4898 	struct sysctl_oid_list *node_list;
4899 	struct sysctl_oid *node;
4900 	struct sbuf *namebuf, *descbuf;
4901 
4902 	node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, node_name, CTLFLAG_RD,
4903 				   NULL, descr);
4904 	node_list = SYSCTL_CHILDREN(node);
4905 
4906 	namebuf = sbuf_new_auto();
4907 	descbuf = sbuf_new_auto();
4908 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4909 		sbuf_clear(namebuf);
4910 		sbuf_clear(descbuf);
4911 
4912 		sbuf_printf(namebuf, "%d", i);
4913 		sbuf_printf(descbuf, "%s for TC %d", descr, i);
4914 
4915 		sbuf_finish(namebuf);
4916 		sbuf_finish(descbuf);
4917 
4918 		SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, sbuf_data(namebuf),
4919 			CTLFLAG_RD | CTLFLAG_STATS, &pfc_stat_location[i], 0,
4920 			sbuf_data(descbuf));
4921 	}
4922 
4923 	sbuf_delete(namebuf);
4924 	sbuf_delete(descbuf);
4925 }
4926 
4927 /**
4928  * ice_add_sysctls_mac_pfc_stats - Add sysctls for MAC PFC statistics
4929  * @ctx: the sysctl ctx to use
4930  * @parent: parent node to add the sysctls under
4931  * @stats: the hw ports stat structure to pull values from
4932  *
4933  * Add global Priority Flow Control MAC statistics sysctls. These are
4934  * structured as a node with the PFC statistic, where there are eight
4935  * nodes for each traffic class.
4936  */
4937 static void
4938 ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
4939 			      struct sysctl_oid *parent,
4940 			      struct ice_hw_port_stats *stats)
4941 {
4942 	struct sysctl_oid_list *parent_list;
4943 
4944 	parent_list = SYSCTL_CHILDREN(parent);
4945 
4946 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_rx,
4947 	    "p_xon_recvd", "PFC XON received");
4948 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_rx,
4949 	    "p_xoff_recvd", "PFC XOFF received");
4950 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_tx,
4951 	    "p_xon_txd", "PFC XON transmitted");
4952 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_tx,
4953 	    "p_xoff_txd", "PFC XOFF transmitted");
4954 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_2_xoff,
4955 	    "p_xon2xoff", "PFC XON to XOFF transitions");
4956 }
4957 
4958 /**
4959  * ice_add_sysctls_mac_stats - Add sysctls for global MAC statistics
4960  * @ctx: the sysctl ctx to use
4961  * @parent: parent node to add the sysctls under
4962  * @stats: the hw ports stat structure to pull values from
4963  *
4964  * Add global MAC statistics sysctls.
4965  */
4966 void
4967 ice_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx,
4968 			  struct sysctl_oid *parent,
4969 			  struct ice_hw_port_stats *stats)
4970 {
4971 	struct sysctl_oid *mac_node;
4972 	struct sysctl_oid_list *parent_list, *mac_list;
4973 
4974 	parent_list = SYSCTL_CHILDREN(parent);
4975 
4976 	mac_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "mac", CTLFLAG_RD,
4977 				   NULL, "Mac Hardware Statistics");
4978 	mac_list = SYSCTL_CHILDREN(mac_node);
4979 
4980 	/* Add the ethernet statistics common to VSI and MAC */
4981 	ice_add_sysctls_eth_stats(ctx, mac_node, &stats->eth);
4982 
4983 	/* Add PFC stats that add per-TC counters */
4984 	ice_add_sysctls_mac_pfc_stats(ctx, mac_node, stats);
4985 
4986 	const struct ice_sysctl_info ctls[] = {
4987 		/* Packet Reception Stats */
4988 		{&stats->rx_size_64, "rx_frames_64", "64 byte frames received"},
4989 		{&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"},
4990 		{&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"},
4991 		{&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"},
4992 		{&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"},
4993 		{&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"},
4994 		{&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"},
4995 		{&stats->rx_undersize, "rx_undersize", "Undersized packets received"},
4996 		{&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"},
4997 		{&stats->rx_oversize, "rx_oversized", "Oversized packets received"},
4998 		{&stats->rx_jabber, "rx_jabber", "Received Jabber"},
4999 		{&stats->rx_len_errors, "rx_length_errors", "Receive Length Errors"},
5000 		{&stats->eth.rx_discards, "rx_discards",
5001 		    "Discarded Rx Packets by Port (shortage of storage space)"},
5002 		/* Packet Transmission Stats */
5003 		{&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"},
5004 		{&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"},
5005 		{&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"},
5006 		{&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"},
5007 		{&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"},
5008 		{&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"},
5009 		{&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"},
5010 		{&stats->tx_dropped_link_down, "tx_dropped", "Tx Dropped Due To Link Down"},
5011 		/* Flow control */
5012 		{&stats->link_xon_tx, "xon_txd", "Link XON transmitted"},
5013 		{&stats->link_xon_rx, "xon_recvd", "Link XON received"},
5014 		{&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"},
5015 		{&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"},
5016 		/* Other */
5017 		{&stats->crc_errors, "crc_errors", "CRC Errors"},
5018 		{&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"},
5019 		{&stats->mac_local_faults, "local_faults", "MAC Local Faults"},
5020 		{&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"},
5021 		/* End */
5022 		{ 0, 0, 0 }
5023 	};
5024 
5025 	const struct ice_sysctl_info *entry = ctls;
5026 	while (entry->stat != 0) {
5027 		SYSCTL_ADD_U64(ctx, mac_list, OID_AUTO, entry->name,
5028 			CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
5029 			entry->description);
5030 		entry++;
5031 	}
5032 }
5033 
5034 /**
5035  * ice_configure_misc_interrupts - enable 'other' interrupt causes
5036  * @sc: pointer to device private softc
5037  *
5038  * Enable various "other" interrupt causes, and associate them to interrupt 0,
5039  * which is our administrative interrupt.
5040  */
5041 void
5042 ice_configure_misc_interrupts(struct ice_softc *sc)
5043 {
5044 	struct ice_hw *hw = &sc->hw;
5045 	u32 val;
5046 
5047 	/* Read the OICR register to clear it */
5048 	rd32(hw, PFINT_OICR);
5049 
5050 	/* Enable useful "other" interrupt causes */
5051 	val = (PFINT_OICR_ECC_ERR_M |
5052 	       PFINT_OICR_MAL_DETECT_M |
5053 	       PFINT_OICR_GRST_M |
5054 	       PFINT_OICR_PCI_EXCEPTION_M |
5055 	       PFINT_OICR_VFLR_M |
5056 	       PFINT_OICR_HMC_ERR_M |
5057 	       PFINT_OICR_PE_CRITERR_M);
5058 
5059 	wr32(hw, PFINT_OICR_ENA, val);
5060 
5061 	/* Note that since we're using MSI-X index 0, and ITR index 0, we do
5062 	 * not explicitly program them when writing to the PFINT_*_CTL
5063 	 * registers. Nevertheless, these writes are associating the
5064 	 * interrupts with the ITR 0 vector
5065 	 */
5066 
5067 	/* Associate the OICR interrupt with ITR 0, and enable it */
5068 	wr32(hw, PFINT_OICR_CTL, PFINT_OICR_CTL_CAUSE_ENA_M);
5069 
5070 	/* Associate the Mailbox interrupt with ITR 0, and enable it */
5071 	wr32(hw, PFINT_MBX_CTL, PFINT_MBX_CTL_CAUSE_ENA_M);
5072 
5073 	/* Associate the AdminQ interrupt with ITR 0, and enable it */
5074 	wr32(hw, PFINT_FW_CTL, PFINT_FW_CTL_CAUSE_ENA_M);
5075 }
5076 
5077 /**
5078  * ice_filter_is_mcast - Check if info is a multicast filter
5079  * @vsi: vsi structure addresses are targeted towards
5080  * @info: filter info
5081  *
5082  * @returns true if the provided info is a multicast filter, and false
5083  * otherwise.
5084  */
5085 static bool
5086 ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info)
5087 {
5088 	const u8 *addr = info->l_data.mac.mac_addr;
5089 
5090 	/*
5091 	 * Check if this info matches a multicast filter added by
5092 	 * ice_add_mac_to_list
5093 	 */
5094 	if ((info->flag == ICE_FLTR_TX) &&
5095 	    (info->src_id == ICE_SRC_ID_VSI) &&
5096 	    (info->lkup_type == ICE_SW_LKUP_MAC) &&
5097 	    (info->vsi_handle == vsi->idx) &&
5098 	    ETHER_IS_MULTICAST(addr) && !ETHER_IS_BROADCAST(addr))
5099 		return true;
5100 
5101 	return false;
5102 }
5103 
5104 /**
5105  * @struct ice_mcast_sync_data
5106  * @brief data used by ice_sync_one_mcast_filter function
5107  *
5108  * Structure used to store data needed for processing by the
5109  * ice_sync_one_mcast_filter. This structure contains a linked list of filters
5110  * to be added, an error indication, and a pointer to the device softc.
5111  */
5112 struct ice_mcast_sync_data {
5113 	struct ice_list_head add_list;
5114 	struct ice_softc *sc;
5115 	int err;
5116 };
5117 
5118 /**
5119  * ice_sync_one_mcast_filter - Check if we need to program the filter
5120  * @p: void pointer to algorithm data
5121  * @sdl: link level socket address
5122  * @count: unused count value
5123  *
5124  * Called by if_foreach_llmaddr to operate on each filter in the ifp filter
5125  * list. For the given address, search our internal list to see if we have
5126  * found the filter. If not, add it to our list of filters that need to be
5127  * programmed.
5128  *
5129  * @returns (1) if we've actually setup the filter to be added
5130  */
5131 static u_int
5132 ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl,
5133 			  u_int __unused count)
5134 {
5135 	struct ice_mcast_sync_data *data = (struct ice_mcast_sync_data *)p;
5136 	struct ice_softc *sc = data->sc;
5137 	struct ice_hw *hw = &sc->hw;
5138 	struct ice_switch_info *sw = hw->switch_info;
5139 	const u8 *sdl_addr = (const u8 *)LLADDR(sdl);
5140 	struct ice_fltr_mgmt_list_entry *itr;
5141 	struct ice_list_head *rules;
5142 	int err;
5143 
5144 	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5145 
5146 	/*
5147 	 * If a previous filter already indicated an error, there is no need
5148 	 * for us to finish processing the rest of the filters.
5149 	 */
5150 	if (data->err)
5151 		return (0);
5152 
5153 	/* See if this filter has already been programmed */
5154 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5155 		struct ice_fltr_info *info = &itr->fltr_info;
5156 		const u8 *addr = info->l_data.mac.mac_addr;
5157 
5158 		/* Only check multicast filters */
5159 		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5160 			continue;
5161 
5162 		/*
5163 		 * If this filter matches, mark the internal filter as
5164 		 * "found", and exit.
5165 		 */
5166 		if (bcmp(addr, sdl_addr, ETHER_ADDR_LEN) == 0) {
5167 			itr->marker = ICE_FLTR_FOUND;
5168 			return (1);
5169 		}
5170 	}
5171 
5172 	/*
5173 	 * If we failed to locate the filter in our internal list, we need to
5174 	 * place it into our add list.
5175 	 */
5176 	err = ice_add_mac_to_list(&sc->pf_vsi, &data->add_list, sdl_addr,
5177 				  ICE_FWD_TO_VSI);
5178 	if (err) {
5179 		device_printf(sc->dev,
5180 			      "Failed to place MAC %6D onto add list, err %s\n",
5181 			      sdl_addr, ":", ice_err_str(err));
5182 		data->err = err;
5183 
5184 		return (0);
5185 	}
5186 
5187 	return (1);
5188 }
5189 
5190 /**
5191  * ice_sync_multicast_filters - Synchronize OS and internal filter list
5192  * @sc: device private structure
5193  *
5194  * Called in response to SIOCDELMULTI to synchronize the operating system
5195  * multicast address list with the internal list of filters programmed to
5196  * firmware.
5197  *
5198  * Works in one phase to find added and deleted filters using a marker bit on
5199  * the internal list.
5200  *
5201  * First, a loop over the internal list clears the marker bit. Second, for
5202  * each filter in the ifp list is checked. If we find it in the internal list,
5203  * the marker bit is set. Otherwise, the filter is added to the add list.
5204  * Third, a loop over the internal list determines if any filters have not
5205  * been found. Each of these is added to the delete list. Finally, the add and
5206  * delete lists are programmed to firmware to update the filters.
5207  *
5208  * @returns zero on success or an integer error code on failure.
5209  */
5210 int
5211 ice_sync_multicast_filters(struct ice_softc *sc)
5212 {
5213 	struct ice_hw *hw = &sc->hw;
5214 	struct ice_switch_info *sw = hw->switch_info;
5215 	struct ice_fltr_mgmt_list_entry *itr;
5216 	struct ice_mcast_sync_data data = {};
5217 	struct ice_list_head *rules, remove_list;
5218 	enum ice_status status;
5219 	int err = 0;
5220 
5221 	INIT_LIST_HEAD(&data.add_list);
5222 	INIT_LIST_HEAD(&remove_list);
5223 	data.sc = sc;
5224 	data.err = 0;
5225 
5226 	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5227 
5228 	/* Acquire the lock for the entire duration */
5229 	ice_acquire_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5230 
5231 	/* (1) Reset the marker state for all filters */
5232 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry)
5233 		itr->marker = ICE_FLTR_NOT_FOUND;
5234 
5235 	/* (2) determine which filters need to be added and removed */
5236 	if_foreach_llmaddr(sc->ifp, ice_sync_one_mcast_filter, (void *)&data);
5237 	if (data.err) {
5238 		/* ice_sync_one_mcast_filter already prints an error */
5239 		err = data.err;
5240 		ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5241 		goto free_filter_lists;
5242 	}
5243 
5244 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5245 		struct ice_fltr_info *info = &itr->fltr_info;
5246 		const u8 *addr = info->l_data.mac.mac_addr;
5247 
5248 		/* Only check multicast filters */
5249 		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5250 			continue;
5251 
5252 		/*
5253 		 * If the filter is not marked as found, then it must no
5254 		 * longer be in the ifp address list, so we need to remove it.
5255 		 */
5256 		if (itr->marker == ICE_FLTR_NOT_FOUND) {
5257 			err = ice_add_mac_to_list(&sc->pf_vsi, &remove_list,
5258 						  addr, ICE_FWD_TO_VSI);
5259 			if (err) {
5260 				device_printf(sc->dev,
5261 					      "Failed to place MAC %6D onto remove list, err %s\n",
5262 					      addr, ":", ice_err_str(err));
5263 				ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5264 				goto free_filter_lists;
5265 			}
5266 		}
5267 	}
5268 
5269 	ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5270 
5271 	status = ice_add_mac(hw, &data.add_list);
5272 	if (status) {
5273 		device_printf(sc->dev,
5274 			      "Could not add new MAC filters, err %s aq_err %s\n",
5275 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5276 		err = (EIO);
5277 		goto free_filter_lists;
5278 	}
5279 
5280 	status = ice_remove_mac(hw, &remove_list);
5281 	if (status) {
5282 		device_printf(sc->dev,
5283 			      "Could not remove old MAC filters, err %s aq_err %s\n",
5284 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5285 		err = (EIO);
5286 		goto free_filter_lists;
5287 	}
5288 
5289 free_filter_lists:
5290 	ice_free_fltr_list(&data.add_list);
5291 	ice_free_fltr_list(&remove_list);
5292 
5293 	return (err);
5294 }
5295 
5296 /**
5297  * ice_add_vlan_hw_filters - Add multiple VLAN filters for a given VSI
5298  * @vsi: The VSI to add the filter for
5299  * @vid: array of VLAN ids to add
5300  * @length: length of vid array
5301  *
5302  * Programs HW filters so that the given VSI will receive the specified VLANs.
5303  */
5304 enum ice_status
5305 ice_add_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length)
5306 {
5307 	struct ice_hw *hw = &vsi->sc->hw;
5308 	struct ice_list_head vlan_list;
5309 	struct ice_fltr_list_entry *vlan_entries;
5310 	enum ice_status status;
5311 
5312 	MPASS(length > 0);
5313 
5314 	INIT_LIST_HEAD(&vlan_list);
5315 
5316 	vlan_entries = (struct ice_fltr_list_entry *)
5317 	    malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO);
5318 	if (!vlan_entries)
5319 		return (ICE_ERR_NO_MEMORY);
5320 
5321 	for (u16 i = 0; i < length; i++) {
5322 		vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5323 		vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI;
5324 		vlan_entries[i].fltr_info.flag = ICE_FLTR_TX;
5325 		vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI;
5326 		vlan_entries[i].fltr_info.vsi_handle = vsi->idx;
5327 		vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i];
5328 
5329 		LIST_ADD(&vlan_entries[i].list_entry, &vlan_list);
5330 	}
5331 
5332 	status = ice_add_vlan(hw, &vlan_list);
5333 	if (!status)
5334 		goto done;
5335 
5336 	device_printf(vsi->sc->dev, "Failed to add VLAN filters:\n");
5337 	for (u16 i = 0; i < length; i++) {
5338 		device_printf(vsi->sc->dev,
5339 		    "- vlan %d, status %d\n",
5340 		    vlan_entries[i].fltr_info.l_data.vlan.vlan_id,
5341 		    vlan_entries[i].status);
5342 	}
5343 done:
5344 	free(vlan_entries, M_ICE);
5345 	return (status);
5346 }
5347 
5348 /**
5349  * ice_add_vlan_hw_filter - Add a VLAN filter for a given VSI
5350  * @vsi: The VSI to add the filter for
5351  * @vid: VLAN to add
5352  *
5353  * Programs a HW filter so that the given VSI will receive the specified VLAN.
5354  */
5355 enum ice_status
5356 ice_add_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5357 {
5358 	return ice_add_vlan_hw_filters(vsi, &vid, 1);
5359 }
5360 
5361 /**
5362  * ice_remove_vlan_hw_filters - Remove multiple VLAN filters for a given VSI
5363  * @vsi: The VSI to remove the filters from
5364  * @vid: array of VLAN ids to remove
5365  * @length: length of vid array
5366  *
5367  * Removes previously programmed HW filters for the specified VSI.
5368  */
5369 enum ice_status
5370 ice_remove_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length)
5371 {
5372 	struct ice_hw *hw = &vsi->sc->hw;
5373 	struct ice_list_head vlan_list;
5374 	struct ice_fltr_list_entry *vlan_entries;
5375 	enum ice_status status;
5376 
5377 	MPASS(length > 0);
5378 
5379 	INIT_LIST_HEAD(&vlan_list);
5380 
5381 	vlan_entries = (struct ice_fltr_list_entry *)
5382 	    malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO);
5383 	if (!vlan_entries)
5384 		return (ICE_ERR_NO_MEMORY);
5385 
5386 	for (u16 i = 0; i < length; i++) {
5387 		vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5388 		vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI;
5389 		vlan_entries[i].fltr_info.flag = ICE_FLTR_TX;
5390 		vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI;
5391 		vlan_entries[i].fltr_info.vsi_handle = vsi->idx;
5392 		vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i];
5393 
5394 		LIST_ADD(&vlan_entries[i].list_entry, &vlan_list);
5395 	}
5396 
5397 	status = ice_remove_vlan(hw, &vlan_list);
5398 	if (!status)
5399 		goto done;
5400 
5401 	device_printf(vsi->sc->dev, "Failed to remove VLAN filters:\n");
5402 	for (u16 i = 0; i < length; i++) {
5403 		device_printf(vsi->sc->dev,
5404 		    "- vlan %d, status %d\n",
5405 		    vlan_entries[i].fltr_info.l_data.vlan.vlan_id,
5406 		    vlan_entries[i].status);
5407 	}
5408 done:
5409 	free(vlan_entries, M_ICE);
5410 	return (status);
5411 }
5412 
5413 /**
5414  * ice_remove_vlan_hw_filter - Remove a VLAN filter for a given VSI
5415  * @vsi: The VSI to remove the filter from
5416  * @vid: VLAN to remove
5417  *
5418  * Removes a previously programmed HW filter for the specified VSI.
5419  */
5420 enum ice_status
5421 ice_remove_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5422 {
5423 	return ice_remove_vlan_hw_filters(vsi, &vid, 1);
5424 }
5425 
5426 #define ICE_SYSCTL_HELP_RX_ITR			\
5427 "\nControl Rx interrupt throttle rate."		\
5428 "\n\t0-8160 - sets interrupt rate in usecs"	\
5429 "\n\t    -1 - reset the Rx itr to default"
5430 
5431 /**
5432  * ice_sysctl_rx_itr - Display or change the Rx ITR for a VSI
5433  * @oidp: sysctl oid structure
5434  * @arg1: pointer to private data structure
5435  * @arg2: unused
5436  * @req: sysctl request pointer
5437  *
5438  * On read: Displays the current Rx ITR value
5439  * on write: Sets the Rx ITR value, reconfiguring device if it is up
5440  */
5441 static int
5442 ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS)
5443 {
5444 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5445 	struct ice_softc *sc = vsi->sc;
5446 	int increment, ret;
5447 
5448 	UNREFERENCED_PARAMETER(arg2);
5449 
5450 	if (ice_driver_is_detaching(sc))
5451 		return (ESHUTDOWN);
5452 
5453 	ret = sysctl_handle_16(oidp, &vsi->rx_itr, 0, req);
5454 	if ((ret) || (req->newptr == NULL))
5455 		return (ret);
5456 
5457 	if (vsi->rx_itr < 0)
5458 		vsi->rx_itr = ICE_DFLT_RX_ITR;
5459 	if (vsi->rx_itr > ICE_ITR_MAX)
5460 		vsi->rx_itr = ICE_ITR_MAX;
5461 
5462 	/* Assume 2usec increment if it hasn't been loaded yet */
5463 	increment = sc->hw.itr_gran ? : 2;
5464 
5465 	/* We need to round the value to the hardware's ITR granularity */
5466 	vsi->rx_itr = (vsi->rx_itr / increment ) * increment;
5467 
5468 	/* If the driver has finished initializing, then we need to reprogram
5469 	 * the ITR registers now. Otherwise, they will be programmed during
5470 	 * driver initialization.
5471 	 */
5472 	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5473 		ice_configure_rx_itr(vsi);
5474 
5475 	return (0);
5476 }
5477 
5478 #define ICE_SYSCTL_HELP_TX_ITR			\
5479 "\nControl Tx interrupt throttle rate."		\
5480 "\n\t0-8160 - sets interrupt rate in usecs"	\
5481 "\n\t    -1 - reset the Tx itr to default"
5482 
5483 /**
5484  * ice_sysctl_tx_itr - Display or change the Tx ITR for a VSI
5485  * @oidp: sysctl oid structure
5486  * @arg1: pointer to private data structure
5487  * @arg2: unused
5488  * @req: sysctl request pointer
5489  *
5490  * On read: Displays the current Tx ITR value
5491  * on write: Sets the Tx ITR value, reconfiguring device if it is up
5492  */
5493 static int
5494 ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS)
5495 {
5496 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5497 	struct ice_softc *sc = vsi->sc;
5498 	int increment, ret;
5499 
5500 	UNREFERENCED_PARAMETER(arg2);
5501 
5502 	if (ice_driver_is_detaching(sc))
5503 		return (ESHUTDOWN);
5504 
5505 	ret = sysctl_handle_16(oidp, &vsi->tx_itr, 0, req);
5506 	if ((ret) || (req->newptr == NULL))
5507 		return (ret);
5508 
5509 	/* Allow configuring a negative value to reset to the default */
5510 	if (vsi->tx_itr < 0)
5511 		vsi->tx_itr = ICE_DFLT_TX_ITR;
5512 	if (vsi->tx_itr > ICE_ITR_MAX)
5513 		vsi->tx_itr = ICE_ITR_MAX;
5514 
5515 	/* Assume 2usec increment if it hasn't been loaded yet */
5516 	increment = sc->hw.itr_gran ? : 2;
5517 
5518 	/* We need to round the value to the hardware's ITR granularity */
5519 	vsi->tx_itr = (vsi->tx_itr / increment ) * increment;
5520 
5521 	/* If the driver has finished initializing, then we need to reprogram
5522 	 * the ITR registers now. Otherwise, they will be programmed during
5523 	 * driver initialization.
5524 	 */
5525 	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5526 		ice_configure_tx_itr(vsi);
5527 
5528 	return (0);
5529 }
5530 
5531 /**
5532  * ice_add_vsi_tunables - Add tunables and nodes for a VSI
5533  * @vsi: pointer to VSI structure
5534  * @parent: parent node to add the tunables under
5535  *
5536  * Create a sysctl context for the VSI, so that sysctls for the VSI can be
5537  * dynamically removed upon VSI removal.
5538  *
5539  * Add various tunables and set up the basic node structure for the VSI. Must
5540  * be called *prior* to ice_add_vsi_sysctls. It should be called as soon as
5541  * possible after the VSI memory is initialized.
5542  *
5543  * VSI specific sysctls with CTLFLAG_TUN should be initialized here so that
5544  * their values can be read from loader.conf prior to their first use in the
5545  * driver.
5546  */
5547 void
5548 ice_add_vsi_tunables(struct ice_vsi *vsi, struct sysctl_oid *parent)
5549 {
5550 	struct sysctl_oid_list *vsi_list;
5551 	char vsi_name[32], vsi_desc[32];
5552 
5553 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
5554 
5555 	/* Initialize the sysctl context for this VSI */
5556 	sysctl_ctx_init(&vsi->ctx);
5557 
5558 	/* Add a node to collect this VSI's statistics together */
5559 	snprintf(vsi_name, sizeof(vsi_name), "%u", vsi->idx);
5560 	snprintf(vsi_desc, sizeof(vsi_desc), "VSI %u", vsi->idx);
5561 	vsi->vsi_node = SYSCTL_ADD_NODE(&vsi->ctx, parent_list, OID_AUTO, vsi_name,
5562 					CTLFLAG_RD, NULL, vsi_desc);
5563 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
5564 
5565 	vsi->rx_itr = ICE_DFLT_TX_ITR;
5566 	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "rx_itr",
5567 			CTLTYPE_S16 | CTLFLAG_RWTUN,
5568 			vsi, 0, ice_sysctl_rx_itr, "S",
5569 			ICE_SYSCTL_HELP_RX_ITR);
5570 
5571 	vsi->tx_itr = ICE_DFLT_TX_ITR;
5572 	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "tx_itr",
5573 			CTLTYPE_S16 | CTLFLAG_RWTUN,
5574 			vsi, 0, ice_sysctl_tx_itr, "S",
5575 			ICE_SYSCTL_HELP_TX_ITR);
5576 }
5577 
5578 /**
5579  * ice_del_vsi_sysctl_ctx - Delete the sysctl context(s) of a VSI
5580  * @vsi: the VSI to remove contexts for
5581  *
5582  * Free the context for the VSI sysctls. This includes the main context, as
5583  * well as the per-queue sysctls.
5584  */
5585 void
5586 ice_del_vsi_sysctl_ctx(struct ice_vsi *vsi)
5587 {
5588 	device_t dev = vsi->sc->dev;
5589 	int err;
5590 
5591 	if (vsi->vsi_node) {
5592 		err = sysctl_ctx_free(&vsi->ctx);
5593 		if (err)
5594 			device_printf(dev, "failed to free VSI %d sysctl context, err %s\n",
5595 				      vsi->idx, ice_err_str(err));
5596 		vsi->vsi_node = NULL;
5597 	}
5598 }
5599 
5600 /**
5601  * ice_add_dscp2tc_map_sysctls - Add sysctl tree for DSCP to TC mapping
5602  * @sc: pointer to device private softc
5603  * @ctx: the sysctl ctx to use
5604  * @ctx_list: list of sysctl children for device (to add sysctl tree to)
5605  *
5606  * Add a sysctl tree for individual dscp2tc_map sysctls. Each child of this
5607  * node can map 8 DSCPs to TC values; there are 8 of these in turn for a total
5608  * of 64 DSCP to TC map values that the user can configure.
5609  */
5610 void
5611 ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
5612 			    struct sysctl_ctx_list *ctx,
5613 			    struct sysctl_oid_list *ctx_list)
5614 {
5615 	struct sysctl_oid_list *node_list;
5616 	struct sysctl_oid *node;
5617 	struct sbuf *namebuf, *descbuf;
5618 	int first_dscp_val, last_dscp_val;
5619 
5620 	node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "dscp2tc_map", CTLFLAG_RD,
5621 			       NULL, "Map of DSCP values to DCB TCs");
5622 	node_list = SYSCTL_CHILDREN(node);
5623 
5624 	namebuf = sbuf_new_auto();
5625 	descbuf = sbuf_new_auto();
5626 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
5627 		sbuf_clear(namebuf);
5628 		sbuf_clear(descbuf);
5629 
5630 		first_dscp_val = i * 8;
5631 		last_dscp_val = first_dscp_val + 7;
5632 
5633 		sbuf_printf(namebuf, "%d-%d", first_dscp_val, last_dscp_val);
5634 		sbuf_printf(descbuf, "Map DSCP values %d to %d to TCs",
5635 			    first_dscp_val, last_dscp_val);
5636 
5637 		sbuf_finish(namebuf);
5638 		sbuf_finish(descbuf);
5639 
5640 		SYSCTL_ADD_PROC(ctx, node_list,
5641 		    OID_AUTO, sbuf_data(namebuf), CTLTYPE_STRING | CTLFLAG_RW,
5642 		    sc, i, ice_sysctl_dscp2tc_map, "A", sbuf_data(descbuf));
5643 	}
5644 
5645 	sbuf_delete(namebuf);
5646 	sbuf_delete(descbuf);
5647 }
5648 
5649 /**
5650  * ice_add_device_tunables - Add early tunable sysctls and sysctl nodes
5651  * @sc: device private structure
5652  *
5653  * Add per-device dynamic tunable sysctls, and setup the general sysctl trees
5654  * for re-use by ice_add_device_sysctls.
5655  *
5656  * In order for the sysctl fields to be initialized before use, this function
5657  * should be called as early as possible during attach activities.
5658  *
5659  * Any non-global sysctl marked as CTLFLAG_TUN should likely be initialized
5660  * here in this function, rather than later in ice_add_device_sysctls.
5661  *
5662  * To make things easier, this function is also expected to setup the various
5663  * sysctl nodes in addition to tunables so that other sysctls which can't be
5664  * initialized early can hook into the same nodes.
5665  */
5666 void
5667 ice_add_device_tunables(struct ice_softc *sc)
5668 {
5669 	device_t dev = sc->dev;
5670 
5671 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5672 	struct sysctl_oid_list *ctx_list =
5673 		SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
5674 
5675 	sc->enable_health_events = ice_enable_health_events;
5676 
5677 	SYSCTL_ADD_BOOL(ctx, ctx_list, OID_AUTO, "enable_health_events",
5678 			CTLFLAG_RDTUN, &sc->enable_health_events, 0,
5679 			"Enable FW health event reporting for this PF");
5680 
5681 	/* Add a node to track VSI sysctls. Keep track of the node in the
5682 	 * softc so that we can hook other sysctls into it later. This
5683 	 * includes both the VSI statistics, as well as potentially dynamic
5684 	 * VSIs in the future.
5685 	 */
5686 
5687 	sc->vsi_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "vsi",
5688 					  CTLFLAG_RD, NULL, "VSI Configuration and Statistics");
5689 
5690 	/* Add debug tunables */
5691 	ice_add_debug_tunables(sc);
5692 }
5693 
5694 /**
5695  * ice_sysctl_dump_mac_filters - Dump a list of all HW MAC Filters
5696  * @oidp: sysctl oid structure
5697  * @arg1: pointer to private data structure
5698  * @arg2: unused
5699  * @req: sysctl request pointer
5700  *
5701  * Callback for "mac_filters" sysctl to dump the programmed MAC filters.
5702  */
5703 static int
5704 ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS)
5705 {
5706 	struct ice_softc *sc = (struct ice_softc *)arg1;
5707 	struct ice_hw *hw = &sc->hw;
5708 	struct ice_switch_info *sw = hw->switch_info;
5709 	struct ice_fltr_mgmt_list_entry *fm_entry;
5710 	struct ice_list_head *rule_head;
5711 	struct ice_lock *rule_lock;
5712 	struct ice_fltr_info *fi;
5713 	struct sbuf *sbuf;
5714 	int ret;
5715 
5716 	UNREFERENCED_PARAMETER(oidp);
5717 	UNREFERENCED_PARAMETER(arg2);
5718 
5719 	if (ice_driver_is_detaching(sc))
5720 		return (ESHUTDOWN);
5721 
5722 	/* Wire the old buffer so we can take a non-sleepable lock */
5723 	ret = sysctl_wire_old_buffer(req, 0);
5724 	if (ret)
5725 		return (ret);
5726 
5727 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5728 
5729 	rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
5730 	rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5731 
5732 	sbuf_printf(sbuf, "MAC Filter List");
5733 
5734 	ice_acquire_lock(rule_lock);
5735 
5736 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5737 		fi = &fm_entry->fltr_info;
5738 
5739 		sbuf_printf(sbuf,
5740 			    "\nmac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %d",
5741 			    fi->l_data.mac.mac_addr, ":", fi->vsi_handle,
5742 			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5743 			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5744 
5745 		/* if we have a vsi_list_info, print some information about that */
5746 		if (fm_entry->vsi_list_info) {
5747 			sbuf_printf(sbuf,
5748 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5749 				    fm_entry->vsi_count,
5750 				    fm_entry->vsi_list_info->vsi_list_id,
5751 				    fm_entry->vsi_list_info->ref_cnt);
5752 		}
5753 	}
5754 
5755 	ice_release_lock(rule_lock);
5756 
5757 	sbuf_finish(sbuf);
5758 	sbuf_delete(sbuf);
5759 
5760 	return (0);
5761 }
5762 
5763 /**
5764  * ice_sysctl_dump_vlan_filters - Dump a list of all HW VLAN Filters
5765  * @oidp: sysctl oid structure
5766  * @arg1: pointer to private data structure
5767  * @arg2: unused
5768  * @req: sysctl request pointer
5769  *
5770  * Callback for "vlan_filters" sysctl to dump the programmed VLAN filters.
5771  */
5772 static int
5773 ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS)
5774 {
5775 	struct ice_softc *sc = (struct ice_softc *)arg1;
5776 	struct ice_hw *hw = &sc->hw;
5777 	struct ice_switch_info *sw = hw->switch_info;
5778 	struct ice_fltr_mgmt_list_entry *fm_entry;
5779 	struct ice_list_head *rule_head;
5780 	struct ice_lock *rule_lock;
5781 	struct ice_fltr_info *fi;
5782 	struct sbuf *sbuf;
5783 	int ret;
5784 
5785 	UNREFERENCED_PARAMETER(oidp);
5786 	UNREFERENCED_PARAMETER(arg2);
5787 
5788 	if (ice_driver_is_detaching(sc))
5789 		return (ESHUTDOWN);
5790 
5791 	/* Wire the old buffer so we can take a non-sleepable lock */
5792 	ret = sysctl_wire_old_buffer(req, 0);
5793 	if (ret)
5794 		return (ret);
5795 
5796 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5797 
5798 	rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
5799 	rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
5800 
5801 	sbuf_printf(sbuf, "VLAN Filter List");
5802 
5803 	ice_acquire_lock(rule_lock);
5804 
5805 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5806 		fi = &fm_entry->fltr_info;
5807 
5808 		sbuf_printf(sbuf,
5809 			    "\nvlan_id = %4d, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5810 			    fi->l_data.vlan.vlan_id, fi->vsi_handle,
5811 			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5812 			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5813 
5814 		/* if we have a vsi_list_info, print some information about that */
5815 		if (fm_entry->vsi_list_info) {
5816 			sbuf_printf(sbuf,
5817 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5818 				    fm_entry->vsi_count,
5819 				    fm_entry->vsi_list_info->vsi_list_id,
5820 				    fm_entry->vsi_list_info->ref_cnt);
5821 		}
5822 	}
5823 
5824 	ice_release_lock(rule_lock);
5825 
5826 	sbuf_finish(sbuf);
5827 	sbuf_delete(sbuf);
5828 
5829 	return (0);
5830 }
5831 
5832 /**
5833  * ice_sysctl_dump_ethertype_filters - Dump a list of all HW Ethertype filters
5834  * @oidp: sysctl oid structure
5835  * @arg1: pointer to private data structure
5836  * @arg2: unused
5837  * @req: sysctl request pointer
5838  *
5839  * Callback for "ethertype_filters" sysctl to dump the programmed Ethertype
5840  * filters.
5841  */
5842 static int
5843 ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS)
5844 {
5845 	struct ice_softc *sc = (struct ice_softc *)arg1;
5846 	struct ice_hw *hw = &sc->hw;
5847 	struct ice_switch_info *sw = hw->switch_info;
5848 	struct ice_fltr_mgmt_list_entry *fm_entry;
5849 	struct ice_list_head *rule_head;
5850 	struct ice_lock *rule_lock;
5851 	struct ice_fltr_info *fi;
5852 	struct sbuf *sbuf;
5853 	int ret;
5854 
5855 	UNREFERENCED_PARAMETER(oidp);
5856 	UNREFERENCED_PARAMETER(arg2);
5857 
5858 	if (ice_driver_is_detaching(sc))
5859 		return (ESHUTDOWN);
5860 
5861 	/* Wire the old buffer so we can take a non-sleepable lock */
5862 	ret = sysctl_wire_old_buffer(req, 0);
5863 	if (ret)
5864 		return (ret);
5865 
5866 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5867 
5868 	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rule_lock;
5869 	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rules;
5870 
5871 	sbuf_printf(sbuf, "Ethertype Filter List");
5872 
5873 	ice_acquire_lock(rule_lock);
5874 
5875 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5876 		fi = &fm_entry->fltr_info;
5877 
5878 		sbuf_printf(sbuf,
5879 			    "\nethertype = 0x%04x, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5880 			fi->l_data.ethertype_mac.ethertype,
5881 			fi->vsi_handle, ice_fltr_flag_str(fi->flag),
5882 			fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
5883 			fi->fltr_rule_id);
5884 
5885 		/* if we have a vsi_list_info, print some information about that */
5886 		if (fm_entry->vsi_list_info) {
5887 			sbuf_printf(sbuf,
5888 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5889 				    fm_entry->vsi_count,
5890 				    fm_entry->vsi_list_info->vsi_list_id,
5891 				    fm_entry->vsi_list_info->ref_cnt);
5892 		}
5893 	}
5894 
5895 	ice_release_lock(rule_lock);
5896 
5897 	sbuf_finish(sbuf);
5898 	sbuf_delete(sbuf);
5899 
5900 	return (0);
5901 }
5902 
5903 /**
5904  * ice_sysctl_dump_ethertype_mac_filters - Dump a list of all HW Ethertype/MAC filters
5905  * @oidp: sysctl oid structure
5906  * @arg1: pointer to private data structure
5907  * @arg2: unused
5908  * @req: sysctl request pointer
5909  *
5910  * Callback for "ethertype_mac_filters" sysctl to dump the programmed
5911  * Ethertype/MAC filters.
5912  */
5913 static int
5914 ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS)
5915 {
5916 	struct ice_softc *sc = (struct ice_softc *)arg1;
5917 	struct ice_hw *hw = &sc->hw;
5918 	struct ice_switch_info *sw = hw->switch_info;
5919 	struct ice_fltr_mgmt_list_entry *fm_entry;
5920 	struct ice_list_head *rule_head;
5921 	struct ice_lock *rule_lock;
5922 	struct ice_fltr_info *fi;
5923 	struct sbuf *sbuf;
5924 	int ret;
5925 
5926 	UNREFERENCED_PARAMETER(oidp);
5927 	UNREFERENCED_PARAMETER(arg2);
5928 
5929 	if (ice_driver_is_detaching(sc))
5930 		return (ESHUTDOWN);
5931 
5932 	/* Wire the old buffer so we can take a non-sleepable lock */
5933 	ret = sysctl_wire_old_buffer(req, 0);
5934 	if (ret)
5935 		return (ret);
5936 
5937 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5938 
5939 	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rule_lock;
5940 	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rules;
5941 
5942 	sbuf_printf(sbuf, "Ethertype/MAC Filter List");
5943 
5944 	ice_acquire_lock(rule_lock);
5945 
5946 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5947 		fi = &fm_entry->fltr_info;
5948 
5949 		sbuf_printf(sbuf,
5950 			    "\nethertype = 0x%04x, mac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5951 			    fi->l_data.ethertype_mac.ethertype,
5952 			    fi->l_data.ethertype_mac.mac_addr, ":",
5953 			    fi->vsi_handle, ice_fltr_flag_str(fi->flag),
5954 			    fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
5955 			    fi->fltr_rule_id);
5956 
5957 		/* if we have a vsi_list_info, print some information about that */
5958 		if (fm_entry->vsi_list_info) {
5959 			sbuf_printf(sbuf,
5960 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5961 				    fm_entry->vsi_count,
5962 				    fm_entry->vsi_list_info->vsi_list_id,
5963 				    fm_entry->vsi_list_info->ref_cnt);
5964 		}
5965 	}
5966 
5967 	ice_release_lock(rule_lock);
5968 
5969 	sbuf_finish(sbuf);
5970 	sbuf_delete(sbuf);
5971 
5972 	return (0);
5973 }
5974 
5975 /**
5976  * ice_sysctl_dump_state_flags - Dump device driver state flags
5977  * @oidp: sysctl oid structure
5978  * @arg1: pointer to private data structure
5979  * @arg2: unused
5980  * @req: sysctl request pointer
5981  *
5982  * Callback for "state" sysctl to display currently set driver state flags.
5983  */
5984 static int
5985 ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS)
5986 {
5987 	struct ice_softc *sc = (struct ice_softc *)arg1;
5988 	struct sbuf *sbuf;
5989 	u32 copied_state;
5990 	unsigned int i;
5991 	bool at_least_one = false;
5992 
5993 	UNREFERENCED_PARAMETER(oidp);
5994 	UNREFERENCED_PARAMETER(arg2);
5995 
5996 	if (ice_driver_is_detaching(sc))
5997 		return (ESHUTDOWN);
5998 
5999 	/* Make a copy of the state to ensure we display coherent values */
6000 	copied_state = atomic_load_acq_32(&sc->state);
6001 
6002 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6003 
6004 	/* Add the string for each set state to the sbuf */
6005 	for (i = 0; i < 32; i++) {
6006 		if (copied_state & BIT(i)) {
6007 			const char *str = ice_state_to_str((enum ice_state)i);
6008 
6009 			at_least_one = true;
6010 
6011 			if (str)
6012 				sbuf_printf(sbuf, "\n%s", str);
6013 			else
6014 				sbuf_printf(sbuf, "\nBIT(%u)", i);
6015 		}
6016 	}
6017 
6018 	if (!at_least_one)
6019 		sbuf_printf(sbuf, "Nothing set");
6020 
6021 	sbuf_finish(sbuf);
6022 	sbuf_delete(sbuf);
6023 
6024 	return (0);
6025 }
6026 
6027 #define ICE_SYSCTL_DEBUG_MASK_HELP \
6028 "\nSelect debug statements to print to kernel messages"		\
6029 "\nFlags:"							\
6030 "\n\t        0x1 - Function Tracing"				\
6031 "\n\t        0x2 - Driver Initialization"			\
6032 "\n\t        0x4 - Release"					\
6033 "\n\t        0x8 - FW Logging"					\
6034 "\n\t       0x10 - Link"					\
6035 "\n\t       0x20 - PHY"						\
6036 "\n\t       0x40 - Queue Context"				\
6037 "\n\t       0x80 - NVM"						\
6038 "\n\t      0x100 - LAN"						\
6039 "\n\t      0x200 - Flow"					\
6040 "\n\t      0x400 - DCB"						\
6041 "\n\t      0x800 - Diagnostics"					\
6042 "\n\t     0x1000 - Flow Director"				\
6043 "\n\t     0x2000 - Switch"					\
6044 "\n\t     0x4000 - Scheduler"					\
6045 "\n\t     0x8000 - RDMA"					\
6046 "\n\t    0x10000 - DDP Package"					\
6047 "\n\t    0x20000 - Resources"					\
6048 "\n\t    0x40000 - ACL"						\
6049 "\n\t    0x80000 - PTP"						\
6050 "\n\t   0x100000 - Admin Queue messages"			\
6051 "\n\t   0x200000 - Admin Queue descriptors"			\
6052 "\n\t   0x400000 - Admin Queue descriptor buffers"		\
6053 "\n\t   0x800000 - Admin Queue commands"			\
6054 "\n\t  0x1000000 - Parser"					\
6055 "\n\t  ..."							\
6056 "\n\t  0x8000000 - (Reserved for user)"				\
6057 "\n\t"								\
6058 "\nUse \"sysctl -x\" to view flags properly."
6059 
6060 /**
6061  * ice_add_debug_tunables - Add tunables helpful for debugging the device driver
6062  * @sc: device private structure
6063  *
6064  * Add sysctl tunable values related to debugging the device driver. For now,
6065  * this means a tunable to set the debug mask early during driver load.
6066  *
6067  * The debug node will be marked CTLFLAG_SKIP unless INVARIANTS is defined, so
6068  * that in normal kernel builds, these will all be hidden, but on a debug
6069  * kernel they will be more easily visible.
6070  */
6071 static void
6072 ice_add_debug_tunables(struct ice_softc *sc)
6073 {
6074 	struct sysctl_oid_list *debug_list;
6075 	device_t dev = sc->dev;
6076 
6077 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
6078 	struct sysctl_oid_list *ctx_list =
6079 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
6080 
6081 	sc->debug_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug",
6082 					    ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6083 					    NULL, "Debug Sysctls");
6084 	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
6085 
6086 	SYSCTL_ADD_U64(ctx, debug_list, OID_AUTO, "debug_mask",
6087 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
6088 		       &sc->hw.debug_mask, 0,
6089 		       ICE_SYSCTL_DEBUG_MASK_HELP);
6090 
6091 	/* Load the default value from the global sysctl first */
6092 	sc->enable_tx_fc_filter = ice_enable_tx_fc_filter;
6093 
6094 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_fc_filter",
6095 			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
6096 			&sc->enable_tx_fc_filter, 0,
6097 			"Drop Ethertype 0x8808 control frames originating from software on this PF");
6098 
6099 	sc->tx_balance_en = ice_tx_balance_en;
6100 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "tx_balance",
6101 			ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
6102 			&sc->tx_balance_en, 0,
6103 			"Enable 5-layer scheduler topology");
6104 
6105 	/* Load the default value from the global sysctl first */
6106 	sc->enable_tx_lldp_filter = ice_enable_tx_lldp_filter;
6107 
6108 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_lldp_filter",
6109 			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
6110 			&sc->enable_tx_lldp_filter, 0,
6111 			"Drop Ethertype 0x88cc LLDP frames originating from software on this PF");
6112 
6113 	ice_add_fw_logging_tunables(sc, sc->debug_sysctls);
6114 }
6115 
6116 #define ICE_SYSCTL_HELP_REQUEST_RESET		\
6117 "\nRequest the driver to initiate a reset."	\
6118 "\n\tpfr - Initiate a PF reset"			\
6119 "\n\tcorer - Initiate a CORE reset"		\
6120 "\n\tglobr - Initiate a GLOBAL reset"
6121 
6122 /**
6123  * @var rl_sysctl_ticks
6124  * @brief timestamp for latest reset request sysctl call
6125  *
6126  * Helps rate-limit the call to the sysctl which resets the device
6127  */
6128 int rl_sysctl_ticks = 0;
6129 
6130 /**
6131  * ice_sysctl_request_reset - Request that the driver initiate a reset
6132  * @oidp: sysctl oid structure
6133  * @arg1: pointer to private data structure
6134  * @arg2: unused
6135  * @req: sysctl request pointer
6136  *
6137  * Callback for "request_reset" sysctl to request that the driver initiate
6138  * a reset. Expects to be passed one of the following strings
6139  *
6140  * "pfr" - Initiate a PF reset
6141  * "corer" - Initiate a CORE reset
6142  * "globr" - Initiate a Global reset
6143  */
6144 static int
6145 ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS)
6146 {
6147 	struct ice_softc *sc = (struct ice_softc *)arg1;
6148 	struct ice_hw *hw = &sc->hw;
6149 	enum ice_status status;
6150 	enum ice_reset_req reset_type = ICE_RESET_INVAL;
6151 	const char *reset_message;
6152 	int ret;
6153 
6154 	/* Buffer to store the requested reset string. Must contain enough
6155 	 * space to store the largest expected reset string, which currently
6156 	 * means 6 bytes of space.
6157 	 */
6158 	char reset[6] = "";
6159 
6160 	UNREFERENCED_PARAMETER(arg2);
6161 
6162 	ret = priv_check(curthread, PRIV_DRIVER);
6163 	if (ret)
6164 		return (ret);
6165 
6166 	if (ice_driver_is_detaching(sc))
6167 		return (ESHUTDOWN);
6168 
6169 	/* Read in the requested reset type. */
6170 	ret = sysctl_handle_string(oidp, reset, sizeof(reset), req);
6171 	if ((ret) || (req->newptr == NULL))
6172 		return (ret);
6173 
6174 	if (strcmp(reset, "pfr") == 0) {
6175 		reset_message = "Requesting a PF reset";
6176 		reset_type = ICE_RESET_PFR;
6177 	} else if (strcmp(reset, "corer") == 0) {
6178 		reset_message = "Initiating a CORE reset";
6179 		reset_type = ICE_RESET_CORER;
6180 	} else if (strcmp(reset, "globr") == 0) {
6181 		reset_message = "Initiating a GLOBAL reset";
6182 		reset_type = ICE_RESET_GLOBR;
6183 	} else if (strcmp(reset, "empr") == 0) {
6184 		device_printf(sc->dev, "Triggering an EMP reset via software is not currently supported\n");
6185 		return (EOPNOTSUPP);
6186 	}
6187 
6188 	if (reset_type == ICE_RESET_INVAL) {
6189 		device_printf(sc->dev, "%s is not a valid reset request\n", reset);
6190 		return (EINVAL);
6191 	}
6192 
6193 	/*
6194 	 * Rate-limit the frequency at which this function is called.
6195 	 * Assuming this is called successfully once, typically,
6196 	 * everything should be handled within the allotted time frame.
6197 	 * However, in the odd setup situations, we've also put in
6198 	 * guards for when the reset has finished, but we're in the
6199 	 * process of rebuilding. And instead of queueing an intent,
6200 	 * simply error out and let the caller retry, if so desired.
6201 	 */
6202 	if (TICKS_2_MSEC(ticks - rl_sysctl_ticks) < 500) {
6203 		device_printf(sc->dev,
6204 		    "Call frequency too high. Operation aborted.\n");
6205 		return (EBUSY);
6206 	}
6207 	rl_sysctl_ticks = ticks;
6208 
6209 	if (TICKS_2_MSEC(ticks - sc->rebuild_ticks) < 100) {
6210 		device_printf(sc->dev, "Device rebuilding. Operation aborted.\n");
6211 		return (EBUSY);
6212 	}
6213 
6214 	if (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) {
6215 		device_printf(sc->dev, "Device in reset. Operation aborted.\n");
6216 		return (EBUSY);
6217 	}
6218 
6219 	device_printf(sc->dev, "%s\n", reset_message);
6220 
6221 	/* Initiate the PF reset during the admin status task */
6222 	if (reset_type == ICE_RESET_PFR) {
6223 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
6224 		return (0);
6225 	}
6226 
6227 	/*
6228 	 * Other types of resets including CORE and GLOBAL resets trigger an
6229 	 * interrupt on all PFs. Initiate the reset now. Preparation and
6230 	 * rebuild logic will be handled by the admin status task.
6231 	 */
6232 	status = ice_reset(hw, reset_type);
6233 
6234 	/*
6235 	 * Resets can take a long time and we still don't want another call
6236 	 * to this function before we settle down.
6237 	 */
6238 	rl_sysctl_ticks = ticks;
6239 
6240 	if (status) {
6241 		device_printf(sc->dev, "failed to initiate device reset, err %s\n",
6242 			      ice_status_str(status));
6243 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
6244 		return (EFAULT);
6245 	}
6246 
6247 	return (0);
6248 }
6249 
6250 #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING		\
6251 "\nSelect clusters to dump with \"dump\" sysctl"		\
6252 "\nFlags:"							\
6253 "\n\t   0x1 - Switch"						\
6254 "\n\t   0x2 - ACL"						\
6255 "\n\t   0x4 - Tx Scheduler"					\
6256 "\n\t   0x8 - Profile Configuration"				\
6257 "\n\t  0x20 - Link"						\
6258 "\n\t  0x80 - DCB"						\
6259 "\n\t 0x100 - L2P"						\
6260 "\n\t"								\
6261 "\nUse \"sysctl -x\" to view flags properly."
6262 
6263 /**
6264  * ice_sysctl_fw_debug_dump_cluster_setting - Set which clusters to dump
6265  *     from FW when FW debug dump occurs
6266  * @oidp: sysctl oid structure
6267  * @arg1: pointer to private data structure
6268  * @arg2: unused
6269  * @req: sysctl request pointer
6270  */
6271 static int
6272 ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS)
6273 {
6274 	struct ice_softc *sc = (struct ice_softc *)arg1;
6275 	device_t dev = sc->dev;
6276 	u16 clusters;
6277 	int ret;
6278 
6279 	UNREFERENCED_PARAMETER(arg2);
6280 
6281 	ret = priv_check(curthread, PRIV_DRIVER);
6282 	if (ret)
6283 		return (ret);
6284 
6285 	if (ice_driver_is_detaching(sc))
6286 		return (ESHUTDOWN);
6287 
6288 	clusters = sc->fw_debug_dump_cluster_mask;
6289 
6290 	ret = sysctl_handle_16(oidp, &clusters, 0, req);
6291 	if ((ret) || (req->newptr == NULL))
6292 		return (ret);
6293 
6294 	if (!clusters ||
6295 	    (clusters & ~(ICE_FW_DEBUG_DUMP_VALID_CLUSTER_MASK))) {
6296 		device_printf(dev,
6297 		    "%s: ERROR: Incorrect settings requested\n",
6298 		    __func__);
6299 		return (EINVAL);
6300 	}
6301 
6302 	sc->fw_debug_dump_cluster_mask = clusters;
6303 
6304 	return (0);
6305 }
6306 
6307 #define ICE_FW_DUMP_AQ_COUNT_LIMIT	(10000)
6308 
6309 /**
6310  * ice_fw_debug_dump_print_cluster - Print formatted cluster data from FW
6311  * @sc: the device softc
6312  * @sbuf: initialized sbuf to print data to
6313  * @cluster_id: FW cluster ID to print data from
6314  *
6315  * Reads debug data from the specified cluster id in the FW and prints it to
6316  * the input sbuf. This function issues multiple AQ commands to the FW in
6317  * order to get all of the data in the cluster.
6318  *
6319  * @remark Only intended to be used by the sysctl handler
6320  * ice_sysctl_fw_debug_dump_do_dump
6321  */
6322 static void
6323 ice_fw_debug_dump_print_cluster(struct ice_softc *sc, struct sbuf *sbuf, u16 cluster_id)
6324 {
6325 	struct ice_hw *hw = &sc->hw;
6326 	device_t dev = sc->dev;
6327 	u16 data_buf_size = ICE_AQ_MAX_BUF_LEN;
6328 	const u8 reserved_buf[8] = {};
6329 	enum ice_status status;
6330 	int counter = 0;
6331 	u8 *data_buf;
6332 
6333 	/* Other setup */
6334 	data_buf = (u8 *)malloc(data_buf_size, M_ICE, M_NOWAIT | M_ZERO);
6335 	if (!data_buf)
6336 		return;
6337 
6338 	/* Input parameters / loop variables */
6339 	u16 table_id = 0;
6340 	u32 offset = 0;
6341 
6342 	/* Output from the Get Internal Data AQ command */
6343 	u16 ret_buf_size = 0;
6344 	u16 ret_next_table = 0;
6345 	u32 ret_next_index = 0;
6346 
6347 	ice_debug(hw, ICE_DBG_DIAG, "%s: dumping cluster id %d\n", __func__,
6348 	    cluster_id);
6349 
6350 	for (;;) {
6351 		/* Do not trust the FW behavior to be completely correct */
6352 		if (counter++ >= ICE_FW_DUMP_AQ_COUNT_LIMIT) {
6353 			device_printf(dev,
6354 			    "%s: Exceeded counter limit for cluster %d\n",
6355 			    __func__, cluster_id);
6356 			break;
6357 		}
6358 
6359 		ice_debug(hw, ICE_DBG_DIAG, "---\n");
6360 		ice_debug(hw, ICE_DBG_DIAG,
6361 		    "table_id 0x%04x offset 0x%08x buf_size %d\n",
6362 		    table_id, offset, data_buf_size);
6363 
6364 		status = ice_aq_get_internal_data(hw, cluster_id, table_id,
6365 		    offset, data_buf, data_buf_size, &ret_buf_size,
6366 		    &ret_next_table, &ret_next_index, NULL);
6367 		if (status) {
6368 			device_printf(dev,
6369 			    "%s: ice_aq_get_internal_data in cluster %d: err %s aq_err %s\n",
6370 			    __func__, cluster_id, ice_status_str(status),
6371 			    ice_aq_str(hw->adminq.sq_last_status));
6372 			break;
6373 		}
6374 
6375 		ice_debug(hw, ICE_DBG_DIAG,
6376 		    "ret_table_id 0x%04x ret_offset 0x%08x ret_buf_size %d\n",
6377 		    ret_next_table, ret_next_index, ret_buf_size);
6378 
6379 		/* Print cluster id */
6380 		u32 print_cluster_id = (u32)cluster_id;
6381 		sbuf_bcat(sbuf, &print_cluster_id, sizeof(print_cluster_id));
6382 		/* Print table id */
6383 		u32 print_table_id = (u32)table_id;
6384 		sbuf_bcat(sbuf, &print_table_id, sizeof(print_table_id));
6385 		/* Print table length */
6386 		u32 print_table_length = (u32)ret_buf_size;
6387 		sbuf_bcat(sbuf, &print_table_length, sizeof(print_table_length));
6388 		/* Print current offset */
6389 		u32 print_curr_offset = offset;
6390 		sbuf_bcat(sbuf, &print_curr_offset, sizeof(print_curr_offset));
6391 		/* Print reserved bytes */
6392 		sbuf_bcat(sbuf, reserved_buf, sizeof(reserved_buf));
6393 		/* Print data */
6394 		sbuf_bcat(sbuf, data_buf, ret_buf_size);
6395 
6396 		/* Adjust loop variables */
6397 		memset(data_buf, 0, data_buf_size);
6398 		bool same_table_next = (table_id == ret_next_table);
6399 		bool last_table_next = (ret_next_table == 0xff || ret_next_table == 0xffff);
6400 		bool last_offset_next = (ret_next_index == 0xffffffff || ret_next_index == 0);
6401 
6402 		if ((!same_table_next && !last_offset_next) ||
6403 		    (same_table_next && last_table_next)) {
6404 			device_printf(dev,
6405 			    "%s: Unexpected conditions for same_table_next(%d) last_table_next(%d) last_offset_next(%d), ending cluster (%d)\n",
6406 			    __func__, same_table_next, last_table_next, last_offset_next, cluster_id);
6407 			break;
6408 		}
6409 
6410 		if (!same_table_next && !last_table_next && last_offset_next) {
6411 			/* We've hit the end of the table */
6412 			table_id = ret_next_table;
6413 			offset = 0;
6414 		}
6415 		else if (!same_table_next && last_table_next && last_offset_next) {
6416 			/* We've hit the end of the cluster */
6417 			break;
6418 		}
6419 		else if (same_table_next && !last_table_next && last_offset_next) {
6420 			if (cluster_id == 0x1 && table_id < 39)
6421 				table_id += 1;
6422 			else
6423 				break;
6424 		}
6425 		else { /* if (same_table_next && !last_table_next && !last_offset_next) */
6426 			/* More data left in the table */
6427 			offset = ret_next_index;
6428 		}
6429 	}
6430 
6431 	free(data_buf, M_ICE);
6432 }
6433 
6434 #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP \
6435 "\nWrite 1 to output a FW debug dump containing the clusters specified by the \"clusters\" sysctl" \
6436 "\nThe \"-b\" flag must be used in order to dump this data as binary data because" \
6437 "\nthis data is opaque and not a string."
6438 
6439 #define ICE_FW_DUMP_BASE_TEXT_SIZE	(1024 * 1024)
6440 #define ICE_FW_DUMP_CLUST0_TEXT_SIZE	(2 * 1024 * 1024)
6441 #define ICE_FW_DUMP_CLUST1_TEXT_SIZE	(128 * 1024)
6442 #define ICE_FW_DUMP_CLUST2_TEXT_SIZE	(2 * 1024 * 1024)
6443 
6444 /**
6445  * ice_sysctl_fw_debug_dump_do_dump - Dump data from FW to sysctl output
6446  * @oidp: sysctl oid structure
6447  * @arg1: pointer to private data structure
6448  * @arg2: unused
6449  * @req: sysctl request pointer
6450  *
6451  * Sysctl handler for the debug.dump.dump sysctl. Prints out a specially-
6452  * formatted dump of some debug FW data intended to be processed by a special
6453  * Intel tool. Prints out the cluster data specified by the "clusters"
6454  * sysctl.
6455  *
6456  * @remark The actual AQ calls and printing are handled by a helper
6457  * function above.
6458  */
6459 static int
6460 ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS)
6461 {
6462 	struct ice_softc *sc = (struct ice_softc *)arg1;
6463 	device_t dev = sc->dev;
6464 	struct sbuf *sbuf;
6465 	int bit, ret;
6466 
6467 	UNREFERENCED_PARAMETER(arg2);
6468 
6469 	ret = priv_check(curthread, PRIV_DRIVER);
6470 	if (ret)
6471 		return (ret);
6472 
6473 	if (ice_driver_is_detaching(sc))
6474 		return (ESHUTDOWN);
6475 
6476 	/* If the user hasn't written "1" to this sysctl yet: */
6477 	if (!ice_test_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP)) {
6478 		/* Avoid output on the first set of reads to this sysctl in
6479 		 * order to prevent a null byte from being written to the
6480 		 * end result when called via sysctl(8).
6481 		 */
6482 		if (req->oldptr == NULL && req->newptr == NULL) {
6483 			ret = SYSCTL_OUT(req, 0, 0);
6484 			return (ret);
6485 		}
6486 
6487 		char input_buf[2] = "";
6488 		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
6489 		if ((ret) || (req->newptr == NULL))
6490 			return (ret);
6491 
6492 		/* If we get '1', then indicate we'll do a dump in the next
6493 		 * sysctl read call.
6494 		 */
6495 		if (input_buf[0] == '1') {
6496 			if (!sc->fw_debug_dump_cluster_mask) {
6497 				device_printf(dev,
6498 				    "%s: Debug Dump failed because no cluster was specified with the \"clusters\" sysctl.\n",
6499 				    __func__);
6500 				return (EINVAL);
6501 			}
6502 
6503 			ice_set_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6504 			return (0);
6505 		}
6506 
6507 		return (EINVAL);
6508 	}
6509 
6510 	/* --- FW debug dump state is set --- */
6511 
6512 
6513 	/* Caller just wants the upper bound for size */
6514 	if (req->oldptr == NULL && req->newptr == NULL) {
6515 		size_t est_output_len = ICE_FW_DUMP_BASE_TEXT_SIZE;
6516 		if (sc->fw_debug_dump_cluster_mask & 0x1)
6517 			est_output_len += ICE_FW_DUMP_CLUST0_TEXT_SIZE;
6518 		if (sc->fw_debug_dump_cluster_mask & 0x2)
6519 			est_output_len += ICE_FW_DUMP_CLUST1_TEXT_SIZE;
6520 		if (sc->fw_debug_dump_cluster_mask & 0x4)
6521 			est_output_len += ICE_FW_DUMP_CLUST2_TEXT_SIZE;
6522 
6523 		ret = SYSCTL_OUT(req, 0, est_output_len);
6524 		return (ret);
6525 	}
6526 
6527 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6528 	sbuf_clear_flags(sbuf, SBUF_INCLUDENUL);
6529 
6530 	ice_debug(&sc->hw, ICE_DBG_DIAG, "%s: Debug Dump running...\n", __func__);
6531 
6532 	for_each_set_bit(bit, &sc->fw_debug_dump_cluster_mask,
6533 	    sizeof(sc->fw_debug_dump_cluster_mask) * 8)
6534 		ice_fw_debug_dump_print_cluster(sc, sbuf, bit);
6535 
6536 	sbuf_finish(sbuf);
6537 	sbuf_delete(sbuf);
6538 
6539 	ice_clear_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6540 	return (ret);
6541 }
6542 
6543 /**
6544  * ice_add_debug_sysctls - Add sysctls helpful for debugging the device driver
6545  * @sc: device private structure
6546  *
6547  * Add sysctls related to debugging the device driver. Generally these should
6548  * simply be sysctls which dump internal driver state, to aid in understanding
6549  * what the driver is doing.
6550  */
6551 static void
6552 ice_add_debug_sysctls(struct ice_softc *sc)
6553 {
6554 	struct sysctl_oid *sw_node, *dump_node;
6555 	struct sysctl_oid_list *debug_list, *sw_list, *dump_list;
6556 	device_t dev = sc->dev;
6557 
6558 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
6559 
6560 	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
6561 
6562 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "request_reset",
6563 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_WR, sc, 0,
6564 			ice_sysctl_request_reset, "A",
6565 			ICE_SYSCTL_HELP_REQUEST_RESET);
6566 
6567 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "pfr_count",
6568 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6569 		       &sc->soft_stats.pfr_count, 0,
6570 		       "# of PF resets handled");
6571 
6572 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "corer_count",
6573 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6574 		       &sc->soft_stats.corer_count, 0,
6575 		       "# of CORE resets handled");
6576 
6577 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "globr_count",
6578 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6579 		       &sc->soft_stats.globr_count, 0,
6580 		       "# of Global resets handled");
6581 
6582 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "empr_count",
6583 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6584 		       &sc->soft_stats.empr_count, 0,
6585 		       "# of EMP resets handled");
6586 
6587 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "tx_mdd_count",
6588 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6589 		       &sc->soft_stats.tx_mdd_count, 0,
6590 		       "# of Tx MDD events detected");
6591 
6592 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "rx_mdd_count",
6593 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6594 		       &sc->soft_stats.rx_mdd_count, 0,
6595 		       "# of Rx MDD events detected");
6596 
6597 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "state",
6598 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6599 			ice_sysctl_dump_state_flags, "A",
6600 			"Driver State Flags");
6601 
6602 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "set_link",
6603 			ICE_CTLFLAG_DEBUG | CTLTYPE_U8 | CTLFLAG_RW, sc, 0,
6604 			ice_sysctl_debug_set_link, "CU", "Set link");
6605 
6606 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_low",
6607 			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6608 			ice_sysctl_phy_type_low, "QU",
6609 			"PHY type Low from Get PHY Caps/Set PHY Cfg");
6610 
6611 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_high",
6612 			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6613 			ice_sysctl_phy_type_high, "QU",
6614 			"PHY type High from Get PHY Caps/Set PHY Cfg");
6615 
6616 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_sw_caps",
6617 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6618 			ice_sysctl_phy_sw_caps, "",
6619 			"Get PHY Capabilities (Software configuration)");
6620 
6621 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_nvm_caps",
6622 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6623 			ice_sysctl_phy_nvm_caps, "",
6624 			"Get PHY Capabilities (NVM configuration)");
6625 
6626 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_topo_caps",
6627 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6628 			ice_sysctl_phy_topo_caps, "",
6629 			"Get PHY Capabilities (Topology configuration)");
6630 
6631 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_link_status",
6632 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6633 			ice_sysctl_phy_link_status, "",
6634 			"Get PHY Link Status");
6635 
6636 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_diag_data",
6637 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6638 			ice_sysctl_read_i2c_diag_data, "A",
6639 			"Dump selected diagnostic data from FW");
6640 
6641 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "fw_build",
6642 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->hw.fw_build, 0,
6643 		       "FW Build ID");
6644 
6645 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "os_ddp_version",
6646 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6647 			ice_sysctl_os_pkg_version, "A",
6648 			"DDP package name and version found in ice_ddp");
6649 
6650 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "cur_lldp_persist_status",
6651 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6652 			ice_sysctl_fw_cur_lldp_persist_status, "A",
6653 			"Current LLDP persistent status");
6654 
6655 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "dflt_lldp_persist_status",
6656 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6657 			ice_sysctl_fw_dflt_lldp_persist_status, "A",
6658 			"Default LLDP persistent status");
6659 
6660 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "negotiated_fc",
6661 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6662 			ice_sysctl_negotiated_fc, "A",
6663 			"Current Negotiated Flow Control mode");
6664 
6665 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "local_dcbx_cfg",
6666 			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_LOCAL,
6667 			ice_sysctl_dump_dcbx_cfg, "A",
6668 			"Dumps Local MIB information from firmware");
6669 
6670 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "remote_dcbx_cfg",
6671 			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_REMOTE,
6672 			ice_sysctl_dump_dcbx_cfg, "A",
6673 			"Dumps Remote MIB information from firmware");
6674 
6675 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "pf_vsi_cfg", CTLTYPE_STRING | CTLFLAG_RD,
6676 			sc, 0, ice_sysctl_dump_vsi_cfg, "A",
6677 			"Dumps Selected PF VSI parameters from firmware");
6678 
6679 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "query_port_ets", CTLTYPE_STRING | CTLFLAG_RD,
6680 			sc, 0, ice_sysctl_query_port_ets, "A",
6681 			"Prints selected output from Query Port ETS AQ command");
6682 
6683 	sw_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "switch",
6684 				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6685 				  "Switch Configuration");
6686 	sw_list = SYSCTL_CHILDREN(sw_node);
6687 
6688 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "mac_filters",
6689 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6690 			ice_sysctl_dump_mac_filters, "A",
6691 			"MAC Filters");
6692 
6693 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "vlan_filters",
6694 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6695 			ice_sysctl_dump_vlan_filters, "A",
6696 			"VLAN Filters");
6697 
6698 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_filters",
6699 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6700 			ice_sysctl_dump_ethertype_filters, "A",
6701 			"Ethertype Filters");
6702 
6703 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_mac_filters",
6704 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6705 			ice_sysctl_dump_ethertype_mac_filters, "A",
6706 			"Ethertype/MAC Filters");
6707 
6708 	dump_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "dump",
6709 				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6710 				  "Internal FW Dump");
6711 	dump_list = SYSCTL_CHILDREN(dump_node);
6712 
6713 	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "clusters",
6714 			ICE_CTLFLAG_DEBUG | CTLTYPE_U16 | CTLFLAG_RW, sc, 0,
6715 			ice_sysctl_fw_debug_dump_cluster_setting, "SU",
6716 			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING);
6717 
6718 	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "dump",
6719 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6720 			ice_sysctl_fw_debug_dump_do_dump, "",
6721 			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP);
6722 }
6723 
6724 /**
6725  * ice_vsi_disable_tx - Disable (unconfigure) Tx queues for a VSI
6726  * @vsi: the VSI to disable
6727  *
6728  * Disables the Tx queues associated with this VSI. Essentially the opposite
6729  * of ice_cfg_vsi_for_tx.
6730  */
6731 int
6732 ice_vsi_disable_tx(struct ice_vsi *vsi)
6733 {
6734 	struct ice_softc *sc = vsi->sc;
6735 	struct ice_hw *hw = &sc->hw;
6736 	enum ice_status status;
6737 	u32 *q_teids;
6738 	u16 *q_ids, *q_handles;
6739 	size_t q_teids_size, q_ids_size, q_handles_size;
6740 	int tc, j, buf_idx, err = 0;
6741 
6742 	if (vsi->num_tx_queues > 255)
6743 		return (ENOSYS);
6744 
6745 	q_teids_size = sizeof(*q_teids) * vsi->num_tx_queues;
6746 	q_teids = (u32 *)malloc(q_teids_size, M_ICE, M_NOWAIT|M_ZERO);
6747 	if (!q_teids)
6748 		return (ENOMEM);
6749 
6750 	q_ids_size = sizeof(*q_ids) * vsi->num_tx_queues;
6751 	q_ids = (u16 *)malloc(q_ids_size, M_ICE, M_NOWAIT|M_ZERO);
6752 	if (!q_ids) {
6753 		err = (ENOMEM);
6754 		goto free_q_teids;
6755 	}
6756 
6757 	q_handles_size = sizeof(*q_handles) * vsi->num_tx_queues;
6758 	q_handles = (u16 *)malloc(q_handles_size, M_ICE, M_NOWAIT|M_ZERO);
6759 	if (!q_handles) {
6760 		err = (ENOMEM);
6761 		goto free_q_ids;
6762 	}
6763 
6764 	ice_for_each_traffic_class(tc) {
6765 		struct ice_tc_info *tc_info = &vsi->tc_info[tc];
6766 		u16 start_idx, end_idx;
6767 
6768 		/* Skip rest of disabled TCs once the first
6769 		 * disabled TC is found */
6770 		if (!(vsi->tc_map & BIT(tc)))
6771 			break;
6772 
6773 		/* Fill out TX queue information for this TC */
6774 		start_idx = tc_info->qoffset;
6775 		end_idx = start_idx + tc_info->qcount_tx;
6776 		buf_idx = 0;
6777 		for (j = start_idx; j < end_idx; j++) {
6778 			struct ice_tx_queue *txq = &vsi->tx_queues[j];
6779 
6780 			q_ids[buf_idx] = vsi->tx_qmap[j];
6781 			q_handles[buf_idx] = txq->q_handle;
6782 			q_teids[buf_idx] = txq->q_teid;
6783 			buf_idx++;
6784 		}
6785 
6786 		status = ice_dis_vsi_txq(hw->port_info, vsi->idx, tc, buf_idx,
6787 					 q_handles, q_ids, q_teids, ICE_NO_RESET, 0, NULL);
6788 		if (status == ICE_ERR_DOES_NOT_EXIST) {
6789 			; /* Queues have already been disabled, no need to report this as an error */
6790 		} else if (status == ICE_ERR_RESET_ONGOING) {
6791 			device_printf(sc->dev,
6792 				      "Reset in progress. LAN Tx queues already disabled\n");
6793 			break;
6794 		} else if (status) {
6795 			device_printf(sc->dev,
6796 				      "Failed to disable LAN Tx queues: err %s aq_err %s\n",
6797 				      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6798 			err = (ENODEV);
6799 			break;
6800 		}
6801 
6802 		/* Clear buffers */
6803 		memset(q_teids, 0, q_teids_size);
6804 		memset(q_ids, 0, q_ids_size);
6805 		memset(q_handles, 0, q_handles_size);
6806 	}
6807 
6808 /* free_q_handles: */
6809 	free(q_handles, M_ICE);
6810 free_q_ids:
6811 	free(q_ids, M_ICE);
6812 free_q_teids:
6813 	free(q_teids, M_ICE);
6814 
6815 	return err;
6816 }
6817 
6818 /**
6819  * ice_vsi_set_rss_params - Set the RSS parameters for the VSI
6820  * @vsi: the VSI to configure
6821  *
6822  * Sets the RSS table size and lookup table type for the VSI based on its
6823  * VSI type.
6824  */
6825 static void
6826 ice_vsi_set_rss_params(struct ice_vsi *vsi)
6827 {
6828 	struct ice_softc *sc = vsi->sc;
6829 	struct ice_hw_common_caps *cap;
6830 
6831 	cap = &sc->hw.func_caps.common_cap;
6832 
6833 	switch (vsi->type) {
6834 	case ICE_VSI_PF:
6835 		/* The PF VSI inherits RSS instance of the PF */
6836 		vsi->rss_table_size = cap->rss_table_size;
6837 		vsi->rss_lut_type = ICE_LUT_PF;
6838 		break;
6839 	case ICE_VSI_VF:
6840 		vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
6841 		vsi->rss_lut_type = ICE_LUT_VSI;
6842 		break;
6843 	default:
6844 		device_printf(sc->dev,
6845 			      "VSI %d: RSS not supported for VSI type %d\n",
6846 			      vsi->idx, vsi->type);
6847 		break;
6848 	}
6849 }
6850 
6851 /**
6852  * ice_vsi_add_txqs_ctx - Create a sysctl context and node to store txq sysctls
6853  * @vsi: The VSI to add the context for
6854  *
6855  * Creates a sysctl context for storing txq sysctls. Additionally creates
6856  * a node rooted at the given VSI's main sysctl node. This context will be
6857  * used to store per-txq sysctls which may need to be released during the
6858  * driver's lifetime.
6859  */
6860 void
6861 ice_vsi_add_txqs_ctx(struct ice_vsi *vsi)
6862 {
6863 	struct sysctl_oid_list *vsi_list;
6864 
6865 	sysctl_ctx_init(&vsi->txqs_ctx);
6866 
6867 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
6868 
6869 	vsi->txqs_node = SYSCTL_ADD_NODE(&vsi->txqs_ctx, vsi_list, OID_AUTO, "txqs",
6870 					 CTLFLAG_RD, NULL, "Tx Queues");
6871 }
6872 
6873 /**
6874  * ice_vsi_add_rxqs_ctx - Create a sysctl context and node to store rxq sysctls
6875  * @vsi: The VSI to add the context for
6876  *
6877  * Creates a sysctl context for storing rxq sysctls. Additionally creates
6878  * a node rooted at the given VSI's main sysctl node. This context will be
6879  * used to store per-rxq sysctls which may need to be released during the
6880  * driver's lifetime.
6881  */
6882 void
6883 ice_vsi_add_rxqs_ctx(struct ice_vsi *vsi)
6884 {
6885 	struct sysctl_oid_list *vsi_list;
6886 
6887 	sysctl_ctx_init(&vsi->rxqs_ctx);
6888 
6889 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
6890 
6891 	vsi->rxqs_node = SYSCTL_ADD_NODE(&vsi->rxqs_ctx, vsi_list, OID_AUTO, "rxqs",
6892 					 CTLFLAG_RD, NULL, "Rx Queues");
6893 }
6894 
6895 /**
6896  * ice_vsi_del_txqs_ctx - Delete the Tx queue sysctl context for this VSI
6897  * @vsi: The VSI to delete from
6898  *
6899  * Frees the txq sysctl context created for storing the per-queue Tx sysctls.
6900  * Must be called prior to freeing the Tx queue memory, in order to avoid
6901  * having sysctls point at stale memory.
6902  */
6903 void
6904 ice_vsi_del_txqs_ctx(struct ice_vsi *vsi)
6905 {
6906 	device_t dev = vsi->sc->dev;
6907 	int err;
6908 
6909 	if (vsi->txqs_node) {
6910 		err = sysctl_ctx_free(&vsi->txqs_ctx);
6911 		if (err)
6912 			device_printf(dev, "failed to free VSI %d txqs_ctx, err %s\n",
6913 				      vsi->idx, ice_err_str(err));
6914 		vsi->txqs_node = NULL;
6915 	}
6916 }
6917 
6918 /**
6919  * ice_vsi_del_rxqs_ctx - Delete the Rx queue sysctl context for this VSI
6920  * @vsi: The VSI to delete from
6921  *
6922  * Frees the rxq sysctl context created for storing the per-queue Rx sysctls.
6923  * Must be called prior to freeing the Rx queue memory, in order to avoid
6924  * having sysctls point at stale memory.
6925  */
6926 void
6927 ice_vsi_del_rxqs_ctx(struct ice_vsi *vsi)
6928 {
6929 	device_t dev = vsi->sc->dev;
6930 	int err;
6931 
6932 	if (vsi->rxqs_node) {
6933 		err = sysctl_ctx_free(&vsi->rxqs_ctx);
6934 		if (err)
6935 			device_printf(dev, "failed to free VSI %d rxqs_ctx, err %s\n",
6936 				      vsi->idx, ice_err_str(err));
6937 		vsi->rxqs_node = NULL;
6938 	}
6939 }
6940 
6941 /**
6942  * ice_add_txq_sysctls - Add per-queue sysctls for a Tx queue
6943  * @txq: pointer to the Tx queue
6944  *
6945 * Add per-queue sysctls for a given Tx queue. Can't be called during
6946 * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
6947  */
6948 void
6949 ice_add_txq_sysctls(struct ice_tx_queue *txq)
6950 {
6951 	struct ice_vsi *vsi = txq->vsi;
6952 	struct sysctl_ctx_list *ctx = &vsi->txqs_ctx;
6953 	struct sysctl_oid_list *txqs_list, *this_txq_list;
6954 	struct sysctl_oid *txq_node;
6955 	char txq_name[32], txq_desc[32];
6956 
6957 	const struct ice_sysctl_info ctls[] = {
6958 		{ &txq->stats.tx_packets, "tx_packets", "Queue Packets Transmitted" },
6959 		{ &txq->stats.tx_bytes, "tx_bytes", "Queue Bytes Transmitted" },
6960 		{ &txq->stats.mss_too_small, "mss_too_small", "TSO sends with an MSS less than 64" },
6961 		{ 0, 0, 0 }
6962 	};
6963 
6964 	const struct ice_sysctl_info *entry = ctls;
6965 
6966 	txqs_list = SYSCTL_CHILDREN(vsi->txqs_node);
6967 
6968 	snprintf(txq_name, sizeof(txq_name), "%u", txq->me);
6969 	snprintf(txq_desc, sizeof(txq_desc), "Tx Queue %u", txq->me);
6970 	txq_node = SYSCTL_ADD_NODE(ctx, txqs_list, OID_AUTO, txq_name,
6971 				   CTLFLAG_RD, NULL, txq_desc);
6972 	this_txq_list = SYSCTL_CHILDREN(txq_node);
6973 
6974 	/* Add the Tx queue statistics */
6975 	while (entry->stat != 0) {
6976 		SYSCTL_ADD_U64(ctx, this_txq_list, OID_AUTO, entry->name,
6977 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
6978 			       entry->description);
6979 		entry++;
6980 	}
6981 
6982 	SYSCTL_ADD_U8(ctx, this_txq_list, OID_AUTO, "tc",
6983 		       CTLFLAG_RD, &txq->tc, 0,
6984 		       "Traffic Class that Queue belongs to");
6985 }
6986 
6987 /**
6988  * ice_add_rxq_sysctls - Add per-queue sysctls for an Rx queue
6989  * @rxq: pointer to the Rx queue
6990  *
6991  * Add per-queue sysctls for a given Rx queue. Can't be called during
6992  * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
6993  */
6994 void
6995 ice_add_rxq_sysctls(struct ice_rx_queue *rxq)
6996 {
6997 	struct ice_vsi *vsi = rxq->vsi;
6998 	struct sysctl_ctx_list *ctx = &vsi->rxqs_ctx;
6999 	struct sysctl_oid_list *rxqs_list, *this_rxq_list;
7000 	struct sysctl_oid *rxq_node;
7001 	char rxq_name[32], rxq_desc[32];
7002 
7003 	const struct ice_sysctl_info ctls[] = {
7004 		{ &rxq->stats.rx_packets, "rx_packets", "Queue Packets Received" },
7005 		{ &rxq->stats.rx_bytes, "rx_bytes", "Queue Bytes Received" },
7006 		{ &rxq->stats.desc_errs, "rx_desc_errs", "Queue Rx Descriptor Errors" },
7007 		{ 0, 0, 0 }
7008 	};
7009 
7010 	const struct ice_sysctl_info *entry = ctls;
7011 
7012 	rxqs_list = SYSCTL_CHILDREN(vsi->rxqs_node);
7013 
7014 	snprintf(rxq_name, sizeof(rxq_name), "%u", rxq->me);
7015 	snprintf(rxq_desc, sizeof(rxq_desc), "Rx Queue %u", rxq->me);
7016 	rxq_node = SYSCTL_ADD_NODE(ctx, rxqs_list, OID_AUTO, rxq_name,
7017 				   CTLFLAG_RD, NULL, rxq_desc);
7018 	this_rxq_list = SYSCTL_CHILDREN(rxq_node);
7019 
7020 	/* Add the Rx queue statistics */
7021 	while (entry->stat != 0) {
7022 		SYSCTL_ADD_U64(ctx, this_rxq_list, OID_AUTO, entry->name,
7023 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
7024 			       entry->description);
7025 		entry++;
7026 	}
7027 
7028 	SYSCTL_ADD_U8(ctx, this_rxq_list, OID_AUTO, "tc",
7029 		       CTLFLAG_RD, &rxq->tc, 0,
7030 		       "Traffic Class that Queue belongs to");
7031 }
7032 
7033 /**
7034  * ice_get_default_rss_key - Obtain a default RSS key
7035  * @seed: storage for the RSS key data
7036  *
7037  * Copies a pre-generated RSS key into the seed memory. The seed pointer must
7038  * point to a block of memory that is at least 40 bytes in size.
7039  *
7040  * The key isn't randomly generated each time this function is called because
7041  * that makes the RSS key change every time we reconfigure RSS. This does mean
7042  * that we're hard coding a possibly 'well known' key. We might want to
7043  * investigate randomly generating this key once during the first call.
7044  */
7045 static void
7046 ice_get_default_rss_key(u8 *seed)
7047 {
7048 	const u8 default_seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE] = {
7049 		0x39, 0xed, 0xff, 0x4d, 0x43, 0x58, 0x42, 0xc3, 0x5f, 0xb8,
7050 		0xa5, 0x32, 0x95, 0x65, 0x81, 0xcd, 0x36, 0x79, 0x71, 0x97,
7051 		0xde, 0xa4, 0x41, 0x40, 0x6f, 0x27, 0xe9, 0x81, 0x13, 0xa0,
7052 		0x95, 0x93, 0x5b, 0x1e, 0x9d, 0x27, 0x9d, 0x24, 0x84, 0xb5,
7053 	};
7054 
7055 	bcopy(default_seed, seed, ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
7056 }
7057 
7058 /**
7059  * ice_set_rss_key - Configure a given VSI with the default RSS key
7060  * @vsi: the VSI to configure
7061  *
7062  * Program the hardware RSS key. We use rss_getkey to grab the kernel RSS key.
7063  * If the kernel RSS interface is not available, this will fall back to our
7064  * pre-generated hash seed from ice_get_default_rss_key().
7065  */
7066 static int
7067 ice_set_rss_key(struct ice_vsi *vsi)
7068 {
7069 	struct ice_aqc_get_set_rss_keys keydata = { .standard_rss_key = {0} };
7070 	struct ice_softc *sc = vsi->sc;
7071 	struct ice_hw *hw = &sc->hw;
7072 	enum ice_status status;
7073 
7074 	/*
7075 	 * If the RSS kernel interface is disabled, this will return the
7076 	 * default RSS key above.
7077 	 */
7078 	rss_getkey(keydata.standard_rss_key);
7079 
7080 	status = ice_aq_set_rss_key(hw, vsi->idx, &keydata);
7081 	if (status) {
7082 		device_printf(sc->dev,
7083 			      "ice_aq_set_rss_key status %s, error %s\n",
7084 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7085 		return (EIO);
7086 	}
7087 
7088 	return (0);
7089 }
7090 
7091 /**
7092  * ice_set_rss_flow_flds - Program the RSS hash flows after package init
7093  * @vsi: the VSI to configure
7094  *
7095  * If the package file is initialized, the default RSS flows are reset. We
7096  * need to reprogram the expected hash configuration. We'll use
7097  * rss_gethashconfig() to determine which flows to enable. If RSS kernel
7098  * support is not enabled, this macro will fall back to suitable defaults.
7099  */
7100 static void
7101 ice_set_rss_flow_flds(struct ice_vsi *vsi)
7102 {
7103 	struct ice_softc *sc = vsi->sc;
7104 	struct ice_hw *hw = &sc->hw;
7105 	struct ice_rss_hash_cfg rss_cfg = { 0, 0, ICE_RSS_ANY_HEADERS, false };
7106 	device_t dev = sc->dev;
7107 	enum ice_status status;
7108 	u_int rss_hash_config;
7109 
7110 	rss_hash_config = rss_gethashconfig();
7111 
7112 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) {
7113 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4;
7114 		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV4;
7115 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7116 		if (status)
7117 			device_printf(dev,
7118 				      "ice_add_rss_cfg on VSI %d failed for ipv4 flow, err %s aq_err %s\n",
7119 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7120 	}
7121 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) {
7122 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP;
7123 		rss_cfg.hash_flds = ICE_HASH_TCP_IPV4;
7124 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7125 		if (status)
7126 			device_printf(dev,
7127 				      "ice_add_rss_cfg on VSI %d failed for tcp4 flow, err %s aq_err %s\n",
7128 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7129 	}
7130 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) {
7131 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP;
7132 		rss_cfg.hash_flds = ICE_HASH_UDP_IPV4;
7133 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7134 		if (status)
7135 			device_printf(dev,
7136 				      "ice_add_rss_cfg on VSI %d failed for udp4 flow, err %s aq_err %s\n",
7137 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7138 	}
7139 	if (rss_hash_config & (RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_IPV6_EX)) {
7140 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6;
7141 		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV6;
7142 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7143 		if (status)
7144 			device_printf(dev,
7145 				      "ice_add_rss_cfg on VSI %d failed for ipv6 flow, err %s aq_err %s\n",
7146 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7147 	}
7148 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) {
7149 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP;
7150 		rss_cfg.hash_flds = ICE_HASH_TCP_IPV6;
7151 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7152 		if (status)
7153 			device_printf(dev,
7154 				      "ice_add_rss_cfg on VSI %d failed for tcp6 flow, err %s aq_err %s\n",
7155 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7156 	}
7157 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) {
7158 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP;
7159 		rss_cfg.hash_flds = ICE_HASH_UDP_IPV6;
7160 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7161 		if (status)
7162 			device_printf(dev,
7163 				      "ice_add_rss_cfg on VSI %d failed for udp6 flow, err %s aq_err %s\n",
7164 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7165 	}
7166 
7167 	/* Warn about RSS hash types which are not supported */
7168 	/* coverity[dead_error_condition] */
7169 	if (rss_hash_config & ~ICE_DEFAULT_RSS_HASH_CONFIG) {
7170 		device_printf(dev,
7171 			      "ice_add_rss_cfg on VSI %d could not configure every requested hash type\n",
7172 			      vsi->idx);
7173 	}
7174 }
7175 
7176 /**
7177  * ice_set_rss_lut - Program the RSS lookup table for a VSI
7178  * @vsi: the VSI to configure
7179  *
7180  * Programs the RSS lookup table for a given VSI. We use
7181  * rss_get_indirection_to_bucket which will use the indirection table provided
7182  * by the kernel RSS interface when available. If the kernel RSS interface is
7183  * not available, we will fall back to a simple round-robin fashion queue
7184  * assignment.
7185  */
7186 static int
7187 ice_set_rss_lut(struct ice_vsi *vsi)
7188 {
7189 	struct ice_softc *sc = vsi->sc;
7190 	struct ice_hw *hw = &sc->hw;
7191 	device_t dev = sc->dev;
7192 	struct ice_aq_get_set_rss_lut_params lut_params;
7193 	enum ice_status status;
7194 	int i, err = 0;
7195 	u8 *lut;
7196 
7197 	lut = (u8 *)malloc(vsi->rss_table_size, M_ICE, M_NOWAIT|M_ZERO);
7198 	if (!lut) {
7199 		device_printf(dev, "Failed to allocate RSS lut memory\n");
7200 		return (ENOMEM);
7201 	}
7202 
7203 	/* Populate the LUT with max no. of queues. If the RSS kernel
7204 	 * interface is disabled, this will assign the lookup table in
7205 	 * a simple round robin fashion
7206 	 */
7207 	for (i = 0; i < vsi->rss_table_size; i++) {
7208 		/* XXX: this needs to be changed if num_rx_queues ever counts
7209 		 * more than just the RSS queues */
7210 		lut[i] = rss_get_indirection_to_bucket(i) % vsi->num_rx_queues;
7211 	}
7212 
7213 	lut_params.vsi_handle = vsi->idx;
7214 	lut_params.lut_size = vsi->rss_table_size;
7215 	lut_params.lut_type = vsi->rss_lut_type;
7216 	lut_params.lut = lut;
7217 	lut_params.global_lut_id = 0;
7218 	status = ice_aq_set_rss_lut(hw, &lut_params);
7219 	if (status) {
7220 		device_printf(dev,
7221 			      "Cannot set RSS lut, err %s aq_err %s\n",
7222 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7223 		err = (EIO);
7224 	}
7225 
7226 	free(lut, M_ICE);
7227 	return err;
7228 }
7229 
7230 /**
7231  * ice_config_rss - Configure RSS for a VSI
7232  * @vsi: the VSI to configure
7233  *
7234  * If FEATURE_RSS is enabled, configures the RSS lookup table and hash key for
7235  * a given VSI.
7236  */
7237 int
7238 ice_config_rss(struct ice_vsi *vsi)
7239 {
7240 	int err;
7241 
7242 	/* Nothing to do, if RSS is not enabled */
7243 	if (!ice_is_bit_set(vsi->sc->feat_en, ICE_FEATURE_RSS))
7244 		return 0;
7245 
7246 	err = ice_set_rss_key(vsi);
7247 	if (err)
7248 		return err;
7249 
7250 	ice_set_rss_flow_flds(vsi);
7251 
7252 	return ice_set_rss_lut(vsi);
7253 }
7254 
7255 /**
7256  * ice_log_pkg_init - Log a message about status of DDP initialization
7257  * @sc: the device softc pointer
7258  * @pkg_status: the status result of ice_copy_and_init_pkg
7259  *
7260  * Called by ice_load_pkg after an attempt to download the DDP package
7261  * contents to the device to log an appropriate message for the system
7262  * administrator about download status.
7263  *
7264  * @post ice_is_init_pkg_successful function is used to determine
7265  * whether the download was successful and DDP package is compatible
7266  * with this driver. Otherwise driver will transition to Safe Mode.
7267  */
7268 void
7269 ice_log_pkg_init(struct ice_softc *sc, enum ice_ddp_state pkg_status)
7270 {
7271 	struct ice_hw *hw = &sc->hw;
7272 	device_t dev = sc->dev;
7273 	struct sbuf *active_pkg, *os_pkg;
7274 
7275 	active_pkg = sbuf_new_auto();
7276 	ice_active_pkg_version_str(hw, active_pkg);
7277 	sbuf_finish(active_pkg);
7278 
7279 	os_pkg = sbuf_new_auto();
7280 	ice_os_pkg_version_str(hw, os_pkg);
7281 	sbuf_finish(os_pkg);
7282 
7283 	switch (pkg_status) {
7284 	case ICE_DDP_PKG_SUCCESS:
7285 		device_printf(dev,
7286 			      "The DDP package was successfully loaded: %s.\n",
7287 			      sbuf_data(active_pkg));
7288 		break;
7289 	case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
7290 	case ICE_DDP_PKG_ALREADY_LOADED:
7291 		device_printf(dev,
7292 			      "DDP package already present on device: %s.\n",
7293 			      sbuf_data(active_pkg));
7294 		break;
7295 	case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
7296 		device_printf(dev,
7297 			      "The driver could not load the DDP package file because a compatible DDP package is already present on the device.  The device has package %s.  The ice_ddp module has package: %s.\n",
7298 			      sbuf_data(active_pkg),
7299 			      sbuf_data(os_pkg));
7300 		break;
7301 	case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
7302 		device_printf(dev,
7303 			      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7304 			      sbuf_data(active_pkg),
7305 			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7306 		break;
7307 	case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
7308 		device_printf(dev,
7309 			      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7310 			      sbuf_data(active_pkg),
7311 			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7312 		break;
7313 	case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
7314 		/*
7315 		 * This assumes that the active_pkg_ver will not be
7316 		 * initialized if the ice_ddp package version is not
7317 		 * supported.
7318 		 */
7319 		if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
7320 			/* The ice_ddp version is not supported */
7321 			if (pkg_ver_compatible(&hw->pkg_ver) > 0) {
7322 				device_printf(dev,
7323 					      "The DDP package in the ice_ddp module is higher than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated driver.  Entering Safe Mode.\n",
7324 					      sbuf_data(os_pkg),
7325 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7326 			} else if (pkg_ver_compatible(&hw->pkg_ver) < 0) {
7327 				device_printf(dev,
7328 					      "The DDP package in the ice_ddp module is lower than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated ice_ddp module.  Entering Safe Mode.\n",
7329 					      sbuf_data(os_pkg),
7330 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7331 			} else {
7332 				device_printf(dev,
7333 					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7334 					      sbuf_data(os_pkg),
7335 					      sbuf_data(active_pkg),
7336 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7337 			}
7338 		} else {
7339 			if (pkg_ver_compatible(&hw->active_pkg_ver) > 0) {
7340 				device_printf(dev,
7341 					      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7342 					      sbuf_data(active_pkg),
7343 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7344 			} else if (pkg_ver_compatible(&hw->active_pkg_ver) < 0) {
7345 				device_printf(dev,
7346 					      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7347 					      sbuf_data(active_pkg),
7348 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7349 			} else {
7350 				device_printf(dev,
7351 					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7352 					      sbuf_data(os_pkg),
7353 					      sbuf_data(active_pkg),
7354 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7355 			}
7356 		}
7357 		break;
7358 	case ICE_DDP_PKG_INVALID_FILE:
7359 		device_printf(dev,
7360 			      "The DDP package in the ice_ddp module is invalid.  Entering Safe Mode\n");
7361 		break;
7362 	case ICE_DDP_PKG_FW_MISMATCH:
7363 		device_printf(dev,
7364 			      "The firmware loaded on the device is not compatible with the DDP package.  Please update the device's NVM.  Entering safe mode.\n");
7365 		break;
7366 	case ICE_DDP_PKG_NO_SEC_MANIFEST:
7367 	case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
7368 		device_printf(dev,
7369 			      "The DDP package in the ice_ddp module cannot be loaded because its signature is not valid.  Please use a valid ice_ddp module.  Entering Safe Mode.\n");
7370 		break;
7371 	case ICE_DDP_PKG_SECURE_VERSION_NBR_TOO_LOW:
7372 		device_printf(dev,
7373 			      "The DDP package in the ice_ddp module could not be loaded because its security revision is too low.  Please use an updated ice_ddp module.  Entering Safe Mode.\n");
7374 		break;
7375 	case ICE_DDP_PKG_MANIFEST_INVALID:
7376 	case ICE_DDP_PKG_BUFFER_INVALID:
7377 		device_printf(dev,
7378 			      "An error occurred on the device while loading the DDP package.  Entering Safe Mode.\n");
7379 		break;
7380 	default:
7381 		device_printf(dev,
7382 			 "An unknown error occurred when loading the DDP package.  Entering Safe Mode.\n");
7383 		break;
7384 	}
7385 
7386 	sbuf_delete(active_pkg);
7387 	sbuf_delete(os_pkg);
7388 }
7389 
7390 /**
7391  * ice_load_pkg_file - Load the DDP package file using firmware_get
7392  * @sc: device private softc
7393  *
7394  * Use firmware_get to load the DDP package memory and then request that
7395  * firmware download the package contents and program the relevant hardware
7396  * bits.
7397  *
7398  * This function makes a copy of the DDP package memory which is tracked in
7399  * the ice_hw structure. The copy will be managed and released by
7400  * ice_deinit_hw(). This allows the firmware reference to be immediately
7401  * released using firmware_put.
7402  */
7403 enum ice_status
7404 ice_load_pkg_file(struct ice_softc *sc)
7405 {
7406 	struct ice_hw *hw = &sc->hw;
7407 	device_t dev = sc->dev;
7408 	enum ice_ddp_state state;
7409 	const struct firmware *pkg;
7410 	enum ice_status status = ICE_SUCCESS;
7411 	u8 cached_layer_count;
7412 	u8 *buf_copy;
7413 
7414 	pkg = firmware_get("ice_ddp");
7415 	if (!pkg) {
7416 		device_printf(dev,
7417 		    "The DDP package module (ice_ddp) failed to load or could not be found. Entering Safe Mode.\n");
7418 		if (cold)
7419 			device_printf(dev,
7420 			    "The DDP package module cannot be automatically loaded while booting. You may want to specify ice_ddp_load=\"YES\" in your loader.conf\n");
7421 		status = ICE_ERR_CFG;
7422 		goto err_load_pkg;
7423 	}
7424 
7425 	/* Check for topology change */
7426 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_TX_BALANCE)) {
7427 		cached_layer_count = hw->num_tx_sched_layers;
7428 		buf_copy = (u8 *)malloc(pkg->datasize, M_ICE, M_NOWAIT);
7429 		if (buf_copy == NULL)
7430 			return ICE_ERR_NO_MEMORY;
7431 		memcpy(buf_copy, pkg->data, pkg->datasize);
7432 		status = ice_cfg_tx_topo(&sc->hw, buf_copy, pkg->datasize);
7433 		free(buf_copy, M_ICE);
7434 		/* Success indicates a change was made */
7435 		if (status == ICE_SUCCESS) {
7436 			/* 9 -> 5 */
7437 			if (cached_layer_count == 9)
7438 				device_printf(dev,
7439 				    "Transmit balancing feature enabled\n");
7440 			else
7441 				device_printf(dev,
7442 				    "Transmit balancing feature disabled\n");
7443 			ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_en);
7444 			return (status);
7445 		} else if (status == ICE_ERR_CFG) {
7446 			/* Status is ICE_ERR_CFG when DDP does not support transmit balancing */
7447 			device_printf(dev,
7448 			    "DDP package does not support transmit balancing feature - please update to the latest DDP package and try again\n");
7449 		}
7450 	}
7451 
7452 	/* Copy and download the pkg contents */
7453 	state = ice_copy_and_init_pkg(hw, (const u8 *)pkg->data, pkg->datasize);
7454 
7455 	/* Release the firmware reference */
7456 	firmware_put(pkg, FIRMWARE_UNLOAD);
7457 
7458 	/* Check the active DDP package version and log a message */
7459 	ice_log_pkg_init(sc, state);
7460 
7461 	/* Place the driver into safe mode */
7462 	if (ice_is_init_pkg_successful(state))
7463 		return (ICE_ERR_ALREADY_EXISTS);
7464 
7465 err_load_pkg:
7466 	ice_zero_bitmap(sc->feat_cap, ICE_FEATURE_COUNT);
7467 	ice_zero_bitmap(sc->feat_en, ICE_FEATURE_COUNT);
7468 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
7469 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
7470 
7471 	return (status);
7472 }
7473 
7474 /**
7475  * ice_get_ifnet_counter - Retrieve counter value for a given ifnet counter
7476  * @vsi: the vsi to retrieve the value for
7477  * @counter: the counter type to retrieve
7478  *
7479  * Returns the value for a given ifnet counter. To do so, we calculate the
7480  * value based on the matching hardware statistics.
7481  */
7482 uint64_t
7483 ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter)
7484 {
7485 	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
7486 	struct ice_eth_stats *es = &vsi->hw_stats.cur;
7487 
7488 	/* For some statistics, especially those related to error flows, we do
7489 	 * not have per-VSI counters. In this case, we just report the global
7490 	 * counters.
7491 	 */
7492 
7493 	switch (counter) {
7494 	case IFCOUNTER_IPACKETS:
7495 		return (es->rx_unicast + es->rx_multicast + es->rx_broadcast);
7496 	case IFCOUNTER_IERRORS:
7497 		return (hs->crc_errors + hs->illegal_bytes +
7498 			hs->mac_local_faults + hs->mac_remote_faults +
7499 			hs->rx_len_errors + hs->rx_undersize +
7500 			hs->rx_oversize + hs->rx_fragments + hs->rx_jabber);
7501 	case IFCOUNTER_OPACKETS:
7502 		return (es->tx_unicast + es->tx_multicast + es->tx_broadcast);
7503 	case IFCOUNTER_OERRORS:
7504 		return (es->tx_errors);
7505 	case IFCOUNTER_COLLISIONS:
7506 		return (0);
7507 	case IFCOUNTER_IBYTES:
7508 		return (es->rx_bytes);
7509 	case IFCOUNTER_OBYTES:
7510 		return (es->tx_bytes);
7511 	case IFCOUNTER_IMCASTS:
7512 		return (es->rx_multicast);
7513 	case IFCOUNTER_OMCASTS:
7514 		return (es->tx_multicast);
7515 	case IFCOUNTER_IQDROPS:
7516 		return (es->rx_discards);
7517 	case IFCOUNTER_OQDROPS:
7518 		return (hs->tx_dropped_link_down);
7519 	case IFCOUNTER_NOPROTO:
7520 		return (es->rx_unknown_protocol);
7521 	default:
7522 		return if_get_counter_default(vsi->sc->ifp, counter);
7523 	}
7524 }
7525 
7526 /**
7527  * ice_save_pci_info - Save PCI configuration fields in HW struct
7528  * @hw: the ice_hw struct to save the PCI information in
7529  * @dev: the device to get the PCI information from
7530  *
7531  * This should only be called once, early in the device attach
7532  * process.
7533  */
7534 void
7535 ice_save_pci_info(struct ice_hw *hw, device_t dev)
7536 {
7537 	hw->vendor_id = pci_get_vendor(dev);
7538 	hw->device_id = pci_get_device(dev);
7539 	hw->subsystem_vendor_id = pci_get_subvendor(dev);
7540 	hw->subsystem_device_id = pci_get_subdevice(dev);
7541 	hw->revision_id = pci_get_revid(dev);
7542 	hw->bus.device = pci_get_slot(dev);
7543 	hw->bus.func = pci_get_function(dev);
7544 }
7545 
7546 /**
7547  * ice_replay_all_vsi_cfg - Replace configuration for all VSIs after reset
7548  * @sc: the device softc
7549  *
7550  * Replace the configuration for each VSI, and then cleanup replay
7551  * information. Called after a hardware reset in order to reconfigure the
7552  * active VSIs.
7553  */
7554 int
7555 ice_replay_all_vsi_cfg(struct ice_softc *sc)
7556 {
7557 	struct ice_hw *hw = &sc->hw;
7558 	enum ice_status status;
7559 	int i;
7560 
7561 	for (i = 0 ; i < sc->num_available_vsi; i++) {
7562 		struct ice_vsi *vsi = sc->all_vsi[i];
7563 
7564 		if (!vsi)
7565 			continue;
7566 
7567 		status = ice_replay_vsi(hw, vsi->idx);
7568 		if (status) {
7569 			device_printf(sc->dev, "Failed to replay VSI %d, err %s aq_err %s\n",
7570 				      vsi->idx, ice_status_str(status),
7571 				      ice_aq_str(hw->adminq.sq_last_status));
7572 			return (EIO);
7573 		}
7574 	}
7575 
7576 	/* Cleanup replay filters after successful reconfiguration */
7577 	ice_replay_post(hw);
7578 	return (0);
7579 }
7580 
7581 /**
7582  * ice_clean_vsi_rss_cfg - Cleanup RSS configuration for a given VSI
7583  * @vsi: pointer to the VSI structure
7584  *
7585  * Cleanup the advanced RSS configuration for a given VSI. This is necessary
7586  * during driver removal to ensure that all RSS resources are properly
7587  * released.
7588  *
7589  * @remark this function doesn't report an error as it is expected to be
7590  * called during driver reset and unload, and there isn't much the driver can
7591  * do if freeing RSS resources fails.
7592  */
7593 static void
7594 ice_clean_vsi_rss_cfg(struct ice_vsi *vsi)
7595 {
7596 	struct ice_softc *sc = vsi->sc;
7597 	struct ice_hw *hw = &sc->hw;
7598 	device_t dev = sc->dev;
7599 	enum ice_status status;
7600 
7601 	status = ice_rem_vsi_rss_cfg(hw, vsi->idx);
7602 	if (status)
7603 		device_printf(dev,
7604 			      "Failed to remove RSS configuration for VSI %d, err %s\n",
7605 			      vsi->idx, ice_status_str(status));
7606 
7607 	/* Remove this VSI from the RSS list */
7608 	ice_rem_vsi_rss_list(hw, vsi->idx);
7609 }
7610 
7611 /**
7612  * ice_clean_all_vsi_rss_cfg - Cleanup RSS configuration for all VSIs
7613  * @sc: the device softc pointer
7614  *
7615  * Cleanup the advanced RSS configuration for all VSIs on a given PF
7616  * interface.
7617  *
7618  * @remark This should be called while preparing for a reset, to cleanup stale
7619  * RSS configuration for all VSIs.
7620  */
7621 void
7622 ice_clean_all_vsi_rss_cfg(struct ice_softc *sc)
7623 {
7624 	int i;
7625 
7626 	/* No need to cleanup if RSS is not enabled */
7627 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
7628 		return;
7629 
7630 	for (i = 0; i < sc->num_available_vsi; i++) {
7631 		struct ice_vsi *vsi = sc->all_vsi[i];
7632 
7633 		if (vsi)
7634 			ice_clean_vsi_rss_cfg(vsi);
7635 	}
7636 }
7637 
7638 /**
7639  * ice_requested_fec_mode - Return the requested FEC mode as a string
7640  * @pi: The port info structure
7641  *
7642  * Return a string representing the requested FEC mode.
7643  */
7644 static const char *
7645 ice_requested_fec_mode(struct ice_port_info *pi)
7646 {
7647 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
7648 	enum ice_status status;
7649 
7650 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
7651 				     &pcaps, NULL);
7652 	if (status)
7653 		/* Just report unknown if we can't get capabilities */
7654 		return "Unknown";
7655 
7656 	/* Check if RS-FEC has been requested first */
7657 	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ |
7658 				      ICE_AQC_PHY_FEC_25G_RS_544_REQ))
7659 		return ice_fec_str(ICE_FEC_RS);
7660 
7661 	/* If RS FEC has not been requested, then check BASE-R */
7662 	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
7663 				      ICE_AQC_PHY_FEC_25G_KR_REQ))
7664 		return ice_fec_str(ICE_FEC_BASER);
7665 
7666 	return ice_fec_str(ICE_FEC_NONE);
7667 }
7668 
7669 /**
7670  * ice_negotiated_fec_mode - Return the negotiated FEC mode as a string
7671  * @pi: The port info structure
7672  *
7673  * Return a string representing the current FEC mode.
7674  */
7675 static const char *
7676 ice_negotiated_fec_mode(struct ice_port_info *pi)
7677 {
7678 	/* First, check if RS has been requested first */
7679 	if (pi->phy.link_info.fec_info & (ICE_AQ_LINK_25G_RS_528_FEC_EN |
7680 					  ICE_AQ_LINK_25G_RS_544_FEC_EN))
7681 		return ice_fec_str(ICE_FEC_RS);
7682 
7683 	/* If RS FEC has not been requested, then check BASE-R */
7684 	if (pi->phy.link_info.fec_info & ICE_AQ_LINK_25G_KR_FEC_EN)
7685 		return ice_fec_str(ICE_FEC_BASER);
7686 
7687 	return ice_fec_str(ICE_FEC_NONE);
7688 }
7689 
7690 /**
7691  * ice_autoneg_mode - Return string indicating of autoneg completed
7692  * @pi: The port info structure
7693  *
7694  * Return "True" if autonegotiation is completed, "False" otherwise.
7695  */
7696 static const char *
7697 ice_autoneg_mode(struct ice_port_info *pi)
7698 {
7699 	if (pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
7700 		return "True";
7701 	else
7702 		return "False";
7703 }
7704 
7705 /**
7706  * ice_flowcontrol_mode - Return string indicating the Flow Control mode
7707  * @pi: The port info structure
7708  *
7709  * Returns the current Flow Control mode as a string.
7710  */
7711 static const char *
7712 ice_flowcontrol_mode(struct ice_port_info *pi)
7713 {
7714 	return ice_fc_str(pi->fc.current_mode);
7715 }
7716 
7717 /**
7718  * ice_link_up_msg - Log a link up message with associated info
7719  * @sc: the device private softc
7720  *
7721  * Log a link up message with LOG_NOTICE message level. Include information
7722  * about the duplex, FEC mode, autonegotiation and flow control.
7723  */
7724 void
7725 ice_link_up_msg(struct ice_softc *sc)
7726 {
7727 	struct ice_hw *hw = &sc->hw;
7728 	struct ifnet *ifp = sc->ifp;
7729 	const char *speed, *req_fec, *neg_fec, *autoneg, *flowcontrol;
7730 
7731 	speed = ice_aq_speed_to_str(hw->port_info);
7732 	req_fec = ice_requested_fec_mode(hw->port_info);
7733 	neg_fec = ice_negotiated_fec_mode(hw->port_info);
7734 	autoneg = ice_autoneg_mode(hw->port_info);
7735 	flowcontrol = ice_flowcontrol_mode(hw->port_info);
7736 
7737 	log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
7738 	    if_name(ifp), speed, req_fec, neg_fec, autoneg, flowcontrol);
7739 }
7740 
7741 /**
7742  * ice_update_laa_mac - Update MAC address if Locally Administered
7743  * @sc: the device softc
7744  *
7745  * Update the device MAC address when a Locally Administered Address is
7746  * assigned.
7747  *
7748  * This function does *not* update the MAC filter list itself. Instead, it
7749  * should be called after ice_rm_pf_default_mac_filters, so that the previous
7750  * address filter will be removed, and before ice_cfg_pf_default_mac_filters,
7751  * so that the new address filter will be assigned.
7752  */
7753 int
7754 ice_update_laa_mac(struct ice_softc *sc)
7755 {
7756 	const u8 *lladdr = (const u8 *)if_getlladdr(sc->ifp);
7757 	struct ice_hw *hw = &sc->hw;
7758 	enum ice_status status;
7759 
7760 	/* If the address is the same, then there is nothing to update */
7761 	if (!memcmp(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN))
7762 		return (0);
7763 
7764 	/* Reject Multicast addresses */
7765 	if (ETHER_IS_MULTICAST(lladdr))
7766 		return (EINVAL);
7767 
7768 	status = ice_aq_manage_mac_write(hw, lladdr, ICE_AQC_MAN_MAC_UPDATE_LAA_WOL, NULL);
7769 	if (status) {
7770 		device_printf(sc->dev, "Failed to write mac %6D to firmware, err %s aq_err %s\n",
7771 			      lladdr, ":", ice_status_str(status),
7772 			      ice_aq_str(hw->adminq.sq_last_status));
7773 		return (EFAULT);
7774 	}
7775 
7776 	/* Copy the address into place of the LAN address. */
7777 	bcopy(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN);
7778 
7779 	return (0);
7780 }
7781 
7782 /**
7783  * ice_get_and_print_bus_info - Save (PCI) bus info and print messages
7784  * @sc: device softc
7785  *
7786  * This will potentially print out a warning message if bus bandwidth
7787  * is insufficient for full-speed operation.
7788  *
7789  * This should only be called once, during the attach process, after
7790  * hw->port_info has been filled out with port link topology information
7791  * (from the Get PHY Capabilities Admin Queue command).
7792  */
7793 void
7794 ice_get_and_print_bus_info(struct ice_softc *sc)
7795 {
7796 	struct ice_hw *hw = &sc->hw;
7797 	device_t dev = sc->dev;
7798 	u16 pci_link_status;
7799 	int offset;
7800 
7801 	pci_find_cap(dev, PCIY_EXPRESS, &offset);
7802 	pci_link_status = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
7803 
7804 	/* Fill out hw struct with PCIE link status info */
7805 	ice_set_pci_link_status_data(hw, pci_link_status);
7806 
7807 	/* Use info to print out bandwidth messages */
7808 	ice_print_bus_link_data(dev, hw);
7809 
7810 	if (ice_pcie_bandwidth_check(sc)) {
7811 		device_printf(dev,
7812 		    "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
7813 		device_printf(dev,
7814 		    "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
7815 	}
7816 }
7817 
7818 /**
7819  * ice_pcie_bus_speed_to_rate - Convert driver bus speed enum value to
7820  * a 64-bit baudrate.
7821  * @speed: enum value to convert
7822  *
7823  * This only goes up to PCIE Gen 4.
7824  */
7825 static uint64_t
7826 ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed)
7827 {
7828 	/* If the PCI-E speed is Gen1 or Gen2, then report
7829 	 * only 80% of bus speed to account for encoding overhead.
7830 	 */
7831 	switch (speed) {
7832 	case ice_pcie_speed_2_5GT:
7833 		return IF_Gbps(2);
7834 	case ice_pcie_speed_5_0GT:
7835 		return IF_Gbps(4);
7836 	case ice_pcie_speed_8_0GT:
7837 		return IF_Gbps(8);
7838 	case ice_pcie_speed_16_0GT:
7839 		return IF_Gbps(16);
7840 	case ice_pcie_speed_unknown:
7841 	default:
7842 		return 0;
7843 	}
7844 }
7845 
7846 /**
7847  * ice_pcie_lnk_width_to_int - Convert driver pci-e width enum value to
7848  * a 32-bit number.
7849  * @width: enum value to convert
7850  */
7851 static int
7852 ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width)
7853 {
7854 	switch (width) {
7855 	case ice_pcie_lnk_x1:
7856 		return (1);
7857 	case ice_pcie_lnk_x2:
7858 		return (2);
7859 	case ice_pcie_lnk_x4:
7860 		return (4);
7861 	case ice_pcie_lnk_x8:
7862 		return (8);
7863 	case ice_pcie_lnk_x12:
7864 		return (12);
7865 	case ice_pcie_lnk_x16:
7866 		return (16);
7867 	case ice_pcie_lnk_x32:
7868 		return (32);
7869 	case ice_pcie_lnk_width_resrv:
7870 	case ice_pcie_lnk_width_unknown:
7871 	default:
7872 		return (0);
7873 	}
7874 }
7875 
7876 /**
7877  * ice_pcie_bandwidth_check - Check if PCI-E bandwidth is sufficient for
7878  * full-speed device operation.
7879  * @sc: adapter softc
7880  *
7881  * Returns 0 if sufficient; 1 if not.
7882  */
7883 static uint8_t
7884 ice_pcie_bandwidth_check(struct ice_softc *sc)
7885 {
7886 	struct ice_hw *hw = &sc->hw;
7887 	int num_ports, pcie_width;
7888 	u64 pcie_speed, port_speed;
7889 
7890 	MPASS(hw->port_info);
7891 
7892 	num_ports = bitcount32(hw->func_caps.common_cap.valid_functions);
7893 	port_speed = ice_phy_types_to_max_rate(hw->port_info);
7894 	pcie_speed = ice_pcie_bus_speed_to_rate(hw->bus.speed);
7895 	pcie_width = ice_pcie_lnk_width_to_int(hw->bus.width);
7896 
7897 	/*
7898 	 * If 2x100, clamp ports to 1 -- 2nd port is intended for
7899 	 * failover.
7900 	 */
7901 	if (port_speed == IF_Gbps(100))
7902 		num_ports = 1;
7903 
7904 	return !!((num_ports * port_speed) > pcie_speed * pcie_width);
7905 }
7906 
7907 /**
7908  * ice_print_bus_link_data - Print PCI-E bandwidth information
7909  * @dev: device to print string for
7910  * @hw: hw struct with PCI-e link information
7911  */
7912 static void
7913 ice_print_bus_link_data(device_t dev, struct ice_hw *hw)
7914 {
7915         device_printf(dev, "PCI Express Bus: Speed %s %s\n",
7916             ((hw->bus.speed == ice_pcie_speed_16_0GT) ? "16.0GT/s" :
7917             (hw->bus.speed == ice_pcie_speed_8_0GT) ? "8.0GT/s" :
7918             (hw->bus.speed == ice_pcie_speed_5_0GT) ? "5.0GT/s" :
7919             (hw->bus.speed == ice_pcie_speed_2_5GT) ? "2.5GT/s" : "Unknown"),
7920             (hw->bus.width == ice_pcie_lnk_x32) ? "Width x32" :
7921             (hw->bus.width == ice_pcie_lnk_x16) ? "Width x16" :
7922             (hw->bus.width == ice_pcie_lnk_x12) ? "Width x12" :
7923             (hw->bus.width == ice_pcie_lnk_x8) ? "Width x8" :
7924             (hw->bus.width == ice_pcie_lnk_x4) ? "Width x4" :
7925             (hw->bus.width == ice_pcie_lnk_x2) ? "Width x2" :
7926             (hw->bus.width == ice_pcie_lnk_x1) ? "Width x1" : "Width Unknown");
7927 }
7928 
7929 /**
7930  * ice_set_pci_link_status_data - store PCI bus info
7931  * @hw: pointer to hardware structure
7932  * @link_status: the link status word from PCI config space
7933  *
7934  * Stores the PCI bus info (speed, width, type) within the ice_hw structure
7935  **/
7936 static void
7937 ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status)
7938 {
7939 	u16 reg;
7940 
7941 	hw->bus.type = ice_bus_pci_express;
7942 
7943 	reg = (link_status & PCIEM_LINK_STA_WIDTH) >> 4;
7944 
7945 	switch (reg) {
7946 	case ice_pcie_lnk_x1:
7947 	case ice_pcie_lnk_x2:
7948 	case ice_pcie_lnk_x4:
7949 	case ice_pcie_lnk_x8:
7950 	case ice_pcie_lnk_x12:
7951 	case ice_pcie_lnk_x16:
7952 	case ice_pcie_lnk_x32:
7953 		hw->bus.width = (enum ice_pcie_link_width)reg;
7954 		break;
7955 	default:
7956 		hw->bus.width = ice_pcie_lnk_width_unknown;
7957 		break;
7958 	}
7959 
7960 	reg = (link_status & PCIEM_LINK_STA_SPEED) + 0x13;
7961 
7962 	switch (reg) {
7963 	case ice_pcie_speed_2_5GT:
7964 	case ice_pcie_speed_5_0GT:
7965 	case ice_pcie_speed_8_0GT:
7966 	case ice_pcie_speed_16_0GT:
7967 		hw->bus.speed = (enum ice_pcie_bus_speed)reg;
7968 		break;
7969 	default:
7970 		hw->bus.speed = ice_pcie_speed_unknown;
7971 		break;
7972 	}
7973 }
7974 
7975 /**
7976  * ice_init_link_events - Initialize Link Status Events mask
7977  * @sc: the device softc
7978  *
7979  * Initialize the Link Status Events mask to disable notification of link
7980  * events we don't care about in software. Also request that link status
7981  * events be enabled.
7982  */
7983 int
7984 ice_init_link_events(struct ice_softc *sc)
7985 {
7986 	struct ice_hw *hw = &sc->hw;
7987 	enum ice_status status;
7988 	u16 wanted_events;
7989 
7990 	/* Set the bits for the events that we want to be notified by */
7991 	wanted_events = (ICE_AQ_LINK_EVENT_UPDOWN |
7992 			 ICE_AQ_LINK_EVENT_MEDIA_NA |
7993 			 ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL);
7994 
7995 	/* request that every event except the wanted events be masked */
7996 	status = ice_aq_set_event_mask(hw, hw->port_info->lport, ~wanted_events, NULL);
7997 	if (status) {
7998 		device_printf(sc->dev,
7999 			      "Failed to set link status event mask, err %s aq_err %s\n",
8000 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
8001 		return (EIO);
8002 	}
8003 
8004 	/* Request link info with the LSE bit set to enable link status events */
8005 	status = ice_aq_get_link_info(hw->port_info, true, NULL, NULL);
8006 	if (status) {
8007 		device_printf(sc->dev,
8008 			      "Failed to enable link status events, err %s aq_err %s\n",
8009 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
8010 		return (EIO);
8011 	}
8012 
8013 	return (0);
8014 }
8015 
8016 /**
8017  * ice_handle_mdd_event - Handle possibly malicious events
8018  * @sc: the device softc
8019  *
8020  * Called by the admin task if an MDD detection interrupt is triggered.
8021  * Identifies possibly malicious events coming from VFs. Also triggers for
8022  * similar incorrect behavior from the PF as well.
8023  */
8024 void
8025 ice_handle_mdd_event(struct ice_softc *sc)
8026 {
8027 	struct ice_hw *hw = &sc->hw;
8028 	bool mdd_detected = false, request_reinit = false;
8029 	device_t dev = sc->dev;
8030 	u32 reg;
8031 
8032 	if (!ice_testandclear_state(&sc->state, ICE_STATE_MDD_PENDING))
8033 		return;
8034 
8035 	reg = rd32(hw, GL_MDET_TX_TCLAN);
8036 	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
8037 		u8 pf_num  = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> GL_MDET_TX_TCLAN_PF_NUM_S;
8038 		u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> GL_MDET_TX_TCLAN_VF_NUM_S;
8039 		u8 event   = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> GL_MDET_TX_TCLAN_MAL_TYPE_S;
8040 		u16 queue  = (reg & GL_MDET_TX_TCLAN_QNUM_M) >> GL_MDET_TX_TCLAN_QNUM_S;
8041 
8042 		device_printf(dev, "Malicious Driver Detection Tx Descriptor check event '%s' on Tx queue %u PF# %u VF# %u\n",
8043 			      ice_mdd_tx_tclan_str(event), queue, pf_num, vf_num);
8044 
8045 		/* Only clear this event if it matches this PF, that way other
8046 		 * PFs can read the event and determine VF and queue number.
8047 		 */
8048 		if (pf_num == hw->pf_id)
8049 			wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
8050 
8051 		mdd_detected = true;
8052 	}
8053 
8054 	/* Determine what triggered the MDD event */
8055 	reg = rd32(hw, GL_MDET_TX_PQM);
8056 	if (reg & GL_MDET_TX_PQM_VALID_M) {
8057 		u8 pf_num  = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> GL_MDET_TX_PQM_PF_NUM_S;
8058 		u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> GL_MDET_TX_PQM_VF_NUM_S;
8059 		u8 event   = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> GL_MDET_TX_PQM_MAL_TYPE_S;
8060 		u16 queue  = (reg & GL_MDET_TX_PQM_QNUM_M) >> GL_MDET_TX_PQM_QNUM_S;
8061 
8062 		device_printf(dev, "Malicious Driver Detection Tx Quanta check event '%s' on Tx queue %u PF# %u VF# %u\n",
8063 			      ice_mdd_tx_pqm_str(event), queue, pf_num, vf_num);
8064 
8065 		/* Only clear this event if it matches this PF, that way other
8066 		 * PFs can read the event and determine VF and queue number.
8067 		 */
8068 		if (pf_num == hw->pf_id)
8069 			wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
8070 
8071 		mdd_detected = true;
8072 	}
8073 
8074 	reg = rd32(hw, GL_MDET_RX);
8075 	if (reg & GL_MDET_RX_VALID_M) {
8076 		u8 pf_num  = (reg & GL_MDET_RX_PF_NUM_M) >> GL_MDET_RX_PF_NUM_S;
8077 		u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> GL_MDET_RX_VF_NUM_S;
8078 		u8 event   = (reg & GL_MDET_RX_MAL_TYPE_M) >> GL_MDET_RX_MAL_TYPE_S;
8079 		u16 queue  = (reg & GL_MDET_RX_QNUM_M) >> GL_MDET_RX_QNUM_S;
8080 
8081 		device_printf(dev, "Malicious Driver Detection Rx event '%s' on Rx queue %u PF# %u VF# %u\n",
8082 			      ice_mdd_rx_str(event), queue, pf_num, vf_num);
8083 
8084 		/* Only clear this event if it matches this PF, that way other
8085 		 * PFs can read the event and determine VF and queue number.
8086 		 */
8087 		if (pf_num == hw->pf_id)
8088 			wr32(hw, GL_MDET_RX, 0xffffffff);
8089 
8090 		mdd_detected = true;
8091 	}
8092 
8093 	/* Now, confirm that this event actually affects this PF, by checking
8094 	 * the PF registers.
8095 	 */
8096 	if (mdd_detected) {
8097 		reg = rd32(hw, PF_MDET_TX_TCLAN);
8098 		if (reg & PF_MDET_TX_TCLAN_VALID_M) {
8099 			wr32(hw, PF_MDET_TX_TCLAN, 0xffff);
8100 			sc->soft_stats.tx_mdd_count++;
8101 			request_reinit = true;
8102 		}
8103 
8104 		reg = rd32(hw, PF_MDET_TX_PQM);
8105 		if (reg & PF_MDET_TX_PQM_VALID_M) {
8106 			wr32(hw, PF_MDET_TX_PQM, 0xffff);
8107 			sc->soft_stats.tx_mdd_count++;
8108 			request_reinit = true;
8109 		}
8110 
8111 		reg = rd32(hw, PF_MDET_RX);
8112 		if (reg & PF_MDET_RX_VALID_M) {
8113 			wr32(hw, PF_MDET_RX, 0xffff);
8114 			sc->soft_stats.rx_mdd_count++;
8115 			request_reinit = true;
8116 		}
8117 	}
8118 
8119 	/* TODO: Implement logic to detect and handle events caused by VFs. */
8120 
8121 	/* request that the upper stack re-initialize the Tx/Rx queues */
8122 	if (request_reinit)
8123 		ice_request_stack_reinit(sc);
8124 
8125 	ice_flush(hw);
8126 }
8127 
8128 /**
8129  * ice_start_dcbx_agent - Start DCBX agent in FW via AQ command
8130  * @sc: the device softc
8131  *
8132  * @pre device is DCB capable and the FW LLDP agent has started
8133  *
8134  * Checks DCBX status and starts the DCBX agent if it is not in
8135  * a valid state via an AQ command.
8136  */
8137 static void
8138 ice_start_dcbx_agent(struct ice_softc *sc)
8139 {
8140 	struct ice_hw *hw = &sc->hw;
8141 	device_t dev = sc->dev;
8142 	bool dcbx_agent_status;
8143 	enum ice_status status;
8144 
8145 	hw->port_info->qos_cfg.dcbx_status = ice_get_dcbx_status(hw);
8146 
8147 	if (hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_DONE &&
8148 	    hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) {
8149 		/*
8150 		 * Start DCBX agent, but not LLDP. The return value isn't
8151 		 * checked here because a more detailed dcbx agent status is
8152 		 * retrieved and checked in ice_init_dcb() and elsewhere.
8153 		 */
8154 		status = ice_aq_start_stop_dcbx(hw, true, &dcbx_agent_status, NULL);
8155 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM)
8156 			device_printf(dev,
8157 			    "start_stop_dcbx failed, err %s aq_err %s\n",
8158 			    ice_status_str(status),
8159 			    ice_aq_str(hw->adminq.sq_last_status));
8160 	}
8161 }
8162 
8163 /**
8164  * ice_init_dcb_setup - Initialize DCB settings for HW
8165  * @sc: the device softc
8166  *
8167  * This needs to be called after the fw_lldp_agent sysctl is added, since that
8168  * can update the device's LLDP agent status if a tunable value is set.
8169  *
8170  * Get and store the initial state of DCB settings on driver load. Print out
8171  * informational messages as well.
8172  */
8173 void
8174 ice_init_dcb_setup(struct ice_softc *sc)
8175 {
8176 	struct ice_dcbx_cfg *local_dcbx_cfg;
8177 	struct ice_hw *hw = &sc->hw;
8178 	device_t dev = sc->dev;
8179 	enum ice_status status;
8180 	u8 pfcmode_ret;
8181 
8182 	/* Don't do anything if DCB isn't supported */
8183 	if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_DCB)) {
8184 		device_printf(dev, "%s: No DCB support\n", __func__);
8185 		return;
8186 	}
8187 
8188 	/* Starts DCBX agent if it needs starting */
8189 	ice_start_dcbx_agent(sc);
8190 
8191 	/* This sets hw->port_info->qos_cfg.is_sw_lldp */
8192 	status = ice_init_dcb(hw, true);
8193 
8194 	/* If there is an error, then FW LLDP is not in a usable state */
8195 	if (status != 0 && status != ICE_ERR_NOT_READY) {
8196 		/* Don't print an error message if the return code from the AQ
8197 		 * cmd performed in ice_init_dcb() is EPERM; that means the
8198 		 * FW LLDP engine is disabled, and that is a valid state.
8199 		 */
8200 		if (!(status == ICE_ERR_AQ_ERROR &&
8201 		      hw->adminq.sq_last_status == ICE_AQ_RC_EPERM)) {
8202 			device_printf(dev, "DCB init failed, err %s aq_err %s\n",
8203 				      ice_status_str(status),
8204 				      ice_aq_str(hw->adminq.sq_last_status));
8205 		}
8206 		hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
8207 	}
8208 
8209 	switch (hw->port_info->qos_cfg.dcbx_status) {
8210 	case ICE_DCBX_STATUS_DIS:
8211 		ice_debug(hw, ICE_DBG_DCB, "DCBX disabled\n");
8212 		break;
8213 	case ICE_DCBX_STATUS_NOT_STARTED:
8214 		ice_debug(hw, ICE_DBG_DCB, "DCBX not started\n");
8215 		break;
8216 	case ICE_DCBX_STATUS_MULTIPLE_PEERS:
8217 		ice_debug(hw, ICE_DBG_DCB, "DCBX detected multiple peers\n");
8218 		break;
8219 	default:
8220 		break;
8221 	}
8222 
8223 	/* LLDP disabled in FW */
8224 	if (hw->port_info->qos_cfg.is_sw_lldp) {
8225 		ice_add_rx_lldp_filter(sc);
8226 		device_printf(dev, "Firmware LLDP agent disabled\n");
8227 	}
8228 
8229 	/* Query and cache PFC mode */
8230 	status = ice_aq_query_pfc_mode(hw, &pfcmode_ret, NULL);
8231 	if (status) {
8232 		device_printf(dev, "PFC mode query failed, err %s aq_err %s\n",
8233 			      ice_status_str(status),
8234 			      ice_aq_str(hw->adminq.sq_last_status));
8235 	}
8236 	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
8237 	switch (pfcmode_ret) {
8238 	case ICE_AQC_PFC_VLAN_BASED_PFC:
8239 		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_VLAN;
8240 		break;
8241 	case ICE_AQC_PFC_DSCP_BASED_PFC:
8242 		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_DSCP;
8243 		break;
8244 	default:
8245 		/* DCB is disabled, but we shouldn't get here */
8246 		break;
8247 	}
8248 
8249 	/* Set default SW MIB for init */
8250 	ice_set_default_local_mib_settings(sc);
8251 
8252 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_en);
8253 }
8254 
8255 /**
8256  * ice_dcb_get_tc_map - Scans config to get bitmap of enabled TCs
8257  * @dcbcfg: DCB configuration to examine
8258  *
8259  * Scans a TC mapping table inside dcbcfg to find traffic classes
8260  * enabled and @returns a bitmask of enabled TCs
8261  */
8262 u8
8263 ice_dcb_get_tc_map(const struct ice_dcbx_cfg *dcbcfg)
8264 {
8265 	u8 tc_map = 0;
8266 	int i = 0;
8267 
8268 	switch (dcbcfg->pfc_mode) {
8269 	case ICE_QOS_MODE_VLAN:
8270 		/* XXX: "i" is actually "User Priority" here, not
8271 		 * Traffic Class, but the max for both is 8, so it works
8272 		 * out here.
8273 		 */
8274 		for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
8275 			tc_map |= BIT(dcbcfg->etscfg.prio_table[i]);
8276 		break;
8277 	case ICE_QOS_MODE_DSCP:
8278 		for (i = 0; i < ICE_DSCP_NUM_VAL; i++)
8279 			tc_map |= BIT(dcbcfg->dscp_map[i]);
8280 		break;
8281 	default:
8282 		/* Invalid Mode */
8283 		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8284 		break;
8285 	}
8286 
8287 	return (tc_map);
8288 }
8289 
8290 /**
8291  * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
8292  * @dcbcfg: config to retrieve number of TCs from
8293  *
8294  * @return number of contiguous TCs found in dcbcfg's ETS Configuration
8295  * Priority Assignment Table, a value from 1 to 8. If there are
8296  * non-contiguous TCs used (e.g. assigning 1 and 3 without using 2),
8297  * then returns 0.
8298  */
8299 static u8
8300 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
8301 {
8302 	u8 tc_map;
8303 
8304 	tc_map = ice_dcb_get_tc_map(dcbcfg);
8305 
8306 	return (ice_dcb_tc_contig(tc_map));
8307 }
8308 
8309 /**
8310  * ice_debug_print_mib_change_event - helper function to log LLDP MIB change events
8311  * @sc: the device private softc
8312  * @event: event received on a control queue
8313  *
8314  * Prints out the type and contents of an LLDP MIB change event in a DCB debug message.
8315  */
8316 static void
8317 ice_debug_print_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8318 {
8319 	struct ice_aqc_lldp_get_mib *params =
8320 	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8321 	u8 mib_type, bridge_type, tx_status;
8322 
8323 	static const char* mib_type_strings[] = {
8324 	    "Local MIB",
8325 	    "Remote MIB",
8326 	    "Reserved",
8327 	    "Reserved"
8328 	};
8329 	static const char* bridge_type_strings[] = {
8330 	    "Nearest Bridge",
8331 	    "Non-TPMR Bridge",
8332 	    "Reserved",
8333 	    "Reserved"
8334 	};
8335 	static const char* tx_status_strings[] = {
8336 	    "Port's TX active",
8337 	    "Port's TX suspended and drained",
8338 	    "Reserved",
8339 	    "Port's TX suspended and drained; blocked TC pipe flushed"
8340 	};
8341 
8342 	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8343 	    ICE_AQ_LLDP_MIB_TYPE_S;
8344 	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8345 	    ICE_AQ_LLDP_BRID_TYPE_S;
8346 	tx_status = (params->type & ICE_AQ_LLDP_TX_M) >>
8347 	    ICE_AQ_LLDP_TX_S;
8348 
8349 	ice_debug(&sc->hw, ICE_DBG_DCB, "LLDP MIB Change Event (%s, %s, %s)\n",
8350 	    mib_type_strings[mib_type], bridge_type_strings[bridge_type],
8351 	    tx_status_strings[tx_status]);
8352 
8353 	/* Nothing else to report */
8354 	if (!event->msg_buf)
8355 		return;
8356 
8357 	ice_debug(&sc->hw, ICE_DBG_DCB, "- %s contents:\n", mib_type_strings[mib_type]);
8358 	ice_debug_array(&sc->hw, ICE_DBG_DCB, 16, 1, event->msg_buf,
8359 			event->msg_len);
8360 }
8361 
8362 /**
8363  * ice_dcb_needs_reconfig - Returns true if driver needs to reconfigure
8364  * @sc: the device private softc
8365  * @old_cfg: Old DCBX configuration to compare against
8366  * @new_cfg: New DCBX configuration to check
8367  *
8368  * @return true if something changed in new_cfg that requires the driver
8369  * to do some reconfiguration.
8370  */
8371 static bool
8372 ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
8373     struct ice_dcbx_cfg *new_cfg)
8374 {
8375 	struct ice_hw *hw = &sc->hw;
8376 	bool needs_reconfig = false;
8377 
8378 	/* No change detected in DCBX config */
8379 	if (!memcmp(old_cfg, new_cfg, sizeof(*old_cfg))) {
8380 		ice_debug(hw, ICE_DBG_DCB,
8381 		    "No change detected in local DCBX configuration\n");
8382 		return (false);
8383 	}
8384 
8385 	/* Check if ETS config has changed */
8386 	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
8387 		   sizeof(new_cfg->etscfg))) {
8388 		/* If Priority Table has changed, then driver reconfig is needed */
8389 		if (memcmp(&new_cfg->etscfg.prio_table,
8390 			   &old_cfg->etscfg.prio_table,
8391 			   sizeof(new_cfg->etscfg.prio_table))) {
8392 			ice_debug(hw, ICE_DBG_DCB, "ETS UP2TC changed\n");
8393 			needs_reconfig = true;
8394 		}
8395 
8396 		/* These are just informational */
8397 		if (memcmp(&new_cfg->etscfg.tcbwtable,
8398 			   &old_cfg->etscfg.tcbwtable,
8399 			   sizeof(new_cfg->etscfg.tcbwtable))) {
8400 			ice_debug(hw, ICE_DBG_DCB, "ETS TCBW table changed\n");
8401 			needs_reconfig = true;
8402 		}
8403 
8404 		if (memcmp(&new_cfg->etscfg.tsatable,
8405 			   &old_cfg->etscfg.tsatable,
8406 			   sizeof(new_cfg->etscfg.tsatable))) {
8407 			ice_debug(hw, ICE_DBG_DCB, "ETS TSA table changed\n");
8408 			needs_reconfig = true;
8409 		}
8410 	}
8411 
8412 	/* Check if PFC config has changed */
8413 	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
8414 		ice_debug(hw, ICE_DBG_DCB, "PFC config changed\n");
8415 		needs_reconfig = true;
8416 	}
8417 
8418 	/* Check if APP table has changed */
8419 	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app)))
8420 		ice_debug(hw, ICE_DBG_DCB, "APP Table changed\n");
8421 
8422 	ice_debug(hw, ICE_DBG_DCB, "%s result: %d\n", __func__, needs_reconfig);
8423 
8424 	return (needs_reconfig);
8425 }
8426 
8427 /**
8428  * ice_stop_pf_vsi - Stop queues for PF LAN VSI
8429  * @sc: the device private softc
8430  *
8431  * Flushes interrupts and stops the queues associated with the PF LAN VSI.
8432  */
8433 static void
8434 ice_stop_pf_vsi(struct ice_softc *sc)
8435 {
8436 	/* Dissociate the Tx and Rx queues from the interrupts */
8437 	ice_flush_txq_interrupts(&sc->pf_vsi);
8438 	ice_flush_rxq_interrupts(&sc->pf_vsi);
8439 
8440 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
8441 		return;
8442 
8443 	/* Disable the Tx and Rx queues */
8444 	ice_vsi_disable_tx(&sc->pf_vsi);
8445 	ice_control_all_rx_queues(&sc->pf_vsi, false);
8446 }
8447 
8448 /**
8449  * ice_vsi_setup_q_map - Setup a VSI queue map
8450  * @vsi: the VSI being configured
8451  * @ctxt: VSI context structure
8452  */
8453 static void
8454 ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
8455 {
8456 	u16 qcounts[ICE_MAX_TRAFFIC_CLASS] = {};
8457 	u16 offset = 0, qmap = 0, pow = 0;
8458 	u16 num_q_per_tc, qcount_rx, rem_queues;
8459 	int i, j, k;
8460 
8461 	if (vsi->num_tcs == 0) {
8462 		/* at least TC0 should be enabled by default */
8463 		vsi->num_tcs = 1;
8464 		vsi->tc_map = 0x1;
8465 	}
8466 
8467 	qcount_rx = vsi->num_rx_queues;
8468 	num_q_per_tc = min(qcount_rx / vsi->num_tcs, ICE_MAX_RXQS_PER_TC);
8469 
8470 	if (!num_q_per_tc)
8471 		num_q_per_tc = 1;
8472 
8473 	/* Set initial values for # of queues to use for each active TC */
8474 	ice_for_each_traffic_class(i)
8475 		if (i < vsi->num_tcs)
8476 			qcounts[i] = num_q_per_tc;
8477 
8478 	/* If any queues are unassigned, add them to TC 0 */
8479 	rem_queues = qcount_rx % vsi->num_tcs;
8480 	if (rem_queues > 0)
8481 		qcounts[0] += rem_queues;
8482 
8483 	/* TC mapping is a function of the number of Rx queues assigned to the
8484 	 * VSI for each traffic class and the offset of these queues.
8485 	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
8486 	 * queues allocated to TC0. No:of queues is a power-of-2.
8487 	 *
8488 	 * If TC is not enabled, the queue offset is set to 0, and allocate one
8489 	 * queue, this way, traffic for the given TC will be sent to the default
8490 	 * queue.
8491 	 *
8492 	 * Setup number and offset of Rx queues for all TCs for the VSI
8493 	 */
8494 	ice_for_each_traffic_class(i) {
8495 		if (!(vsi->tc_map & BIT(i))) {
8496 			/* TC is not enabled */
8497 			vsi->tc_info[i].qoffset = 0;
8498 			vsi->tc_info[i].qcount_rx = 1;
8499 			vsi->tc_info[i].qcount_tx = 1;
8500 
8501 			ctxt->info.tc_mapping[i] = 0;
8502 			continue;
8503 		}
8504 
8505 		/* TC is enabled */
8506 		vsi->tc_info[i].qoffset = offset;
8507 		vsi->tc_info[i].qcount_rx = qcounts[i];
8508 		vsi->tc_info[i].qcount_tx = qcounts[i];
8509 
8510 		/* find the (rounded up) log-2 of queue count for current TC */
8511 		pow = fls(qcounts[i] - 1);
8512 
8513 		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
8514 			ICE_AQ_VSI_TC_Q_OFFSET_M) |
8515 			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
8516 			 ICE_AQ_VSI_TC_Q_NUM_M);
8517 		ctxt->info.tc_mapping[i] = CPU_TO_LE16(qmap);
8518 
8519 		/* Store traffic class and handle data in queue structures */
8520 		for (j = offset, k = 0; j < offset + qcounts[i]; j++, k++) {
8521 			vsi->tx_queues[j].q_handle = k;
8522 			vsi->tx_queues[j].tc = i;
8523 
8524 			vsi->rx_queues[j].tc = i;
8525 		}
8526 
8527 		offset += qcounts[i];
8528 	}
8529 
8530 	/* Rx queue mapping */
8531 	ctxt->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
8532 	ctxt->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
8533 	ctxt->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
8534 }
8535 
8536 /**
8537  * ice_pf_vsi_cfg_tc - Configure PF VSI for a given TC map
8538  * @sc: the device private softc
8539  * @tc_map: traffic class bitmap
8540  *
8541  * @pre VSI queues are stopped
8542  *
8543  * @return 0 if configuration is successful
8544  * @return EIO if Update VSI AQ cmd fails
8545  * @return ENODEV if updating Tx Scheduler fails
8546  */
8547 static int
8548 ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map)
8549 {
8550 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
8551 	struct ice_vsi *vsi = &sc->pf_vsi;
8552 	struct ice_hw *hw = &sc->hw;
8553 	struct ice_vsi_ctx ctx = { 0 };
8554 	device_t dev = sc->dev;
8555 	enum ice_status status;
8556 	u8 num_tcs = 0;
8557 	int i = 0;
8558 
8559 	/* Count the number of enabled Traffic Classes */
8560 	ice_for_each_traffic_class(i)
8561 		if (tc_map & BIT(i))
8562 			num_tcs++;
8563 
8564 	vsi->tc_map = tc_map;
8565 	vsi->num_tcs = num_tcs;
8566 
8567 	/* Set default parameters for context */
8568 	ctx.vf_num = 0;
8569 	ctx.info = vsi->info;
8570 
8571 	/* Setup queue map */
8572 	ice_vsi_setup_q_map(vsi, &ctx);
8573 
8574 	/* Update VSI configuration in firmware (RX queues) */
8575 	ctx.info.valid_sections = CPU_TO_LE16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
8576 	status = ice_update_vsi(hw, vsi->idx, &ctx, NULL);
8577 	if (status) {
8578 		device_printf(dev,
8579 		    "%s: Update VSI AQ call failed, err %s aq_err %s\n",
8580 		    __func__, ice_status_str(status),
8581 		    ice_aq_str(hw->adminq.sq_last_status));
8582 		return (EIO);
8583 	}
8584 	vsi->info = ctx.info;
8585 
8586 	/* Use values derived in ice_vsi_setup_q_map() */
8587 	for (i = 0; i < num_tcs; i++)
8588 		max_txqs[i] = vsi->tc_info[i].qcount_tx;
8589 
8590 	if (hw->debug_mask & ICE_DBG_DCB) {
8591 		device_printf(dev, "%s: max_txqs:", __func__);
8592 		ice_for_each_traffic_class(i)
8593 			printf(" %d", max_txqs[i]);
8594 		printf("\n");
8595 	}
8596 
8597 	/* Update LAN Tx queue info in firmware */
8598 	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx, vsi->tc_map,
8599 				 max_txqs);
8600 	if (status) {
8601 		device_printf(dev,
8602 		    "%s: Failed VSI lan queue config, err %s aq_err %s\n",
8603 		    __func__, ice_status_str(status),
8604 		    ice_aq_str(hw->adminq.sq_last_status));
8605 		return (ENODEV);
8606 	}
8607 
8608 	vsi->info.valid_sections = 0;
8609 
8610 	return (0);
8611 }
8612 
8613 /**
8614  * ice_dcb_tc_contig - Count TCs if they're contiguous
8615  * @tc_map: pointer to priority table
8616  *
8617  * @return The number of traffic classes in
8618  * an 8-bit TC bitmap, or if there is a gap, then returns 0.
8619  */
8620 static u8
8621 ice_dcb_tc_contig(u8 tc_map)
8622 {
8623 	bool tc_unused = false;
8624 	u8 ret = 0;
8625 
8626 	/* Scan bitmask for contiguous TCs starting with TC0 */
8627 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
8628 		if (tc_map & BIT(i)) {
8629 			if (!tc_unused) {
8630 				ret++;
8631 			} else {
8632 				/* Non-contiguous TCs detected */
8633 				return (0);
8634 			}
8635 		} else
8636 			tc_unused = true;
8637 	}
8638 
8639 	return (ret);
8640 }
8641 
8642 /**
8643  * ice_dcb_recfg - Reconfigure VSI with new DCB settings
8644  * @sc: the device private softc
8645  *
8646  * @pre All VSIs have been disabled/stopped
8647  *
8648  * Reconfigures VSI settings based on local_dcbx_cfg.
8649  */
8650 static void
8651 ice_dcb_recfg(struct ice_softc *sc)
8652 {
8653 	struct ice_dcbx_cfg *dcbcfg =
8654 	    &sc->hw.port_info->qos_cfg.local_dcbx_cfg;
8655 	device_t dev = sc->dev;
8656 	u8 tc_map = 0;
8657 	int ret;
8658 
8659 	tc_map = ice_dcb_get_tc_map(dcbcfg);
8660 
8661 	/* If non-contiguous TCs are used, then configure
8662 	 * the default TC instead. There's no support for
8663 	 * non-contiguous TCs being used.
8664 	 */
8665 	if (ice_dcb_tc_contig(tc_map) == 0) {
8666 		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8667 		ice_set_default_local_lldp_mib(sc);
8668 	}
8669 
8670 	/* Reconfigure VSI queues to add/remove traffic classes */
8671 	ret = ice_pf_vsi_cfg_tc(sc, tc_map);
8672 	if (ret)
8673 		device_printf(dev,
8674 		    "Failed to configure TCs for PF VSI, err %s\n",
8675 		    ice_err_str(ret));
8676 
8677 }
8678 
8679 /**
8680  * ice_set_default_local_mib_settings - Set Local LLDP MIB to default settings
8681  * @sc: device softc structure
8682  *
8683  * Overwrites the driver's SW local LLDP MIB with default settings. This
8684  * ensures the driver has a valid MIB when it next uses the Set Local LLDP MIB
8685  * admin queue command.
8686  */
8687 static void
8688 ice_set_default_local_mib_settings(struct ice_softc *sc)
8689 {
8690 	struct ice_dcbx_cfg *dcbcfg;
8691 	struct ice_hw *hw = &sc->hw;
8692 	struct ice_port_info *pi;
8693 	u8 maxtcs, maxtcs_ets, old_pfc_mode;
8694 
8695 	pi = hw->port_info;
8696 
8697 	dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
8698 
8699 	maxtcs = hw->func_caps.common_cap.maxtc;
8700 	/* This value is only 3 bits; 8 TCs maps to 0 */
8701 	maxtcs_ets = maxtcs & ICE_IEEE_ETS_MAXTC_M;
8702 
8703 	/* VLAN vs DSCP mode needs to be preserved */
8704 	old_pfc_mode = dcbcfg->pfc_mode;
8705 
8706 	/**
8707 	 * Setup the default settings used by the driver for the Set Local
8708 	 * LLDP MIB Admin Queue command (0x0A08). (1TC w/ 100% BW, ETS, no
8709 	 * PFC, TSA=2).
8710 	 */
8711 	memset(dcbcfg, 0, sizeof(*dcbcfg));
8712 
8713 	dcbcfg->etscfg.willing = 1;
8714 	dcbcfg->etscfg.tcbwtable[0] = 100;
8715 	dcbcfg->etscfg.maxtcs = maxtcs_ets;
8716 	dcbcfg->etscfg.tsatable[0] = 2;
8717 
8718 	dcbcfg->etsrec = dcbcfg->etscfg;
8719 	dcbcfg->etsrec.willing = 0;
8720 
8721 	dcbcfg->pfc.willing = 1;
8722 	dcbcfg->pfc.pfccap = maxtcs;
8723 
8724 	dcbcfg->pfc_mode = old_pfc_mode;
8725 }
8726 
8727 /**
8728  * ice_do_dcb_reconfig - notify RDMA and reconfigure PF LAN VSI
8729  * @sc: the device private softc
8730  * @pending_mib: FW has a pending MIB change to execute
8731  *
8732  * @pre Determined that the DCB configuration requires a change
8733  *
8734  * Reconfigures the PF LAN VSI based on updated DCB configuration
8735  * found in the hw struct's/port_info's/ local dcbx configuration.
8736  */
8737 void
8738 ice_do_dcb_reconfig(struct ice_softc *sc, bool pending_mib)
8739 {
8740 	struct ice_aqc_port_ets_elem port_ets = { 0 };
8741 	struct ice_dcbx_cfg *local_dcbx_cfg;
8742 	struct ice_hw *hw = &sc->hw;
8743 	struct ice_port_info *pi;
8744 	device_t dev = sc->dev;
8745 	enum ice_status status;
8746 
8747 	pi = sc->hw.port_info;
8748 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
8749 
8750 	ice_rdma_notify_dcb_qos_change(sc);
8751 	/* If there's a pending MIB, tell the FW to execute the MIB change
8752 	 * now.
8753 	 */
8754 	if (pending_mib) {
8755 		status = ice_lldp_execute_pending_mib(hw);
8756 		if ((status == ICE_ERR_AQ_ERROR) &&
8757 		    (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)) {
8758 			device_printf(dev,
8759 			    "Execute Pending LLDP MIB AQ call failed, no pending MIB\n");
8760 		} else if (status) {
8761 			device_printf(dev,
8762 			    "Execute Pending LLDP MIB AQ call failed, err %s aq_err %s\n",
8763 			    ice_status_str(status),
8764 			    ice_aq_str(hw->adminq.sq_last_status));
8765 			/* This won't break traffic, but QoS will not work as expected */
8766 		}
8767 	}
8768 
8769 	/* Set state when there's more than one TC */
8770 	if (ice_dcb_get_num_tc(local_dcbx_cfg) > 1) {
8771 		device_printf(dev, "Multiple traffic classes enabled\n");
8772 		ice_set_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
8773 	} else {
8774 		device_printf(dev, "Multiple traffic classes disabled\n");
8775 		ice_clear_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
8776 	}
8777 
8778 	/* Disable PF VSI since it's going to be reconfigured */
8779 	ice_stop_pf_vsi(sc);
8780 
8781 	/* Query ETS configuration and update SW Tx scheduler info */
8782 	status = ice_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
8783 	if (status != ICE_SUCCESS) {
8784 		device_printf(dev,
8785 		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
8786 		    ice_status_str(status),
8787 		    ice_aq_str(hw->adminq.sq_last_status));
8788 		/* This won't break traffic, but QoS will not work as expected */
8789 	}
8790 
8791 	/* Change PF VSI configuration */
8792 	ice_dcb_recfg(sc);
8793 
8794 	/* Send new configuration to RDMA client driver */
8795 	ice_rdma_dcb_qos_update(sc, pi);
8796 
8797 	ice_request_stack_reinit(sc);
8798 }
8799 
8800 /**
8801  * ice_handle_mib_change_event - helper function to handle LLDP MIB change events
8802  * @sc: the device private softc
8803  * @event: event received on a control queue
8804  *
8805  * Checks the updated MIB it receives and possibly reconfigures the PF LAN
8806  * VSI depending on what has changed. This will also print out some debug
8807  * information about the MIB event if ICE_DBG_DCB is enabled in the debug_mask.
8808  */
8809 static void
8810 ice_handle_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8811 {
8812 	struct ice_aqc_lldp_get_mib *params =
8813 	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8814 	struct ice_dcbx_cfg tmp_dcbx_cfg, *local_dcbx_cfg;
8815 	struct ice_port_info *pi;
8816 	device_t dev = sc->dev;
8817 	struct ice_hw *hw = &sc->hw;
8818 	bool needs_reconfig, mib_is_pending;
8819 	enum ice_status status;
8820 	u8 mib_type, bridge_type;
8821 
8822 	ASSERT_CFG_LOCKED(sc);
8823 
8824 	ice_debug_print_mib_change_event(sc, event);
8825 
8826 	pi = sc->hw.port_info;
8827 
8828 	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8829 	    ICE_AQ_LLDP_MIB_TYPE_S;
8830 	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8831 	    ICE_AQ_LLDP_BRID_TYPE_S;
8832 	mib_is_pending = (params->state & ICE_AQ_LLDP_MIB_CHANGE_STATE_M) >>
8833 	    ICE_AQ_LLDP_MIB_CHANGE_STATE_S;
8834 
8835 	/* Ignore if event is not for Nearest Bridge */
8836 	if (bridge_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
8837 		return;
8838 
8839 	/* Check MIB Type and return if event for Remote MIB update */
8840 	if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) {
8841 		/* Update the cached remote MIB and return */
8842 		status = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
8843 					 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
8844 					 &pi->qos_cfg.remote_dcbx_cfg);
8845 		if (status)
8846 			device_printf(dev,
8847 			    "%s: Failed to get Remote DCB config; status %s, aq_err %s\n",
8848 			    __func__, ice_status_str(status),
8849 			    ice_aq_str(hw->adminq.sq_last_status));
8850 		/* Not fatal if this fails */
8851 		return;
8852 	}
8853 
8854 	/* Save line length by aliasing the local dcbx cfg */
8855 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
8856 	/* Save off the old configuration and clear current config */
8857 	tmp_dcbx_cfg = *local_dcbx_cfg;
8858 	memset(local_dcbx_cfg, 0, sizeof(*local_dcbx_cfg));
8859 
8860 	/* Update the current local_dcbx_cfg with new data */
8861 	if (mib_is_pending) {
8862 		ice_get_dcb_cfg_from_mib_change(pi, event);
8863 	} else {
8864 		/* Get updated DCBX data from firmware */
8865 		status = ice_get_dcb_cfg(pi);
8866 		if (status) {
8867 			device_printf(dev,
8868 			    "%s: Failed to get Local DCB config; status %s, aq_err %s\n",
8869 			    __func__, ice_status_str(status),
8870 			    ice_aq_str(hw->adminq.sq_last_status));
8871 			return;
8872 		}
8873 	}
8874 
8875 	/* Check to see if DCB needs reconfiguring */
8876 	needs_reconfig = ice_dcb_needs_reconfig(sc, &tmp_dcbx_cfg,
8877 	    local_dcbx_cfg);
8878 
8879 	if (!needs_reconfig && !mib_is_pending)
8880 		return;
8881 
8882 	/* Reconfigure -- this will also notify FW that configuration is done,
8883 	 * if the FW MIB change is only pending instead of executed.
8884 	 */
8885 	ice_do_dcb_reconfig(sc, mib_is_pending);
8886 }
8887 
8888 /**
8889  * ice_send_version - Send driver version to firmware
8890  * @sc: the device private softc
8891  *
8892  * Send the driver version to the firmware. This must be called as early as
8893  * possible after ice_init_hw().
8894  */
8895 int
8896 ice_send_version(struct ice_softc *sc)
8897 {
8898 	struct ice_driver_ver driver_version = {0};
8899 	struct ice_hw *hw = &sc->hw;
8900 	device_t dev = sc->dev;
8901 	enum ice_status status;
8902 
8903 	driver_version.major_ver = ice_major_version;
8904 	driver_version.minor_ver = ice_minor_version;
8905 	driver_version.build_ver = ice_patch_version;
8906 	driver_version.subbuild_ver = ice_rc_version;
8907 
8908 	strlcpy((char *)driver_version.driver_string, ice_driver_version,
8909 		sizeof(driver_version.driver_string));
8910 
8911 	status = ice_aq_send_driver_ver(hw, &driver_version, NULL);
8912 	if (status) {
8913 		device_printf(dev, "Unable to send driver version to firmware, err %s aq_err %s\n",
8914 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
8915 		return (EIO);
8916 	}
8917 
8918 	return (0);
8919 }
8920 
8921 /**
8922  * ice_handle_lan_overflow_event - helper function to log LAN overflow events
8923  * @sc: device softc
8924  * @event: event received on a control queue
8925  *
8926  * Prints out a message when a LAN overflow event is detected on a receive
8927  * queue.
8928  */
8929 static void
8930 ice_handle_lan_overflow_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8931 {
8932 	struct ice_aqc_event_lan_overflow *params =
8933 	    (struct ice_aqc_event_lan_overflow *)&event->desc.params.lan_overflow;
8934 	struct ice_hw *hw = &sc->hw;
8935 
8936 	ice_debug(hw, ICE_DBG_DCB, "LAN overflow event detected, prtdcb_ruptq=0x%08x, qtx_ctl=0x%08x\n",
8937 		  LE32_TO_CPU(params->prtdcb_ruptq),
8938 		  LE32_TO_CPU(params->qtx_ctl));
8939 }
8940 
8941 /**
8942  * ice_add_ethertype_to_list - Add an Ethertype filter to a filter list
8943  * @vsi: the VSI to target packets to
8944  * @list: the list to add the filter to
8945  * @ethertype: the Ethertype to filter on
8946  * @direction: The direction of the filter (Tx or Rx)
8947  * @action: the action to take
8948  *
8949  * Add an Ethertype filter to a filter list. Used to forward a series of
8950  * filters to the firmware for configuring the switch.
8951  *
8952  * Returns 0 on success, and an error code on failure.
8953  */
8954 static int
8955 ice_add_ethertype_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
8956 			  u16 ethertype, u16 direction,
8957 			  enum ice_sw_fwd_act_type action)
8958 {
8959 	struct ice_fltr_list_entry *entry;
8960 
8961 	MPASS((direction == ICE_FLTR_TX) || (direction == ICE_FLTR_RX));
8962 
8963 	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
8964 	if (!entry)
8965 		return (ENOMEM);
8966 
8967 	entry->fltr_info.flag = direction;
8968 	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
8969 	entry->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
8970 	entry->fltr_info.fltr_act = action;
8971 	entry->fltr_info.vsi_handle = vsi->idx;
8972 	entry->fltr_info.l_data.ethertype_mac.ethertype = ethertype;
8973 
8974 	LIST_ADD(&entry->list_entry, list);
8975 
8976 	return 0;
8977 }
8978 
8979 #define ETHERTYPE_PAUSE_FRAMES 0x8808
8980 #define ETHERTYPE_LLDP_FRAMES 0x88cc
8981 
8982 /**
8983  * ice_cfg_pf_ethertype_filters - Configure switch to drop ethertypes
8984  * @sc: the device private softc
8985  *
8986  * Configure the switch to drop PAUSE frames and LLDP frames transmitted from
8987  * the host. This prevents malicious VFs from sending these frames and being
8988  * able to control or configure the network.
8989  */
8990 int
8991 ice_cfg_pf_ethertype_filters(struct ice_softc *sc)
8992 {
8993 	struct ice_list_head ethertype_list;
8994 	struct ice_vsi *vsi = &sc->pf_vsi;
8995 	struct ice_hw *hw = &sc->hw;
8996 	device_t dev = sc->dev;
8997 	enum ice_status status;
8998 	int err = 0;
8999 
9000 	INIT_LIST_HEAD(&ethertype_list);
9001 
9002 	/*
9003 	 * Note that the switch filters will ignore the VSI index for the drop
9004 	 * action, so we only need to program drop filters once for the main
9005 	 * VSI.
9006 	 */
9007 
9008 	/* Configure switch to drop all Tx pause frames coming from any VSI. */
9009 	if (sc->enable_tx_fc_filter) {
9010 		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9011 						ETHERTYPE_PAUSE_FRAMES,
9012 						ICE_FLTR_TX, ICE_DROP_PACKET);
9013 		if (err)
9014 			goto free_ethertype_list;
9015 	}
9016 
9017 	/* Configure switch to drop LLDP frames coming from any VSI */
9018 	if (sc->enable_tx_lldp_filter) {
9019 		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9020 						ETHERTYPE_LLDP_FRAMES,
9021 						ICE_FLTR_TX, ICE_DROP_PACKET);
9022 		if (err)
9023 			goto free_ethertype_list;
9024 	}
9025 
9026 	status = ice_add_eth_mac(hw, &ethertype_list);
9027 	if (status) {
9028 		device_printf(dev,
9029 			      "Failed to add Tx Ethertype filters, err %s aq_err %s\n",
9030 			      ice_status_str(status),
9031 			      ice_aq_str(hw->adminq.sq_last_status));
9032 		err = (EIO);
9033 	}
9034 
9035 free_ethertype_list:
9036 	ice_free_fltr_list(&ethertype_list);
9037 	return err;
9038 }
9039 
9040 /**
9041  * ice_add_rx_lldp_filter - add ethertype filter for Rx LLDP frames
9042  * @sc: the device private structure
9043  *
9044  * Add a switch ethertype filter which forwards the LLDP frames to the main PF
9045  * VSI. Called when the fw_lldp_agent is disabled, to allow the LLDP frames to
9046  * be forwarded to the stack.
9047  */
9048 void
9049 ice_add_rx_lldp_filter(struct ice_softc *sc)
9050 {
9051 	struct ice_list_head ethertype_list;
9052 	struct ice_vsi *vsi = &sc->pf_vsi;
9053 	struct ice_hw *hw = &sc->hw;
9054 	device_t dev = sc->dev;
9055 	enum ice_status status;
9056 	int err;
9057 	u16 vsi_num;
9058 
9059 	/*
9060 	 * If FW is new enough, use a direct AQ command to perform the filter
9061 	 * addition.
9062 	 */
9063 	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
9064 		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
9065 		status = ice_lldp_fltr_add_remove(hw, vsi_num, true);
9066 		if (status) {
9067 			device_printf(dev,
9068 			    "Failed to add Rx LLDP filter, err %s aq_err %s\n",
9069 			    ice_status_str(status),
9070 			    ice_aq_str(hw->adminq.sq_last_status));
9071 		} else
9072 			ice_set_state(&sc->state,
9073 			    ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
9074 		return;
9075 	}
9076 
9077 	INIT_LIST_HEAD(&ethertype_list);
9078 
9079 	/* Forward Rx LLDP frames to the stack */
9080 	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9081 					ETHERTYPE_LLDP_FRAMES,
9082 					ICE_FLTR_RX, ICE_FWD_TO_VSI);
9083 	if (err) {
9084 		device_printf(dev,
9085 			      "Failed to add Rx LLDP filter, err %s\n",
9086 			      ice_err_str(err));
9087 		goto free_ethertype_list;
9088 	}
9089 
9090 	status = ice_add_eth_mac(hw, &ethertype_list);
9091 	if (status && status != ICE_ERR_ALREADY_EXISTS) {
9092 		device_printf(dev,
9093 			      "Failed to add Rx LLDP filter, err %s aq_err %s\n",
9094 			      ice_status_str(status),
9095 			      ice_aq_str(hw->adminq.sq_last_status));
9096 	} else {
9097 		/*
9098 		 * If status == ICE_ERR_ALREADY_EXISTS, we won't treat an
9099 		 * already existing filter as an error case.
9100 		 */
9101 		ice_set_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
9102 	}
9103 
9104 free_ethertype_list:
9105 	ice_free_fltr_list(&ethertype_list);
9106 }
9107 
9108 /**
9109  * ice_del_rx_lldp_filter - Remove ethertype filter for Rx LLDP frames
9110  * @sc: the device private structure
9111  *
9112  * Remove the switch filter forwarding LLDP frames to the main PF VSI, called
9113  * when the firmware LLDP agent is enabled, to stop routing LLDP frames to the
9114  * stack.
9115  */
9116 static void
9117 ice_del_rx_lldp_filter(struct ice_softc *sc)
9118 {
9119 	struct ice_list_head ethertype_list;
9120 	struct ice_vsi *vsi = &sc->pf_vsi;
9121 	struct ice_hw *hw = &sc->hw;
9122 	device_t dev = sc->dev;
9123 	enum ice_status status;
9124 	int err;
9125 	u16 vsi_num;
9126 
9127 	/*
9128 	 * Only in the scenario where the driver added the filter during
9129 	 * this session (while the driver was loaded) would we be able to
9130 	 * delete this filter.
9131 	 */
9132 	if (!ice_test_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER))
9133 		return;
9134 
9135 	/*
9136 	 * If FW is new enough, use a direct AQ command to perform the filter
9137 	 * removal.
9138 	 */
9139 	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
9140 		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
9141 		status = ice_lldp_fltr_add_remove(hw, vsi_num, false);
9142 		if (status) {
9143 			device_printf(dev,
9144 			    "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
9145 			    ice_status_str(status),
9146 			    ice_aq_str(hw->adminq.sq_last_status));
9147 		}
9148 		return;
9149 	}
9150 
9151 	INIT_LIST_HEAD(&ethertype_list);
9152 
9153 	/* Remove filter forwarding Rx LLDP frames to the stack */
9154 	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9155 					ETHERTYPE_LLDP_FRAMES,
9156 					ICE_FLTR_RX, ICE_FWD_TO_VSI);
9157 	if (err) {
9158 		device_printf(dev,
9159 			      "Failed to remove Rx LLDP filter, err %s\n",
9160 			      ice_err_str(err));
9161 		goto free_ethertype_list;
9162 	}
9163 
9164 	status = ice_remove_eth_mac(hw, &ethertype_list);
9165 	if (status == ICE_ERR_DOES_NOT_EXIST) {
9166 		; /* Don't complain if we try to remove a filter that doesn't exist */
9167 	} else if (status) {
9168 		device_printf(dev,
9169 			      "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
9170 			      ice_status_str(status),
9171 			      ice_aq_str(hw->adminq.sq_last_status));
9172 	}
9173 
9174 free_ethertype_list:
9175 	ice_free_fltr_list(&ethertype_list);
9176 }
9177 
9178 /**
9179  * ice_init_link_configuration -- Setup link in different ways depending
9180  * on whether media is available or not.
9181  * @sc: device private structure
9182  *
9183  * Called at the end of the attach process to either set default link
9184  * parameters if there is media available, or force HW link down and
9185  * set a state bit if there is no media.
9186  */
9187 void
9188 ice_init_link_configuration(struct ice_softc *sc)
9189 {
9190 	struct ice_port_info *pi = sc->hw.port_info;
9191 	struct ice_hw *hw = &sc->hw;
9192 	device_t dev = sc->dev;
9193 	enum ice_status status;
9194 
9195 	pi->phy.get_link_info = true;
9196 	status = ice_get_link_status(pi, &sc->link_up);
9197 	if (status != ICE_SUCCESS) {
9198 		device_printf(dev,
9199 		    "%s: ice_get_link_status failed; status %s, aq_err %s\n",
9200 		    __func__, ice_status_str(status),
9201 		    ice_aq_str(hw->adminq.sq_last_status));
9202 		return;
9203 	}
9204 
9205 	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
9206 		ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
9207 		/* Apply default link settings */
9208 		if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)) {
9209 			ice_set_link(sc, false);
9210 			ice_set_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
9211 		} else
9212 			ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
9213 	} else {
9214 		 /* Set link down, and poll for media available in timer. This prevents the
9215 		  * driver from receiving spurious link-related events.
9216 		  */
9217 		ice_set_state(&sc->state, ICE_STATE_NO_MEDIA);
9218 		status = ice_aq_set_link_restart_an(pi, false, NULL);
9219 		if (status != ICE_SUCCESS && hw->adminq.sq_last_status != ICE_AQ_RC_EMODE)
9220 			device_printf(dev,
9221 			    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
9222 			    __func__, ice_status_str(status),
9223 			    ice_aq_str(hw->adminq.sq_last_status));
9224 	}
9225 }
9226 
9227 /**
9228  * ice_apply_saved_phy_req_to_cfg -- Write saved user PHY settings to cfg data
9229  * @sc: device private structure
9230  * @cfg: new PHY config data to be modified
9231  *
9232  * Applies user settings for advertised speeds to the PHY type fields in the
9233  * supplied PHY config struct. It uses the data from pcaps to check if the
9234  * saved settings are invalid and uses the pcaps data instead if they are
9235  * invalid.
9236  */
9237 static int
9238 ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
9239 			       struct ice_aqc_set_phy_cfg_data *cfg)
9240 {
9241 	struct ice_phy_data phy_data = { 0 };
9242 	struct ice_port_info *pi = sc->hw.port_info;
9243 	u64 phy_low = 0, phy_high = 0;
9244 	u16 link_speeds;
9245 	int ret;
9246 
9247 	link_speeds = pi->phy.curr_user_speed_req;
9248 
9249 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2)) {
9250 		memset(&phy_data, 0, sizeof(phy_data));
9251 		phy_data.report_mode = ICE_AQC_REPORT_DFLT_CFG;
9252 		phy_data.user_speeds_orig = link_speeds;
9253 		ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9254 		if (ret != 0) {
9255 			/* Error message already printed within function */
9256 			return (ret);
9257 		}
9258 		phy_low = phy_data.phy_low_intr;
9259 		phy_high = phy_data.phy_high_intr;
9260 
9261 		if (link_speeds == 0 || phy_data.user_speeds_intr)
9262 			goto finalize_link_speed;
9263 		if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9264 			memset(&phy_data, 0, sizeof(phy_data));
9265 			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9266 			phy_data.user_speeds_orig = link_speeds;
9267 			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9268 			if (ret != 0) {
9269 				/* Error message already printed within function */
9270 				return (ret);
9271 			}
9272 			phy_low = phy_data.phy_low_intr;
9273 			phy_high = phy_data.phy_high_intr;
9274 
9275 			if (!phy_data.user_speeds_intr) {
9276 				phy_low = phy_data.phy_low_orig;
9277 				phy_high = phy_data.phy_high_orig;
9278 			}
9279 			goto finalize_link_speed;
9280 		}
9281 		/* If we're here, then it means the benefits of Version 2
9282 		 * link management aren't utilized.  We fall through to
9283 		 * handling Strict Link Mode the same as Version 1 link
9284 		 * management.
9285 		 */
9286 	}
9287 
9288 	memset(&phy_data, 0, sizeof(phy_data));
9289 	if ((link_speeds == 0) &&
9290 	    (sc->ldo_tlv.phy_type_low || sc->ldo_tlv.phy_type_high))
9291 		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9292 	else
9293 		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9294 	phy_data.user_speeds_orig = link_speeds;
9295 	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9296 	if (ret != 0) {
9297 		/* Error message already printed within function */
9298 		return (ret);
9299 	}
9300 	phy_low = phy_data.phy_low_intr;
9301 	phy_high = phy_data.phy_high_intr;
9302 
9303 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9304 		if (phy_low == 0 && phy_high == 0) {
9305 			device_printf(sc->dev,
9306 			    "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
9307 			return (EINVAL);
9308 		}
9309 	} else {
9310 		if (link_speeds == 0) {
9311 			if (sc->ldo_tlv.phy_type_low & phy_low ||
9312 			    sc->ldo_tlv.phy_type_high & phy_high) {
9313 				phy_low &= sc->ldo_tlv.phy_type_low;
9314 				phy_high &= sc->ldo_tlv.phy_type_high;
9315 			}
9316 		} else if (phy_low == 0 && phy_high == 0) {
9317 			memset(&phy_data, 0, sizeof(phy_data));
9318 			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9319 			phy_data.user_speeds_orig = link_speeds;
9320 			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9321 			if (ret != 0) {
9322 				/* Error message already printed within function */
9323 				return (ret);
9324 			}
9325 			phy_low = phy_data.phy_low_intr;
9326 			phy_high = phy_data.phy_high_intr;
9327 
9328 			if (!phy_data.user_speeds_intr) {
9329 				phy_low = phy_data.phy_low_orig;
9330 				phy_high = phy_data.phy_high_orig;
9331 			}
9332 		}
9333 	}
9334 
9335 finalize_link_speed:
9336 
9337 	/* Cache new user settings for speeds */
9338 	pi->phy.curr_user_speed_req = phy_data.user_speeds_intr;
9339 	cfg->phy_type_low = htole64(phy_low);
9340 	cfg->phy_type_high = htole64(phy_high);
9341 
9342 	return (ret);
9343 }
9344 
9345 /**
9346  * ice_apply_saved_fec_req_to_cfg -- Write saved user FEC mode to cfg data
9347  * @sc: device private structure
9348  * @cfg: new PHY config data to be modified
9349  *
9350  * Applies user setting for FEC mode to PHY config struct. It uses the data
9351  * from pcaps to check if the saved settings are invalid and uses the pcaps
9352  * data instead if they are invalid.
9353  */
9354 static int
9355 ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
9356 			       struct ice_aqc_set_phy_cfg_data *cfg)
9357 {
9358 	struct ice_port_info *pi = sc->hw.port_info;
9359 	enum ice_status status;
9360 
9361 	cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
9362 	status = ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req);
9363 	if (status)
9364 		return (EIO);
9365 
9366 	return (0);
9367 }
9368 
9369 /**
9370  * ice_apply_saved_fc_req_to_cfg -- Write saved user flow control mode to cfg data
9371  * @pi: port info struct
9372  * @cfg: new PHY config data to be modified
9373  *
9374  * Applies user setting for flow control mode to PHY config struct. There are
9375  * no invalid flow control mode settings; if there are, then this function
9376  * treats them like "ICE_FC_NONE".
9377  */
9378 static void
9379 ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
9380 			      struct ice_aqc_set_phy_cfg_data *cfg)
9381 {
9382 	cfg->caps &= ~(ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9383 		       ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY);
9384 
9385 	switch (pi->phy.curr_user_fc_req) {
9386 	case ICE_FC_FULL:
9387 		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9388 			     ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9389 		break;
9390 	case ICE_FC_RX_PAUSE:
9391 		cfg->caps |= ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9392 		break;
9393 	case ICE_FC_TX_PAUSE:
9394 		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY;
9395 		break;
9396 	default:
9397 		/* ICE_FC_NONE */
9398 		break;
9399 	}
9400 }
9401 
9402 /**
9403  * ice_apply_saved_phy_cfg -- Re-apply user PHY config settings
9404  * @sc: device private structure
9405  * @settings: which settings to apply
9406  *
9407  * Applies user settings for advertised speeds, FEC mode, and flow
9408  * control mode to a PHY config struct; it uses the data from pcaps
9409  * to check if the saved settings are invalid and uses the pcaps
9410  * data instead if they are invalid.
9411  *
9412  * For things like sysctls where only one setting needs to be
9413  * updated, the bitmap allows the caller to specify which setting
9414  * to update.
9415  */
9416 int
9417 ice_apply_saved_phy_cfg(struct ice_softc *sc, u8 settings)
9418 {
9419 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
9420 	struct ice_port_info *pi = sc->hw.port_info;
9421 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9422 	struct ice_hw *hw = &sc->hw;
9423 	device_t dev = sc->dev;
9424 	u64 phy_low, phy_high;
9425 	enum ice_status status;
9426 	enum ice_fec_mode dflt_fec_mode;
9427 	u16 dflt_user_speed;
9428 
9429 	if (!settings || settings > ICE_APPLY_LS_FEC_FC) {
9430 		ice_debug(hw, ICE_DBG_LINK, "Settings out-of-bounds: %u\n",
9431 		    settings);
9432 	}
9433 
9434 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
9435 				     &pcaps, NULL);
9436 	if (status != ICE_SUCCESS) {
9437 		device_printf(dev,
9438 		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
9439 		    __func__, ice_status_str(status),
9440 		    ice_aq_str(hw->adminq.sq_last_status));
9441 		return (EIO);
9442 	}
9443 
9444 	phy_low = le64toh(pcaps.phy_type_low);
9445 	phy_high = le64toh(pcaps.phy_type_high);
9446 
9447 	/* Save off initial config parameters */
9448 	dflt_user_speed = ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9449 	dflt_fec_mode = ice_caps_to_fec_mode(pcaps.caps, pcaps.link_fec_options);
9450 
9451 	/* Setup new PHY config */
9452 	ice_copy_phy_caps_to_cfg(pi, &pcaps, &cfg);
9453 
9454 	/* On error, restore active configuration values */
9455 	if ((settings & ICE_APPLY_LS) &&
9456 	    ice_apply_saved_phy_req_to_cfg(sc, &cfg)) {
9457 		pi->phy.curr_user_speed_req = dflt_user_speed;
9458 		cfg.phy_type_low = pcaps.phy_type_low;
9459 		cfg.phy_type_high = pcaps.phy_type_high;
9460 	}
9461 	if ((settings & ICE_APPLY_FEC) &&
9462 	    ice_apply_saved_fec_req_to_cfg(sc, &cfg)) {
9463 		pi->phy.curr_user_fec_req = dflt_fec_mode;
9464 	}
9465 	if (settings & ICE_APPLY_FC) {
9466 		/* No real error indicators for this process,
9467 		 * so we'll just have to assume it works. */
9468 		ice_apply_saved_fc_req_to_cfg(pi, &cfg);
9469 	}
9470 
9471 	/* Enable link and re-negotiate it */
9472 	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
9473 
9474 	status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL);
9475 	if (status != ICE_SUCCESS) {
9476 		/* Don't indicate failure if there's no media in the port.
9477 		 * The settings have been saved and will apply when media
9478 		 * is inserted.
9479 		 */
9480 		if ((status == ICE_ERR_AQ_ERROR) &&
9481 		    (hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)) {
9482 			device_printf(dev,
9483 			    "%s: Setting will be applied when media is inserted\n",
9484 			    __func__);
9485 			return (0);
9486 		} else {
9487 			device_printf(dev,
9488 			    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
9489 			    __func__, ice_status_str(status),
9490 			    ice_aq_str(hw->adminq.sq_last_status));
9491 			return (EIO);
9492 		}
9493 	}
9494 
9495 	return (0);
9496 }
9497 
9498 /**
9499  * ice_print_ldo_tlv - Print out LDO TLV information
9500  * @sc: device private structure
9501  * @tlv: LDO TLV information from the adapter NVM
9502  *
9503  * Dump out the information in tlv to the kernel message buffer; intended for
9504  * debugging purposes.
9505  */
9506 static void
9507 ice_print_ldo_tlv(struct ice_softc *sc, struct ice_link_default_override_tlv *tlv)
9508 {
9509 	device_t dev = sc->dev;
9510 
9511 	device_printf(dev, "TLV: -options     0x%02x\n", tlv->options);
9512 	device_printf(dev, "     -phy_config  0x%02x\n", tlv->phy_config);
9513 	device_printf(dev, "     -fec_options 0x%02x\n", tlv->fec_options);
9514 	device_printf(dev, "     -phy_high    0x%016llx\n",
9515 	    (unsigned long long)tlv->phy_type_high);
9516 	device_printf(dev, "     -phy_low     0x%016llx\n",
9517 	    (unsigned long long)tlv->phy_type_low);
9518 }
9519 
9520 /**
9521  * ice_set_link_management_mode -- Strict or lenient link management
9522  * @sc: device private structure
9523  *
9524  * Some NVMs give the adapter the option to advertise a superset of link
9525  * configurations.  This checks to see if that option is enabled.
9526  * Further, the NVM could also provide a specific set of configurations
9527  * to try; these are cached in the driver's private structure if they
9528  * are available.
9529  */
9530 void
9531 ice_set_link_management_mode(struct ice_softc *sc)
9532 {
9533 	struct ice_port_info *pi = sc->hw.port_info;
9534 	device_t dev = sc->dev;
9535 	struct ice_link_default_override_tlv tlv = { 0 };
9536 	enum ice_status status;
9537 
9538 	/* Port must be in strict mode if FW version is below a certain
9539 	 * version. (i.e. Don't set lenient mode features)
9540 	 */
9541 	if (!(ice_fw_supports_link_override(&sc->hw)))
9542 		return;
9543 
9544 	status = ice_get_link_default_override(&tlv, pi);
9545 	if (status != ICE_SUCCESS) {
9546 		device_printf(dev,
9547 		    "%s: ice_get_link_default_override failed; status %s, aq_err %s\n",
9548 		    __func__, ice_status_str(status),
9549 		    ice_aq_str(sc->hw.adminq.sq_last_status));
9550 		return;
9551 	}
9552 
9553 	if (sc->hw.debug_mask & ICE_DBG_LINK)
9554 		ice_print_ldo_tlv(sc, &tlv);
9555 
9556 	/* Set lenient link mode */
9557 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LENIENT_LINK_MODE) &&
9558 	    (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE)))
9559 		ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_en);
9560 
9561 	/* FW supports reporting a default configuration */
9562 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_2) &&
9563 	    ice_fw_supports_report_dflt_cfg(&sc->hw)) {
9564 		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_en);
9565 		/* Knowing we're at a high enough firmware revision to
9566 		 * support this link management configuration, we don't
9567 		 * need to check/support earlier versions.
9568 		 */
9569 		return;
9570 	}
9571 
9572 	/* Default overrides only work if in lenient link mode */
9573 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_1) &&
9574 	    ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE) &&
9575 	    (tlv.options & ICE_LINK_OVERRIDE_EN))
9576 		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_en);
9577 
9578 	/* Cache the LDO TLV structure in the driver, since it
9579 	 * won't change during the driver's lifetime.
9580 	 */
9581 	sc->ldo_tlv = tlv;
9582 }
9583 
9584 /**
9585  * ice_set_link -- Set up/down link on phy
9586  * @sc: device private structure
9587  * @enabled: link status to set up
9588  *
9589  * This should be called when change of link status is needed.
9590  */
9591 void
9592 ice_set_link(struct ice_softc *sc, bool enabled)
9593 {
9594 	struct ice_hw *hw = &sc->hw;
9595 	device_t dev = sc->dev;
9596 	enum ice_status status;
9597 
9598 	if (ice_driver_is_detaching(sc))
9599 		return;
9600 
9601 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
9602 		return;
9603 
9604 	if (enabled)
9605 		ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
9606 	else {
9607 		status = ice_aq_set_link_restart_an(hw->port_info, false, NULL);
9608 		if (status != ICE_SUCCESS) {
9609 			if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
9610 				device_printf(dev,
9611 				    "%s: Link control not enabled in current device mode\n",
9612 				    __func__);
9613 			else
9614 				device_printf(dev,
9615 				    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
9616 				    __func__, ice_status_str(status),
9617 				    ice_aq_str(hw->adminq.sq_last_status));
9618 		} else
9619 			sc->link_up = false;
9620 	}
9621 }
9622 
9623 /**
9624  * ice_init_saved_phy_cfg -- Set cached user PHY cfg settings with NVM defaults
9625  * @sc: device private structure
9626  *
9627  * This should be called before the tunables for these link settings
9628  * (e.g. advertise_speed) are added -- so that these defaults don't overwrite
9629  * the cached values that the sysctl handlers will write.
9630  *
9631  * This also needs to be called before ice_init_link_configuration, to ensure
9632  * that there are sane values that can be written if there is media available
9633  * in the port.
9634  */
9635 void
9636 ice_init_saved_phy_cfg(struct ice_softc *sc)
9637 {
9638 	struct ice_port_info *pi = sc->hw.port_info;
9639 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9640 	struct ice_hw *hw = &sc->hw;
9641 	device_t dev = sc->dev;
9642 	enum ice_status status;
9643 	u64 phy_low, phy_high;
9644 	u8 report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9645 
9646 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2))
9647 		report_mode = ICE_AQC_REPORT_DFLT_CFG;
9648 	status = ice_aq_get_phy_caps(pi, false, report_mode, &pcaps, NULL);
9649 	if (status != ICE_SUCCESS) {
9650 		device_printf(dev,
9651 		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
9652 		    __func__,
9653 		    report_mode == ICE_AQC_REPORT_DFLT_CFG ? "DFLT" : "w/MEDIA",
9654 		    ice_status_str(status),
9655 		    ice_aq_str(hw->adminq.sq_last_status));
9656 		return;
9657 	}
9658 
9659 	phy_low = le64toh(pcaps.phy_type_low);
9660 	phy_high = le64toh(pcaps.phy_type_high);
9661 
9662 	/* Save off initial config parameters */
9663 	pi->phy.curr_user_speed_req =
9664 	   ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9665 	pi->phy.curr_user_fec_req = ice_caps_to_fec_mode(pcaps.caps,
9666 	    pcaps.link_fec_options);
9667 	pi->phy.curr_user_fc_req = ice_caps_to_fc_mode(pcaps.caps);
9668 }
9669 
9670 /**
9671  * ice_module_init - Driver callback to handle module load
9672  *
9673  * Callback for handling module load events. This function should initialize
9674  * any data structures that are used for the life of the device driver.
9675  */
9676 static int
9677 ice_module_init(void)
9678 {
9679 	ice_rdma_init();
9680 	return (0);
9681 }
9682 
9683 /**
9684  * ice_module_exit - Driver callback to handle module exit
9685  *
9686  * Callback for handling module unload events. This function should release
9687  * any resources initialized during ice_module_init.
9688  *
9689  * If this function returns non-zero, the module will not be unloaded. It
9690  * should only return such a value if the module cannot be unloaded at all,
9691  * such as due to outstanding memory references that cannot be revoked.
9692  */
9693 static int
9694 ice_module_exit(void)
9695 {
9696 	ice_rdma_exit();
9697 	return (0);
9698 }
9699 
9700 /**
9701  * ice_module_event_handler - Callback for module events
9702  * @mod: unused module_t parameter
9703  * @what: the event requested
9704  * @arg: unused event argument
9705  *
9706  * Callback used to handle module events from the stack. Used to allow the
9707  * driver to define custom behavior that should happen at module load and
9708  * unload.
9709  */
9710 int
9711 ice_module_event_handler(module_t __unused mod, int what, void __unused *arg)
9712 {
9713 	switch (what) {
9714 	case MOD_LOAD:
9715 		return ice_module_init();
9716 	case MOD_UNLOAD:
9717 		return ice_module_exit();
9718 	default:
9719 		/* TODO: do we need to handle MOD_QUIESCE and MOD_SHUTDOWN? */
9720 		return (EOPNOTSUPP);
9721 	}
9722 }
9723 
9724 /**
9725  * ice_handle_nvm_access_ioctl - Handle an NVM access ioctl request
9726  * @sc: the device private softc
9727  * @ifd: ifdrv ioctl request pointer
9728  */
9729 int
9730 ice_handle_nvm_access_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
9731 {
9732 	union ice_nvm_access_data *data;
9733 	struct ice_nvm_access_cmd *cmd;
9734 	size_t ifd_len = ifd->ifd_len, malloc_len;
9735 	struct ice_hw *hw = &sc->hw;
9736 	device_t dev = sc->dev;
9737 	enum ice_status status;
9738 	u8 *nvm_buffer;
9739 	int err;
9740 
9741 	/*
9742 	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
9743 	 * a privilege check. In turn, iflib forwards the ioctl to the driver
9744 	 * without performing a privilege check. Perform one here to ensure
9745 	 * that non-privileged threads cannot access this interface.
9746 	 */
9747 	err = priv_check(curthread, PRIV_DRIVER);
9748 	if (err)
9749 		return (err);
9750 
9751 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
9752 		device_printf(dev, "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
9753 			      __func__);
9754 		return (EBUSY);
9755 	}
9756 
9757 	if (ifd_len < sizeof(struct ice_nvm_access_cmd)) {
9758 		device_printf(dev, "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
9759 			      __func__, ifd_len, sizeof(struct ice_nvm_access_cmd));
9760 		return (EINVAL);
9761 	}
9762 
9763 	if (ifd->ifd_data == NULL) {
9764 		device_printf(dev, "%s: ifd data buffer not present.\n",
9765 			      __func__);
9766 		return (EINVAL);
9767 	}
9768 
9769 	/*
9770 	 * If everything works correctly, ice_handle_nvm_access should not
9771 	 * modify data past the size of the ioctl length. However, it could
9772 	 * lead to memory corruption if it did. Make sure to allocate at least
9773 	 * enough space for the command and data regardless. This
9774 	 * ensures that any access to the data union will not access invalid
9775 	 * memory.
9776 	 */
9777 	malloc_len = max(ifd_len, sizeof(*data) + sizeof(*cmd));
9778 
9779 	nvm_buffer = (u8 *)malloc(malloc_len, M_ICE, M_ZERO | M_WAITOK);
9780 	if (!nvm_buffer)
9781 		return (ENOMEM);
9782 
9783 	/* Copy the NVM access command and data in from user space */
9784 	/* coverity[tainted_data_argument] */
9785 	err = copyin(ifd->ifd_data, nvm_buffer, ifd_len);
9786 	if (err) {
9787 		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
9788 			      __func__, ice_err_str(err));
9789 		goto cleanup_free_nvm_buffer;
9790 	}
9791 
9792 	/*
9793 	 * The NVM command structure is immediately followed by data which
9794 	 * varies in size based on the command.
9795 	 */
9796 	cmd = (struct ice_nvm_access_cmd *)nvm_buffer;
9797 	data = (union ice_nvm_access_data *)(nvm_buffer + sizeof(struct ice_nvm_access_cmd));
9798 
9799 	/* Handle the NVM access request */
9800 	status = ice_handle_nvm_access(hw, cmd, data);
9801 	if (status)
9802 		ice_debug(hw, ICE_DBG_NVM,
9803 			  "NVM access request failed, err %s\n",
9804 			  ice_status_str(status));
9805 
9806 	/* Copy the possibly modified contents of the handled request out */
9807 	err = copyout(nvm_buffer, ifd->ifd_data, ifd_len);
9808 	if (err) {
9809 		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
9810 			      __func__, ice_err_str(err));
9811 		goto cleanup_free_nvm_buffer;
9812 	}
9813 
9814 	/* Convert private status to an error code for proper ioctl response */
9815 	switch (status) {
9816 	case ICE_SUCCESS:
9817 		err = (0);
9818 		break;
9819 	case ICE_ERR_NO_MEMORY:
9820 		err = (ENOMEM);
9821 		break;
9822 	case ICE_ERR_OUT_OF_RANGE:
9823 		err = (ENOTTY);
9824 		break;
9825 	case ICE_ERR_PARAM:
9826 	default:
9827 		err = (EINVAL);
9828 		break;
9829 	}
9830 
9831 cleanup_free_nvm_buffer:
9832 	free(nvm_buffer, M_ICE);
9833 	return err;
9834 }
9835 
9836 /**
9837  * ice_read_sff_eeprom - Read data from SFF eeprom
9838  * @sc: device softc
9839  * @dev_addr: I2C device address (typically 0xA0 or 0xA2)
9840  * @offset: offset into the eeprom
9841  * @data: pointer to data buffer to store read data in
9842  * @length: length to read; max length is 16
9843  *
9844  * Read from the SFF eeprom in the module for this PF's port. For more details
9845  * on the contents of an SFF eeprom, refer to SFF-8724 (SFP), SFF-8636 (QSFP),
9846  * and SFF-8024 (both).
9847  */
9848 int
9849 ice_read_sff_eeprom(struct ice_softc *sc, u16 dev_addr, u16 offset, u8* data, u16 length)
9850 {
9851 	struct ice_hw *hw = &sc->hw;
9852 	int ret = 0, retries = 0;
9853 	enum ice_status status;
9854 
9855 	if (length > 16)
9856 		return (EINVAL);
9857 
9858 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
9859 		return (ENOSYS);
9860 
9861 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
9862 		return (ENXIO);
9863 
9864 	do {
9865 		status = ice_aq_sff_eeprom(hw, 0, dev_addr,
9866 					   offset, 0, 0, data, length,
9867 					   false, NULL);
9868 		if (!status) {
9869 			ret = 0;
9870 			break;
9871 		}
9872 		if (status == ICE_ERR_AQ_ERROR &&
9873 		    hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY) {
9874 			ret = EBUSY;
9875 			continue;
9876 		}
9877 		if (status == ICE_ERR_AQ_ERROR &&
9878 		    hw->adminq.sq_last_status == ICE_AQ_RC_EACCES) {
9879 			/* FW says I2C access isn't supported */
9880 			ret = EACCES;
9881 			break;
9882 		}
9883 		if (status == ICE_ERR_AQ_ERROR &&
9884 		    hw->adminq.sq_last_status == ICE_AQ_RC_EPERM) {
9885 			device_printf(sc->dev,
9886 				  "%s: Module pointer location specified in command does not permit the required operation.\n",
9887 				  __func__);
9888 			ret = EPERM;
9889 			break;
9890 		} else {
9891 			device_printf(sc->dev,
9892 				  "%s: Error reading I2C data: err %s aq_err %s\n",
9893 				  __func__, ice_status_str(status),
9894 				  ice_aq_str(hw->adminq.sq_last_status));
9895 			ret = EIO;
9896 			break;
9897 		}
9898 	} while (retries++ < ICE_I2C_MAX_RETRIES);
9899 
9900 	if (ret == EBUSY)
9901 		device_printf(sc->dev,
9902 			  "%s: Error reading I2C data after %d retries\n",
9903 			  __func__, ICE_I2C_MAX_RETRIES);
9904 
9905 	return (ret);
9906 }
9907 
9908 /**
9909  * ice_handle_i2c_req - Driver independent I2C request handler
9910  * @sc: device softc
9911  * @req: The I2C parameters to use
9912  *
9913  * Read from the port's I2C eeprom using the parameters from the ioctl.
9914  */
9915 int
9916 ice_handle_i2c_req(struct ice_softc *sc, struct ifi2creq *req)
9917 {
9918 	return ice_read_sff_eeprom(sc, req->dev_addr, req->offset, req->data, req->len);
9919 }
9920 
9921 /**
9922  * ice_sysctl_read_i2c_diag_data - Read some module diagnostic data via i2c
9923  * @oidp: sysctl oid structure
9924  * @arg1: pointer to private data structure
9925  * @arg2: unused
9926  * @req: sysctl request pointer
9927  *
9928  * Read 8 bytes of diagnostic data from the SFF eeprom in the (Q)SFP module
9929  * inserted into the port.
9930  *
9931  *             | SFP A2  | QSFP Lower Page
9932  * ------------|---------|----------------
9933  * Temperature | 96-97	 | 22-23
9934  * Vcc         | 98-99   | 26-27
9935  * TX power    | 102-103 | 34-35..40-41
9936  * RX power    | 104-105 | 50-51..56-57
9937  */
9938 static int
9939 ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS)
9940 {
9941 	struct ice_softc *sc = (struct ice_softc *)arg1;
9942 	device_t dev = sc->dev;
9943 	struct sbuf *sbuf;
9944 	int ret;
9945 	u8 data[16];
9946 
9947 	UNREFERENCED_PARAMETER(arg2);
9948 	UNREFERENCED_PARAMETER(oidp);
9949 
9950 	if (ice_driver_is_detaching(sc))
9951 		return (ESHUTDOWN);
9952 
9953 	if (req->oldptr == NULL) {
9954 		ret = SYSCTL_OUT(req, 0, 128);
9955 		return (ret);
9956 	}
9957 
9958 	ret = ice_read_sff_eeprom(sc, 0xA0, 0, data, 1);
9959 	if (ret)
9960 		return (ret);
9961 
9962 	/* 0x3 for SFP; 0xD/0x11 for QSFP+/QSFP28 */
9963 	if (data[0] == 0x3) {
9964 		/*
9965 		 * Check for:
9966 		 * - Internally calibrated data
9967 		 * - Diagnostic monitoring is implemented
9968 		 */
9969 		ice_read_sff_eeprom(sc, 0xA0, 92, data, 1);
9970 		if (!(data[0] & 0x60)) {
9971 			device_printf(dev, "Module doesn't support diagnostics: 0xA0[92] = %02X\n", data[0]);
9972 			return (ENODEV);
9973 		}
9974 
9975 		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
9976 
9977 		ice_read_sff_eeprom(sc, 0xA2, 96, data, 4);
9978 		for (int i = 0; i < 4; i++)
9979 			sbuf_printf(sbuf, "%02X ", data[i]);
9980 
9981 		ice_read_sff_eeprom(sc, 0xA2, 102, data, 4);
9982 		for (int i = 0; i < 4; i++)
9983 			sbuf_printf(sbuf, "%02X ", data[i]);
9984 	} else if (data[0] == 0xD || data[0] == 0x11) {
9985 		/*
9986 		 * QSFP+ modules are always internally calibrated, and must indicate
9987 		 * what types of diagnostic monitoring are implemented
9988 		 */
9989 		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
9990 
9991 		ice_read_sff_eeprom(sc, 0xA0, 22, data, 2);
9992 		for (int i = 0; i < 2; i++)
9993 			sbuf_printf(sbuf, "%02X ", data[i]);
9994 
9995 		ice_read_sff_eeprom(sc, 0xA0, 26, data, 2);
9996 		for (int i = 0; i < 2; i++)
9997 			sbuf_printf(sbuf, "%02X ", data[i]);
9998 
9999 		ice_read_sff_eeprom(sc, 0xA0, 34, data, 2);
10000 		for (int i = 0; i < 2; i++)
10001 			sbuf_printf(sbuf, "%02X ", data[i]);
10002 
10003 		ice_read_sff_eeprom(sc, 0xA0, 50, data, 2);
10004 		for (int i = 0; i < 2; i++)
10005 			sbuf_printf(sbuf, "%02X ", data[i]);
10006 	} else {
10007 		device_printf(dev, "Module is not SFP/SFP+/SFP28/QSFP+ (%02X)\n", data[0]);
10008 		return (ENODEV);
10009 	}
10010 
10011 	sbuf_finish(sbuf);
10012 	sbuf_delete(sbuf);
10013 
10014 	return (0);
10015 }
10016 
10017 /**
10018  * ice_alloc_intr_tracking - Setup interrupt tracking structures
10019  * @sc: device softc structure
10020  *
10021  * Sets up the resource manager for keeping track of interrupt allocations,
10022  * and initializes the tracking maps for the PF's interrupt allocations.
10023  *
10024  * Unlike the scheme for queues, this is done in one step since both the
10025  * manager and the maps both have the same lifetime.
10026  *
10027  * @returns 0 on success, or an error code on failure.
10028  */
10029 int
10030 ice_alloc_intr_tracking(struct ice_softc *sc)
10031 {
10032 	struct ice_hw *hw = &sc->hw;
10033 	device_t dev = sc->dev;
10034 	int err;
10035 
10036 	/* Initialize the interrupt allocation manager */
10037 	err = ice_resmgr_init_contig_only(&sc->imgr,
10038 	    hw->func_caps.common_cap.num_msix_vectors);
10039 	if (err) {
10040 		device_printf(dev, "Unable to initialize PF interrupt manager: %s\n",
10041 			      ice_err_str(err));
10042 		return (err);
10043 	}
10044 
10045 	/* Allocate PF interrupt mapping storage */
10046 	if (!(sc->pf_imap =
10047 	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
10048 	      M_ICE, M_NOWAIT))) {
10049 		device_printf(dev, "Unable to allocate PF imap memory\n");
10050 		err = ENOMEM;
10051 		goto free_imgr;
10052 	}
10053 	if (!(sc->rdma_imap =
10054 	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
10055 	      M_ICE, M_NOWAIT))) {
10056 		device_printf(dev, "Unable to allocate RDMA imap memory\n");
10057 		err = ENOMEM;
10058 		free(sc->pf_imap, M_ICE);
10059 		goto free_imgr;
10060 	}
10061 	for (u32 i = 0; i < hw->func_caps.common_cap.num_msix_vectors; i++) {
10062 		sc->pf_imap[i] = ICE_INVALID_RES_IDX;
10063 		sc->rdma_imap[i] = ICE_INVALID_RES_IDX;
10064 	}
10065 
10066 	return (0);
10067 
10068 free_imgr:
10069 	ice_resmgr_destroy(&sc->imgr);
10070 	return (err);
10071 }
10072 
10073 /**
10074  * ice_free_intr_tracking - Free PF interrupt tracking structures
10075  * @sc: device softc structure
10076  *
10077  * Frees the interrupt resource allocation manager and the PF's owned maps.
10078  *
10079  * VF maps are released when the owning VF's are destroyed, which should always
10080  * happen before this function is called.
10081  */
10082 void
10083 ice_free_intr_tracking(struct ice_softc *sc)
10084 {
10085 	if (sc->pf_imap) {
10086 		ice_resmgr_release_map(&sc->imgr, sc->pf_imap,
10087 				       sc->lan_vectors);
10088 		free(sc->pf_imap, M_ICE);
10089 		sc->pf_imap = NULL;
10090 	}
10091 	if (sc->rdma_imap) {
10092 		ice_resmgr_release_map(&sc->imgr, sc->rdma_imap,
10093 				       sc->lan_vectors);
10094 		free(sc->rdma_imap, M_ICE);
10095 		sc->rdma_imap = NULL;
10096 	}
10097 
10098 	ice_resmgr_destroy(&sc->imgr);
10099 }
10100 
10101 /**
10102  * ice_apply_supported_speed_filter - Mask off unsupported speeds
10103  * @report_speeds: bit-field for the desired link speeds
10104  * @mod_type: type of module/sgmii connection we have
10105  *
10106  * Given a bitmap of the desired lenient mode link speeds,
10107  * this function will mask off the speeds that are not currently
10108  * supported by the device.
10109  */
10110 static u16
10111 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type)
10112 {
10113 	u16 speed_mask;
10114 	enum { IS_SGMII, IS_SFP, IS_QSFP } module;
10115 
10116 	/*
10117 	 * The SFF specification says 0 is unknown, so we'll
10118 	 * treat it like we're connected through SGMII for now.
10119 	 * This may need revisiting if a new type is supported
10120 	 * in the future.
10121 	 */
10122 	switch (mod_type) {
10123 	case 0:
10124 		module = IS_SGMII;
10125 		break;
10126 	case 3:
10127 		module = IS_SFP;
10128 		break;
10129 	default:
10130 		module = IS_QSFP;
10131 		break;
10132 	}
10133 
10134 	/* We won't offer anything lower than 100M for any part,
10135 	 * but we'll need to mask off other speeds based on the
10136 	 * device and module type.
10137 	 */
10138 	speed_mask = ~((u16)ICE_AQ_LINK_SPEED_100MB - 1);
10139 	if ((report_speeds & ICE_AQ_LINK_SPEED_10GB) && (module == IS_SFP))
10140 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10141 	if (report_speeds & ICE_AQ_LINK_SPEED_25GB)
10142 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10143 	if (report_speeds & ICE_AQ_LINK_SPEED_50GB) {
10144 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10145 		if (module == IS_QSFP)
10146 			speed_mask = ~((u16)ICE_AQ_LINK_SPEED_10GB - 1);
10147 	}
10148 	if (report_speeds & ICE_AQ_LINK_SPEED_100GB)
10149 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_25GB - 1);
10150 	return (report_speeds & speed_mask);
10151 }
10152 
10153 /**
10154  * ice_init_health_events - Enable FW health event reporting
10155  * @sc: device softc
10156  *
10157  * Will try to enable firmware health event reporting, but shouldn't
10158  * cause any grief (to the caller) if this fails.
10159  */
10160 void
10161 ice_init_health_events(struct ice_softc *sc)
10162 {
10163 	enum ice_status status;
10164 	u8 health_mask;
10165 
10166 	if ((!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HEALTH_STATUS)) ||
10167 		(!sc->enable_health_events))
10168 		return;
10169 
10170 	health_mask = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK |
10171 		      ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK;
10172 
10173 	status = ice_aq_set_health_status_config(&sc->hw, health_mask, NULL);
10174 	if (status)
10175 		device_printf(sc->dev,
10176 		    "Failed to enable firmware health events, err %s aq_err %s\n",
10177 		    ice_status_str(status),
10178 		    ice_aq_str(sc->hw.adminq.sq_last_status));
10179 	else
10180 		ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_en);
10181 }
10182 
10183 /**
10184  * ice_print_health_status_string - Print message for given FW health event
10185  * @dev: the PCIe device
10186  * @elem: health status element containing status code
10187  *
10188  * A rather large list of possible health status codes and their associated
10189  * messages.
10190  */
10191 static void
10192 ice_print_health_status_string(device_t dev,
10193 			       struct ice_aqc_health_status_elem *elem)
10194 {
10195 	u16 status_code = le16toh(elem->health_status_code);
10196 
10197 	switch (status_code) {
10198 	case ICE_AQC_HEALTH_STATUS_INFO_RECOVERY:
10199 		device_printf(dev, "The device is in firmware recovery mode.\n");
10200 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10201 		break;
10202 	case ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS:
10203 		device_printf(dev, "The flash chip cannot be accessed.\n");
10204 		device_printf(dev, "Possible Solution: If issue persists, call customer support.\n");
10205 		break;
10206 	case ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH:
10207 		device_printf(dev, "NVM authentication failed.\n");
10208 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10209 		break;
10210 	case ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH:
10211 		device_printf(dev, "Option ROM authentication failed.\n");
10212 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10213 		break;
10214 	case ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH:
10215 		device_printf(dev, "DDP package failed.\n");
10216 		device_printf(dev, "Possible Solution: Update to latest base driver and DDP package.\n");
10217 		break;
10218 	case ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT:
10219 		device_printf(dev, "NVM image is incompatible.\n");
10220 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10221 		break;
10222 	case ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT:
10223 		device_printf(dev, "Option ROM is incompatible.\n");
10224 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10225 		break;
10226 	case ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB:
10227 		device_printf(dev, "Supplied MIB file is invalid. DCB reverted to default configuration.\n");
10228 		device_printf(dev, "Possible Solution: Disable FW-LLDP and check DCBx system configuration.\n");
10229 		break;
10230 	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT:
10231 		device_printf(dev, "An unsupported module was detected.\n");
10232 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10233 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10234 		break;
10235 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE:
10236 		device_printf(dev, "Module type is not supported.\n");
10237 		device_printf(dev, "Possible Solution: Change or replace the module or cable.\n");
10238 		break;
10239 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL:
10240 		device_printf(dev, "Module is not qualified.\n");
10241 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10242 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10243 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10244 		break;
10245 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM:
10246 		device_printf(dev, "Device cannot communicate with the module.\n");
10247 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10248 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10249 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10250 		break;
10251 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT:
10252 		device_printf(dev, "Unresolved module conflict.\n");
10253 		device_printf(dev, "Possible Solution 1: Manually set speed/duplex or use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10254 		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
10255 		break;
10256 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT:
10257 		device_printf(dev, "Module is not present.\n");
10258 		device_printf(dev, "Possible Solution 1: Check that the module is inserted correctly.\n");
10259 		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
10260 		break;
10261 	case ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED:
10262 		device_printf(dev, "Underutilized module.\n");
10263 		device_printf(dev, "Possible Solution 1: Change or replace the module or cable.\n");
10264 		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10265 		break;
10266 	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT:
10267 		device_printf(dev, "An unsupported module was detected.\n");
10268 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10269 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10270 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10271 		break;
10272 	case ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG:
10273 		device_printf(dev, "Invalid link configuration.\n");
10274 		break;
10275 	case ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS:
10276 		device_printf(dev, "Port hardware access error.\n");
10277 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10278 		break;
10279 	case ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE:
10280 		device_printf(dev, "A port is unreachable.\n");
10281 		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10282 		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10283 		break;
10284 	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED:
10285 		device_printf(dev, "Port speed is limited due to module.\n");
10286 		device_printf(dev, "Possible Solution: Change the module or use Intel(R) Ethernet Port Configuration Tool to configure the port option to match the current module speed.\n");
10287 		break;
10288 	case ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT:
10289 		device_printf(dev, "A parallel fault was detected.\n");
10290 		device_printf(dev, "Possible Solution: Check link partner connection and configuration.\n");
10291 		break;
10292 	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED:
10293 		device_printf(dev, "Port speed is limited by PHY capabilities.\n");
10294 		device_printf(dev, "Possible Solution 1: Change the module to align to port option.\n");
10295 		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10296 		break;
10297 	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO:
10298 		device_printf(dev, "LOM topology netlist is corrupted.\n");
10299 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10300 		break;
10301 	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST:
10302 		device_printf(dev, "Unrecoverable netlist error.\n");
10303 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10304 		break;
10305 	case ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT:
10306 		device_printf(dev, "Port topology conflict.\n");
10307 		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10308 		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10309 		break;
10310 	case ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS:
10311 		device_printf(dev, "Unrecoverable hardware access error.\n");
10312 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10313 		break;
10314 	case ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME:
10315 		device_printf(dev, "Unrecoverable runtime error.\n");
10316 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10317 		break;
10318 	case ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT:
10319 		device_printf(dev, "Link management engine failed to initialize.\n");
10320 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10321 		break;
10322 	default:
10323 		break;
10324 	}
10325 }
10326 
10327 /**
10328  * ice_handle_health_status_event - helper function to output health status
10329  * @sc: device softc structure
10330  * @event: event received on a control queue
10331  *
10332  * Prints out the appropriate string based on the given Health Status Event
10333  * code.
10334  */
10335 static void
10336 ice_handle_health_status_event(struct ice_softc *sc,
10337 			       struct ice_rq_event_info *event)
10338 {
10339 	struct ice_aqc_health_status_elem *health_info;
10340 	u16 status_count;
10341 	int i;
10342 
10343 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_HEALTH_STATUS))
10344 		return;
10345 
10346 	health_info = (struct ice_aqc_health_status_elem *)event->msg_buf;
10347 	status_count = le16toh(event->desc.params.get_health_status.health_status_count);
10348 
10349 	if (status_count > (event->buf_len / sizeof(*health_info))) {
10350 		device_printf(sc->dev, "Received a health status event with invalid event count\n");
10351 		return;
10352 	}
10353 
10354 	for (i = 0; i < status_count; i++) {
10355 		ice_print_health_status_string(sc->dev, health_info);
10356 		health_info++;
10357 	}
10358 }
10359 
10360 /**
10361  * ice_set_default_local_lldp_mib - Possibly apply local LLDP MIB to FW
10362  * @sc: device softc structure
10363  *
10364  * This function needs to be called after link up; it makes sure the FW has
10365  * certain PFC/DCB settings. In certain configurations this will re-apply a
10366  * default local LLDP MIB configuration; this is intended to workaround a FW
10367  * behavior where these settings seem to be cleared on link up.
10368  */
10369 void
10370 ice_set_default_local_lldp_mib(struct ice_softc *sc)
10371 {
10372 	struct ice_hw *hw = &sc->hw;
10373 	struct ice_port_info *pi;
10374 	device_t dev = sc->dev;
10375 	enum ice_status status;
10376 
10377 	/* Set Local MIB can disrupt flow control settings for
10378 	 * non-DCB-supported devices.
10379 	 */
10380 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_DCB))
10381 		return;
10382 
10383 	pi = hw->port_info;
10384 
10385 	/* Don't overwrite a custom SW configuration */
10386 	if (!pi->qos_cfg.is_sw_lldp &&
10387 	    !ice_test_state(&sc->state, ICE_STATE_MULTIPLE_TCS))
10388 		ice_set_default_local_mib_settings(sc);
10389 
10390 	status = ice_set_dcb_cfg(pi);
10391 
10392 	if (status)
10393 		device_printf(dev,
10394 		    "Error setting Local LLDP MIB: %s aq_err %s\n",
10395 		    ice_status_str(status),
10396 		    ice_aq_str(hw->adminq.sq_last_status));
10397 }
10398 
10399 /**
10400  * ice_sbuf_print_ets_cfg - Helper function to print ETS cfg
10401  * @sbuf: string buffer to print to
10402  * @name: prefix string to use
10403  * @ets: structure to pull values from
10404  *
10405  * A helper function for ice_sysctl_dump_dcbx_cfg(), this
10406  * formats the ETS rec and cfg TLVs into text.
10407  */
10408 static void
10409 ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name, struct ice_dcb_ets_cfg *ets)
10410 {
10411 	sbuf_printf(sbuf, "%s.willing: %u\n", name, ets->willing);
10412 	sbuf_printf(sbuf, "%s.cbs: %u\n", name, ets->cbs);
10413 	sbuf_printf(sbuf, "%s.maxtcs: %u\n", name, ets->maxtcs);
10414 
10415 	sbuf_printf(sbuf, "%s.prio_table:", name);
10416 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10417 		sbuf_printf(sbuf, " %d", ets->prio_table[i]);
10418 	sbuf_printf(sbuf, "\n");
10419 
10420 	sbuf_printf(sbuf, "%s.tcbwtable:", name);
10421 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10422 		sbuf_printf(sbuf, " %d", ets->tcbwtable[i]);
10423 	sbuf_printf(sbuf, "\n");
10424 
10425 	sbuf_printf(sbuf, "%s.tsatable:", name);
10426 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10427 		sbuf_printf(sbuf, " %d", ets->tsatable[i]);
10428 	sbuf_printf(sbuf, "\n");
10429 }
10430 
10431 /**
10432  * ice_sysctl_dump_dcbx_cfg - Print out DCBX/DCB config info
10433  * @oidp: sysctl oid structure
10434  * @arg1: pointer to private data structure
10435  * @arg2: AQ define for either Local or Remote MIB
10436  * @req: sysctl request pointer
10437  *
10438  * Prints out DCB/DCBX configuration, including the contents
10439  * of either the local or remote MIB, depending on the value
10440  * used in arg2.
10441  */
10442 static int
10443 ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS)
10444 {
10445 	struct ice_softc *sc = (struct ice_softc *)arg1;
10446 	struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg = {};
10447 	struct ice_dcbx_cfg dcb_buf = {};
10448 	struct ice_dcbx_cfg *dcbcfg;
10449 	struct ice_hw *hw = &sc->hw;
10450 	device_t dev = sc->dev;
10451 	struct sbuf *sbuf;
10452 	enum ice_status status;
10453 	u8 maxtcs, dcbx_status, is_sw_lldp;
10454 
10455 	UNREFERENCED_PARAMETER(oidp);
10456 
10457 	if (ice_driver_is_detaching(sc))
10458 		return (ESHUTDOWN);
10459 
10460 	is_sw_lldp = hw->port_info->qos_cfg.is_sw_lldp;
10461 
10462 	/* The driver doesn't receive a Remote MIB via SW */
10463 	if (is_sw_lldp && arg2 == ICE_AQ_LLDP_MIB_REMOTE)
10464 		return (ENOENT);
10465 
10466 	dcbcfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
10467 	if (!is_sw_lldp) {
10468 		/* Collect information from the FW in FW LLDP mode */
10469 		dcbcfg = &dcb_buf;
10470 		status = ice_aq_get_dcb_cfg(hw, (u8)arg2,
10471 		    ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbcfg);
10472 		if (status && arg2 == ICE_AQ_LLDP_MIB_REMOTE &&
10473 		    hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
10474 			device_printf(dev,
10475 			    "Unable to query Remote MIB; port has not received one yet\n");
10476 			return (ENOENT);
10477 		}
10478 		if (status) {
10479 			device_printf(dev, "Unable to query LLDP MIB, err %s aq_err %s\n",
10480 			    ice_status_str(status),
10481 			    ice_aq_str(hw->adminq.sq_last_status));
10482 			return (EIO);
10483 		}
10484 	}
10485 
10486 	status = ice_aq_get_cee_dcb_cfg(hw, &cee_cfg, NULL);
10487 	if (status == ICE_SUCCESS)
10488 		dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE;
10489 	else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
10490 		dcbcfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
10491 	else
10492 		device_printf(dev, "Get CEE DCB Cfg AQ cmd err %s aq_err %s\n",
10493 		    ice_status_str(status),
10494 		    ice_aq_str(hw->adminq.sq_last_status));
10495 
10496 	maxtcs = hw->func_caps.common_cap.maxtc;
10497 	dcbx_status = ice_get_dcbx_status(hw);
10498 
10499 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10500 
10501 	/* Do the actual printing */
10502 	sbuf_printf(sbuf, "\n");
10503 	sbuf_printf(sbuf, "SW LLDP mode: %d\n", is_sw_lldp);
10504 	sbuf_printf(sbuf, "Function caps maxtcs: %d\n", maxtcs);
10505 	sbuf_printf(sbuf, "dcbx_status: %d\n", dcbx_status);
10506 
10507 	sbuf_printf(sbuf, "numapps: %u\n", dcbcfg->numapps);
10508 	sbuf_printf(sbuf, "CEE TLV status: %u\n", dcbcfg->tlv_status);
10509 	sbuf_printf(sbuf, "pfc_mode: %s\n", (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP) ?
10510 	    "DSCP" : "VLAN");
10511 	sbuf_printf(sbuf, "dcbx_mode: %s\n",
10512 	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_IEEE) ? "IEEE" :
10513 	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_CEE) ? "CEE" :
10514 	    "Unknown");
10515 
10516 	ice_sbuf_print_ets_cfg(sbuf, "etscfg", &dcbcfg->etscfg);
10517 	ice_sbuf_print_ets_cfg(sbuf, "etsrec", &dcbcfg->etsrec);
10518 
10519 	sbuf_printf(sbuf, "pfc.willing: %u\n", dcbcfg->pfc.willing);
10520 	sbuf_printf(sbuf, "pfc.mbc: %u\n", dcbcfg->pfc.mbc);
10521 	sbuf_printf(sbuf, "pfc.pfccap: 0x%0x\n", dcbcfg->pfc.pfccap);
10522 	sbuf_printf(sbuf, "pfc.pfcena: 0x%0x\n", dcbcfg->pfc.pfcena);
10523 
10524 	if (arg2 == ICE_AQ_LLDP_MIB_LOCAL) {
10525 		sbuf_printf(sbuf, "dscp_map:\n");
10526 		for (int i = 0; i < 8; i++) {
10527 			for (int j = 0; j < 8; j++)
10528 				sbuf_printf(sbuf, " %d",
10529 					    dcbcfg->dscp_map[i * 8 + j]);
10530 			sbuf_printf(sbuf, "\n");
10531 		}
10532 
10533 		sbuf_printf(sbuf, "\nLocal registers:\n");
10534 		sbuf_printf(sbuf, "PRTDCB_GENC.NUMTC: %d\n",
10535 		    (rd32(hw, PRTDCB_GENC) & PRTDCB_GENC_NUMTC_M)
10536 		        >> PRTDCB_GENC_NUMTC_S);
10537 		sbuf_printf(sbuf, "PRTDCB_TUP2TC: 0x%0x\n",
10538 		    (rd32(hw, PRTDCB_TUP2TC)));
10539 		sbuf_printf(sbuf, "PRTDCB_RUP2TC: 0x%0x\n",
10540 		    (rd32(hw, PRTDCB_RUP2TC)));
10541 		sbuf_printf(sbuf, "GLDCB_TC2PFC: 0x%0x\n",
10542 		    (rd32(hw, GLDCB_TC2PFC)));
10543 	}
10544 
10545 	/* Finish */
10546 	sbuf_finish(sbuf);
10547 	sbuf_delete(sbuf);
10548 
10549 	return (0);
10550 }
10551 
10552 /**
10553  * ice_sysctl_dump_vsi_cfg - print PF LAN VSI configuration
10554  * @oidp: sysctl oid structure
10555  * @arg1: pointer to private data structure
10556  * @arg2: unused
10557  * @req: sysctl request pointer
10558  *
10559  * XXX: This could be extended to apply to arbitrary PF-owned VSIs,
10560  * but for simplicity, this only works on the PF's LAN VSI.
10561  */
10562 static int
10563 ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS)
10564 {
10565 	struct ice_softc *sc = (struct ice_softc *)arg1;
10566 	struct ice_vsi_ctx ctx = { 0 };
10567 	struct ice_hw *hw = &sc->hw;
10568 	device_t dev = sc->dev;
10569 	struct sbuf *sbuf;
10570 	enum ice_status status;
10571 
10572 	UNREFERENCED_PARAMETER(oidp);
10573 	UNREFERENCED_PARAMETER(arg2);
10574 
10575 	if (ice_driver_is_detaching(sc))
10576 		return (ESHUTDOWN);
10577 
10578 	/* Get HW absolute index of a VSI */
10579 	ctx.vsi_num = ice_get_hw_vsi_num(hw, sc->pf_vsi.idx);
10580 
10581 	status = ice_aq_get_vsi_params(hw, &ctx, NULL);
10582 	if (status != ICE_SUCCESS) {
10583 		device_printf(dev,
10584 		    "Get VSI AQ call failed, err %s aq_err %s\n",
10585 		    ice_status_str(status),
10586 		    ice_aq_str(hw->adminq.sq_last_status));
10587 		return (EIO);
10588 	}
10589 
10590 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10591 
10592 	/* Do the actual printing */
10593 	sbuf_printf(sbuf, "\n");
10594 
10595 	sbuf_printf(sbuf, "VSI NUM: %d\n", ctx.vsi_num);
10596 	sbuf_printf(sbuf, "VF  NUM: %d\n", ctx.vf_num);
10597 	sbuf_printf(sbuf, "VSIs allocated: %d\n", ctx.vsis_allocd);
10598 	sbuf_printf(sbuf, "VSIs unallocated: %d\n", ctx.vsis_unallocated);
10599 
10600 	sbuf_printf(sbuf, "Rx Queue Map method: %d\n",
10601 	    LE16_TO_CPU(ctx.info.mapping_flags));
10602 	/* The PF VSI is always contiguous, so there's no if-statement here */
10603 	sbuf_printf(sbuf, "Rx Queue base: %d\n",
10604 	    LE16_TO_CPU(ctx.info.q_mapping[0]));
10605 	sbuf_printf(sbuf, "Rx Queue count: %d\n",
10606 	    LE16_TO_CPU(ctx.info.q_mapping[1]));
10607 
10608 	sbuf_printf(sbuf, "TC qbases  :");
10609 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10610 		sbuf_printf(sbuf, " %4d",
10611 		    ctx.info.tc_mapping[i] & ICE_AQ_VSI_TC_Q_OFFSET_M);
10612 	}
10613 	sbuf_printf(sbuf, "\n");
10614 
10615 	sbuf_printf(sbuf, "TC qcounts :");
10616 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10617 		sbuf_printf(sbuf, " %4d",
10618 		    1 << (ctx.info.tc_mapping[i] >> ICE_AQ_VSI_TC_Q_NUM_S));
10619 	}
10620 
10621 	/* Finish */
10622 	sbuf_finish(sbuf);
10623 	sbuf_delete(sbuf);
10624 
10625 	return (0);
10626 }
10627 
10628 /**
10629  * ice_ets_str_to_tbl - Parse string into ETS table
10630  * @str: input string to parse
10631  * @table: output eight values used for ETS values
10632  * @limit: max valid value to accept for ETS values
10633  *
10634  * Parses a string and converts the eight values within
10635  * into a table that can be used in setting ETS settings
10636  * in a MIB.
10637  *
10638  * @return 0 on success, EINVAL if a parsed value is
10639  * not between 0 and limit.
10640  */
10641 static int
10642 ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit)
10643 {
10644 	const char *str_start = str;
10645 	char *str_end;
10646 	long token;
10647 
10648 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10649 		token = strtol(str_start, &str_end, 0);
10650 		if (token < 0 || token > limit)
10651 			return (EINVAL);
10652 
10653 		table[i] = (u8)token;
10654 		str_start = (str_end + 1);
10655 	}
10656 
10657 	return (0);
10658 }
10659 
10660 /**
10661  * ice_check_ets_bw - Check if ETS bw vals are valid
10662  * @table: eight values used for ETS bandwidth
10663  *
10664  * @return true if the sum of all 8 values in table
10665  * equals 100.
10666  */
10667 static bool
10668 ice_check_ets_bw(u8 *table)
10669 {
10670 	int sum = 0;
10671 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10672 		sum += (int)table[i];
10673 
10674 	return (sum == 100);
10675 }
10676 
10677 /**
10678  * ice_cfg_pba_num - Determine if PBA Number is retrievable
10679  * @sc: the device private softc structure
10680  *
10681  * Sets the feature flag for the existence of a PBA number
10682  * based on the success of the read command.  This does not
10683  * cache the result.
10684  */
10685 void
10686 ice_cfg_pba_num(struct ice_softc *sc)
10687 {
10688 	u8 pba_string[32] = "";
10689 
10690 	if ((ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HAS_PBA)) &&
10691 	    (ice_read_pba_string(&sc->hw, pba_string, sizeof(pba_string)) == 0))
10692 		ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_en);
10693 }
10694 
10695 /**
10696  * ice_sysctl_query_port_ets - print Port ETS Config from AQ
10697  * @oidp: sysctl oid structure
10698  * @arg1: pointer to private data structure
10699  * @arg2: unused
10700  * @req: sysctl request pointer
10701  */
10702 static int
10703 ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS)
10704 {
10705 	struct ice_softc *sc = (struct ice_softc *)arg1;
10706 	struct ice_aqc_port_ets_elem port_ets = { 0 };
10707 	struct ice_hw *hw = &sc->hw;
10708 	struct ice_port_info *pi;
10709 	device_t dev = sc->dev;
10710 	struct sbuf *sbuf;
10711 	enum ice_status status;
10712 	int i = 0;
10713 
10714 	UNREFERENCED_PARAMETER(oidp);
10715 	UNREFERENCED_PARAMETER(arg2);
10716 
10717 	if (ice_driver_is_detaching(sc))
10718 		return (ESHUTDOWN);
10719 
10720 	pi = hw->port_info;
10721 
10722 	status = ice_aq_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
10723 	if (status != ICE_SUCCESS) {
10724 		device_printf(dev,
10725 		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
10726 		    ice_status_str(status),
10727 		    ice_aq_str(hw->adminq.sq_last_status));
10728 		return (EIO);
10729 	}
10730 
10731 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10732 
10733 	/* Do the actual printing */
10734 	sbuf_printf(sbuf, "\n");
10735 
10736 	sbuf_printf(sbuf, "Valid TC map: 0x%x\n", port_ets.tc_valid_bits);
10737 
10738 	sbuf_printf(sbuf, "TC BW %%:");
10739 	ice_for_each_traffic_class(i) {
10740 		sbuf_printf(sbuf, " %3d", port_ets.tc_bw_share[i]);
10741 	}
10742 	sbuf_printf(sbuf, "\n");
10743 
10744 	sbuf_printf(sbuf, "EIR profile ID: %d\n", port_ets.port_eir_prof_id);
10745 	sbuf_printf(sbuf, "CIR profile ID: %d\n", port_ets.port_cir_prof_id);
10746 	sbuf_printf(sbuf, "TC Node prio: 0x%x\n", port_ets.tc_node_prio);
10747 
10748 	sbuf_printf(sbuf, "TC Node TEIDs:\n");
10749 	ice_for_each_traffic_class(i) {
10750 		sbuf_printf(sbuf, "%d: %d\n", i, port_ets.tc_node_teid[i]);
10751 	}
10752 
10753 	/* Finish */
10754 	sbuf_finish(sbuf);
10755 	sbuf_delete(sbuf);
10756 
10757 	return (0);
10758 }
10759 
10760 /**
10761  * ice_sysctl_dscp2tc_map - Map DSCP to hardware TCs
10762  * @oidp: sysctl oid structure
10763  * @arg1: pointer to private data structure
10764  * @arg2: which eight DSCP to UP mappings to configure (0 - 7)
10765  * @req: sysctl request pointer
10766  *
10767  * Gets or sets the current DSCP to UP table cached by the driver. Since there
10768  * are 64 possible DSCP values to configure, this sysctl only configures
10769  * chunks of 8 in that space at a time.
10770  *
10771  * This sysctl is only relevant in DSCP mode, and will only function in SW DCB
10772  * mode.
10773  */
10774 static int
10775 ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS)
10776 {
10777 	struct ice_softc *sc = (struct ice_softc *)arg1;
10778 	struct ice_dcbx_cfg *local_dcbx_cfg;
10779 	struct ice_port_info *pi;
10780 	struct ice_hw *hw = &sc->hw;
10781 	device_t dev = sc->dev;
10782 	enum ice_status status;
10783 	struct sbuf *sbuf;
10784 	int ret;
10785 
10786 	/* Store input rates from user */
10787 	char dscp_user_buf[128] = "";
10788 	u8 new_dscp_table_seg[ICE_MAX_TRAFFIC_CLASS] = {};
10789 
10790 	if (ice_driver_is_detaching(sc))
10791 		return (ESHUTDOWN);
10792 
10793 	if (req->oldptr == NULL && req->newptr == NULL) {
10794 		ret = SYSCTL_OUT(req, 0, 128);
10795 		return (ret);
10796 	}
10797 
10798 	pi = hw->port_info;
10799 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
10800 
10801 	sbuf = sbuf_new(NULL, dscp_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
10802 
10803 	/* Format DSCP-to-UP data for output */
10804 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10805 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->dscp_map[arg2 * 8 + i]);
10806 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
10807 			sbuf_printf(sbuf, ",");
10808 	}
10809 
10810 	sbuf_finish(sbuf);
10811 	sbuf_delete(sbuf);
10812 
10813 	/* Read in the new DSCP mapping values */
10814 	ret = sysctl_handle_string(oidp, dscp_user_buf, sizeof(dscp_user_buf), req);
10815 	if ((ret) || (req->newptr == NULL))
10816 		return (ret);
10817 
10818 	/* Don't allow setting changes in FW DCB mode */
10819 	if (!hw->port_info->qos_cfg.is_sw_lldp) {
10820 		device_printf(dev, "%s: DSCP mapping is not allowed in FW DCBX mode\n",
10821 		    __func__);
10822 		return (EINVAL);
10823 	}
10824 
10825 	/* Convert 8 values in a string to a table; this is similar to what
10826 	 * needs to be done for ETS settings, so this function can be re-used
10827 	 * for that purpose.
10828 	 */
10829 	ret = ice_ets_str_to_tbl(dscp_user_buf, new_dscp_table_seg, 8);
10830 	if (ret) {
10831 		device_printf(dev, "%s: Could not parse input DSCP2TC table: %s\n",
10832 		    __func__, dscp_user_buf);
10833 		return (ret);
10834 	}
10835 
10836 	memcpy(&local_dcbx_cfg->dscp_map[arg2 * 8], new_dscp_table_seg,
10837 	    sizeof(new_dscp_table_seg));
10838 
10839 	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
10840 
10841 	status = ice_set_dcb_cfg(pi);
10842 	if (status) {
10843 		device_printf(dev,
10844 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
10845 		    __func__, ice_status_str(status),
10846 		    ice_aq_str(hw->adminq.sq_last_status));
10847 		return (EIO);
10848 	}
10849 
10850 	ice_do_dcb_reconfig(sc, false);
10851 
10852 	return (0);
10853 }
10854 
10855 /**
10856  * ice_handle_debug_dump_ioctl - Handle a debug dump ioctl request
10857  * @sc: the device private softc
10858  * @ifd: ifdrv ioctl request pointer
10859  */
10860 int
10861 ice_handle_debug_dump_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
10862 {
10863 	size_t ifd_len = ifd->ifd_len;
10864 	struct ice_hw *hw = &sc->hw;
10865 	device_t dev = sc->dev;
10866 	struct ice_debug_dump_cmd *ddc;
10867 	enum ice_status status;
10868 	int err = 0;
10869 
10870 	/* Returned arguments from the Admin Queue */
10871 	u16 ret_buf_size = 0;
10872 	u16 ret_next_table = 0;
10873 	u32 ret_next_index = 0;
10874 
10875 	/*
10876 	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
10877 	 * a privilege check. In turn, iflib forwards the ioctl to the driver
10878 	 * without performing a privilege check. Perform one here to ensure
10879 	 * that non-privileged threads cannot access this interface.
10880 	 */
10881 	err = priv_check(curthread, PRIV_DRIVER);
10882 	if (err)
10883 		return (err);
10884 
10885 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
10886 		device_printf(dev,
10887 		    "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
10888 		    __func__);
10889 		return (EBUSY);
10890 	}
10891 
10892 	if (ifd_len < sizeof(*ddc)) {
10893 		device_printf(dev,
10894 		    "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
10895 		    __func__, ifd_len, sizeof(*ddc));
10896 		return (EINVAL);
10897 	}
10898 
10899 	if (ifd->ifd_data == NULL) {
10900 		device_printf(dev, "%s: ifd data buffer not present.\n",
10901 		     __func__);
10902 		return (EINVAL);
10903 	}
10904 
10905 	ddc = (struct ice_debug_dump_cmd *)malloc(ifd_len, M_ICE, M_ZERO | M_NOWAIT);
10906 	if (!ddc)
10907 		return (ENOMEM);
10908 
10909 	/* Copy the NVM access command and data in from user space */
10910 	/* coverity[tainted_data_argument] */
10911 	err = copyin(ifd->ifd_data, ddc, ifd_len);
10912 	if (err) {
10913 		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
10914 			      __func__, ice_err_str(err));
10915 		goto out;
10916 	}
10917 
10918 	/* The data_size arg must be at least 1 for the AQ cmd to work */
10919 	if (ddc->data_size == 0) {
10920 		device_printf(dev,
10921 		    "%s: data_size must be greater than 0\n", __func__);
10922 		err = EINVAL;
10923 		goto out;
10924 	}
10925 	/* ...and it can't be too long */
10926 	if (ddc->data_size > (ifd_len - sizeof(*ddc))) {
10927 		device_printf(dev,
10928 		    "%s: data_size (%d) is larger than ifd_len space (%zu)?\n", __func__,
10929 		    ddc->data_size, ifd_len - sizeof(*ddc));
10930 		err = EINVAL;
10931 		goto out;
10932 	}
10933 
10934 	/* Make sure any possible data buffer space is zeroed */
10935 	memset(ddc->data, 0, ifd_len - sizeof(*ddc));
10936 
10937 	status = ice_aq_get_internal_data(hw, ddc->cluster_id, ddc->table_id, ddc->offset,
10938 	    (u8 *)ddc->data, ddc->data_size, &ret_buf_size, &ret_next_table, &ret_next_index, NULL);
10939 	ice_debug(hw, ICE_DBG_DIAG, "%s: ret_buf_size %d, ret_next_table %d, ret_next_index %d\n",
10940 	    __func__, ret_buf_size, ret_next_table, ret_next_index);
10941 	if (status) {
10942 		device_printf(dev,
10943 		    "%s: Get Internal Data AQ command failed, err %s aq_err %s\n",
10944 		    __func__,
10945 		    ice_status_str(status),
10946 		    ice_aq_str(hw->adminq.sq_last_status));
10947 		goto aq_error;
10948 	}
10949 
10950 	ddc->table_id = ret_next_table;
10951 	ddc->offset = ret_next_index;
10952 	ddc->data_size = ret_buf_size;
10953 
10954 	/* Copy the possibly modified contents of the handled request out */
10955 	err = copyout(ddc, ifd->ifd_data, ifd->ifd_len);
10956 	if (err) {
10957 		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
10958 			      __func__, ice_err_str(err));
10959 		goto out;
10960 	}
10961 
10962 aq_error:
10963 	/* Convert private status to an error code for proper ioctl response */
10964 	switch (status) {
10965 	case ICE_SUCCESS:
10966 		err = (0);
10967 		break;
10968 	case ICE_ERR_NO_MEMORY:
10969 		err = (ENOMEM);
10970 		break;
10971 	case ICE_ERR_OUT_OF_RANGE:
10972 		err = (ENOTTY);
10973 		break;
10974 	case ICE_ERR_AQ_ERROR:
10975 		err = (EIO);
10976 		break;
10977 	case ICE_ERR_PARAM:
10978 	default:
10979 		err = (EINVAL);
10980 		break;
10981 	}
10982 
10983 out:
10984 	free(ddc, M_ICE);
10985 	return (err);
10986 }
10987 
10988 /**
10989  * ice_sysctl_allow_no_fec_mod_in_auto - Change Auto FEC behavior
10990  * @oidp: sysctl oid structure
10991  * @arg1: pointer to private data structure
10992  * @arg2: unused
10993  * @req: sysctl request pointer
10994  *
10995  * Allows user to let "No FEC" mode to be used in "Auto"
10996  * FEC mode during FEC negotiation. This is only supported
10997  * on newer firmware versions.
10998  */
10999 static int
11000 ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS)
11001 {
11002 	struct ice_softc *sc = (struct ice_softc *)arg1;
11003 	struct ice_hw *hw = &sc->hw;
11004 	device_t dev = sc->dev;
11005 	u8 user_flag;
11006 	int ret;
11007 
11008 	UNREFERENCED_PARAMETER(arg2);
11009 
11010 	ret = priv_check(curthread, PRIV_DRIVER);
11011 	if (ret)
11012 		return (ret);
11013 
11014 	if (ice_driver_is_detaching(sc))
11015 		return (ESHUTDOWN);
11016 
11017 	user_flag = (u8)sc->allow_no_fec_mod_in_auto;
11018 
11019 	ret = sysctl_handle_bool(oidp, &user_flag, 0, req);
11020 	if ((ret) || (req->newptr == NULL))
11021 		return (ret);
11022 
11023 	if (!ice_fw_supports_fec_dis_auto(hw)) {
11024 		log(LOG_INFO,
11025 		    "%s: Enabling or disabling of auto configuration of modules that don't support FEC is unsupported by the current firmware\n",
11026 		    device_get_nameunit(dev));
11027 		return (ENODEV);
11028 	}
11029 
11030 	if (user_flag == (bool)sc->allow_no_fec_mod_in_auto)
11031 		return (0);
11032 
11033 	sc->allow_no_fec_mod_in_auto = (u8)user_flag;
11034 
11035 	if (sc->allow_no_fec_mod_in_auto)
11036 		log(LOG_INFO, "%s: Enabled auto configuration of No FEC modules\n",
11037 		    device_get_nameunit(dev));
11038 	else
11039 		log(LOG_INFO,
11040 		    "%s: Auto configuration of No FEC modules reset to NVM defaults\n",
11041 		    device_get_nameunit(dev));
11042 
11043 	return (0);
11044 }
11045 
11046 /**
11047  * ice_sysctl_temperature - Retrieve NIC temp via AQ command
11048  * @oidp: sysctl oid structure
11049  * @arg1: pointer to private data structure
11050  * @arg2: unused
11051  * @req: sysctl request pointer
11052  *
11053  * If ICE_DBG_DIAG is set in the debug.debug_mask sysctl, then this will print
11054  * temperature threshold information in the kernel message log, too.
11055  */
11056 static int
11057 ice_sysctl_temperature(SYSCTL_HANDLER_ARGS)
11058 {
11059 	struct ice_aqc_get_sensor_reading_resp resp;
11060 	struct ice_softc *sc = (struct ice_softc *)arg1;
11061 	struct ice_hw *hw = &sc->hw;
11062 	device_t dev = sc->dev;
11063 	enum ice_status status;
11064 
11065 	UNREFERENCED_PARAMETER(oidp);
11066 	UNREFERENCED_PARAMETER(arg2);
11067 
11068 	if (ice_driver_is_detaching(sc))
11069 		return (ESHUTDOWN);
11070 
11071 	status = ice_aq_get_sensor_reading(hw, ICE_AQC_INT_TEMP_SENSOR,
11072 	    ICE_AQC_INT_TEMP_FORMAT, &resp, NULL);
11073 	if (status != ICE_SUCCESS) {
11074 		device_printf(dev,
11075 		    "Get Sensor Reading AQ call failed, err %s aq_err %s\n",
11076 		    ice_status_str(status),
11077 		    ice_aq_str(hw->adminq.sq_last_status));
11078 		return (EIO);
11079 	}
11080 
11081 	ice_debug(hw, ICE_DBG_DIAG, "%s: Warning Temp Threshold: %d\n", __func__,
11082 	    resp.data.s0f0.temp_warning_threshold);
11083 	ice_debug(hw, ICE_DBG_DIAG, "%s: Critical Temp Threshold: %d\n", __func__,
11084 	    resp.data.s0f0.temp_critical_threshold);
11085 	ice_debug(hw, ICE_DBG_DIAG, "%s: Fatal Temp Threshold: %d\n", __func__,
11086 	    resp.data.s0f0.temp_fatal_threshold);
11087 
11088 	return sysctl_handle_8(oidp, &resp.data.s0f0.temp, 0, req);
11089 }
11090