xref: /freebsd/sys/dev/sfxge/sfxge_port.c (revision 069ac184)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2010-2016 Solarflare Communications Inc.
5  * All rights reserved.
6  *
7  * This software was developed in part by Philip Paeps under contract for
8  * Solarflare Communications, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright notice,
14  *    this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright notice,
16  *    this list of conditions and the following disclaimer in the documentation
17  *    and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * The views and conclusions contained in the software and documentation are
32  * those of the authors and should not be interpreted as representing official
33  * policies, either expressed or implied, of the FreeBSD Project.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/limits.h>
38 #include <net/ethernet.h>
39 #include <net/if_dl.h>
40 
41 #include "common/efx.h"
42 
43 #include "sfxge.h"
44 
45 #define	SFXGE_PARAM_STATS_UPDATE_PERIOD_MS \
46 	SFXGE_PARAM(stats_update_period_ms)
47 static int sfxge_stats_update_period_ms = SFXGE_STATS_UPDATE_PERIOD_MS;
48 TUNABLE_INT(SFXGE_PARAM_STATS_UPDATE_PERIOD_MS,
49 	    &sfxge_stats_update_period_ms);
50 SYSCTL_INT(_hw_sfxge, OID_AUTO, stats_update_period_ms, CTLFLAG_RDTUN,
51 	   &sfxge_stats_update_period_ms, 0,
52 	   "netstat interface statistics update period in milliseconds");
53 
54 static int sfxge_phy_cap_mask(struct sfxge_softc *, int, uint32_t *);
55 
56 static int
57 sfxge_mac_stat_update(struct sfxge_softc *sc)
58 {
59 	struct sfxge_port *port = &sc->port;
60 	efsys_mem_t *esmp = &(port->mac_stats.dma_buf);
61 	clock_t now;
62 	unsigned int min_ticks;
63 	unsigned int count;
64 	int rc;
65 
66 	SFXGE_PORT_LOCK_ASSERT_OWNED(port);
67 
68 	if (__predict_false(port->init_state != SFXGE_PORT_STARTED)) {
69 		rc = 0;
70 		goto out;
71 	}
72 
73 	min_ticks = (unsigned int)hz * port->stats_update_period_ms / 1000;
74 
75 	now = ticks;
76 	if ((unsigned int)(now - port->mac_stats.update_time) < min_ticks) {
77 		rc = 0;
78 		goto out;
79 	}
80 
81 	port->mac_stats.update_time = now;
82 
83 	/* If we're unlucky enough to read statistics wduring the DMA, wait
84 	 * up to 10ms for it to finish (typically takes <500us) */
85 	for (count = 0; count < 100; ++count) {
86 		EFSYS_PROBE1(wait, unsigned int, count);
87 
88 		/* Try to update the cached counters */
89 		if ((rc = efx_mac_stats_update(sc->enp, esmp,
90 		    port->mac_stats.decode_buf, NULL)) != EAGAIN)
91 			goto out;
92 
93 		DELAY(100);
94 	}
95 
96 	rc = ETIMEDOUT;
97 out:
98 	return (rc);
99 }
100 
101 uint64_t
102 sfxge_get_counter(if_t ifp, ift_counter c)
103 {
104 	struct sfxge_softc *sc = if_getsoftc(ifp);
105 	uint64_t *mac_stats;
106 	uint64_t val;
107 
108 	SFXGE_PORT_LOCK(&sc->port);
109 
110 	/* Ignore error and use old values */
111 	(void)sfxge_mac_stat_update(sc);
112 
113 	mac_stats = (uint64_t *)sc->port.mac_stats.decode_buf;
114 
115 	switch (c) {
116 	case IFCOUNTER_IPACKETS:
117 		val = mac_stats[EFX_MAC_RX_PKTS];
118 		break;
119 	case IFCOUNTER_IERRORS:
120 		val = mac_stats[EFX_MAC_RX_ERRORS];
121 		break;
122 	case IFCOUNTER_OPACKETS:
123 		val = mac_stats[EFX_MAC_TX_PKTS];
124 		break;
125 	case IFCOUNTER_OERRORS:
126 		val = mac_stats[EFX_MAC_TX_ERRORS];
127 		break;
128 	case IFCOUNTER_COLLISIONS:
129 		val = mac_stats[EFX_MAC_TX_SGL_COL_PKTS] +
130 		      mac_stats[EFX_MAC_TX_MULT_COL_PKTS] +
131 		      mac_stats[EFX_MAC_TX_EX_COL_PKTS] +
132 		      mac_stats[EFX_MAC_TX_LATE_COL_PKTS];
133 		break;
134 	case IFCOUNTER_IBYTES:
135 		val = mac_stats[EFX_MAC_RX_OCTETS];
136 		break;
137 	case IFCOUNTER_OBYTES:
138 		val = mac_stats[EFX_MAC_TX_OCTETS];
139 		break;
140 	case IFCOUNTER_OMCASTS:
141 		val = mac_stats[EFX_MAC_TX_MULTICST_PKTS] +
142 		      mac_stats[EFX_MAC_TX_BRDCST_PKTS];
143 		break;
144 	case IFCOUNTER_OQDROPS:
145 		SFXGE_PORT_UNLOCK(&sc->port);
146 		return (sfxge_tx_get_drops(sc));
147 	case IFCOUNTER_IMCASTS:
148 		/* if_imcasts is maintained in net/if_ethersubr.c */
149 	case IFCOUNTER_IQDROPS:
150 		/* if_iqdrops is maintained in net/if_ethersubr.c */
151 	case IFCOUNTER_NOPROTO:
152 		/* if_noproto is maintained in net/if_ethersubr.c */
153 	default:
154 		SFXGE_PORT_UNLOCK(&sc->port);
155 		return (if_get_counter_default(ifp, c));
156 	}
157 
158 	SFXGE_PORT_UNLOCK(&sc->port);
159 
160 	return (val);
161 }
162 
163 static int
164 sfxge_mac_stat_handler(SYSCTL_HANDLER_ARGS)
165 {
166 	struct sfxge_softc *sc = arg1;
167 	unsigned int id = arg2;
168 	int rc;
169 	uint64_t val;
170 
171 	SFXGE_PORT_LOCK(&sc->port);
172 	if ((rc = sfxge_mac_stat_update(sc)) == 0)
173 		val = ((uint64_t *)sc->port.mac_stats.decode_buf)[id];
174 	SFXGE_PORT_UNLOCK(&sc->port);
175 
176 	if (rc == 0)
177 		rc = SYSCTL_OUT(req, &val, sizeof(val));
178 	return (rc);
179 }
180 
181 static void
182 sfxge_mac_stat_init(struct sfxge_softc *sc)
183 {
184 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
185 	struct sysctl_oid_list *stat_list;
186 	unsigned int id;
187 	const char *name;
188 
189 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
190 
191 	/* Initialise the named stats */
192 	for (id = 0; id < EFX_MAC_NSTATS; id++) {
193 		name = efx_mac_stat_name(sc->enp, id);
194 		SYSCTL_ADD_PROC(ctx, stat_list, OID_AUTO, name,
195 		    CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
196 		    sc, id, sfxge_mac_stat_handler, "Q", "");
197 	}
198 }
199 
200 #ifdef SFXGE_HAVE_PAUSE_MEDIAOPTS
201 
202 static unsigned int
203 sfxge_port_wanted_fc(struct sfxge_softc *sc)
204 {
205 	struct ifmedia_entry *ifm = sc->media.ifm_cur;
206 
207 	if (ifm->ifm_media == (IFM_ETHER | IFM_AUTO))
208 		return (EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE);
209 	return (((ifm->ifm_media & IFM_ETH_RXPAUSE) ? EFX_FCNTL_RESPOND : 0) |
210 		((ifm->ifm_media & IFM_ETH_TXPAUSE) ? EFX_FCNTL_GENERATE : 0));
211 }
212 
213 static unsigned int
214 sfxge_port_link_fc_ifm(struct sfxge_softc *sc)
215 {
216 	unsigned int wanted_fc, link_fc;
217 
218 	efx_mac_fcntl_get(sc->enp, &wanted_fc, &link_fc);
219 	return ((link_fc & EFX_FCNTL_RESPOND) ? IFM_ETH_RXPAUSE : 0) |
220 		((link_fc & EFX_FCNTL_GENERATE) ? IFM_ETH_TXPAUSE : 0);
221 }
222 
223 #else /* !SFXGE_HAVE_PAUSE_MEDIAOPTS */
224 
225 static unsigned int
226 sfxge_port_wanted_fc(struct sfxge_softc *sc)
227 {
228 	return (sc->port.wanted_fc);
229 }
230 
231 static unsigned int
232 sfxge_port_link_fc_ifm(struct sfxge_softc *sc)
233 {
234 	return (0);
235 }
236 
237 static int
238 sfxge_port_wanted_fc_handler(SYSCTL_HANDLER_ARGS)
239 {
240 	struct sfxge_softc *sc;
241 	struct sfxge_port *port;
242 	unsigned int fcntl;
243 	int error;
244 
245 	sc = arg1;
246 	port = &sc->port;
247 
248 	if (req->newptr != NULL) {
249 		if ((error = SYSCTL_IN(req, &fcntl, sizeof(fcntl))) != 0)
250 			return (error);
251 
252 		SFXGE_PORT_LOCK(port);
253 
254 		if (port->wanted_fc != fcntl) {
255 			if (port->init_state == SFXGE_PORT_STARTED)
256 				error = efx_mac_fcntl_set(sc->enp,
257 							  port->wanted_fc,
258 							  B_TRUE);
259 			if (error == 0)
260 				port->wanted_fc = fcntl;
261 		}
262 
263 		SFXGE_PORT_UNLOCK(port);
264 	} else {
265 		SFXGE_PORT_LOCK(port);
266 		fcntl = port->wanted_fc;
267 		SFXGE_PORT_UNLOCK(port);
268 
269 		error = SYSCTL_OUT(req, &fcntl, sizeof(fcntl));
270 	}
271 
272 	return (error);
273 }
274 
275 static int
276 sfxge_port_link_fc_handler(SYSCTL_HANDLER_ARGS)
277 {
278 	struct sfxge_softc *sc;
279 	struct sfxge_port *port;
280 	unsigned int wanted_fc, link_fc;
281 
282 	sc = arg1;
283 	port = &sc->port;
284 
285 	SFXGE_PORT_LOCK(port);
286 	if (__predict_true(port->init_state == SFXGE_PORT_STARTED) &&
287 	    SFXGE_LINK_UP(sc))
288 		efx_mac_fcntl_get(sc->enp, &wanted_fc, &link_fc);
289 	else
290 		link_fc = 0;
291 	SFXGE_PORT_UNLOCK(port);
292 
293 	return (SYSCTL_OUT(req, &link_fc, sizeof(link_fc)));
294 }
295 
296 #endif /* SFXGE_HAVE_PAUSE_MEDIAOPTS */
297 
298 static const uint64_t sfxge_link_baudrate[EFX_LINK_NMODES] = {
299 	[EFX_LINK_10HDX]	= IF_Mbps(10),
300 	[EFX_LINK_10FDX]	= IF_Mbps(10),
301 	[EFX_LINK_100HDX]	= IF_Mbps(100),
302 	[EFX_LINK_100FDX]	= IF_Mbps(100),
303 	[EFX_LINK_1000HDX]	= IF_Gbps(1),
304 	[EFX_LINK_1000FDX]	= IF_Gbps(1),
305 	[EFX_LINK_10000FDX]	= IF_Gbps(10),
306 	[EFX_LINK_25000FDX]	= IF_Gbps(25),
307 	[EFX_LINK_40000FDX]	= IF_Gbps(40),
308 	[EFX_LINK_50000FDX]	= IF_Gbps(50),
309 	[EFX_LINK_100000FDX]	= IF_Gbps(100),
310 };
311 
312 void
313 sfxge_mac_link_update(struct sfxge_softc *sc, efx_link_mode_t mode)
314 {
315 	struct sfxge_port *port;
316 	int link_state;
317 
318 	port = &sc->port;
319 
320 	if (port->link_mode == mode)
321 		return;
322 
323 	port->link_mode = mode;
324 
325 	/* Push link state update to the OS */
326 	link_state = (SFXGE_LINK_UP(sc) ? LINK_STATE_UP : LINK_STATE_DOWN);
327 	if_setbaudrate(sc->ifnet, sfxge_link_baudrate[port->link_mode]);
328 	if_link_state_change(sc->ifnet, link_state);
329 }
330 
331 static void
332 sfxge_mac_poll_work(void *arg, int npending)
333 {
334 	struct sfxge_softc *sc;
335 	efx_nic_t *enp;
336 	struct sfxge_port *port;
337 	efx_link_mode_t mode;
338 
339 	sc = (struct sfxge_softc *)arg;
340 	enp = sc->enp;
341 	port = &sc->port;
342 
343 	SFXGE_PORT_LOCK(port);
344 
345 	if (__predict_false(port->init_state != SFXGE_PORT_STARTED))
346 		goto done;
347 
348 	/* This may sleep waiting for MCDI completion */
349 	(void)efx_port_poll(enp, &mode);
350 	sfxge_mac_link_update(sc, mode);
351 
352 done:
353 	SFXGE_PORT_UNLOCK(port);
354 }
355 
356 static u_int
357 sfxge_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
358 {
359 	uint8_t *mcast_addr = arg;
360 
361 	if (cnt == EFX_MAC_MULTICAST_LIST_MAX)
362 		return (0);
363 
364 	memcpy(mcast_addr + (cnt * EFX_MAC_ADDR_LEN), LLADDR(sdl),
365 	    EFX_MAC_ADDR_LEN);
366 
367 	return (1);
368 }
369 
370 static int
371 sfxge_mac_multicast_list_set(struct sfxge_softc *sc)
372 {
373 	if_t ifp = sc->ifnet;
374 	struct sfxge_port *port = &sc->port;
375 	int rc = 0;
376 
377 	mtx_assert(&port->lock, MA_OWNED);
378 
379 	port->mcast_count = if_foreach_llmaddr(ifp, sfxge_copy_maddr,
380 	    port->mcast_addrs);
381 	if (port->mcast_count == EFX_MAC_MULTICAST_LIST_MAX) {
382 		device_printf(sc->dev, "Too many multicast addresses\n");
383 		rc = EINVAL;
384 	}
385 
386 	if (rc == 0) {
387 		rc = efx_mac_multicast_list_set(sc->enp, port->mcast_addrs,
388 						port->mcast_count);
389 		if (rc != 0)
390 			device_printf(sc->dev,
391 			    "Cannot set multicast address list\n");
392 	}
393 
394 	return (rc);
395 }
396 
397 static int
398 sfxge_mac_filter_set_locked(struct sfxge_softc *sc)
399 {
400 	if_t ifp = sc->ifnet;
401 	struct sfxge_port *port = &sc->port;
402 	boolean_t all_mulcst;
403 	int rc;
404 
405 	mtx_assert(&port->lock, MA_OWNED);
406 
407 	all_mulcst = !!(if_getflags(ifp) & (IFF_PROMISC | IFF_ALLMULTI));
408 
409 	rc = sfxge_mac_multicast_list_set(sc);
410 	/* Fallback to all multicast if cannot set multicast list */
411 	if (rc != 0)
412 		all_mulcst = B_TRUE;
413 
414 	rc = efx_mac_filter_set(sc->enp, !!(if_getflags(ifp) & IFF_PROMISC),
415 				(port->mcast_count > 0), all_mulcst, B_TRUE);
416 
417 	return (rc);
418 }
419 
420 int
421 sfxge_mac_filter_set(struct sfxge_softc *sc)
422 {
423 	struct sfxge_port *port = &sc->port;
424 	int rc;
425 
426 	SFXGE_PORT_LOCK(port);
427 	/*
428 	 * The function may be called without softc_lock held in the
429 	 * case of SIOCADDMULTI and SIOCDELMULTI ioctls. ioctl handler
430 	 * checks IFF_DRV_RUNNING flag which implies port started, but
431 	 * it is not guaranteed to remain. softc_lock shared lock can't
432 	 * be held in the case of these ioctls processing, since it
433 	 * results in failure where kernel complains that non-sleepable
434 	 * lock is held in sleeping thread. Both problems are repeatable
435 	 * on LAG with LACP proto bring up.
436 	 */
437 	if (__predict_true(port->init_state == SFXGE_PORT_STARTED))
438 		rc = sfxge_mac_filter_set_locked(sc);
439 	else
440 		rc = 0;
441 	SFXGE_PORT_UNLOCK(port);
442 	return (rc);
443 }
444 
445 void
446 sfxge_port_stop(struct sfxge_softc *sc)
447 {
448 	struct sfxge_port *port;
449 	efx_nic_t *enp;
450 
451 	port = &sc->port;
452 	enp = sc->enp;
453 
454 	SFXGE_PORT_LOCK(port);
455 
456 	KASSERT(port->init_state == SFXGE_PORT_STARTED,
457 	    ("port not started"));
458 
459 	port->init_state = SFXGE_PORT_INITIALIZED;
460 
461 	port->mac_stats.update_time = 0;
462 
463 	/* This may call MCDI */
464 	(void)efx_mac_drain(enp, B_TRUE);
465 
466 	(void)efx_mac_stats_periodic(enp, &port->mac_stats.dma_buf, 0, B_FALSE);
467 
468 	port->link_mode = EFX_LINK_UNKNOWN;
469 
470 	/* Destroy the common code port object. */
471 	efx_port_fini(enp);
472 
473 	efx_filter_fini(enp);
474 
475 	SFXGE_PORT_UNLOCK(port);
476 }
477 
478 int
479 sfxge_port_start(struct sfxge_softc *sc)
480 {
481 	uint8_t mac_addr[ETHER_ADDR_LEN];
482 	struct epoch_tracker et;
483 	if_t ifp = sc->ifnet;
484 	struct sfxge_port *port;
485 	efx_nic_t *enp;
486 	size_t pdu;
487 	int rc;
488 	uint32_t phy_cap_mask;
489 
490 	port = &sc->port;
491 	enp = sc->enp;
492 
493 	SFXGE_PORT_LOCK(port);
494 
495 	KASSERT(port->init_state == SFXGE_PORT_INITIALIZED,
496 	    ("port not initialized"));
497 
498 	/* Initialise the required filtering */
499 	if ((rc = efx_filter_init(enp)) != 0)
500 		goto fail_filter_init;
501 
502 	/* Initialize the port object in the common code. */
503 	if ((rc = efx_port_init(sc->enp)) != 0)
504 		goto fail;
505 
506 	/* Set the SDU */
507 	pdu = EFX_MAC_PDU(if_getmtu(ifp));
508 	if ((rc = efx_mac_pdu_set(enp, pdu)) != 0)
509 		goto fail2;
510 
511 	if ((rc = efx_mac_fcntl_set(enp, sfxge_port_wanted_fc(sc), B_TRUE))
512 	    != 0)
513 		goto fail3;
514 
515 	/* Set the unicast address */
516 	NET_EPOCH_ENTER(et);
517 	bcopy(if_getlladdr(ifp), mac_addr, sizeof(mac_addr));
518 	NET_EPOCH_EXIT(et);
519 	if ((rc = efx_mac_addr_set(enp, mac_addr)) != 0)
520 		goto fail4;
521 
522 	sfxge_mac_filter_set_locked(sc);
523 
524 	/* Update MAC stats by DMA every period */
525 	if ((rc = efx_mac_stats_periodic(enp, &port->mac_stats.dma_buf,
526 					 port->stats_update_period_ms,
527 					 B_FALSE)) != 0)
528 		goto fail6;
529 
530 	if ((rc = efx_mac_drain(enp, B_FALSE)) != 0)
531 		goto fail8;
532 
533 	if ((rc = sfxge_phy_cap_mask(sc, sc->media.ifm_cur->ifm_media,
534 				     &phy_cap_mask)) != 0)
535 		goto fail9;
536 
537 	if ((rc = efx_phy_adv_cap_set(sc->enp, phy_cap_mask)) != 0)
538 		goto fail10;
539 
540 	port->init_state = SFXGE_PORT_STARTED;
541 
542 	/* Single poll in case there were missing initial events */
543 	SFXGE_PORT_UNLOCK(port);
544 	sfxge_mac_poll_work(sc, 0);
545 
546 	return (0);
547 
548 fail10:
549 fail9:
550 	(void)efx_mac_drain(enp, B_TRUE);
551 fail8:
552 	(void)efx_mac_stats_periodic(enp, &port->mac_stats.dma_buf, 0, B_FALSE);
553 fail6:
554 fail4:
555 fail3:
556 
557 fail2:
558 	efx_port_fini(enp);
559 fail:
560 	efx_filter_fini(enp);
561 fail_filter_init:
562 	SFXGE_PORT_UNLOCK(port);
563 
564 	return (rc);
565 }
566 
567 static int
568 sfxge_phy_stat_update(struct sfxge_softc *sc)
569 {
570 	struct sfxge_port *port = &sc->port;
571 	efsys_mem_t *esmp = &port->phy_stats.dma_buf;
572 	clock_t now;
573 	unsigned int count;
574 	int rc;
575 
576 	SFXGE_PORT_LOCK_ASSERT_OWNED(port);
577 
578 	if (__predict_false(port->init_state != SFXGE_PORT_STARTED)) {
579 		rc = 0;
580 		goto out;
581 	}
582 
583 	now = ticks;
584 	if ((unsigned int)(now - port->phy_stats.update_time) < (unsigned int)hz) {
585 		rc = 0;
586 		goto out;
587 	}
588 
589 	port->phy_stats.update_time = now;
590 
591 	/* If we're unlucky enough to read statistics wduring the DMA, wait
592 	 * up to 10ms for it to finish (typically takes <500us) */
593 	for (count = 0; count < 100; ++count) {
594 		EFSYS_PROBE1(wait, unsigned int, count);
595 
596 		/* Synchronize the DMA memory for reading */
597 		bus_dmamap_sync(esmp->esm_tag, esmp->esm_map,
598 		    BUS_DMASYNC_POSTREAD);
599 
600 		/* Try to update the cached counters */
601 		if ((rc = efx_phy_stats_update(sc->enp, esmp,
602 		    port->phy_stats.decode_buf)) != EAGAIN)
603 			goto out;
604 
605 		DELAY(100);
606 	}
607 
608 	rc = ETIMEDOUT;
609 out:
610 	return (rc);
611 }
612 
613 static int
614 sfxge_phy_stat_handler(SYSCTL_HANDLER_ARGS)
615 {
616 	struct sfxge_softc *sc = arg1;
617 	unsigned int id = arg2;
618 	int rc;
619 	uint32_t val;
620 
621 	SFXGE_PORT_LOCK(&sc->port);
622 	if ((rc = sfxge_phy_stat_update(sc)) == 0)
623 		val = ((uint32_t *)sc->port.phy_stats.decode_buf)[id];
624 	SFXGE_PORT_UNLOCK(&sc->port);
625 
626 	if (rc == 0)
627 		rc = SYSCTL_OUT(req, &val, sizeof(val));
628 	return (rc);
629 }
630 
631 static void
632 sfxge_phy_stat_init(struct sfxge_softc *sc)
633 {
634 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
635 	struct sysctl_oid_list *stat_list;
636 	unsigned int id;
637 	const char *name;
638 	uint64_t stat_mask = efx_nic_cfg_get(sc->enp)->enc_phy_stat_mask;
639 
640 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
641 
642 	/* Initialise the named stats */
643 	for (id = 0; id < EFX_PHY_NSTATS; id++) {
644 		if (!(stat_mask & ((uint64_t)1 << id)))
645 			continue;
646 		name = efx_phy_stat_name(sc->enp, id);
647 		SYSCTL_ADD_PROC(ctx, stat_list, OID_AUTO, name,
648 		    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE,
649 		    sc, id, sfxge_phy_stat_handler,
650 		    id == EFX_PHY_STAT_OUI ? "IX" : "IU", "");
651 	}
652 }
653 
654 void
655 sfxge_port_fini(struct sfxge_softc *sc)
656 {
657 	struct sfxge_port *port;
658 	efsys_mem_t *esmp;
659 
660 	port = &sc->port;
661 	esmp = &port->mac_stats.dma_buf;
662 
663 	KASSERT(port->init_state == SFXGE_PORT_INITIALIZED,
664 	    ("Port not initialized"));
665 
666 	port->init_state = SFXGE_PORT_UNINITIALIZED;
667 
668 	port->link_mode = EFX_LINK_UNKNOWN;
669 
670 	/* Finish with PHY DMA memory */
671 	sfxge_dma_free(&port->phy_stats.dma_buf);
672 	free(port->phy_stats.decode_buf, M_SFXGE);
673 
674 	sfxge_dma_free(esmp);
675 	free(port->mac_stats.decode_buf, M_SFXGE);
676 
677 	SFXGE_PORT_LOCK_DESTROY(port);
678 
679 	port->sc = NULL;
680 }
681 
682 static uint16_t
683 sfxge_port_stats_update_period_ms(struct sfxge_softc *sc)
684 {
685 	int period_ms = sfxge_stats_update_period_ms;
686 
687 	if (period_ms < 0) {
688 		device_printf(sc->dev,
689 			"treat negative stats update period %d as 0 (disable)\n",
690 			 period_ms);
691 		period_ms = 0;
692 	} else if (period_ms > UINT16_MAX) {
693 		device_printf(sc->dev,
694 			"treat too big stats update period %d as %u\n",
695 			period_ms, UINT16_MAX);
696 		period_ms = UINT16_MAX;
697 	}
698 
699 	return period_ms;
700 }
701 
702 static int
703 sfxge_port_stats_update_period_ms_handler(SYSCTL_HANDLER_ARGS)
704 {
705 	struct sfxge_softc *sc;
706 	struct sfxge_port *port;
707 	unsigned int period_ms;
708 	int error;
709 
710 	sc = arg1;
711 	port = &sc->port;
712 
713 	if (req->newptr != NULL) {
714 		error = SYSCTL_IN(req, &period_ms, sizeof(period_ms));
715 		if (error != 0)
716 			return (error);
717 
718 		if (period_ms > UINT16_MAX)
719 			return (EINVAL);
720 
721 		SFXGE_PORT_LOCK(port);
722 
723 		if (port->stats_update_period_ms != period_ms) {
724 			if (port->init_state == SFXGE_PORT_STARTED)
725 				error = efx_mac_stats_periodic(sc->enp,
726 						&port->mac_stats.dma_buf,
727 						period_ms, B_FALSE);
728 			if (error == 0)
729 				port->stats_update_period_ms = period_ms;
730 		}
731 
732 		SFXGE_PORT_UNLOCK(port);
733 	} else {
734 		SFXGE_PORT_LOCK(port);
735 		period_ms = port->stats_update_period_ms;
736 		SFXGE_PORT_UNLOCK(port);
737 
738 		error = SYSCTL_OUT(req, &period_ms, sizeof(period_ms));
739 	}
740 
741 	return (error);
742 }
743 
744 int
745 sfxge_port_init(struct sfxge_softc *sc)
746 {
747 	struct sfxge_port *port;
748 	struct sysctl_ctx_list *sysctl_ctx;
749 	struct sysctl_oid *sysctl_tree;
750 	efsys_mem_t *mac_stats_buf, *phy_stats_buf;
751 	uint32_t mac_nstats;
752 	size_t mac_stats_size;
753 	int rc;
754 
755 	port = &sc->port;
756 	mac_stats_buf = &port->mac_stats.dma_buf;
757 	phy_stats_buf = &port->phy_stats.dma_buf;
758 
759 	KASSERT(port->init_state == SFXGE_PORT_UNINITIALIZED,
760 	    ("Port already initialized"));
761 
762 	port->sc = sc;
763 
764 	SFXGE_PORT_LOCK_INIT(port, device_get_nameunit(sc->dev));
765 
766 	DBGPRINT(sc->dev, "alloc PHY stats");
767 	port->phy_stats.decode_buf = malloc(EFX_PHY_NSTATS * sizeof(uint32_t),
768 					    M_SFXGE, M_WAITOK | M_ZERO);
769 	if ((rc = sfxge_dma_alloc(sc, EFX_PHY_STATS_SIZE, phy_stats_buf)) != 0)
770 		goto fail;
771 	sfxge_phy_stat_init(sc);
772 
773 	DBGPRINT(sc->dev, "init sysctl");
774 	sysctl_ctx = device_get_sysctl_ctx(sc->dev);
775 	sysctl_tree = device_get_sysctl_tree(sc->dev);
776 
777 #ifndef SFXGE_HAVE_PAUSE_MEDIAOPTS
778 	/* If flow control cannot be configured or reported through
779 	 * ifmedia, provide sysctls for it. */
780 	port->wanted_fc = EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE;
781 	SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
782 	    "wanted_fc", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
783 	    sfxge_port_wanted_fc_handler, "IU", "wanted flow control mode");
784 	SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
785 	    "link_fc", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
786 	    sfxge_port_link_fc_handler, "IU", "link flow control mode");
787 #endif
788 
789 	DBGPRINT(sc->dev, "alloc MAC stats");
790 	port->mac_stats.decode_buf = malloc(EFX_MAC_NSTATS * sizeof(uint64_t),
791 					    M_SFXGE, M_WAITOK | M_ZERO);
792 	mac_nstats = efx_nic_cfg_get(sc->enp)->enc_mac_stats_nstats;
793 	mac_stats_size = EFX_P2ROUNDUP(size_t, mac_nstats * sizeof(uint64_t),
794 				       EFX_BUF_SIZE);
795 	if ((rc = sfxge_dma_alloc(sc, mac_stats_size, mac_stats_buf)) != 0)
796 		goto fail2;
797 	port->stats_update_period_ms = sfxge_port_stats_update_period_ms(sc);
798 	sfxge_mac_stat_init(sc);
799 
800 	SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
801 	    "stats_update_period_ms",
802 	    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
803 	    sfxge_port_stats_update_period_ms_handler, "IU",
804 	    "interface statistics refresh period");
805 
806 	port->init_state = SFXGE_PORT_INITIALIZED;
807 
808 	DBGPRINT(sc->dev, "success");
809 	return (0);
810 
811 fail2:
812 	free(port->mac_stats.decode_buf, M_SFXGE);
813 	sfxge_dma_free(phy_stats_buf);
814 fail:
815 	free(port->phy_stats.decode_buf, M_SFXGE);
816 	SFXGE_PORT_LOCK_DESTROY(port);
817 	port->sc = NULL;
818 	DBGPRINT(sc->dev, "failed %d", rc);
819 	return (rc);
820 }
821 
822 static const int sfxge_link_mode[EFX_PHY_MEDIA_NTYPES][EFX_LINK_NMODES] = {
823 	[EFX_PHY_MEDIA_CX4] = {
824 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_CX4,
825 	},
826 	[EFX_PHY_MEDIA_KX4] = {
827 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_KX4,
828 	},
829 	[EFX_PHY_MEDIA_XFP] = {
830 		/* Don't know the module type, but assume SR for now. */
831 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_SR,
832 	},
833 	[EFX_PHY_MEDIA_QSFP_PLUS] = {
834 		/* Don't know the module type, but assume SR for now. */
835 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_SR,
836 		[EFX_LINK_25000FDX]	= IFM_ETHER | IFM_FDX | IFM_25G_SR,
837 		[EFX_LINK_40000FDX]	= IFM_ETHER | IFM_FDX | IFM_40G_CR4,
838 		[EFX_LINK_50000FDX]	= IFM_ETHER | IFM_FDX | IFM_50G_SR,
839 		[EFX_LINK_100000FDX]	= IFM_ETHER | IFM_FDX | IFM_100G_SR2,
840 	},
841 	[EFX_PHY_MEDIA_SFP_PLUS] = {
842 		/* Don't know the module type, but assume SX/SR for now. */
843 		[EFX_LINK_1000FDX]	= IFM_ETHER | IFM_FDX | IFM_1000_SX,
844 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_SR,
845 		[EFX_LINK_25000FDX]	= IFM_ETHER | IFM_FDX | IFM_25G_SR,
846 	},
847 	[EFX_PHY_MEDIA_BASE_T] = {
848 		[EFX_LINK_10HDX]	= IFM_ETHER | IFM_HDX | IFM_10_T,
849 		[EFX_LINK_10FDX]	= IFM_ETHER | IFM_FDX | IFM_10_T,
850 		[EFX_LINK_100HDX]	= IFM_ETHER | IFM_HDX | IFM_100_TX,
851 		[EFX_LINK_100FDX]	= IFM_ETHER | IFM_FDX | IFM_100_TX,
852 		[EFX_LINK_1000HDX]	= IFM_ETHER | IFM_HDX | IFM_1000_T,
853 		[EFX_LINK_1000FDX]	= IFM_ETHER | IFM_FDX | IFM_1000_T,
854 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_T,
855 	},
856 };
857 
858 static void
859 sfxge_media_status(if_t ifp, struct ifmediareq *ifmr)
860 {
861 	struct sfxge_softc *sc;
862 	efx_phy_media_type_t medium_type;
863 	efx_link_mode_t mode;
864 
865 	sc = if_getsoftc(ifp);
866 	SFXGE_ADAPTER_LOCK(sc);
867 
868 	ifmr->ifm_status = IFM_AVALID;
869 	ifmr->ifm_active = IFM_ETHER;
870 
871 	if (SFXGE_RUNNING(sc) && SFXGE_LINK_UP(sc)) {
872 		ifmr->ifm_status |= IFM_ACTIVE;
873 
874 		efx_phy_media_type_get(sc->enp, &medium_type);
875 		mode = sc->port.link_mode;
876 		ifmr->ifm_active |= sfxge_link_mode[medium_type][mode];
877 		ifmr->ifm_active |= sfxge_port_link_fc_ifm(sc);
878 	}
879 
880 	SFXGE_ADAPTER_UNLOCK(sc);
881 }
882 
883 static efx_phy_cap_type_t
884 sfxge_link_mode_to_phy_cap(efx_link_mode_t mode)
885 {
886 	switch (mode) {
887 	case EFX_LINK_10HDX:
888 		return (EFX_PHY_CAP_10HDX);
889 	case EFX_LINK_10FDX:
890 		return (EFX_PHY_CAP_10FDX);
891 	case EFX_LINK_100HDX:
892 		return (EFX_PHY_CAP_100HDX);
893 	case EFX_LINK_100FDX:
894 		return (EFX_PHY_CAP_100FDX);
895 	case EFX_LINK_1000HDX:
896 		return (EFX_PHY_CAP_1000HDX);
897 	case EFX_LINK_1000FDX:
898 		return (EFX_PHY_CAP_1000FDX);
899 	case EFX_LINK_10000FDX:
900 		return (EFX_PHY_CAP_10000FDX);
901 	case EFX_LINK_25000FDX:
902 		return (EFX_PHY_CAP_25000FDX);
903 	case EFX_LINK_40000FDX:
904 		return (EFX_PHY_CAP_40000FDX);
905 	case EFX_LINK_50000FDX:
906 		return (EFX_PHY_CAP_50000FDX);
907 	case EFX_LINK_100000FDX:
908 		return (EFX_PHY_CAP_100000FDX);
909 	default:
910 		return (EFX_PHY_CAP_INVALID);
911 	}
912 }
913 
914 static int
915 sfxge_phy_cap_mask(struct sfxge_softc *sc, int ifmedia, uint32_t *phy_cap_mask)
916 {
917 	/* Get global options (duplex), type and subtype bits */
918 	int ifmedia_masked = ifmedia & (IFM_GMASK | IFM_NMASK | IFM_TMASK);
919 	efx_phy_media_type_t medium_type;
920 	boolean_t mode_found = B_FALSE;
921 	uint32_t cap_mask, mode_cap_mask;
922 	efx_link_mode_t mode;
923 	efx_phy_cap_type_t phy_cap;
924 
925 	efx_phy_media_type_get(sc->enp, &medium_type);
926 	if (medium_type >= nitems(sfxge_link_mode)) {
927 		if_printf(sc->ifnet, "unexpected media type %d\n", medium_type);
928 		return (EINVAL);
929 	}
930 
931 	efx_phy_adv_cap_get(sc->enp, EFX_PHY_CAP_PERM, &cap_mask);
932 
933 	for (mode = EFX_LINK_10HDX; mode < EFX_LINK_NMODES; mode++) {
934 		if (ifmedia_masked == sfxge_link_mode[medium_type][mode]) {
935 			mode_found = B_TRUE;
936 			break;
937 		}
938 	}
939 
940 	if (!mode_found) {
941 		/*
942 		 * If media is not in the table, it must be IFM_AUTO.
943 		 */
944 		KASSERT((cap_mask & (1 << EFX_PHY_CAP_AN)) &&
945 		    ifmedia_masked == (IFM_ETHER | IFM_AUTO),
946 		    ("%s: no mode for media %#x", __func__, ifmedia));
947 		*phy_cap_mask = (cap_mask & ~(1 << EFX_PHY_CAP_ASYM));
948 		return (0);
949 	}
950 
951 	phy_cap = sfxge_link_mode_to_phy_cap(mode);
952 	if (phy_cap == EFX_PHY_CAP_INVALID) {
953 		if_printf(sc->ifnet,
954 			  "cannot map link mode %d to phy capability\n",
955 			  mode);
956 		return (EINVAL);
957 	}
958 
959 	mode_cap_mask = (1 << phy_cap);
960 	mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_AN);
961 #ifdef SFXGE_HAVE_PAUSE_MEDIAOPTS
962 	if (ifmedia & IFM_ETH_RXPAUSE)
963 		mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_PAUSE);
964 	if (!(ifmedia & IFM_ETH_TXPAUSE))
965 		mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_ASYM);
966 #else
967 	mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_PAUSE);
968 #endif
969 
970 	*phy_cap_mask = mode_cap_mask;
971 	return (0);
972 }
973 
974 static int
975 sfxge_media_change(if_t ifp)
976 {
977 	struct sfxge_softc *sc;
978 	struct ifmedia_entry *ifm;
979 	int rc;
980 	uint32_t phy_cap_mask;
981 
982 	sc = if_getsoftc(ifp);
983 	ifm = sc->media.ifm_cur;
984 
985 	SFXGE_ADAPTER_LOCK(sc);
986 
987 	if (!SFXGE_RUNNING(sc)) {
988 		rc = 0;
989 		goto out;
990 	}
991 
992 	rc = efx_mac_fcntl_set(sc->enp, sfxge_port_wanted_fc(sc), B_TRUE);
993 	if (rc != 0)
994 		goto out;
995 
996 	if ((rc = sfxge_phy_cap_mask(sc, ifm->ifm_media, &phy_cap_mask)) != 0)
997 		goto out;
998 
999 	rc = efx_phy_adv_cap_set(sc->enp, phy_cap_mask);
1000 out:
1001 	SFXGE_ADAPTER_UNLOCK(sc);
1002 
1003 	return (rc);
1004 }
1005 
1006 int sfxge_port_ifmedia_init(struct sfxge_softc *sc)
1007 {
1008 	efx_phy_media_type_t medium_type;
1009 	uint32_t cap_mask, mode_cap_mask;
1010 	efx_link_mode_t mode;
1011 	efx_phy_cap_type_t phy_cap;
1012 	int mode_ifm, best_mode_ifm = 0;
1013 	int rc;
1014 
1015 	/*
1016 	 * We need port state to initialise the ifmedia list.
1017 	 * It requires initialized NIC what is already done in
1018 	 * sfxge_create() when resources are estimated.
1019 	 */
1020 	if ((rc = efx_filter_init(sc->enp)) != 0)
1021 		goto out1;
1022 	if ((rc = efx_port_init(sc->enp)) != 0)
1023 		goto out2;
1024 
1025 	/*
1026 	 * Register ifconfig callbacks for querying and setting the
1027 	 * link mode and link status.
1028 	 */
1029 	ifmedia_init(&sc->media, IFM_IMASK, sfxge_media_change,
1030 	    sfxge_media_status);
1031 
1032 	/*
1033 	 * Map firmware medium type and capabilities to ifmedia types.
1034 	 * ifmedia does not distinguish between forcing the link mode
1035 	 * and disabling auto-negotiation.  1000BASE-T and 10GBASE-T
1036 	 * require AN even if only one link mode is enabled, and for
1037 	 * 100BASE-TX it is useful even if the link mode is forced.
1038 	 * Therefore we never disable auto-negotiation.
1039 	 *
1040 	 * Also enable and advertise flow control by default.
1041 	 */
1042 
1043 	efx_phy_media_type_get(sc->enp, &medium_type);
1044 	efx_phy_adv_cap_get(sc->enp, EFX_PHY_CAP_PERM, &cap_mask);
1045 
1046 	for (mode = EFX_LINK_10HDX; mode < EFX_LINK_NMODES; mode++) {
1047 		phy_cap = sfxge_link_mode_to_phy_cap(mode);
1048 		if (phy_cap == EFX_PHY_CAP_INVALID)
1049 			continue;
1050 
1051 		mode_cap_mask = (1 << phy_cap);
1052 		mode_ifm = sfxge_link_mode[medium_type][mode];
1053 
1054 		if ((cap_mask & mode_cap_mask) && mode_ifm) {
1055 			/* No flow-control */
1056 			ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1057 
1058 #ifdef SFXGE_HAVE_PAUSE_MEDIAOPTS
1059 			/* Respond-only.  If using AN, we implicitly
1060 			 * offer symmetric as well, but that doesn't
1061 			 * mean we *have* to generate pause frames.
1062 			 */
1063 			mode_ifm |= IFM_ETH_RXPAUSE;
1064 			ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1065 
1066 			/* Symmetric */
1067 			mode_ifm |= IFM_ETH_TXPAUSE;
1068 			ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1069 #endif
1070 
1071 			/* Link modes are numbered in order of speed,
1072 			 * so assume the last one available is the best.
1073 			 */
1074 			best_mode_ifm = mode_ifm;
1075 		}
1076 	}
1077 
1078 	if (cap_mask & (1 << EFX_PHY_CAP_AN)) {
1079 		/* Add autoselect mode. */
1080 		mode_ifm = IFM_ETHER | IFM_AUTO;
1081 		ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1082 		best_mode_ifm = mode_ifm;
1083 	}
1084 
1085 	if (best_mode_ifm != 0)
1086 		ifmedia_set(&sc->media, best_mode_ifm);
1087 
1088 	/* Now discard port state until interface is started. */
1089 	efx_port_fini(sc->enp);
1090 out2:
1091 	efx_filter_fini(sc->enp);
1092 out1:
1093 	return (rc);
1094 }
1095