xref: /freebsd/sys/dev/ena/ena.c (revision aa386085)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2023 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #include <sys/cdefs.h>
31 #include "opt_rss.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/bus.h>
36 #include <sys/endian.h>
37 #include <sys/eventhandler.h>
38 #include <sys/kernel.h>
39 #include <sys/kthread.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/module.h>
43 #include <sys/rman.h>
44 #include <sys/smp.h>
45 #include <sys/socket.h>
46 #include <sys/sockio.h>
47 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
49 #include <sys/time.h>
50 
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53 
54 #include <machine/atomic.h>
55 #include <machine/bus.h>
56 #include <machine/in_cksum.h>
57 #include <machine/resource.h>
58 
59 #include <dev/pci/pcireg.h>
60 #include <dev/pci/pcivar.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 #include <net/if_types.h>
69 #include <net/if_var.h>
70 #include <net/if_vlan_var.h>
71 #include <netinet/in.h>
72 #include <netinet/in_systm.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78 
79 #include "ena.h"
80 #include "ena_datapath.h"
81 #include "ena_rss.h"
82 #include "ena_sysctl.h"
83 
84 #ifdef DEV_NETMAP
85 #include "ena_netmap.h"
86 #endif /* DEV_NETMAP */
87 
88 /*********************************************************
89  *  Function prototypes
90  *********************************************************/
91 static int ena_probe(device_t);
92 static void ena_intr_msix_mgmnt(void *);
93 static void ena_free_pci_resources(struct ena_adapter *);
94 static int ena_change_mtu(if_t, int);
95 static inline void ena_alloc_counters(counter_u64_t *, int);
96 static inline void ena_free_counters(counter_u64_t *, int);
97 static inline void ena_reset_counters(counter_u64_t *, int);
98 static void ena_init_io_rings_common(struct ena_adapter *, struct ena_ring *,
99     uint16_t);
100 static void ena_init_io_rings_basic(struct ena_adapter *);
101 static void ena_init_io_rings_advanced(struct ena_adapter *);
102 static void ena_init_io_rings(struct ena_adapter *);
103 static void ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
104 static void ena_free_all_io_rings_resources(struct ena_adapter *);
105 static int ena_setup_tx_dma_tag(struct ena_adapter *);
106 static int ena_free_tx_dma_tag(struct ena_adapter *);
107 static int ena_setup_rx_dma_tag(struct ena_adapter *);
108 static int ena_free_rx_dma_tag(struct ena_adapter *);
109 static void ena_release_all_tx_dmamap(struct ena_ring *);
110 static int ena_setup_tx_resources(struct ena_adapter *, int);
111 static void ena_free_tx_resources(struct ena_adapter *, int);
112 static int ena_setup_all_tx_resources(struct ena_adapter *);
113 static void ena_free_all_tx_resources(struct ena_adapter *);
114 static int ena_setup_rx_resources(struct ena_adapter *, unsigned int);
115 static void ena_free_rx_resources(struct ena_adapter *, unsigned int);
116 static int ena_setup_all_rx_resources(struct ena_adapter *);
117 static void ena_free_all_rx_resources(struct ena_adapter *);
118 static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
119     struct ena_rx_buffer *);
120 static void ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
121     struct ena_rx_buffer *);
122 static void ena_free_rx_bufs(struct ena_adapter *, unsigned int);
123 static void ena_refill_all_rx_bufs(struct ena_adapter *);
124 static void ena_free_all_rx_bufs(struct ena_adapter *);
125 static void ena_free_tx_bufs(struct ena_adapter *, unsigned int);
126 static void ena_free_all_tx_bufs(struct ena_adapter *);
127 static void ena_destroy_all_tx_queues(struct ena_adapter *);
128 static void ena_destroy_all_rx_queues(struct ena_adapter *);
129 static void ena_destroy_all_io_queues(struct ena_adapter *);
130 static int ena_create_io_queues(struct ena_adapter *);
131 static int ena_handle_msix(void *);
132 static int ena_enable_msix(struct ena_adapter *);
133 static void ena_setup_mgmnt_intr(struct ena_adapter *);
134 static int ena_setup_io_intr(struct ena_adapter *);
135 static int ena_request_mgmnt_irq(struct ena_adapter *);
136 static int ena_request_io_irq(struct ena_adapter *);
137 static void ena_free_mgmnt_irq(struct ena_adapter *);
138 static void ena_free_io_irq(struct ena_adapter *);
139 static void ena_free_irqs(struct ena_adapter *);
140 static void ena_disable_msix(struct ena_adapter *);
141 static void ena_unmask_all_io_irqs(struct ena_adapter *);
142 static int ena_up_complete(struct ena_adapter *);
143 static uint64_t ena_get_counter(if_t, ift_counter);
144 static int ena_media_change(if_t);
145 static void ena_media_status(if_t, struct ifmediareq *);
146 static void ena_init(void *);
147 static int ena_ioctl(if_t, u_long, caddr_t);
148 static int ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
149 static void ena_update_host_info(struct ena_admin_host_info *, if_t);
150 static void ena_update_hwassist(struct ena_adapter *);
151 static void ena_setup_ifnet(device_t, struct ena_adapter *,
152     struct ena_com_dev_get_features_ctx *);
153 static int ena_enable_wc(device_t, struct resource *);
154 static int ena_set_queues_placement_policy(device_t, struct ena_com_dev *,
155     struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *);
156 static int ena_map_llq_mem_bar(device_t, struct ena_com_dev *);
157 static uint32_t ena_calc_max_io_queue_num(device_t, struct ena_com_dev *,
158     struct ena_com_dev_get_features_ctx *);
159 static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *);
160 static void ena_config_host_info(struct ena_com_dev *, device_t);
161 static int ena_attach(device_t);
162 static int ena_detach(device_t);
163 static int ena_device_init(struct ena_adapter *, device_t,
164     struct ena_com_dev_get_features_ctx *, int *);
165 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *);
166 static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
167 static void unimplemented_aenq_handler(void *, struct ena_admin_aenq_entry *);
168 static int ena_copy_eni_metrics(struct ena_adapter *);
169 static int ena_copy_srd_metrics(struct ena_adapter *);
170 static int ena_copy_customer_metrics(struct ena_adapter *);
171 static void ena_timer_service(void *);
172 
173 static char ena_version[] = ENA_DEVICE_NAME ENA_DRV_MODULE_NAME
174     " v" ENA_DRV_MODULE_VERSION;
175 
176 static ena_vendor_info_t ena_vendor_info_array[] = {
177 	{ PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0 },
178 	{ PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF_RSERV0, 0 },
179 	{ PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0 },
180 	{ PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF_RSERV0, 0 },
181 	/* Last entry */
182 	{ 0, 0, 0 }
183 };
184 
185 struct sx ena_global_lock;
186 
187 /*
188  * Contains pointers to event handlers, e.g. link state chage.
189  */
190 static struct ena_aenq_handlers aenq_handlers;
191 
192 void
ena_dmamap_callback(void * arg,bus_dma_segment_t * segs,int nseg,int error)193 ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
194 {
195 	if (error != 0)
196 		return;
197 	*(bus_addr_t *)arg = segs[0].ds_addr;
198 }
199 
200 int
ena_dma_alloc(device_t dmadev,bus_size_t size,ena_mem_handle_t * dma,int mapflags,bus_size_t alignment,int domain)201 ena_dma_alloc(device_t dmadev, bus_size_t size, ena_mem_handle_t *dma,
202     int mapflags, bus_size_t alignment, int domain)
203 {
204 	struct ena_adapter *adapter = device_get_softc(dmadev);
205 	device_t pdev = adapter->pdev;
206 	uint32_t maxsize;
207 	uint64_t dma_space_addr;
208 	int error;
209 
210 	maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE;
211 
212 	dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
213 	if (unlikely(dma_space_addr == 0))
214 		dma_space_addr = BUS_SPACE_MAXADDR;
215 
216 	error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */
217 	    alignment, 0,      /* alignment, bounds 		*/
218 	    dma_space_addr,    /* lowaddr of exclusion window	*/
219 	    BUS_SPACE_MAXADDR, /* highaddr of exclusion window	*/
220 	    NULL, NULL,	       /* filter, filterarg 		*/
221 	    maxsize,	       /* maxsize 			*/
222 	    1,		       /* nsegments 			*/
223 	    maxsize,	       /* maxsegsize 			*/
224 	    BUS_DMA_ALLOCNOW,  /* flags 			*/
225 	    NULL,	       /* lockfunc 			*/
226 	    NULL,	       /* lockarg 			*/
227 	    &dma->tag);
228 	if (unlikely(error != 0)) {
229 		ena_log(pdev, ERR, "bus_dma_tag_create failed: %d\n", error);
230 		goto fail_tag;
231 	}
232 
233 	error = bus_dma_tag_set_domain(dma->tag, domain);
234 	if (unlikely(error != 0)) {
235 		ena_log(pdev, ERR, "bus_dma_tag_set_domain failed: %d\n",
236 		    error);
237 		goto fail_map_create;
238 	}
239 
240 	error = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr,
241 	    BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map);
242 	if (unlikely(error != 0)) {
243 		ena_log(pdev, ERR, "bus_dmamem_alloc(%ju) failed: %d\n",
244 		    (uintmax_t)size, error);
245 		goto fail_map_create;
246 	}
247 
248 	dma->paddr = 0;
249 	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size,
250 	    ena_dmamap_callback, &dma->paddr, mapflags);
251 	if (unlikely((error != 0) || (dma->paddr == 0))) {
252 		ena_log(pdev, ERR, "bus_dmamap_load failed: %d\n", error);
253 		goto fail_map_load;
254 	}
255 
256 	bus_dmamap_sync(dma->tag, dma->map,
257 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
258 
259 	return (0);
260 
261 fail_map_load:
262 	bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
263 fail_map_create:
264 	bus_dma_tag_destroy(dma->tag);
265 fail_tag:
266 	dma->tag = NULL;
267 	dma->vaddr = NULL;
268 	dma->paddr = 0;
269 
270 	return (error);
271 }
272 
273 static void
ena_free_pci_resources(struct ena_adapter * adapter)274 ena_free_pci_resources(struct ena_adapter *adapter)
275 {
276 	device_t pdev = adapter->pdev;
277 
278 	if (adapter->memory != NULL) {
279 		bus_release_resource(pdev, SYS_RES_MEMORY,
280 		    PCIR_BAR(ENA_MEM_BAR), adapter->memory);
281 	}
282 
283 	if (adapter->registers != NULL) {
284 		bus_release_resource(pdev, SYS_RES_MEMORY,
285 		    PCIR_BAR(ENA_REG_BAR), adapter->registers);
286 	}
287 
288 	if (adapter->msix != NULL) {
289 		bus_release_resource(pdev, SYS_RES_MEMORY, adapter->msix_rid,
290 		    adapter->msix);
291 	}
292 }
293 
294 static int
ena_probe(device_t dev)295 ena_probe(device_t dev)
296 {
297 	ena_vendor_info_t *ent;
298 	uint16_t pci_vendor_id = 0;
299 	uint16_t pci_device_id = 0;
300 
301 	pci_vendor_id = pci_get_vendor(dev);
302 	pci_device_id = pci_get_device(dev);
303 
304 	ent = ena_vendor_info_array;
305 	while (ent->vendor_id != 0) {
306 		if ((pci_vendor_id == ent->vendor_id) &&
307 		    (pci_device_id == ent->device_id)) {
308 			ena_log_raw(DBG, "vendor=%x device=%x\n", pci_vendor_id,
309 			    pci_device_id);
310 
311 			device_set_desc(dev, ENA_DEVICE_DESC);
312 			return (BUS_PROBE_DEFAULT);
313 		}
314 
315 		ent++;
316 	}
317 
318 	return (ENXIO);
319 }
320 
321 static int
ena_change_mtu(if_t ifp,int new_mtu)322 ena_change_mtu(if_t ifp, int new_mtu)
323 {
324 	struct ena_adapter *adapter = if_getsoftc(ifp);
325 	device_t pdev = adapter->pdev;
326 	int rc;
327 
328 	if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
329 		ena_log(pdev, ERR, "Invalid MTU setting. new_mtu: %d max mtu: %d min mtu: %d\n",
330 		    new_mtu, adapter->max_mtu, ENA_MIN_MTU);
331 		return (EINVAL);
332 	}
333 
334 	rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
335 	if (likely(rc == 0)) {
336 		ena_log(pdev, DBG, "set MTU to %d\n", new_mtu);
337 		if_setmtu(ifp, new_mtu);
338 	} else {
339 		ena_log(pdev, ERR, "Failed to set MTU to %d\n", new_mtu);
340 	}
341 
342 	return (rc);
343 }
344 
345 static inline void
ena_alloc_counters(counter_u64_t * begin,int size)346 ena_alloc_counters(counter_u64_t *begin, int size)
347 {
348 	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
349 
350 	for (; begin < end; ++begin)
351 		*begin = counter_u64_alloc(M_WAITOK);
352 }
353 
354 static inline void
ena_free_counters(counter_u64_t * begin,int size)355 ena_free_counters(counter_u64_t *begin, int size)
356 {
357 	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
358 
359 	for (; begin < end; ++begin)
360 		counter_u64_free(*begin);
361 }
362 
363 static inline void
ena_reset_counters(counter_u64_t * begin,int size)364 ena_reset_counters(counter_u64_t *begin, int size)
365 {
366 	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
367 
368 	for (; begin < end; ++begin)
369 		counter_u64_zero(*begin);
370 }
371 
372 static void
ena_init_io_rings_common(struct ena_adapter * adapter,struct ena_ring * ring,uint16_t qid)373 ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
374     uint16_t qid)
375 {
376 	ring->qid = qid;
377 	ring->adapter = adapter;
378 	ring->ena_dev = adapter->ena_dev;
379 	atomic_store_8(&ring->first_interrupt, 0);
380 	ring->no_interrupt_event_cnt = 0;
381 }
382 
383 static void
ena_init_io_rings_basic(struct ena_adapter * adapter)384 ena_init_io_rings_basic(struct ena_adapter *adapter)
385 {
386 	struct ena_com_dev *ena_dev;
387 	struct ena_ring *txr, *rxr;
388 	struct ena_que *que;
389 	int i;
390 
391 	ena_dev = adapter->ena_dev;
392 
393 	for (i = 0; i < adapter->num_io_queues; i++) {
394 		txr = &adapter->tx_ring[i];
395 		rxr = &adapter->rx_ring[i];
396 
397 		/* TX/RX common ring state */
398 		ena_init_io_rings_common(adapter, txr, i);
399 		ena_init_io_rings_common(adapter, rxr, i);
400 
401 		/* TX specific ring state */
402 		txr->tx_max_header_size = ena_dev->tx_max_header_size;
403 		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
404 
405 		que = &adapter->que[i];
406 		que->adapter = adapter;
407 		que->id = i;
408 		que->tx_ring = txr;
409 		que->rx_ring = rxr;
410 
411 		txr->que = que;
412 		rxr->que = que;
413 
414 		rxr->empty_rx_queue = 0;
415 		rxr->rx_mbuf_sz = ena_mbuf_sz;
416 	}
417 }
418 
419 static void
ena_init_io_rings_advanced(struct ena_adapter * adapter)420 ena_init_io_rings_advanced(struct ena_adapter *adapter)
421 {
422 	struct ena_ring *txr, *rxr;
423 	int i;
424 
425 	for (i = 0; i < adapter->num_io_queues; i++) {
426 		txr = &adapter->tx_ring[i];
427 		rxr = &adapter->rx_ring[i];
428 
429 		/* Allocate a buf ring */
430 		txr->buf_ring_size = adapter->buf_ring_size;
431 		txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF, M_WAITOK,
432 		    &txr->ring_mtx);
433 
434 		/* Allocate Tx statistics. */
435 		ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
436 		    sizeof(txr->tx_stats));
437 		txr->tx_last_cleanup_ticks = ticks;
438 
439 		/* Allocate Rx statistics. */
440 		ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
441 		    sizeof(rxr->rx_stats));
442 
443 		/* Initialize locks */
444 		snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)",
445 		    device_get_nameunit(adapter->pdev), i);
446 		snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)",
447 		    device_get_nameunit(adapter->pdev), i);
448 
449 		mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
450 	}
451 }
452 
453 static void
ena_init_io_rings(struct ena_adapter * adapter)454 ena_init_io_rings(struct ena_adapter *adapter)
455 {
456 	/*
457 	 * IO rings initialization can be divided into the 2 steps:
458 	 *   1. Initialize variables and fields with initial values and copy
459 	 *      them from adapter/ena_dev (basic)
460 	 *   2. Allocate mutex, counters and buf_ring (advanced)
461 	 */
462 	ena_init_io_rings_basic(adapter);
463 	ena_init_io_rings_advanced(adapter);
464 }
465 
466 static void
ena_free_io_ring_resources(struct ena_adapter * adapter,unsigned int qid)467 ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
468 {
469 	struct ena_ring *txr = &adapter->tx_ring[qid];
470 	struct ena_ring *rxr = &adapter->rx_ring[qid];
471 
472 	ena_free_counters((counter_u64_t *)&txr->tx_stats,
473 	    sizeof(txr->tx_stats));
474 	ena_free_counters((counter_u64_t *)&rxr->rx_stats,
475 	    sizeof(rxr->rx_stats));
476 
477 	ENA_RING_MTX_LOCK(txr);
478 	drbr_free(txr->br, M_DEVBUF);
479 	ENA_RING_MTX_UNLOCK(txr);
480 
481 	mtx_destroy(&txr->ring_mtx);
482 }
483 
484 static void
ena_free_all_io_rings_resources(struct ena_adapter * adapter)485 ena_free_all_io_rings_resources(struct ena_adapter *adapter)
486 {
487 	int i;
488 
489 	for (i = 0; i < adapter->num_io_queues; i++)
490 		ena_free_io_ring_resources(adapter, i);
491 }
492 
493 static int
ena_setup_tx_dma_tag(struct ena_adapter * adapter)494 ena_setup_tx_dma_tag(struct ena_adapter *adapter)
495 {
496 	int ret;
497 
498 	/* Create DMA tag for Tx buffers */
499 	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
500 	    1, 0,				  /* alignment, bounds 	     */
501 	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
502 	    BUS_SPACE_MAXADDR,			  /* highaddr of excl window */
503 	    NULL, NULL,				  /* filter, filterarg 	     */
504 	    ENA_TSO_MAXSIZE,			  /* maxsize 		     */
505 	    adapter->max_tx_sgl_size - 1,	  /* nsegments 		     */
506 	    ENA_TSO_MAXSIZE,			  /* maxsegsize 	     */
507 	    0,					  /* flags 		     */
508 	    NULL,				  /* lockfunc 		     */
509 	    NULL,				  /* lockfuncarg 	     */
510 	    &adapter->tx_buf_tag);
511 
512 	return (ret);
513 }
514 
515 static int
ena_free_tx_dma_tag(struct ena_adapter * adapter)516 ena_free_tx_dma_tag(struct ena_adapter *adapter)
517 {
518 	int ret;
519 
520 	ret = bus_dma_tag_destroy(adapter->tx_buf_tag);
521 
522 	if (likely(ret == 0))
523 		adapter->tx_buf_tag = NULL;
524 
525 	return (ret);
526 }
527 
528 static int
ena_setup_rx_dma_tag(struct ena_adapter * adapter)529 ena_setup_rx_dma_tag(struct ena_adapter *adapter)
530 {
531 	int ret;
532 
533 	/* Create DMA tag for Rx buffers*/
534 	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent   */
535 	    1, 0,				  /* alignment, bounds 	     */
536 	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
537 	    BUS_SPACE_MAXADDR,			  /* highaddr of excl window */
538 	    NULL, NULL,				  /* filter, filterarg 	     */
539 	    ena_mbuf_sz,			  /* maxsize 		     */
540 	    adapter->max_rx_sgl_size,		  /* nsegments 		     */
541 	    ena_mbuf_sz,			  /* maxsegsize 	     */
542 	    0,					  /* flags 		     */
543 	    NULL,				  /* lockfunc 		     */
544 	    NULL,				  /* lockarg 		     */
545 	    &adapter->rx_buf_tag);
546 
547 	return (ret);
548 }
549 
550 static int
ena_free_rx_dma_tag(struct ena_adapter * adapter)551 ena_free_rx_dma_tag(struct ena_adapter *adapter)
552 {
553 	int ret;
554 
555 	ret = bus_dma_tag_destroy(adapter->rx_buf_tag);
556 
557 	if (likely(ret == 0))
558 		adapter->rx_buf_tag = NULL;
559 
560 	return (ret);
561 }
562 
563 static void
ena_release_all_tx_dmamap(struct ena_ring * tx_ring)564 ena_release_all_tx_dmamap(struct ena_ring *tx_ring)
565 {
566 	struct ena_adapter *adapter = tx_ring->adapter;
567 	struct ena_tx_buffer *tx_info;
568 	bus_dma_tag_t tx_tag = adapter->tx_buf_tag;
569 	int i;
570 #ifdef DEV_NETMAP
571 	struct ena_netmap_tx_info *nm_info;
572 	int j;
573 #endif /* DEV_NETMAP */
574 
575 	for (i = 0; i < tx_ring->ring_size; ++i) {
576 		tx_info = &tx_ring->tx_buffer_info[i];
577 #ifdef DEV_NETMAP
578 		if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) {
579 			nm_info = &tx_info->nm_info;
580 			for (j = 0; j < ENA_PKT_MAX_BUFS; ++j) {
581 				if (nm_info->map_seg[j] != NULL) {
582 					bus_dmamap_destroy(tx_tag,
583 					    nm_info->map_seg[j]);
584 					nm_info->map_seg[j] = NULL;
585 				}
586 			}
587 		}
588 #endif /* DEV_NETMAP */
589 		if (tx_info->dmamap != NULL) {
590 			bus_dmamap_destroy(tx_tag, tx_info->dmamap);
591 			tx_info->dmamap = NULL;
592 		}
593 	}
594 }
595 
596 /**
597  * ena_setup_tx_resources - allocate Tx resources (Descriptors)
598  * @adapter: network interface device structure
599  * @qid: queue index
600  *
601  * Returns 0 on success, otherwise on failure.
602  **/
603 static int
ena_setup_tx_resources(struct ena_adapter * adapter,int qid)604 ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
605 {
606 	device_t pdev = adapter->pdev;
607 	char thread_name[MAXCOMLEN + 1];
608 	struct ena_que *que = &adapter->que[qid];
609 	struct ena_ring *tx_ring = que->tx_ring;
610 	cpuset_t *cpu_mask = NULL;
611 	int size, i, err;
612 #ifdef DEV_NETMAP
613 	bus_dmamap_t *map;
614 	int j;
615 
616 	ena_netmap_reset_tx_ring(adapter, qid);
617 #endif /* DEV_NETMAP */
618 
619 	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
620 
621 	tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
622 	if (unlikely(tx_ring->tx_buffer_info == NULL))
623 		return (ENOMEM);
624 
625 	size = sizeof(uint16_t) * tx_ring->ring_size;
626 	tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
627 	if (unlikely(tx_ring->free_tx_ids == NULL))
628 		goto err_buf_info_free;
629 
630 	size = tx_ring->tx_max_header_size;
631 	tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF,
632 	    M_NOWAIT | M_ZERO);
633 	if (unlikely(tx_ring->push_buf_intermediate_buf == NULL))
634 		goto err_tx_ids_free;
635 
636 	/* Req id stack for TX OOO completions */
637 	for (i = 0; i < tx_ring->ring_size; i++)
638 		tx_ring->free_tx_ids[i] = i;
639 
640 	/* Reset TX statistics. */
641 	ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats,
642 	    sizeof(tx_ring->tx_stats));
643 
644 	tx_ring->next_to_use = 0;
645 	tx_ring->next_to_clean = 0;
646 	tx_ring->acum_pkts = 0;
647 
648 	/* Make sure that drbr is empty */
649 	ENA_RING_MTX_LOCK(tx_ring);
650 	drbr_flush(adapter->ifp, tx_ring->br);
651 	ENA_RING_MTX_UNLOCK(tx_ring);
652 
653 	/* ... and create the buffer DMA maps */
654 	for (i = 0; i < tx_ring->ring_size; i++) {
655 		err = bus_dmamap_create(adapter->tx_buf_tag, 0,
656 		    &tx_ring->tx_buffer_info[i].dmamap);
657 		if (unlikely(err != 0)) {
658 			ena_log(pdev, ERR,
659 			    "Unable to create Tx DMA map for buffer %d\n", i);
660 			goto err_map_release;
661 		}
662 
663 #ifdef DEV_NETMAP
664 		if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) {
665 			map = tx_ring->tx_buffer_info[i].nm_info.map_seg;
666 			for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
667 				err = bus_dmamap_create(adapter->tx_buf_tag, 0,
668 				    &map[j]);
669 				if (unlikely(err != 0)) {
670 					ena_log(pdev, ERR,
671 					    "Unable to create Tx DMA for buffer %d %d\n",
672 					    i, j);
673 					goto err_map_release;
674 				}
675 			}
676 		}
677 #endif /* DEV_NETMAP */
678 	}
679 
680 	/* Allocate taskqueues */
681 	TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring);
682 	tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT,
683 	    taskqueue_thread_enqueue, &tx_ring->enqueue_tq);
684 	if (unlikely(tx_ring->enqueue_tq == NULL)) {
685 		ena_log(pdev, ERR,
686 		    "Unable to create taskqueue for enqueue task\n");
687 		i = tx_ring->ring_size;
688 		goto err_map_release;
689 	}
690 
691 	tx_ring->running = true;
692 
693 #ifdef RSS
694 	cpu_mask = &que->cpu_mask;
695 	snprintf(thread_name, sizeof(thread_name), "%s txeq %d",
696 	    device_get_nameunit(adapter->pdev), que->cpu);
697 #else
698 	snprintf(thread_name, sizeof(thread_name), "%s txeq %d",
699 	    device_get_nameunit(adapter->pdev), que->id);
700 #endif
701 	taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, PI_NET,
702 	    cpu_mask, "%s", thread_name);
703 
704 	return (0);
705 
706 err_map_release:
707 	ena_release_all_tx_dmamap(tx_ring);
708 err_tx_ids_free:
709 	free(tx_ring->free_tx_ids, M_DEVBUF);
710 	tx_ring->free_tx_ids = NULL;
711 err_buf_info_free:
712 	free(tx_ring->tx_buffer_info, M_DEVBUF);
713 	tx_ring->tx_buffer_info = NULL;
714 
715 	return (ENOMEM);
716 }
717 
718 /**
719  * ena_free_tx_resources - Free Tx Resources per Queue
720  * @adapter: network interface device structure
721  * @qid: queue index
722  *
723  * Free all transmit software resources
724  **/
725 static void
ena_free_tx_resources(struct ena_adapter * adapter,int qid)726 ena_free_tx_resources(struct ena_adapter *adapter, int qid)
727 {
728 	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
729 #ifdef DEV_NETMAP
730 	struct ena_netmap_tx_info *nm_info;
731 	int j;
732 #endif /* DEV_NETMAP */
733 
734 	while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task, NULL))
735 		taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
736 
737 	taskqueue_free(tx_ring->enqueue_tq);
738 
739 	ENA_RING_MTX_LOCK(tx_ring);
740 	/* Flush buffer ring, */
741 	drbr_flush(adapter->ifp, tx_ring->br);
742 
743 	/* Free buffer DMA maps, */
744 	for (int i = 0; i < tx_ring->ring_size; i++) {
745 		bus_dmamap_sync(adapter->tx_buf_tag,
746 		    tx_ring->tx_buffer_info[i].dmamap, BUS_DMASYNC_POSTWRITE);
747 		bus_dmamap_unload(adapter->tx_buf_tag,
748 		    tx_ring->tx_buffer_info[i].dmamap);
749 		bus_dmamap_destroy(adapter->tx_buf_tag,
750 		    tx_ring->tx_buffer_info[i].dmamap);
751 
752 #ifdef DEV_NETMAP
753 		if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) {
754 			nm_info = &tx_ring->tx_buffer_info[i].nm_info;
755 			for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
756 				if (nm_info->socket_buf_idx[j] != 0) {
757 					bus_dmamap_sync(adapter->tx_buf_tag,
758 					    nm_info->map_seg[j],
759 					    BUS_DMASYNC_POSTWRITE);
760 					ena_netmap_unload(adapter,
761 					    nm_info->map_seg[j]);
762 				}
763 				bus_dmamap_destroy(adapter->tx_buf_tag,
764 				    nm_info->map_seg[j]);
765 				nm_info->socket_buf_idx[j] = 0;
766 			}
767 		}
768 #endif /* DEV_NETMAP */
769 
770 		m_freem(tx_ring->tx_buffer_info[i].mbuf);
771 		tx_ring->tx_buffer_info[i].mbuf = NULL;
772 	}
773 	ENA_RING_MTX_UNLOCK(tx_ring);
774 
775 	/* And free allocated memory. */
776 	free(tx_ring->tx_buffer_info, M_DEVBUF);
777 	tx_ring->tx_buffer_info = NULL;
778 
779 	free(tx_ring->free_tx_ids, M_DEVBUF);
780 	tx_ring->free_tx_ids = NULL;
781 
782 	free(tx_ring->push_buf_intermediate_buf, M_DEVBUF);
783 	tx_ring->push_buf_intermediate_buf = NULL;
784 }
785 
786 /**
787  * ena_setup_all_tx_resources - allocate all queues Tx resources
788  * @adapter: network interface device structure
789  *
790  * Returns 0 on success, otherwise on failure.
791  **/
792 static int
ena_setup_all_tx_resources(struct ena_adapter * adapter)793 ena_setup_all_tx_resources(struct ena_adapter *adapter)
794 {
795 	int i, rc;
796 
797 	for (i = 0; i < adapter->num_io_queues; i++) {
798 		rc = ena_setup_tx_resources(adapter, i);
799 		if (rc != 0) {
800 			ena_log(adapter->pdev, ERR,
801 			    "Allocation for Tx Queue %u failed\n", i);
802 			goto err_setup_tx;
803 		}
804 	}
805 
806 	return (0);
807 
808 err_setup_tx:
809 	/* Rewind the index freeing the rings as we go */
810 	while (i--)
811 		ena_free_tx_resources(adapter, i);
812 	return (rc);
813 }
814 
815 /**
816  * ena_free_all_tx_resources - Free Tx Resources for All Queues
817  * @adapter: network interface device structure
818  *
819  * Free all transmit software resources
820  **/
821 static void
ena_free_all_tx_resources(struct ena_adapter * adapter)822 ena_free_all_tx_resources(struct ena_adapter *adapter)
823 {
824 	int i;
825 
826 	for (i = 0; i < adapter->num_io_queues; i++)
827 		ena_free_tx_resources(adapter, i);
828 }
829 
830 /**
831  * ena_setup_rx_resources - allocate Rx resources (Descriptors)
832  * @adapter: network interface device structure
833  * @qid: queue index
834  *
835  * Returns 0 on success, otherwise on failure.
836  **/
837 static int
ena_setup_rx_resources(struct ena_adapter * adapter,unsigned int qid)838 ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
839 {
840 	device_t pdev = adapter->pdev;
841 	struct ena_que *que = &adapter->que[qid];
842 	struct ena_ring *rx_ring = que->rx_ring;
843 	int size, err, i;
844 
845 	size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
846 
847 #ifdef DEV_NETMAP
848 	ena_netmap_reset_rx_ring(adapter, qid);
849 	rx_ring->initialized = false;
850 #endif /* DEV_NETMAP */
851 
852 	/*
853 	 * Alloc extra element so in rx path
854 	 * we can always prefetch rx_info + 1
855 	 */
856 	size += sizeof(struct ena_rx_buffer);
857 
858 	rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
859 
860 	size = sizeof(uint16_t) * rx_ring->ring_size;
861 	rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK);
862 
863 	for (i = 0; i < rx_ring->ring_size; i++)
864 		rx_ring->free_rx_ids[i] = i;
865 
866 	/* Reset RX statistics. */
867 	ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats,
868 	    sizeof(rx_ring->rx_stats));
869 
870 	rx_ring->next_to_clean = 0;
871 	rx_ring->next_to_use = 0;
872 
873 	/* ... and create the buffer DMA maps */
874 	for (i = 0; i < rx_ring->ring_size; i++) {
875 		err = bus_dmamap_create(adapter->rx_buf_tag, 0,
876 		    &(rx_ring->rx_buffer_info[i].map));
877 		if (err != 0) {
878 			ena_log(pdev, ERR,
879 			    "Unable to create Rx DMA map for buffer %d\n", i);
880 			goto err_buf_info_unmap;
881 		}
882 	}
883 
884 	/* Create LRO for the ring */
885 	if ((if_getcapenable(adapter->ifp) & IFCAP_LRO) != 0) {
886 		int err = tcp_lro_init(&rx_ring->lro);
887 		if (err != 0) {
888 			ena_log(pdev, ERR, "LRO[%d] Initialization failed!\n",
889 			    qid);
890 		} else {
891 			ena_log(pdev, DBG, "RX Soft LRO[%d] Initialized\n",
892 			    qid);
893 			rx_ring->lro.ifp = adapter->ifp;
894 		}
895 	}
896 
897 	return (0);
898 
899 err_buf_info_unmap:
900 	while (i--) {
901 		bus_dmamap_destroy(adapter->rx_buf_tag,
902 		    rx_ring->rx_buffer_info[i].map);
903 	}
904 
905 	free(rx_ring->free_rx_ids, M_DEVBUF);
906 	rx_ring->free_rx_ids = NULL;
907 	free(rx_ring->rx_buffer_info, M_DEVBUF);
908 	rx_ring->rx_buffer_info = NULL;
909 	return (ENOMEM);
910 }
911 
912 /**
913  * ena_free_rx_resources - Free Rx Resources
914  * @adapter: network interface device structure
915  * @qid: queue index
916  *
917  * Free all receive software resources
918  **/
919 static void
ena_free_rx_resources(struct ena_adapter * adapter,unsigned int qid)920 ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
921 {
922 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
923 
924 	/* Free buffer DMA maps, */
925 	for (int i = 0; i < rx_ring->ring_size; i++) {
926 		bus_dmamap_sync(adapter->rx_buf_tag,
927 		    rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD);
928 		m_freem(rx_ring->rx_buffer_info[i].mbuf);
929 		rx_ring->rx_buffer_info[i].mbuf = NULL;
930 		bus_dmamap_unload(adapter->rx_buf_tag,
931 		    rx_ring->rx_buffer_info[i].map);
932 		bus_dmamap_destroy(adapter->rx_buf_tag,
933 		    rx_ring->rx_buffer_info[i].map);
934 	}
935 
936 	/* free LRO resources, */
937 	tcp_lro_free(&rx_ring->lro);
938 
939 	/* free allocated memory */
940 	free(rx_ring->rx_buffer_info, M_DEVBUF);
941 	rx_ring->rx_buffer_info = NULL;
942 
943 	free(rx_ring->free_rx_ids, M_DEVBUF);
944 	rx_ring->free_rx_ids = NULL;
945 }
946 
947 /**
948  * ena_setup_all_rx_resources - allocate all queues Rx resources
949  * @adapter: network interface device structure
950  *
951  * Returns 0 on success, otherwise on failure.
952  **/
953 static int
ena_setup_all_rx_resources(struct ena_adapter * adapter)954 ena_setup_all_rx_resources(struct ena_adapter *adapter)
955 {
956 	int i, rc = 0;
957 
958 	for (i = 0; i < adapter->num_io_queues; i++) {
959 		rc = ena_setup_rx_resources(adapter, i);
960 		if (rc != 0) {
961 			ena_log(adapter->pdev, ERR,
962 			    "Allocation for Rx Queue %u failed\n", i);
963 			goto err_setup_rx;
964 		}
965 	}
966 	return (0);
967 
968 err_setup_rx:
969 	/* rewind the index freeing the rings as we go */
970 	while (i--)
971 		ena_free_rx_resources(adapter, i);
972 	return (rc);
973 }
974 
975 /**
976  * ena_free_all_rx_resources - Free Rx resources for all queues
977  * @adapter: network interface device structure
978  *
979  * Free all receive software resources
980  **/
981 static void
ena_free_all_rx_resources(struct ena_adapter * adapter)982 ena_free_all_rx_resources(struct ena_adapter *adapter)
983 {
984 	int i;
985 
986 	for (i = 0; i < adapter->num_io_queues; i++)
987 		ena_free_rx_resources(adapter, i);
988 }
989 
990 static inline int
ena_alloc_rx_mbuf(struct ena_adapter * adapter,struct ena_ring * rx_ring,struct ena_rx_buffer * rx_info)991 ena_alloc_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
992     struct ena_rx_buffer *rx_info)
993 {
994 	device_t pdev = adapter->pdev;
995 	struct ena_com_buf *ena_buf;
996 	bus_dma_segment_t segs[1];
997 	int nsegs, error;
998 	int mlen;
999 
1000 	/* if previous allocated frag is not used */
1001 	if (unlikely(rx_info->mbuf != NULL))
1002 		return (0);
1003 
1004 	/* Get mbuf using UMA allocator */
1005 	rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1006 	    rx_ring->rx_mbuf_sz);
1007 
1008 	if (unlikely(rx_info->mbuf == NULL)) {
1009 		counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
1010 		rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1011 		if (unlikely(rx_info->mbuf == NULL)) {
1012 			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1013 			return (ENOMEM);
1014 		}
1015 		mlen = MCLBYTES;
1016 	} else {
1017 		mlen = rx_ring->rx_mbuf_sz;
1018 	}
1019 	/* Set mbuf length*/
1020 	rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
1021 
1022 	/* Map packets for DMA */
1023 	ena_log(pdev, DBG,
1024 	    "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n",
1025 	    adapter->rx_buf_tag, rx_info->mbuf, rx_info->mbuf->m_len);
1026 	error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map,
1027 	    rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
1028 	if (unlikely((error != 0) || (nsegs != 1))) {
1029 		ena_log(pdev, WARN,
1030 		    "failed to map mbuf, error: %d, nsegs: %d\n", error, nsegs);
1031 		counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1);
1032 		goto exit;
1033 	}
1034 
1035 	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD);
1036 
1037 	ena_buf = &rx_info->ena_buf;
1038 	ena_buf->paddr = segs[0].ds_addr;
1039 	ena_buf->len = mlen;
1040 
1041 	ena_log(pdev, DBG,
1042 	    "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n",
1043 	    rx_info->mbuf, rx_info, ena_buf->len, (uintmax_t)ena_buf->paddr);
1044 
1045 	return (0);
1046 
1047 exit:
1048 	m_freem(rx_info->mbuf);
1049 	rx_info->mbuf = NULL;
1050 	return (EFAULT);
1051 }
1052 
1053 static void
ena_free_rx_mbuf(struct ena_adapter * adapter,struct ena_ring * rx_ring,struct ena_rx_buffer * rx_info)1054 ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
1055     struct ena_rx_buffer *rx_info)
1056 {
1057 	if (rx_info->mbuf == NULL) {
1058 		ena_log(adapter->pdev, WARN,
1059 		    "Trying to free unallocated buffer\n");
1060 		return;
1061 	}
1062 
1063 	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1064 	    BUS_DMASYNC_POSTREAD);
1065 	bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map);
1066 	m_freem(rx_info->mbuf);
1067 	rx_info->mbuf = NULL;
1068 }
1069 
1070 /**
1071  * ena_refill_rx_bufs - Refills ring with descriptors
1072  * @rx_ring: the ring which we want to feed with free descriptors
1073  * @num: number of descriptors to refill
1074  * Refills the ring with newly allocated DMA-mapped mbufs for receiving
1075  **/
1076 int
ena_refill_rx_bufs(struct ena_ring * rx_ring,uint32_t num)1077 ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
1078 {
1079 	struct ena_adapter *adapter = rx_ring->adapter;
1080 	device_t pdev = adapter->pdev;
1081 	uint16_t next_to_use, req_id;
1082 	uint32_t i;
1083 	int rc;
1084 
1085 	ena_log_io(adapter->pdev, DBG, "refill qid: %d\n", rx_ring->qid);
1086 
1087 	next_to_use = rx_ring->next_to_use;
1088 
1089 	for (i = 0; i < num; i++) {
1090 		struct ena_rx_buffer *rx_info;
1091 
1092 		ena_log_io(pdev, DBG, "RX buffer - next to use: %d\n",
1093 		    next_to_use);
1094 
1095 		req_id = rx_ring->free_rx_ids[next_to_use];
1096 		rx_info = &rx_ring->rx_buffer_info[req_id];
1097 #ifdef DEV_NETMAP
1098 		if (ena_rx_ring_in_netmap(adapter, rx_ring->qid))
1099 			rc = ena_netmap_alloc_rx_slot(adapter, rx_ring,
1100 			    rx_info);
1101 		else
1102 #endif /* DEV_NETMAP */
1103 			rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
1104 		if (unlikely(rc != 0)) {
1105 			ena_log_io(pdev, WARN,
1106 			    "failed to alloc buffer for rx queue %d\n",
1107 			    rx_ring->qid);
1108 			break;
1109 		}
1110 		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1111 		    &rx_info->ena_buf, req_id);
1112 		if (unlikely(rc != 0)) {
1113 			ena_log_io(pdev, WARN,
1114 			    "failed to add buffer for rx queue %d\n",
1115 			    rx_ring->qid);
1116 			break;
1117 		}
1118 		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1119 		    rx_ring->ring_size);
1120 	}
1121 
1122 	if (unlikely(i < num)) {
1123 		counter_u64_add(rx_ring->rx_stats.refil_partial, 1);
1124 		ena_log_io(pdev, WARN,
1125 		    "refilled rx qid %d with only %d mbufs (from %d)\n",
1126 		    rx_ring->qid, i, num);
1127 	}
1128 
1129 	if (likely(i != 0))
1130 		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1131 
1132 	rx_ring->next_to_use = next_to_use;
1133 	return (i);
1134 }
1135 
1136 int
ena_update_buf_ring_size(struct ena_adapter * adapter,uint32_t new_buf_ring_size)1137 ena_update_buf_ring_size(struct ena_adapter *adapter,
1138     uint32_t new_buf_ring_size)
1139 {
1140 	uint32_t old_buf_ring_size;
1141 	int rc = 0;
1142 	bool dev_was_up;
1143 
1144 	old_buf_ring_size = adapter->buf_ring_size;
1145 	adapter->buf_ring_size = new_buf_ring_size;
1146 
1147 	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1148 	ena_down(adapter);
1149 
1150 	/* Reconfigure buf ring for all Tx rings. */
1151 	ena_free_all_io_rings_resources(adapter);
1152 	ena_init_io_rings_advanced(adapter);
1153 	if (dev_was_up) {
1154 		/*
1155 		 * If ena_up() fails, it's not because of recent buf_ring size
1156 		 * changes. Because of that, we just want to revert old drbr
1157 		 * value and trigger the reset because something else had to
1158 		 * go wrong.
1159 		 */
1160 		rc = ena_up(adapter);
1161 		if (unlikely(rc != 0)) {
1162 			ena_log(adapter->pdev, ERR,
1163 			    "Failed to configure device after setting new drbr size: %u. Reverting old value: %u and triggering the reset\n",
1164 			    new_buf_ring_size, old_buf_ring_size);
1165 
1166 			/* Revert old size and trigger the reset */
1167 			adapter->buf_ring_size = old_buf_ring_size;
1168 			ena_free_all_io_rings_resources(adapter);
1169 			ena_init_io_rings_advanced(adapter);
1170 
1171 			ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET,
1172 			    adapter);
1173 			ena_trigger_reset(adapter, ENA_REGS_RESET_OS_TRIGGER);
1174 		}
1175 	}
1176 
1177 	return (rc);
1178 }
1179 
1180 int
ena_update_queue_size(struct ena_adapter * adapter,uint32_t new_tx_size,uint32_t new_rx_size)1181 ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
1182     uint32_t new_rx_size)
1183 {
1184 	uint32_t old_tx_size, old_rx_size;
1185 	int rc = 0;
1186 	bool dev_was_up;
1187 
1188 	old_tx_size = adapter->requested_tx_ring_size;
1189 	old_rx_size = adapter->requested_rx_ring_size;
1190 	adapter->requested_tx_ring_size = new_tx_size;
1191 	adapter->requested_rx_ring_size = new_rx_size;
1192 
1193 	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1194 	ena_down(adapter);
1195 
1196 	/* Configure queues with new size. */
1197 	ena_init_io_rings_basic(adapter);
1198 	if (dev_was_up) {
1199 		rc = ena_up(adapter);
1200 		if (unlikely(rc != 0)) {
1201 			ena_log(adapter->pdev, ERR,
1202 			    "Failed to configure device with the new sizes - Tx: %u Rx: %u. Reverting old values - Tx: %u Rx: %u\n",
1203 			    new_tx_size, new_rx_size, old_tx_size, old_rx_size);
1204 
1205 			/* Revert old size. */
1206 			adapter->requested_tx_ring_size = old_tx_size;
1207 			adapter->requested_rx_ring_size = old_rx_size;
1208 			ena_init_io_rings_basic(adapter);
1209 
1210 			/* And try again. */
1211 			rc = ena_up(adapter);
1212 			if (unlikely(rc != 0)) {
1213 				ena_log(adapter->pdev, ERR,
1214 				    "Failed to revert old queue sizes. Triggering device reset.\n");
1215 				/*
1216 				 * If we've failed again, something had to go
1217 				 * wrong. After reset, the device should try to
1218 				 * go up
1219 				 */
1220 				ENA_FLAG_SET_ATOMIC(
1221 				    ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1222 				ena_trigger_reset(adapter,
1223 				    ENA_REGS_RESET_OS_TRIGGER);
1224 			}
1225 		}
1226 	}
1227 
1228 	return (rc);
1229 }
1230 
1231 static void
ena_update_io_rings(struct ena_adapter * adapter,uint32_t num)1232 ena_update_io_rings(struct ena_adapter *adapter, uint32_t num)
1233 {
1234 	ena_free_all_io_rings_resources(adapter);
1235 	/* Force indirection table to be reinitialized */
1236 	ena_com_rss_destroy(adapter->ena_dev);
1237 
1238 	adapter->num_io_queues = num;
1239 	ena_init_io_rings(adapter);
1240 }
1241 
1242 int
ena_update_base_cpu(struct ena_adapter * adapter,int new_num)1243 ena_update_base_cpu(struct ena_adapter *adapter, int new_num)
1244 {
1245 	int old_num;
1246 	int rc = 0;
1247 	bool dev_was_up;
1248 
1249 	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1250 	old_num = adapter->irq_cpu_base;
1251 
1252 	ena_down(adapter);
1253 
1254 	adapter->irq_cpu_base = new_num;
1255 
1256 	if (dev_was_up) {
1257 		rc = ena_up(adapter);
1258 		if (unlikely(rc != 0)) {
1259 			ena_log(adapter->pdev, ERR,
1260 			    "Failed to configure device %d IRQ base CPU. "
1261 			    "Reverting to previous value: %d\n",
1262 			    new_num, old_num);
1263 
1264 			adapter->irq_cpu_base = old_num;
1265 
1266 			rc = ena_up(adapter);
1267 			if (unlikely(rc != 0)) {
1268 				ena_log(adapter->pdev, ERR,
1269 				    "Failed to revert to previous setup."
1270 				    "Triggering device reset.\n");
1271 				ENA_FLAG_SET_ATOMIC(
1272 				    ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1273 				ena_trigger_reset(adapter,
1274 				    ENA_REGS_RESET_OS_TRIGGER);
1275 			}
1276 		}
1277 	}
1278 	return (rc);
1279 }
1280 
1281 int
ena_update_cpu_stride(struct ena_adapter * adapter,uint32_t new_num)1282 ena_update_cpu_stride(struct ena_adapter *adapter, uint32_t new_num)
1283 {
1284 	uint32_t old_num;
1285 	int rc = 0;
1286 	bool dev_was_up;
1287 
1288 	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1289 	old_num = adapter->irq_cpu_stride;
1290 
1291 	ena_down(adapter);
1292 
1293 	adapter->irq_cpu_stride = new_num;
1294 
1295 	if (dev_was_up) {
1296 		rc = ena_up(adapter);
1297 		if (unlikely(rc != 0)) {
1298 			ena_log(adapter->pdev, ERR,
1299 			    "Failed to configure device %d IRQ CPU stride. "
1300 			    "Reverting to previous value: %d\n",
1301 			    new_num, old_num);
1302 
1303 			adapter->irq_cpu_stride = old_num;
1304 
1305 			rc = ena_up(adapter);
1306 			if (unlikely(rc != 0)) {
1307 				ena_log(adapter->pdev, ERR,
1308 				    "Failed to revert to previous setup."
1309 				    "Triggering device reset.\n");
1310 				ENA_FLAG_SET_ATOMIC(
1311 				    ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1312 				ena_trigger_reset(adapter,
1313 				    ENA_REGS_RESET_OS_TRIGGER);
1314 			}
1315 		}
1316 	}
1317 	return (rc);
1318 }
1319 
1320 /* Caller should sanitize new_num */
1321 int
ena_update_io_queue_nb(struct ena_adapter * adapter,uint32_t new_num)1322 ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num)
1323 {
1324 	uint32_t old_num;
1325 	int rc = 0;
1326 	bool dev_was_up;
1327 
1328 	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1329 	old_num = adapter->num_io_queues;
1330 	ena_down(adapter);
1331 
1332 	ena_update_io_rings(adapter, new_num);
1333 
1334 	if (dev_was_up) {
1335 		rc = ena_up(adapter);
1336 		if (unlikely(rc != 0)) {
1337 			ena_log(adapter->pdev, ERR,
1338 			    "Failed to configure device with %u IO queues. "
1339 			    "Reverting to previous value: %u\n",
1340 			    new_num, old_num);
1341 
1342 			ena_update_io_rings(adapter, old_num);
1343 
1344 			rc = ena_up(adapter);
1345 			if (unlikely(rc != 0)) {
1346 				ena_log(adapter->pdev, ERR,
1347 				    "Failed to revert to previous setup IO "
1348 				    "queues. Triggering device reset.\n");
1349 				ENA_FLAG_SET_ATOMIC(
1350 				    ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1351 				ena_trigger_reset(adapter,
1352 				    ENA_REGS_RESET_OS_TRIGGER);
1353 			}
1354 		}
1355 	}
1356 
1357 	return (rc);
1358 }
1359 
1360 static void
ena_free_rx_bufs(struct ena_adapter * adapter,unsigned int qid)1361 ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
1362 {
1363 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1364 	unsigned int i;
1365 
1366 	for (i = 0; i < rx_ring->ring_size; i++) {
1367 		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1368 
1369 		if (rx_info->mbuf != NULL)
1370 			ena_free_rx_mbuf(adapter, rx_ring, rx_info);
1371 #ifdef DEV_NETMAP
1372 		if (((if_getflags(adapter->ifp) & IFF_DYING) == 0) &&
1373 		    (if_getcapenable(adapter->ifp) & IFCAP_NETMAP)) {
1374 			if (rx_info->netmap_buf_idx != 0)
1375 				ena_netmap_free_rx_slot(adapter, rx_ring,
1376 				    rx_info);
1377 		}
1378 #endif /* DEV_NETMAP */
1379 	}
1380 }
1381 
1382 /**
1383  * ena_refill_all_rx_bufs - allocate all queues Rx buffers
1384  * @adapter: network interface device structure
1385  *
1386  */
1387 static void
ena_refill_all_rx_bufs(struct ena_adapter * adapter)1388 ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1389 {
1390 	struct ena_ring *rx_ring;
1391 	int i, rc, bufs_num;
1392 
1393 	for (i = 0; i < adapter->num_io_queues; i++) {
1394 		rx_ring = &adapter->rx_ring[i];
1395 		bufs_num = rx_ring->ring_size - 1;
1396 		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1397 		if (unlikely(rc != bufs_num))
1398 			ena_log_io(adapter->pdev, WARN,
1399 			    "refilling Queue %d failed. "
1400 			    "Allocated %d buffers from: %d\n",
1401 			    i, rc, bufs_num);
1402 #ifdef DEV_NETMAP
1403 		rx_ring->initialized = true;
1404 #endif /* DEV_NETMAP */
1405 	}
1406 }
1407 
1408 static void
ena_free_all_rx_bufs(struct ena_adapter * adapter)1409 ena_free_all_rx_bufs(struct ena_adapter *adapter)
1410 {
1411 	int i;
1412 
1413 	for (i = 0; i < adapter->num_io_queues; i++)
1414 		ena_free_rx_bufs(adapter, i);
1415 }
1416 
1417 /**
1418  * ena_free_tx_bufs - Free Tx Buffers per Queue
1419  * @adapter: network interface device structure
1420  * @qid: queue index
1421  **/
1422 static void
ena_free_tx_bufs(struct ena_adapter * adapter,unsigned int qid)1423 ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
1424 {
1425 	bool print_once = true;
1426 	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
1427 
1428 	ENA_RING_MTX_LOCK(tx_ring);
1429 	for (int i = 0; i < tx_ring->ring_size; i++) {
1430 		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1431 
1432 		if (tx_info->mbuf == NULL)
1433 			continue;
1434 
1435 		if (print_once) {
1436 			ena_log(adapter->pdev, WARN,
1437 			    "free uncompleted tx mbuf qid %d idx 0x%x\n", qid,
1438 			    i);
1439 			print_once = false;
1440 		} else {
1441 			ena_log(adapter->pdev, DBG,
1442 			    "free uncompleted tx mbuf qid %d idx 0x%x\n", qid,
1443 			    i);
1444 		}
1445 
1446 		bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1447 		    BUS_DMASYNC_POSTWRITE);
1448 		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1449 
1450 		m_free(tx_info->mbuf);
1451 		tx_info->mbuf = NULL;
1452 	}
1453 	ENA_RING_MTX_UNLOCK(tx_ring);
1454 }
1455 
1456 static void
ena_free_all_tx_bufs(struct ena_adapter * adapter)1457 ena_free_all_tx_bufs(struct ena_adapter *adapter)
1458 {
1459 	for (int i = 0; i < adapter->num_io_queues; i++)
1460 		ena_free_tx_bufs(adapter, i);
1461 }
1462 
1463 static void
ena_destroy_all_tx_queues(struct ena_adapter * adapter)1464 ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1465 {
1466 	uint16_t ena_qid;
1467 	int i;
1468 
1469 	for (i = 0; i < adapter->num_io_queues; i++) {
1470 		ena_qid = ENA_IO_TXQ_IDX(i);
1471 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1472 	}
1473 }
1474 
1475 static void
ena_destroy_all_rx_queues(struct ena_adapter * adapter)1476 ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1477 {
1478 	uint16_t ena_qid;
1479 	int i;
1480 
1481 	for (i = 0; i < adapter->num_io_queues; i++) {
1482 		ena_qid = ENA_IO_RXQ_IDX(i);
1483 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1484 	}
1485 }
1486 
1487 static void
ena_destroy_all_io_queues(struct ena_adapter * adapter)1488 ena_destroy_all_io_queues(struct ena_adapter *adapter)
1489 {
1490 	struct ena_que *queue;
1491 	int i;
1492 
1493 	for (i = 0; i < adapter->num_io_queues; i++) {
1494 		queue = &adapter->que[i];
1495 		while (taskqueue_cancel(queue->cleanup_tq, &queue->cleanup_task, NULL))
1496 			taskqueue_drain(queue->cleanup_tq, &queue->cleanup_task);
1497 		taskqueue_free(queue->cleanup_tq);
1498 	}
1499 
1500 	ena_destroy_all_tx_queues(adapter);
1501 	ena_destroy_all_rx_queues(adapter);
1502 }
1503 
1504 static int
ena_create_io_queues(struct ena_adapter * adapter)1505 ena_create_io_queues(struct ena_adapter *adapter)
1506 {
1507 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1508 	struct ena_com_create_io_ctx ctx;
1509 	struct ena_ring *ring;
1510 	struct ena_que *queue;
1511 	uint16_t ena_qid;
1512 	uint32_t msix_vector;
1513 	cpuset_t *cpu_mask = NULL;
1514 	int rc, i;
1515 
1516 	/* Create TX queues */
1517 	for (i = 0; i < adapter->num_io_queues; i++) {
1518 		msix_vector = ENA_IO_IRQ_IDX(i);
1519 		ena_qid = ENA_IO_TXQ_IDX(i);
1520 		ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1521 		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1522 		ctx.queue_size = adapter->requested_tx_ring_size;
1523 		ctx.msix_vector = msix_vector;
1524 		ctx.qid = ena_qid;
1525 		ctx.numa_node = adapter->que[i].domain;
1526 
1527 		rc = ena_com_create_io_queue(ena_dev, &ctx);
1528 		if (rc != 0) {
1529 			ena_log(adapter->pdev, ERR,
1530 			    "Failed to create io TX queue #%d rc: %d\n", i, rc);
1531 			goto err_tx;
1532 		}
1533 		ring = &adapter->tx_ring[i];
1534 		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1535 		    &ring->ena_com_io_sq, &ring->ena_com_io_cq);
1536 		if (rc != 0) {
1537 			ena_log(adapter->pdev, ERR,
1538 			    "Failed to get TX queue handlers. TX queue num"
1539 			    " %d rc: %d\n",
1540 			    i, rc);
1541 			ena_com_destroy_io_queue(ena_dev, ena_qid);
1542 			goto err_tx;
1543 		}
1544 
1545 		if (ctx.numa_node >= 0) {
1546 			ena_com_update_numa_node(ring->ena_com_io_cq,
1547 			    ctx.numa_node);
1548 		}
1549 	}
1550 
1551 	/* Create RX queues */
1552 	for (i = 0; i < adapter->num_io_queues; i++) {
1553 		msix_vector = ENA_IO_IRQ_IDX(i);
1554 		ena_qid = ENA_IO_RXQ_IDX(i);
1555 		ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1556 		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1557 		ctx.queue_size = adapter->requested_rx_ring_size;
1558 		ctx.msix_vector = msix_vector;
1559 		ctx.qid = ena_qid;
1560 		ctx.numa_node = adapter->que[i].domain;
1561 
1562 		rc = ena_com_create_io_queue(ena_dev, &ctx);
1563 		if (unlikely(rc != 0)) {
1564 			ena_log(adapter->pdev, ERR,
1565 			    "Failed to create io RX queue[%d] rc: %d\n", i, rc);
1566 			goto err_rx;
1567 		}
1568 
1569 		ring = &adapter->rx_ring[i];
1570 		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1571 		    &ring->ena_com_io_sq, &ring->ena_com_io_cq);
1572 		if (unlikely(rc != 0)) {
1573 			ena_log(adapter->pdev, ERR,
1574 			    "Failed to get RX queue handlers. RX queue num"
1575 			    " %d rc: %d\n",
1576 			    i, rc);
1577 			ena_com_destroy_io_queue(ena_dev, ena_qid);
1578 			goto err_rx;
1579 		}
1580 
1581 		if (ctx.numa_node >= 0) {
1582 			ena_com_update_numa_node(ring->ena_com_io_cq,
1583 			    ctx.numa_node);
1584 		}
1585 	}
1586 
1587 	for (i = 0; i < adapter->num_io_queues; i++) {
1588 		queue = &adapter->que[i];
1589 
1590 		NET_TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue);
1591 		queue->cleanup_tq = taskqueue_create_fast("ena cleanup",
1592 		    M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq);
1593 
1594 #ifdef RSS
1595 		cpu_mask = &queue->cpu_mask;
1596 #endif
1597 		taskqueue_start_threads_cpuset(&queue->cleanup_tq, 1, PI_NET,
1598 		    cpu_mask, "%s queue %d cleanup",
1599 		    device_get_nameunit(adapter->pdev), i);
1600 	}
1601 
1602 	return (0);
1603 
1604 err_rx:
1605 	while (i--)
1606 		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1607 	i = adapter->num_io_queues;
1608 err_tx:
1609 	while (i--)
1610 		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1611 
1612 	return (ENXIO);
1613 }
1614 
1615 /*********************************************************************
1616  *
1617  *  MSIX & Interrupt Service routine
1618  *
1619  **********************************************************************/
1620 
1621 /**
1622  * ena_handle_msix - MSIX Interrupt Handler for admin/async queue
1623  * @arg: interrupt number
1624  **/
1625 static void
ena_intr_msix_mgmnt(void * arg)1626 ena_intr_msix_mgmnt(void *arg)
1627 {
1628 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
1629 
1630 	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1631 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)))
1632 		ena_com_aenq_intr_handler(adapter->ena_dev, arg);
1633 }
1634 
1635 /**
1636  * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
1637  * @arg: queue
1638  **/
1639 static int
ena_handle_msix(void * arg)1640 ena_handle_msix(void *arg)
1641 {
1642 	struct ena_que *queue = arg;
1643 	struct ena_adapter *adapter = queue->adapter;
1644 	if_t ifp = adapter->ifp;
1645 
1646 	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1647 		return (FILTER_STRAY);
1648 
1649 	taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task);
1650 
1651 	return (FILTER_HANDLED);
1652 }
1653 
1654 static int
ena_enable_msix(struct ena_adapter * adapter)1655 ena_enable_msix(struct ena_adapter *adapter)
1656 {
1657 	device_t dev = adapter->pdev;
1658 	int msix_vecs, msix_req;
1659 	int i, rc = 0;
1660 
1661 	if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1662 		ena_log(dev, ERR, "Error, MSI-X is already enabled\n");
1663 		return (EINVAL);
1664 	}
1665 
1666 	/* Reserved the max msix vectors we might need */
1667 	msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1668 
1669 	adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry),
1670 	    M_DEVBUF, M_WAITOK | M_ZERO);
1671 
1672 	ena_log(dev, DBG, "trying to enable MSI-X, vectors: %d\n", msix_vecs);
1673 
1674 	for (i = 0; i < msix_vecs; i++) {
1675 		adapter->msix_entries[i].entry = i;
1676 		/* Vectors must start from 1 */
1677 		adapter->msix_entries[i].vector = i + 1;
1678 	}
1679 
1680 	msix_req = msix_vecs;
1681 	rc = pci_alloc_msix(dev, &msix_vecs);
1682 	if (unlikely(rc != 0)) {
1683 		ena_log(dev, ERR, "Failed to enable MSIX, vectors %d rc %d\n",
1684 		    msix_vecs, rc);
1685 
1686 		rc = ENOSPC;
1687 		goto err_msix_free;
1688 	}
1689 
1690 	if (msix_vecs != msix_req) {
1691 		if (msix_vecs == ENA_ADMIN_MSIX_VEC) {
1692 			ena_log(dev, ERR,
1693 			    "Not enough number of MSI-x allocated: %d\n",
1694 			    msix_vecs);
1695 			pci_release_msi(dev);
1696 			rc = ENOSPC;
1697 			goto err_msix_free;
1698 		}
1699 		ena_log(dev, ERR,
1700 		    "Enable only %d MSI-x (out of %d), reduce "
1701 		    "the number of queues\n",
1702 		    msix_vecs, msix_req);
1703 	}
1704 
1705 	adapter->msix_vecs = msix_vecs;
1706 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1707 
1708 	return (0);
1709 
1710 err_msix_free:
1711 	free(adapter->msix_entries, M_DEVBUF);
1712 	adapter->msix_entries = NULL;
1713 
1714 	return (rc);
1715 }
1716 
1717 static void
ena_setup_mgmnt_intr(struct ena_adapter * adapter)1718 ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1719 {
1720 	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, ENA_IRQNAME_SIZE,
1721 	    "ena-mgmnt@pci:%s", device_get_nameunit(adapter->pdev));
1722 	/*
1723 	 * Handler is NULL on purpose, it will be set
1724 	 * when mgmnt interrupt is acquired
1725 	 */
1726 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL;
1727 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1728 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1729 	    adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
1730 }
1731 
1732 static int
ena_setup_io_intr(struct ena_adapter * adapter)1733 ena_setup_io_intr(struct ena_adapter *adapter)
1734 {
1735 #ifdef RSS
1736 	int num_buckets = rss_getnumbuckets();
1737 	static int last_bind = 0;
1738 	int cur_bind;
1739 	int idx;
1740 #endif
1741 	int irq_idx;
1742 
1743 	if (adapter->msix_entries == NULL)
1744 		return (EINVAL);
1745 
1746 #ifdef RSS
1747 	if (adapter->first_bind < 0) {
1748 		adapter->first_bind = last_bind;
1749 		last_bind = (last_bind + adapter->num_io_queues) % num_buckets;
1750 	}
1751 	cur_bind = adapter->first_bind;
1752 #endif
1753 
1754 	for (int i = 0; i < adapter->num_io_queues; i++) {
1755 		irq_idx = ENA_IO_IRQ_IDX(i);
1756 
1757 		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1758 		    "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i);
1759 		adapter->irq_tbl[irq_idx].handler = ena_handle_msix;
1760 		adapter->irq_tbl[irq_idx].data = &adapter->que[i];
1761 		adapter->irq_tbl[irq_idx].vector =
1762 		    adapter->msix_entries[irq_idx].vector;
1763 		ena_log(adapter->pdev, DBG, "ena_setup_io_intr vector: %d\n",
1764 		    adapter->msix_entries[irq_idx].vector);
1765 
1766 		if (adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) {
1767 			adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1768 			    (unsigned)(adapter->irq_cpu_base +
1769 			    i * adapter->irq_cpu_stride) % (unsigned)mp_ncpus;
1770 			CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask);
1771 		}
1772 
1773 #ifdef RSS
1774 		adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1775 		    rss_getcpu(cur_bind);
1776 		cur_bind = (cur_bind + 1) % num_buckets;
1777 		CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask);
1778 
1779 		for (idx = 0; idx < MAXMEMDOM; ++idx) {
1780 			if (CPU_ISSET(adapter->que[i].cpu, &cpuset_domain[idx]))
1781 				break;
1782 		}
1783 		adapter->que[i].domain = idx;
1784 #else
1785 		adapter->que[i].domain = -1;
1786 #endif
1787 	}
1788 
1789 	return (0);
1790 }
1791 
1792 static int
ena_request_mgmnt_irq(struct ena_adapter * adapter)1793 ena_request_mgmnt_irq(struct ena_adapter *adapter)
1794 {
1795 	device_t pdev = adapter->pdev;
1796 	struct ena_irq *irq;
1797 	unsigned long flags;
1798 	int rc, rcc;
1799 
1800 	flags = RF_ACTIVE | RF_SHAREABLE;
1801 
1802 	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1803 	irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1804 	    &irq->vector, flags);
1805 
1806 	if (unlikely(irq->res == NULL)) {
1807 		ena_log(pdev, ERR, "could not allocate irq vector: %d\n",
1808 		    irq->vector);
1809 		return (ENXIO);
1810 	}
1811 
1812 	rc = bus_setup_intr(adapter->pdev, irq->res,
1813 	    INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt, irq->data,
1814 	    &irq->cookie);
1815 	if (unlikely(rc != 0)) {
1816 		ena_log(pdev, ERR,
1817 		    "failed to register interrupt handler for irq %ju: %d\n",
1818 		    rman_get_start(irq->res), rc);
1819 		goto err_res_free;
1820 	}
1821 	irq->requested = true;
1822 
1823 	return (rc);
1824 
1825 err_res_free:
1826 	ena_log(pdev, INFO, "releasing resource for irq %d\n", irq->vector);
1827 	rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, irq->vector,
1828 	    irq->res);
1829 	if (unlikely(rcc != 0))
1830 		ena_log(pdev, ERR,
1831 		    "dev has no parent while releasing res for irq: %d\n",
1832 		    irq->vector);
1833 	irq->res = NULL;
1834 
1835 	return (rc);
1836 }
1837 
1838 static int
ena_request_io_irq(struct ena_adapter * adapter)1839 ena_request_io_irq(struct ena_adapter *adapter)
1840 {
1841 	device_t pdev = adapter->pdev;
1842 	struct ena_irq *irq;
1843 	unsigned long flags = 0;
1844 	int rc = 0, i, rcc;
1845 
1846 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) {
1847 		ena_log(pdev, ERR,
1848 		    "failed to request I/O IRQ: MSI-X is not enabled\n");
1849 		return (EINVAL);
1850 	} else {
1851 		flags = RF_ACTIVE | RF_SHAREABLE;
1852 	}
1853 
1854 	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1855 		irq = &adapter->irq_tbl[i];
1856 
1857 		if (unlikely(irq->requested))
1858 			continue;
1859 
1860 		irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1861 		    &irq->vector, flags);
1862 		if (unlikely(irq->res == NULL)) {
1863 			rc = ENOMEM;
1864 			ena_log(pdev, ERR,
1865 			    "could not allocate irq vector: %d\n", irq->vector);
1866 			goto err;
1867 		}
1868 
1869 		rc = bus_setup_intr(adapter->pdev, irq->res,
1870 		    INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL, irq->data,
1871 		    &irq->cookie);
1872 		if (unlikely(rc != 0)) {
1873 			ena_log(pdev, ERR,
1874 			    "failed to register interrupt handler for irq %ju: %d\n",
1875 			    rman_get_start(irq->res), rc);
1876 			goto err;
1877 		}
1878 		irq->requested = true;
1879 
1880 		if (adapter->rss_enabled || adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) {
1881 			rc = bus_bind_intr(adapter->pdev, irq->res, irq->cpu);
1882 			if (unlikely(rc != 0)) {
1883 				ena_log(pdev, ERR,
1884 				    "failed to bind interrupt handler for irq %ju to cpu %d: %d\n",
1885 				    rman_get_start(irq->res), irq->cpu, rc);
1886 				goto err;
1887 			}
1888 
1889 			ena_log(pdev, INFO, "queue %d - cpu %d\n",
1890 			    i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
1891 		}
1892 	}
1893 	return (rc);
1894 
1895 err:
1896 
1897 	for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) {
1898 		irq = &adapter->irq_tbl[i];
1899 		rcc = 0;
1900 
1901 		/* Once we entered err: section and irq->requested is true we
1902 		   free both intr and resources */
1903 		if (irq->requested) {
1904 			rcc = bus_teardown_intr(adapter->pdev, irq->res,
1905 			    irq->cookie);
1906 			if (unlikely(rcc != 0))
1907 				ena_log(pdev, ERR,
1908 				    "could not release irq: %d, error: %d\n",
1909 				    irq->vector, rcc);
1910 		}
1911 
1912 		/* If we entered err: section without irq->requested set we know
1913 		   it was bus_alloc_resource_any() that needs cleanup, provided
1914 		   res is not NULL. In case res is NULL no work in needed in
1915 		   this iteration */
1916 		rcc = 0;
1917 		if (irq->res != NULL) {
1918 			rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1919 			    irq->vector, irq->res);
1920 		}
1921 		if (unlikely(rcc != 0))
1922 			ena_log(pdev, ERR,
1923 			    "dev has no parent while releasing res for irq: %d\n",
1924 			    irq->vector);
1925 		irq->requested = false;
1926 		irq->res = NULL;
1927 	}
1928 
1929 	return (rc);
1930 }
1931 
1932 static void
ena_free_mgmnt_irq(struct ena_adapter * adapter)1933 ena_free_mgmnt_irq(struct ena_adapter *adapter)
1934 {
1935 	device_t pdev = adapter->pdev;
1936 	struct ena_irq *irq;
1937 	int rc;
1938 
1939 	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1940 	if (irq->requested) {
1941 		ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector);
1942 		rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1943 		if (unlikely(rc != 0))
1944 			ena_log(pdev, ERR, "failed to tear down irq: %d\n",
1945 			    irq->vector);
1946 		irq->requested = 0;
1947 	}
1948 
1949 	if (irq->res != NULL) {
1950 		ena_log(pdev, DBG, "release resource irq: %d\n", irq->vector);
1951 		rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1952 		    irq->vector, irq->res);
1953 		irq->res = NULL;
1954 		if (unlikely(rc != 0))
1955 			ena_log(pdev, ERR,
1956 			    "dev has no parent while releasing res for irq: %d\n",
1957 			    irq->vector);
1958 	}
1959 }
1960 
1961 static void
ena_free_io_irq(struct ena_adapter * adapter)1962 ena_free_io_irq(struct ena_adapter *adapter)
1963 {
1964 	device_t pdev = adapter->pdev;
1965 	struct ena_irq *irq;
1966 	int rc;
1967 
1968 	for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1969 		irq = &adapter->irq_tbl[i];
1970 		if (irq->requested) {
1971 			ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector);
1972 			rc = bus_teardown_intr(adapter->pdev, irq->res,
1973 			    irq->cookie);
1974 			if (unlikely(rc != 0)) {
1975 				ena_log(pdev, ERR,
1976 				    "failed to tear down irq: %d\n",
1977 				    irq->vector);
1978 			}
1979 			irq->requested = 0;
1980 		}
1981 
1982 		if (irq->res != NULL) {
1983 			ena_log(pdev, DBG, "release resource irq: %d\n",
1984 			    irq->vector);
1985 			rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1986 			    irq->vector, irq->res);
1987 			irq->res = NULL;
1988 			if (unlikely(rc != 0)) {
1989 				ena_log(pdev, ERR,
1990 				    "dev has no parent while releasing res for irq: %d\n",
1991 				    irq->vector);
1992 			}
1993 		}
1994 	}
1995 }
1996 
1997 static void
ena_free_irqs(struct ena_adapter * adapter)1998 ena_free_irqs(struct ena_adapter *adapter)
1999 {
2000 	ena_free_io_irq(adapter);
2001 	ena_free_mgmnt_irq(adapter);
2002 	ena_disable_msix(adapter);
2003 }
2004 
2005 static void
ena_disable_msix(struct ena_adapter * adapter)2006 ena_disable_msix(struct ena_adapter *adapter)
2007 {
2008 	if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
2009 		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
2010 		pci_release_msi(adapter->pdev);
2011 	}
2012 
2013 	adapter->msix_vecs = 0;
2014 	free(adapter->msix_entries, M_DEVBUF);
2015 	adapter->msix_entries = NULL;
2016 }
2017 
2018 static void
ena_unmask_all_io_irqs(struct ena_adapter * adapter)2019 ena_unmask_all_io_irqs(struct ena_adapter *adapter)
2020 {
2021 	struct ena_com_io_cq *io_cq;
2022 	struct ena_eth_io_intr_reg intr_reg;
2023 	struct ena_ring *tx_ring;
2024 	uint16_t ena_qid;
2025 	int i;
2026 
2027 	/* Unmask interrupts for all queues */
2028 	for (i = 0; i < adapter->num_io_queues; i++) {
2029 		ena_qid = ENA_IO_TXQ_IDX(i);
2030 		io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
2031 		ena_com_update_intr_reg(&intr_reg, 0, 0, true, false);
2032 		tx_ring = &adapter->tx_ring[i];
2033 		counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1);
2034 		ena_com_unmask_intr(io_cq, &intr_reg);
2035 	}
2036 }
2037 
2038 static int
ena_up_complete(struct ena_adapter * adapter)2039 ena_up_complete(struct ena_adapter *adapter)
2040 {
2041 	int rc;
2042 
2043 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
2044 		rc = ena_rss_configure(adapter);
2045 		if (rc != 0) {
2046 			ena_log(adapter->pdev, ERR,
2047 			    "Failed to configure RSS\n");
2048 			return (rc);
2049 		}
2050 	}
2051 
2052 	rc = ena_change_mtu(adapter->ifp, if_getmtu(adapter->ifp));
2053 	if (unlikely(rc != 0))
2054 		return (rc);
2055 
2056 	ena_refill_all_rx_bufs(adapter);
2057 	ena_reset_counters((counter_u64_t *)&adapter->hw_stats,
2058 	    sizeof(adapter->hw_stats));
2059 
2060 	return (0);
2061 }
2062 
2063 static void
set_io_rings_size(struct ena_adapter * adapter,int new_tx_size,int new_rx_size)2064 set_io_rings_size(struct ena_adapter *adapter, int new_tx_size, int new_rx_size)
2065 {
2066 	int i;
2067 
2068 	for (i = 0; i < adapter->num_io_queues; i++) {
2069 		adapter->tx_ring[i].ring_size = new_tx_size;
2070 		adapter->rx_ring[i].ring_size = new_rx_size;
2071 	}
2072 }
2073 
2074 static int
create_queues_with_size_backoff(struct ena_adapter * adapter)2075 create_queues_with_size_backoff(struct ena_adapter *adapter)
2076 {
2077 	device_t pdev = adapter->pdev;
2078 	int rc;
2079 	uint32_t cur_rx_ring_size, cur_tx_ring_size;
2080 	uint32_t new_rx_ring_size, new_tx_ring_size;
2081 
2082 	/*
2083 	 * Current queue sizes might be set to smaller than the requested
2084 	 * ones due to past queue allocation failures.
2085 	 */
2086 	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2087 	    adapter->requested_rx_ring_size);
2088 
2089 	while (1) {
2090 		/* Allocate transmit descriptors */
2091 		rc = ena_setup_all_tx_resources(adapter);
2092 		if (unlikely(rc != 0)) {
2093 			ena_log(pdev, ERR, "err_setup_tx\n");
2094 			goto err_setup_tx;
2095 		}
2096 
2097 		/* Allocate receive descriptors */
2098 		rc = ena_setup_all_rx_resources(adapter);
2099 		if (unlikely(rc != 0)) {
2100 			ena_log(pdev, ERR, "err_setup_rx\n");
2101 			goto err_setup_rx;
2102 		}
2103 
2104 		/* Create IO queues for Rx & Tx */
2105 		rc = ena_create_io_queues(adapter);
2106 		if (unlikely(rc != 0)) {
2107 			ena_log(pdev, ERR, "create IO queues failed\n");
2108 			goto err_io_que;
2109 		}
2110 
2111 		return (0);
2112 
2113 err_io_que:
2114 		ena_free_all_rx_resources(adapter);
2115 err_setup_rx:
2116 		ena_free_all_tx_resources(adapter);
2117 err_setup_tx:
2118 		/*
2119 		 * Lower the ring size if ENOMEM. Otherwise, return the
2120 		 * error straightaway.
2121 		 */
2122 		if (unlikely(rc != ENOMEM)) {
2123 			ena_log(pdev, ERR,
2124 			    "Queue creation failed with error code: %d\n", rc);
2125 			return (rc);
2126 		}
2127 
2128 		cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2129 		cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2130 
2131 		ena_log(pdev, ERR,
2132 		    "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2133 		    cur_tx_ring_size, cur_rx_ring_size);
2134 
2135 		new_tx_ring_size = cur_tx_ring_size;
2136 		new_rx_ring_size = cur_rx_ring_size;
2137 
2138 		/*
2139 		 * Decrease the size of a larger queue, or decrease both if they
2140 		 * are the same size.
2141 		 */
2142 		if (cur_rx_ring_size <= cur_tx_ring_size)
2143 			new_tx_ring_size = cur_tx_ring_size / 2;
2144 		if (cur_rx_ring_size >= cur_tx_ring_size)
2145 			new_rx_ring_size = cur_rx_ring_size / 2;
2146 
2147 		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2148 		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
2149 			ena_log(pdev, ERR,
2150 			    "Queue creation failed with the smallest possible queue size"
2151 			    "of %d for both queues. Not retrying with smaller queues\n",
2152 			    ENA_MIN_RING_SIZE);
2153 			return (rc);
2154 		}
2155 
2156 		ena_log(pdev, INFO,
2157 		    "Retrying queue creation with sizes TX=%d, RX=%d\n",
2158 		    new_tx_ring_size, new_rx_ring_size);
2159 
2160 		set_io_rings_size(adapter, new_tx_ring_size, new_rx_ring_size);
2161 	}
2162 }
2163 
2164 int
ena_up(struct ena_adapter * adapter)2165 ena_up(struct ena_adapter *adapter)
2166 {
2167 	int rc = 0;
2168 
2169 	ENA_LOCK_ASSERT();
2170 
2171 	if (unlikely(device_is_attached(adapter->pdev) == 0)) {
2172 		ena_log(adapter->pdev, ERR, "device is not attached!\n");
2173 		return (ENXIO);
2174 	}
2175 
2176 	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2177 		return (0);
2178 
2179 	ena_log(adapter->pdev, INFO, "device is going UP\n");
2180 
2181 	/* setup interrupts for IO queues */
2182 	rc = ena_setup_io_intr(adapter);
2183 	if (unlikely(rc != 0)) {
2184 		ena_log(adapter->pdev, ERR, "error setting up IO interrupt\n");
2185 		goto error;
2186 	}
2187 	rc = ena_request_io_irq(adapter);
2188 	if (unlikely(rc != 0)) {
2189 		ena_log(adapter->pdev, ERR, "err_req_irq\n");
2190 		goto error;
2191 	}
2192 
2193 	ena_log(adapter->pdev, INFO,
2194 	    "Creating %u IO queues. Rx queue size: %d, Tx queue size: %d, LLQ is %s\n",
2195 	    adapter->num_io_queues,
2196 	    adapter->requested_rx_ring_size,
2197 	    adapter->requested_tx_ring_size,
2198 	    (adapter->ena_dev->tx_mem_queue_type ==
2199 		ENA_ADMIN_PLACEMENT_POLICY_DEV) ? "ENABLED" : "DISABLED");
2200 
2201 	rc = create_queues_with_size_backoff(adapter);
2202 	if (unlikely(rc != 0)) {
2203 		ena_log(adapter->pdev, ERR,
2204 		    "error creating queues with size backoff\n");
2205 		goto err_create_queues_with_backoff;
2206 	}
2207 
2208 	if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
2209 		if_link_state_change(adapter->ifp, LINK_STATE_UP);
2210 
2211 	rc = ena_up_complete(adapter);
2212 	if (unlikely(rc != 0))
2213 		goto err_up_complete;
2214 
2215 	counter_u64_add(adapter->dev_stats.interface_up, 1);
2216 
2217 	ena_update_hwassist(adapter);
2218 
2219 	if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
2220 
2221 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2222 
2223 	ena_unmask_all_io_irqs(adapter);
2224 
2225 	return (0);
2226 
2227 err_up_complete:
2228 	ena_destroy_all_io_queues(adapter);
2229 	ena_free_all_rx_resources(adapter);
2230 	ena_free_all_tx_resources(adapter);
2231 err_create_queues_with_backoff:
2232 	ena_free_io_irq(adapter);
2233 error:
2234 	return (rc);
2235 }
2236 
2237 static uint64_t
ena_get_counter(if_t ifp,ift_counter cnt)2238 ena_get_counter(if_t ifp, ift_counter cnt)
2239 {
2240 	struct ena_adapter *adapter;
2241 	struct ena_hw_stats *stats;
2242 
2243 	adapter = if_getsoftc(ifp);
2244 	stats = &adapter->hw_stats;
2245 
2246 	switch (cnt) {
2247 	case IFCOUNTER_IPACKETS:
2248 		return (counter_u64_fetch(stats->rx_packets));
2249 	case IFCOUNTER_OPACKETS:
2250 		return (counter_u64_fetch(stats->tx_packets));
2251 	case IFCOUNTER_IBYTES:
2252 		return (counter_u64_fetch(stats->rx_bytes));
2253 	case IFCOUNTER_OBYTES:
2254 		return (counter_u64_fetch(stats->tx_bytes));
2255 	case IFCOUNTER_IQDROPS:
2256 		return (counter_u64_fetch(stats->rx_drops));
2257 	case IFCOUNTER_OQDROPS:
2258 		return (counter_u64_fetch(stats->tx_drops));
2259 	default:
2260 		return (if_get_counter_default(ifp, cnt));
2261 	}
2262 }
2263 
2264 static int
ena_media_change(if_t ifp)2265 ena_media_change(if_t ifp)
2266 {
2267 	/* Media Change is not supported by firmware */
2268 	return (0);
2269 }
2270 
2271 static void
ena_media_status(if_t ifp,struct ifmediareq * ifmr)2272 ena_media_status(if_t ifp, struct ifmediareq *ifmr)
2273 {
2274 	struct ena_adapter *adapter = if_getsoftc(ifp);
2275 	ena_log(adapter->pdev, DBG, "Media status update\n");
2276 
2277 	ENA_LOCK_LOCK();
2278 
2279 	ifmr->ifm_status = IFM_AVALID;
2280 	ifmr->ifm_active = IFM_ETHER;
2281 
2282 	if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) {
2283 		ENA_LOCK_UNLOCK();
2284 		ena_log(adapter->pdev, INFO, "Link is down\n");
2285 		return;
2286 	}
2287 
2288 	ifmr->ifm_status |= IFM_ACTIVE;
2289 	ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX;
2290 
2291 	ENA_LOCK_UNLOCK();
2292 }
2293 
2294 static void
ena_init(void * arg)2295 ena_init(void *arg)
2296 {
2297 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
2298 
2299 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2300 		ENA_LOCK_LOCK();
2301 		ena_up(adapter);
2302 		ENA_LOCK_UNLOCK();
2303 	}
2304 }
2305 
2306 static int
ena_ioctl(if_t ifp,u_long command,caddr_t data)2307 ena_ioctl(if_t ifp, u_long command, caddr_t data)
2308 {
2309 	struct ena_adapter *adapter;
2310 	struct ifreq *ifr;
2311 	int rc;
2312 
2313 	adapter = if_getsoftc(ifp);
2314 	ifr = (struct ifreq *)data;
2315 
2316 	/*
2317 	 * Acquiring lock to prevent from running up and down routines parallel.
2318 	 */
2319 	rc = 0;
2320 	switch (command) {
2321 	case SIOCSIFMTU:
2322 		if (if_getmtu(ifp) == ifr->ifr_mtu)
2323 			break;
2324 		ENA_LOCK_LOCK();
2325 		ena_down(adapter);
2326 
2327 		ena_change_mtu(ifp, ifr->ifr_mtu);
2328 
2329 		rc = ena_up(adapter);
2330 		ENA_LOCK_UNLOCK();
2331 		break;
2332 
2333 	case SIOCSIFFLAGS:
2334 		if ((if_getflags(ifp) & IFF_UP) != 0) {
2335 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2336 				if ((if_getflags(ifp) & (IFF_PROMISC |
2337 				    IFF_ALLMULTI)) != 0) {
2338 					ena_log(adapter->pdev, INFO,
2339 					    "ioctl promisc/allmulti\n");
2340 				}
2341 			} else {
2342 				ENA_LOCK_LOCK();
2343 				rc = ena_up(adapter);
2344 				ENA_LOCK_UNLOCK();
2345 			}
2346 		} else {
2347 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2348 				ENA_LOCK_LOCK();
2349 				ena_down(adapter);
2350 				ENA_LOCK_UNLOCK();
2351 			}
2352 		}
2353 		break;
2354 
2355 	case SIOCADDMULTI:
2356 	case SIOCDELMULTI:
2357 		break;
2358 
2359 	case SIOCSIFMEDIA:
2360 	case SIOCGIFMEDIA:
2361 		rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
2362 		break;
2363 
2364 	case SIOCSIFCAP:
2365 		{
2366 			int reinit = 0;
2367 
2368 			if (ifr->ifr_reqcap != if_getcapenable(ifp)) {
2369 				if_setcapenable(ifp, ifr->ifr_reqcap);
2370 				reinit = 1;
2371 			}
2372 
2373 			if ((reinit != 0) &&
2374 			    ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) {
2375 				ENA_LOCK_LOCK();
2376 				ena_down(adapter);
2377 				rc = ena_up(adapter);
2378 				ENA_LOCK_UNLOCK();
2379 			}
2380 		}
2381 
2382 		break;
2383 	default:
2384 		rc = ether_ioctl(ifp, command, data);
2385 		break;
2386 	}
2387 
2388 	return (rc);
2389 }
2390 
2391 static int
ena_get_dev_offloads(struct ena_com_dev_get_features_ctx * feat)2392 ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
2393 {
2394 	int caps = 0;
2395 
2396 	if ((feat->offload.tx &
2397 	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2398 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
2399 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
2400 		caps |= IFCAP_TXCSUM;
2401 
2402 	if ((feat->offload.tx &
2403 	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK |
2404 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0)
2405 		caps |= IFCAP_TXCSUM_IPV6;
2406 
2407 	if ((feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0)
2408 		caps |= IFCAP_TSO4;
2409 
2410 	if ((feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0)
2411 		caps |= IFCAP_TSO6;
2412 
2413 	if ((feat->offload.rx_supported &
2414 	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK |
2415 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0)
2416 		caps |= IFCAP_RXCSUM;
2417 
2418 	if ((feat->offload.rx_supported &
2419 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0)
2420 		caps |= IFCAP_RXCSUM_IPV6;
2421 
2422 	caps |= IFCAP_LRO | IFCAP_JUMBO_MTU;
2423 
2424 	return (caps);
2425 }
2426 
2427 static void
ena_update_host_info(struct ena_admin_host_info * host_info,if_t ifp)2428 ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp)
2429 {
2430 	host_info->supported_network_features[0] = (uint32_t)if_getcapabilities(ifp);
2431 }
2432 
2433 static void
ena_update_hwassist(struct ena_adapter * adapter)2434 ena_update_hwassist(struct ena_adapter *adapter)
2435 {
2436 	if_t ifp = adapter->ifp;
2437 	uint32_t feat = adapter->tx_offload_cap;
2438 	int cap = if_getcapenable(ifp);
2439 	int flags = 0;
2440 
2441 	if_clearhwassist(ifp);
2442 
2443 	if ((cap & IFCAP_TXCSUM) != 0) {
2444 		if ((feat &
2445 		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0)
2446 			flags |= CSUM_IP;
2447 		if ((feat &
2448 		    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2449 		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0)
2450 			flags |= CSUM_IP_UDP | CSUM_IP_TCP;
2451 	}
2452 
2453 	if ((cap & IFCAP_TXCSUM_IPV6) != 0)
2454 		flags |= CSUM_IP6_UDP | CSUM_IP6_TCP;
2455 
2456 	if ((cap & IFCAP_TSO4) != 0)
2457 		flags |= CSUM_IP_TSO;
2458 
2459 	if ((cap & IFCAP_TSO6) != 0)
2460 		flags |= CSUM_IP6_TSO;
2461 
2462 	if_sethwassistbits(ifp, flags, 0);
2463 }
2464 
2465 static void
ena_setup_ifnet(device_t pdev,struct ena_adapter * adapter,struct ena_com_dev_get_features_ctx * feat)2466 ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
2467     struct ena_com_dev_get_features_ctx *feat)
2468 {
2469 	if_t ifp;
2470 	int caps = 0;
2471 
2472 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2473 	if_initname(ifp, device_get_name(pdev), device_get_unit(pdev));
2474 	if_setdev(ifp, pdev);
2475 	if_setsoftc(ifp, adapter);
2476 
2477 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2478 	if_setinitfn(ifp, ena_init);
2479 	if_settransmitfn(ifp, ena_mq_start);
2480 	if_setqflushfn(ifp, ena_qflush);
2481 	if_setioctlfn(ifp, ena_ioctl);
2482 	if_setgetcounterfn(ifp, ena_get_counter);
2483 
2484 	if_setsendqlen(ifp, adapter->requested_tx_ring_size);
2485 	if_setsendqready(ifp);
2486 	if_setmtu(ifp, ETHERMTU);
2487 	if_setbaudrate(ifp, 0);
2488 	/* Zeroize capabilities... */
2489 	if_setcapabilities(ifp, 0);
2490 	if_setcapenable(ifp, 0);
2491 	/* check hardware support */
2492 	caps = ena_get_dev_offloads(feat);
2493 	/* ... and set them */
2494 	if_setcapabilitiesbit(ifp, caps, 0);
2495 
2496 	/* TSO parameters */
2497 	if_sethwtsomax(ifp, ENA_TSO_MAXSIZE -
2498 	    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
2499 	if_sethwtsomaxsegcount(ifp, adapter->max_tx_sgl_size - 1);
2500 	if_sethwtsomaxsegsize(ifp, ENA_TSO_MAXSIZE);
2501 
2502 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2503 	if_setcapenable(ifp, if_getcapabilities(ifp));
2504 
2505 	/*
2506 	 * Specify the media types supported by this adapter and register
2507 	 * callbacks to update media and link information
2508 	 */
2509 	ifmedia_init(&adapter->media, IFM_IMASK, ena_media_change,
2510 	    ena_media_status);
2511 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2512 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2513 
2514 	ether_ifattach(ifp, adapter->mac_addr);
2515 }
2516 
2517 void
ena_down(struct ena_adapter * adapter)2518 ena_down(struct ena_adapter *adapter)
2519 {
2520 	int rc;
2521 
2522 	ENA_LOCK_ASSERT();
2523 
2524 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2525 		return;
2526 
2527 	ena_log(adapter->pdev, INFO, "device is going DOWN\n");
2528 
2529 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2530 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2531 
2532 	ena_free_io_irq(adapter);
2533 
2534 	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) {
2535 		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
2536 		if (unlikely(rc != 0))
2537 			ena_log(adapter->pdev, ERR, "Device reset failed\n");
2538 	}
2539 
2540 	ena_destroy_all_io_queues(adapter);
2541 
2542 	ena_free_all_tx_bufs(adapter);
2543 	ena_free_all_rx_bufs(adapter);
2544 	ena_free_all_tx_resources(adapter);
2545 	ena_free_all_rx_resources(adapter);
2546 
2547 	counter_u64_add(adapter->dev_stats.interface_down, 1);
2548 }
2549 
2550 static uint32_t
ena_calc_max_io_queue_num(device_t pdev,struct ena_com_dev * ena_dev,struct ena_com_dev_get_features_ctx * get_feat_ctx)2551 ena_calc_max_io_queue_num(device_t pdev, struct ena_com_dev *ena_dev,
2552     struct ena_com_dev_get_features_ctx *get_feat_ctx)
2553 {
2554 	uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
2555 
2556 	/* Regular queues capabilities */
2557 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2558 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2559 		    &get_feat_ctx->max_queue_ext.max_queue_ext;
2560 		io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
2561 		    max_queue_ext->max_rx_cq_num);
2562 
2563 		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
2564 		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
2565 	} else {
2566 		struct ena_admin_queue_feature_desc *max_queues =
2567 		    &get_feat_ctx->max_queues;
2568 		io_tx_sq_num = max_queues->max_sq_num;
2569 		io_tx_cq_num = max_queues->max_cq_num;
2570 		io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
2571 	}
2572 
2573 	/* In case of LLQ use the llq fields for the tx SQ/CQ */
2574 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2575 		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
2576 
2577 	max_num_io_queues = min_t(uint32_t, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
2578 	max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_rx_num);
2579 	max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_sq_num);
2580 	max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_cq_num);
2581 	/* 1 IRQ for mgmnt and 1 IRQ for each TX/RX pair */
2582 	max_num_io_queues = min_t(uint32_t, max_num_io_queues,
2583 	    pci_msix_count(pdev) - 1);
2584 #ifdef RSS
2585 	max_num_io_queues = min_t(uint32_t, max_num_io_queues,
2586 	    rss_getnumbuckets());
2587 #endif
2588 
2589 	return (max_num_io_queues);
2590 }
2591 
2592 static int
ena_enable_wc(device_t pdev,struct resource * res)2593 ena_enable_wc(device_t pdev, struct resource *res)
2594 {
2595 #if defined(__i386) || defined(__amd64) || defined(__aarch64__)
2596 	vm_offset_t va;
2597 	vm_size_t len;
2598 	int rc;
2599 
2600 	va = (vm_offset_t)rman_get_virtual(res);
2601 	len = rman_get_size(res);
2602 	/* Enable write combining */
2603 	rc = pmap_change_attr(va, len, VM_MEMATTR_WRITE_COMBINING);
2604 	if (unlikely(rc != 0)) {
2605 		ena_log(pdev, ERR, "pmap_change_attr failed, %d\n", rc);
2606 		return (rc);
2607 	}
2608 
2609 	return (0);
2610 #endif
2611 	return (EOPNOTSUPP);
2612 }
2613 
2614 static int
ena_set_queues_placement_policy(device_t pdev,struct ena_com_dev * ena_dev,struct ena_admin_feature_llq_desc * llq,struct ena_llq_configurations * llq_default_configurations)2615 ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev,
2616     struct ena_admin_feature_llq_desc *llq,
2617     struct ena_llq_configurations *llq_default_configurations)
2618 {
2619 	int rc;
2620 	uint32_t llq_feature_mask;
2621 
2622 	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
2623 	if (!(ena_dev->supported_features & llq_feature_mask)) {
2624 		ena_log(pdev, WARN,
2625 		    "LLQ is not supported. Fallback to host mode policy.\n");
2626 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2627 		return (0);
2628 	}
2629 
2630 	if (ena_dev->mem_bar == NULL) {
2631 		ena_log(pdev, WARN,
2632 		    "LLQ is advertised as supported but device doesn't expose mem bar.\n");
2633 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2634 		return (0);
2635 	}
2636 
2637 	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
2638 	if (unlikely(rc != 0)) {
2639 		ena_log(pdev, WARN,
2640 		    "Failed to configure the device mode. "
2641 		    "Fallback to host mode policy.\n");
2642 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2643 	}
2644 
2645 	return (0);
2646 }
2647 
2648 static int
ena_map_llq_mem_bar(device_t pdev,struct ena_com_dev * ena_dev)2649 ena_map_llq_mem_bar(device_t pdev, struct ena_com_dev *ena_dev)
2650 {
2651 	struct ena_adapter *adapter = device_get_softc(pdev);
2652 	int rc, rid;
2653 
2654 	/* Try to allocate resources for LLQ bar */
2655 	rid = PCIR_BAR(ENA_MEM_BAR);
2656 	adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid,
2657 	    RF_ACTIVE);
2658 	if (unlikely(adapter->memory == NULL)) {
2659 		ena_log(pdev, WARN,
2660 		    "Unable to allocate LLQ bar resource. LLQ mode won't be used.\n");
2661 		return (0);
2662 	}
2663 
2664 	/* Enable write combining for better LLQ performance */
2665 	rc = ena_enable_wc(adapter->pdev, adapter->memory);
2666 	if (unlikely(rc != 0)) {
2667 		ena_log(pdev, ERR, "failed to enable write combining.\n");
2668 		return (rc);
2669 	}
2670 
2671 	/*
2672 	 * Save virtual address of the device's memory region
2673 	 * for the ena_com layer.
2674 	 */
2675 	ena_dev->mem_bar = rman_get_virtual(adapter->memory);
2676 
2677 	return (0);
2678 }
2679 
2680 static inline void
set_default_llq_configurations(struct ena_llq_configurations * llq_config,struct ena_admin_feature_llq_desc * llq)2681 set_default_llq_configurations(struct ena_llq_configurations *llq_config,
2682     struct ena_admin_feature_llq_desc *llq)
2683 {
2684 	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
2685 	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
2686 	llq_config->llq_num_decs_before_header =
2687 	    ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
2688 	if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) !=
2689 	    0 && ena_force_large_llq_header) {
2690 		llq_config->llq_ring_entry_size =
2691 		    ENA_ADMIN_LIST_ENTRY_SIZE_256B;
2692 		llq_config->llq_ring_entry_size_value = 256;
2693 	} else {
2694 		llq_config->llq_ring_entry_size =
2695 		    ENA_ADMIN_LIST_ENTRY_SIZE_128B;
2696 		llq_config->llq_ring_entry_size_value = 128;
2697 	}
2698 }
2699 
2700 static int
ena_calc_io_queue_size(struct ena_calc_queue_size_ctx * ctx)2701 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
2702 {
2703 	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
2704 	struct ena_com_dev *ena_dev = ctx->ena_dev;
2705 	uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE;
2706 	uint32_t rx_queue_size = ENA_DEFAULT_RING_SIZE;
2707 	uint32_t max_tx_queue_size;
2708 	uint32_t max_rx_queue_size;
2709 
2710 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2711 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2712 		    &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
2713 		max_rx_queue_size = min_t(uint32_t,
2714 		    max_queue_ext->max_rx_cq_depth,
2715 		    max_queue_ext->max_rx_sq_depth);
2716 		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
2717 
2718 		if (ena_dev->tx_mem_queue_type ==
2719 		    ENA_ADMIN_PLACEMENT_POLICY_DEV)
2720 			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2721 			    llq->max_llq_depth);
2722 		else
2723 			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2724 			    max_queue_ext->max_tx_sq_depth);
2725 
2726 		ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2727 		    max_queue_ext->max_per_packet_tx_descs);
2728 		ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2729 		    max_queue_ext->max_per_packet_rx_descs);
2730 	} else {
2731 		struct ena_admin_queue_feature_desc *max_queues =
2732 		    &ctx->get_feat_ctx->max_queues;
2733 		max_rx_queue_size = min_t(uint32_t, max_queues->max_cq_depth,
2734 		    max_queues->max_sq_depth);
2735 		max_tx_queue_size = max_queues->max_cq_depth;
2736 
2737 		if (ena_dev->tx_mem_queue_type ==
2738 		    ENA_ADMIN_PLACEMENT_POLICY_DEV)
2739 			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2740 			    llq->max_llq_depth);
2741 		else
2742 			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2743 			    max_queues->max_sq_depth);
2744 
2745 		ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2746 		    max_queues->max_packet_tx_descs);
2747 		ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2748 		    max_queues->max_packet_rx_descs);
2749 	}
2750 
2751 	/* round down to the nearest power of 2 */
2752 	max_tx_queue_size = 1 << (flsl(max_tx_queue_size) - 1);
2753 	max_rx_queue_size = 1 << (flsl(max_rx_queue_size) - 1);
2754 
2755 	/*
2756 	 * When forcing large headers, we multiply the entry size by 2,
2757 	 * and therefore divide the queue size by 2, leaving the amount
2758 	 * of memory used by the queues unchanged.
2759 	 */
2760 	if (ena_force_large_llq_header) {
2761 		if ((llq->entry_size_ctrl_supported &
2762 		    ENA_ADMIN_LIST_ENTRY_SIZE_256B) != 0 &&
2763 		    ena_dev->tx_mem_queue_type ==
2764 		    ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2765 			max_tx_queue_size /= 2;
2766 			ena_log(ctx->pdev, INFO,
2767 			    "Forcing large headers and decreasing maximum Tx queue size to %d\n",
2768 			    max_tx_queue_size);
2769 		} else {
2770 			ena_log(ctx->pdev, WARN,
2771 			    "Forcing large headers failed: LLQ is disabled or device does not support large headers\n");
2772 		}
2773 	}
2774 
2775 	tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
2776 	    max_tx_queue_size);
2777 	rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
2778 	    max_rx_queue_size);
2779 
2780 	tx_queue_size = 1 << (flsl(tx_queue_size) - 1);
2781 	rx_queue_size = 1 << (flsl(rx_queue_size) - 1);
2782 
2783 	ctx->max_tx_queue_size = max_tx_queue_size;
2784 	ctx->max_rx_queue_size = max_rx_queue_size;
2785 	ctx->tx_queue_size = tx_queue_size;
2786 	ctx->rx_queue_size = rx_queue_size;
2787 
2788 	return (0);
2789 }
2790 
2791 static void
ena_config_host_info(struct ena_com_dev * ena_dev,device_t dev)2792 ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev)
2793 {
2794 	struct ena_admin_host_info *host_info;
2795 	uintptr_t rid;
2796 	int rc;
2797 
2798 	/* Allocate only the host info */
2799 	rc = ena_com_allocate_host_info(ena_dev);
2800 	if (unlikely(rc != 0)) {
2801 		ena_log(dev, ERR, "Cannot allocate host info\n");
2802 		return;
2803 	}
2804 
2805 	host_info = ena_dev->host_attr.host_info;
2806 
2807 	if (pci_get_id(dev, PCI_ID_RID, &rid) == 0)
2808 		host_info->bdf = rid;
2809 	host_info->os_type = ENA_ADMIN_OS_FREEBSD;
2810 	host_info->kernel_ver = osreldate;
2811 
2812 	sprintf(host_info->kernel_ver_str, "%d", osreldate);
2813 	host_info->os_dist = 0;
2814 	strncpy(host_info->os_dist_str, osrelease,
2815 	    sizeof(host_info->os_dist_str) - 1);
2816 
2817 	host_info->driver_version = (ENA_DRV_MODULE_VER_MAJOR) |
2818 	    (ENA_DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2819 	    (ENA_DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
2820 	host_info->num_cpus = mp_ncpus;
2821 	host_info->driver_supported_features =
2822 	    ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
2823 	    ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
2824 
2825 	rc = ena_com_set_host_attributes(ena_dev);
2826 	if (unlikely(rc != 0)) {
2827 		if (rc == EOPNOTSUPP)
2828 			ena_log(dev, WARN, "Cannot set host attributes\n");
2829 		else
2830 			ena_log(dev, ERR, "Cannot set host attributes\n");
2831 
2832 		goto err;
2833 	}
2834 
2835 	return;
2836 
2837 err:
2838 	ena_com_delete_host_info(ena_dev);
2839 }
2840 
2841 static int
ena_device_init(struct ena_adapter * adapter,device_t pdev,struct ena_com_dev_get_features_ctx * get_feat_ctx,int * wd_active)2842 ena_device_init(struct ena_adapter *adapter, device_t pdev,
2843     struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active)
2844 {
2845 	struct ena_llq_configurations llq_config;
2846 	struct ena_com_dev *ena_dev = adapter->ena_dev;
2847 	bool readless_supported;
2848 	uint32_t aenq_groups;
2849 	int dma_width;
2850 	int rc;
2851 
2852 	rc = ena_com_mmio_reg_read_request_init(ena_dev);
2853 	if (unlikely(rc != 0)) {
2854 		ena_log(pdev, ERR, "failed to init mmio read less\n");
2855 		return (rc);
2856 	}
2857 
2858 	/*
2859 	 * The PCIe configuration space revision id indicate if mmio reg
2860 	 * read is disabled
2861 	 */
2862 	readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ);
2863 	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
2864 
2865 	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
2866 	if (unlikely(rc != 0)) {
2867 		ena_log(pdev, ERR, "Can not reset device\n");
2868 		goto err_mmio_read_less;
2869 	}
2870 
2871 	rc = ena_com_validate_version(ena_dev);
2872 	if (unlikely(rc != 0)) {
2873 		ena_log(pdev, ERR, "device version is too low\n");
2874 		goto err_mmio_read_less;
2875 	}
2876 
2877 	dma_width = ena_com_get_dma_width(ena_dev);
2878 	if (unlikely(dma_width < 0)) {
2879 		ena_log(pdev, ERR, "Invalid dma width value %d", dma_width);
2880 		rc = dma_width;
2881 		goto err_mmio_read_less;
2882 	}
2883 	adapter->dma_width = dma_width;
2884 
2885 	/* ENA admin level init */
2886 	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
2887 	if (unlikely(rc != 0)) {
2888 		ena_log(pdev, ERR,
2889 		    "Can not initialize ena admin queue with device\n");
2890 		goto err_mmio_read_less;
2891 	}
2892 
2893 	/*
2894 	 * To enable the msix interrupts the driver needs to know the number
2895 	 * of queues. So the driver uses polling mode to retrieve this
2896 	 * information
2897 	 */
2898 	ena_com_set_admin_polling_mode(ena_dev, true);
2899 
2900 	ena_config_host_info(ena_dev, pdev);
2901 
2902 	/* Get Device Attributes */
2903 	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
2904 	if (unlikely(rc != 0)) {
2905 		ena_log(pdev, ERR,
2906 		    "Cannot get attribute for ena device rc: %d\n", rc);
2907 		goto err_admin_init;
2908 	}
2909 
2910 	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
2911 	    BIT(ENA_ADMIN_FATAL_ERROR) |
2912 	    BIT(ENA_ADMIN_WARNING) |
2913 	    BIT(ENA_ADMIN_NOTIFICATION) |
2914 	    BIT(ENA_ADMIN_KEEP_ALIVE);
2915 
2916 	aenq_groups &= get_feat_ctx->aenq.supported_groups;
2917 	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
2918 	if (unlikely(rc != 0)) {
2919 		ena_log(pdev, ERR, "Cannot configure aenq groups rc: %d\n", rc);
2920 		goto err_admin_init;
2921 	}
2922 
2923 	*wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
2924 
2925 	set_default_llq_configurations(&llq_config, &get_feat_ctx->llq);
2926 
2927 	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
2928 	    &llq_config);
2929 	if (unlikely(rc != 0)) {
2930 		ena_log(pdev, ERR, "Failed to set placement policy\n");
2931 		goto err_admin_init;
2932 	}
2933 
2934 	return (0);
2935 
2936 err_admin_init:
2937 	ena_com_delete_host_info(ena_dev);
2938 	ena_com_admin_destroy(ena_dev);
2939 err_mmio_read_less:
2940 	ena_com_mmio_reg_read_request_destroy(ena_dev);
2941 
2942 	return (rc);
2943 }
2944 
2945 static int
ena_enable_msix_and_set_admin_interrupts(struct ena_adapter * adapter)2946 ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
2947 {
2948 	struct ena_com_dev *ena_dev = adapter->ena_dev;
2949 	int rc;
2950 
2951 	rc = ena_enable_msix(adapter);
2952 	if (unlikely(rc != 0)) {
2953 		ena_log(adapter->pdev, ERR, "Error with MSI-X enablement\n");
2954 		return (rc);
2955 	}
2956 
2957 	ena_setup_mgmnt_intr(adapter);
2958 
2959 	rc = ena_request_mgmnt_irq(adapter);
2960 	if (unlikely(rc != 0)) {
2961 		ena_log(adapter->pdev, ERR, "Cannot setup mgmnt queue intr\n");
2962 		goto err_disable_msix;
2963 	}
2964 
2965 	ena_com_set_admin_polling_mode(ena_dev, false);
2966 
2967 	ena_com_admin_aenq_enable(ena_dev);
2968 
2969 	return (0);
2970 
2971 err_disable_msix:
2972 	ena_disable_msix(adapter);
2973 
2974 	return (rc);
2975 }
2976 
2977 /* Function called on ENA_ADMIN_KEEP_ALIVE event */
2978 static void
ena_keep_alive_wd(void * adapter_data,struct ena_admin_aenq_entry * aenq_e)2979 ena_keep_alive_wd(void *adapter_data, struct ena_admin_aenq_entry *aenq_e)
2980 {
2981 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
2982 	struct ena_admin_aenq_keep_alive_desc *desc;
2983 	sbintime_t stime;
2984 	uint64_t rx_drops;
2985 	uint64_t tx_drops;
2986 
2987 	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
2988 
2989 	rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
2990 	tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low;
2991 	counter_u64_zero(adapter->hw_stats.rx_drops);
2992 	counter_u64_add(adapter->hw_stats.rx_drops, rx_drops);
2993 	counter_u64_zero(adapter->hw_stats.tx_drops);
2994 	counter_u64_add(adapter->hw_stats.tx_drops, tx_drops);
2995 
2996 	stime = getsbinuptime();
2997 	atomic_store_rel_64(&adapter->keep_alive_timestamp, stime);
2998 }
2999 
3000 /* Check for keep alive expiration */
3001 static void
check_for_missing_keep_alive(struct ena_adapter * adapter)3002 check_for_missing_keep_alive(struct ena_adapter *adapter)
3003 {
3004 	sbintime_t timestamp, time;
3005 
3006 	if (adapter->wd_active == 0)
3007 		return;
3008 
3009 	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3010 		return;
3011 
3012 	timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp);
3013 	time = getsbinuptime() - timestamp;
3014 	if (unlikely(time > adapter->keep_alive_timeout)) {
3015 		ena_log(adapter->pdev, ERR, "Keep alive watchdog timeout.\n");
3016 		counter_u64_add(adapter->dev_stats.wd_expired, 1);
3017 		ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
3018 	}
3019 }
3020 
3021 /* Check if admin queue is enabled */
3022 static void
check_for_admin_com_state(struct ena_adapter * adapter)3023 check_for_admin_com_state(struct ena_adapter *adapter)
3024 {
3025 	if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) == false)) {
3026 		ena_log(adapter->pdev, ERR,
3027 		    "ENA admin queue is not in running state!\n");
3028 		counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
3029 		ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO);
3030 	}
3031 }
3032 
3033 static int
check_for_rx_interrupt_queue(struct ena_adapter * adapter,struct ena_ring * rx_ring)3034 check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3035     struct ena_ring *rx_ring)
3036 {
3037 	if (likely(atomic_load_8(&rx_ring->first_interrupt)))
3038 		return (0);
3039 
3040 	if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3041 		return (0);
3042 
3043 	rx_ring->no_interrupt_event_cnt++;
3044 
3045 	if (rx_ring->no_interrupt_event_cnt ==
3046 	    ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3047 		ena_log(adapter->pdev, ERR,
3048 		    "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
3049 		    rx_ring->qid);
3050 		ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3051 		return (EIO);
3052 	}
3053 
3054 	return (0);
3055 }
3056 
3057 static int
check_missing_comp_in_tx_queue(struct ena_adapter * adapter,struct ena_ring * tx_ring)3058 check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3059     struct ena_ring *tx_ring)
3060 {
3061 	device_t pdev = adapter->pdev;
3062 	struct bintime curtime, time;
3063 	struct ena_tx_buffer *tx_buf;
3064 	int time_since_last_cleanup;
3065 	int missing_tx_comp_to;
3066 	sbintime_t time_offset;
3067 	uint32_t missed_tx = 0;
3068 	int i, rc = 0;
3069 
3070 	getbinuptime(&curtime);
3071 
3072 	for (i = 0; i < tx_ring->ring_size; i++) {
3073 		tx_buf = &tx_ring->tx_buffer_info[i];
3074 
3075 		if (bintime_isset(&tx_buf->timestamp) == 0)
3076 			continue;
3077 
3078 		time = curtime;
3079 		bintime_sub(&time, &tx_buf->timestamp);
3080 		time_offset = bttosbt(time);
3081 
3082 		if (unlikely(!atomic_load_8(&tx_ring->first_interrupt) &&
3083 		    time_offset > 2 * adapter->missing_tx_timeout)) {
3084 			/*
3085 			 * If after graceful period interrupt is still not
3086 			 * received, we schedule a reset.
3087 			 */
3088 			ena_log(pdev, ERR,
3089 			    "Potential MSIX issue on Tx side Queue = %d. "
3090 			    "Reset the device\n",
3091 			    tx_ring->qid);
3092 			ena_trigger_reset(adapter,
3093 			    ENA_REGS_RESET_MISS_INTERRUPT);
3094 			return (EIO);
3095 		}
3096 
3097 		/* Check again if packet is still waiting */
3098 		if (unlikely(time_offset > adapter->missing_tx_timeout)) {
3099 
3100 			if (tx_buf->print_once) {
3101 				time_since_last_cleanup = TICKS_2_MSEC(ticks -
3102 				    tx_ring->tx_last_cleanup_ticks);
3103 				missing_tx_comp_to = sbttoms(
3104 				    adapter->missing_tx_timeout);
3105 				ena_log(pdev, WARN,
3106 				    "Found a Tx that wasn't completed on time, qid %d, index %d. "
3107 				    "%d msecs have passed since last cleanup. Missing Tx timeout value %d msecs.\n",
3108 				    tx_ring->qid, i, time_since_last_cleanup,
3109 				    missing_tx_comp_to);
3110 			}
3111 
3112 			tx_buf->print_once = false;
3113 			missed_tx++;
3114 		}
3115 	}
3116 
3117 	if (unlikely(missed_tx > adapter->missing_tx_threshold)) {
3118 		ena_log(pdev, ERR,
3119 		    "The number of lost tx completion is above the threshold "
3120 		    "(%d > %d). Reset the device\n",
3121 		    missed_tx, adapter->missing_tx_threshold);
3122 		ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
3123 		rc = EIO;
3124 	}
3125 
3126 	counter_u64_add(tx_ring->tx_stats.missing_tx_comp, missed_tx);
3127 
3128 	return (rc);
3129 }
3130 
3131 /*
3132  * Check for TX which were not completed on time.
3133  * Timeout is defined by "missing_tx_timeout".
3134  * Reset will be performed if number of incompleted
3135  * transactions exceeds "missing_tx_threshold".
3136  */
3137 static void
check_for_missing_completions(struct ena_adapter * adapter)3138 check_for_missing_completions(struct ena_adapter *adapter)
3139 {
3140 	struct ena_ring *tx_ring;
3141 	struct ena_ring *rx_ring;
3142 	int i, budget, rc;
3143 
3144 	/* Make sure the driver doesn't turn the device in other process */
3145 	rmb();
3146 
3147 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3148 		return;
3149 
3150 	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3151 		return;
3152 
3153 	if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3154 		return;
3155 
3156 	budget = adapter->missing_tx_max_queues;
3157 
3158 	for (i = adapter->next_monitored_tx_qid; i < adapter->num_io_queues; i++) {
3159 		tx_ring = &adapter->tx_ring[i];
3160 		rx_ring = &adapter->rx_ring[i];
3161 
3162 		rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3163 		if (unlikely(rc != 0))
3164 			return;
3165 
3166 		rc = check_for_rx_interrupt_queue(adapter, rx_ring);
3167 		if (unlikely(rc != 0))
3168 			return;
3169 
3170 		budget--;
3171 		if (budget == 0) {
3172 			i++;
3173 			break;
3174 		}
3175 	}
3176 
3177 	adapter->next_monitored_tx_qid = i % adapter->num_io_queues;
3178 }
3179 
3180 /* trigger rx cleanup after 2 consecutive detections */
3181 #define EMPTY_RX_REFILL 2
3182 /* For the rare case where the device runs out of Rx descriptors and the
3183  * msix handler failed to refill new Rx descriptors (due to a lack of memory
3184  * for example).
3185  * This case will lead to a deadlock:
3186  * The device won't send interrupts since all the new Rx packets will be dropped
3187  * The msix handler won't allocate new Rx descriptors so the device won't be
3188  * able to send new packets.
3189  *
3190  * When such a situation is detected - execute rx cleanup task in another thread
3191  */
3192 static void
check_for_empty_rx_ring(struct ena_adapter * adapter)3193 check_for_empty_rx_ring(struct ena_adapter *adapter)
3194 {
3195 	struct ena_ring *rx_ring;
3196 	int i, refill_required;
3197 
3198 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3199 		return;
3200 
3201 	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3202 		return;
3203 
3204 	for (i = 0; i < adapter->num_io_queues; i++) {
3205 		rx_ring = &adapter->rx_ring[i];
3206 
3207 		refill_required = ena_com_free_q_entries(
3208 		    rx_ring->ena_com_io_sq);
3209 		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3210 			rx_ring->empty_rx_queue++;
3211 
3212 			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3213 				counter_u64_add(rx_ring->rx_stats.empty_rx_ring,
3214 				    1);
3215 
3216 				ena_log(adapter->pdev, WARN,
3217 				    "Rx ring %d is stalled. Triggering the refill function\n",
3218 				    i);
3219 
3220 				taskqueue_enqueue(rx_ring->que->cleanup_tq,
3221 				    &rx_ring->que->cleanup_task);
3222 				rx_ring->empty_rx_queue = 0;
3223 			}
3224 		} else {
3225 			rx_ring->empty_rx_queue = 0;
3226 		}
3227 	}
3228 }
3229 
3230 static void
ena_update_hints(struct ena_adapter * adapter,struct ena_admin_ena_hw_hints * hints)3231 ena_update_hints(struct ena_adapter *adapter,
3232     struct ena_admin_ena_hw_hints *hints)
3233 {
3234 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3235 
3236 	if (hints->admin_completion_tx_timeout)
3237 		ena_dev->admin_queue.completion_timeout =
3238 		    hints->admin_completion_tx_timeout * 1000;
3239 
3240 	if (hints->mmio_read_timeout)
3241 		/* convert to usec */
3242 		ena_dev->mmio_read.reg_read_to = hints->mmio_read_timeout * 1000;
3243 
3244 	if (hints->missed_tx_completion_count_threshold_to_reset)
3245 		adapter->missing_tx_threshold =
3246 		    hints->missed_tx_completion_count_threshold_to_reset;
3247 
3248 	if (hints->missing_tx_completion_timeout) {
3249 		if (hints->missing_tx_completion_timeout ==
3250 		    ENA_HW_HINTS_NO_TIMEOUT)
3251 			adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3252 		else
3253 			adapter->missing_tx_timeout = SBT_1MS *
3254 			    hints->missing_tx_completion_timeout;
3255 	}
3256 
3257 	if (hints->driver_watchdog_timeout) {
3258 		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3259 			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3260 		else
3261 			adapter->keep_alive_timeout = SBT_1MS *
3262 			    hints->driver_watchdog_timeout;
3263 	}
3264 }
3265 
3266 /**
3267  * ena_copy_eni_metrics - Get and copy ENI metrics from the HW.
3268  * @adapter: ENA device adapter
3269  *
3270  * Returns 0 on success, EOPNOTSUPP if current HW doesn't support those metrics
3271  * and other error codes on failure.
3272  *
3273  * This function can possibly cause a race with other calls to the admin queue.
3274  * Because of that, the caller should either lock this function or make sure
3275  * that there is no race in the current context.
3276  */
3277 static int
ena_copy_eni_metrics(struct ena_adapter * adapter)3278 ena_copy_eni_metrics(struct ena_adapter *adapter)
3279 {
3280 	static bool print_once = true;
3281 	int rc;
3282 
3283 	rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_metrics);
3284 
3285 	if (rc != 0) {
3286 		if (rc == ENA_COM_UNSUPPORTED) {
3287 			if (print_once) {
3288 				ena_log(adapter->pdev, WARN,
3289 				    "Retrieving ENI metrics is not supported.\n");
3290 				print_once = false;
3291 			} else {
3292 				ena_log(adapter->pdev, DBG,
3293 				    "Retrieving ENI metrics is not supported.\n");
3294 			}
3295 		} else {
3296 			ena_log(adapter->pdev, ERR,
3297 			    "Failed to get ENI metrics: %d\n", rc);
3298 		}
3299 	}
3300 
3301 	return (rc);
3302 }
3303 
3304 static int
ena_copy_srd_metrics(struct ena_adapter * adapter)3305 ena_copy_srd_metrics(struct ena_adapter *adapter)
3306 {
3307 	return ena_com_get_ena_srd_info(adapter->ena_dev, &adapter->ena_srd_info);
3308 }
3309 
3310 static int
ena_copy_customer_metrics(struct ena_adapter * adapter)3311 ena_copy_customer_metrics(struct ena_adapter *adapter)
3312 {
3313 	struct ena_com_dev *dev;
3314 	u32 supported_metrics_count;
3315 	int rc, len;
3316 
3317 	dev = adapter->ena_dev;
3318 
3319 	supported_metrics_count = ena_com_get_customer_metric_count(dev);
3320 	len = supported_metrics_count * sizeof(u64);
3321 
3322 	/* Fill the data buffer */
3323 	rc = ena_com_get_customer_metrics(adapter->ena_dev,
3324 	    (char *)(adapter->customer_metrics_array), len);
3325 
3326 	return (rc);
3327 }
3328 
3329 static void
ena_timer_service(void * data)3330 ena_timer_service(void *data)
3331 {
3332 	struct ena_adapter *adapter = (struct ena_adapter *)data;
3333 	struct ena_admin_host_info *host_info =
3334 	    adapter->ena_dev->host_attr.host_info;
3335 
3336 	check_for_missing_keep_alive(adapter);
3337 
3338 	check_for_admin_com_state(adapter);
3339 
3340 	check_for_missing_completions(adapter);
3341 
3342 	check_for_empty_rx_ring(adapter);
3343 
3344 	/*
3345 	 * User controller update of the ENA metrics.
3346 	 * If the delay was set to 0, then the stats shouldn't be updated at
3347 	 * all.
3348 	 * Otherwise, wait 'metrics_sample_interval' seconds, before
3349 	 * updating stats.
3350 	 * As timer service is executed every second, it's enough to increment
3351 	 * appropriate counter each time the timer service is executed.
3352 	 */
3353 	if ((adapter->metrics_sample_interval != 0) &&
3354 	    (++adapter->metrics_sample_interval_cnt >=
3355 	    adapter->metrics_sample_interval)) {
3356 		taskqueue_enqueue(adapter->metrics_tq, &adapter->metrics_task);
3357 		adapter->metrics_sample_interval_cnt = 0;
3358 	}
3359 
3360 
3361 	if (host_info != NULL)
3362 		ena_update_host_info(host_info, adapter->ifp);
3363 
3364 	if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3365 		/*
3366 		 * Timeout when validating version indicates that the device
3367 		 * became unresponsive. If that happens skip the reset and
3368 		 * reschedule timer service, so the reset can be retried later.
3369 		 */
3370 		if (ena_com_validate_version(adapter->ena_dev) ==
3371 		    ENA_COM_TIMER_EXPIRED) {
3372 			ena_log(adapter->pdev, WARN,
3373 			    "FW unresponsive, skipping reset\n");
3374 			ENA_TIMER_RESET(adapter);
3375 			return;
3376 		}
3377 		ena_log(adapter->pdev, WARN, "Trigger reset is on\n");
3378 		taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task);
3379 		return;
3380 	}
3381 
3382 	/*
3383 	 * Schedule another timeout one second from now.
3384 	 */
3385 	ENA_TIMER_RESET(adapter);
3386 }
3387 
3388 void
ena_destroy_device(struct ena_adapter * adapter,bool graceful)3389 ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3390 {
3391 	if_t ifp = adapter->ifp;
3392 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3393 	bool dev_up;
3394 
3395 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))
3396 		return;
3397 
3398 	if (!graceful)
3399 		if_link_state_change(ifp, LINK_STATE_DOWN);
3400 
3401 	ENA_TIMER_DRAIN(adapter);
3402 
3403 	dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
3404 	if (dev_up)
3405 		ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3406 
3407 	if (!graceful)
3408 		ena_com_set_admin_running_state(ena_dev, false);
3409 
3410 	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3411 		ena_down(adapter);
3412 
3413 	/*
3414 	 * Stop the device from sending AENQ events (if the device was up, and
3415 	 * the trigger reset was on, ena_down already performs device reset)
3416 	 */
3417 	if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up))
3418 		ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3419 
3420 	ena_free_mgmnt_irq(adapter);
3421 
3422 	ena_disable_msix(adapter);
3423 
3424 	/*
3425 	 * IO rings resources should be freed because `ena_restore_device()`
3426 	 * calls (not directly) `ena_enable_msix()`, which re-allocates MSIX
3427 	 * vectors. The amount of MSIX vectors after destroy-restore may be
3428 	 * different than before. Therefore, IO rings resources should be
3429 	 * established from scratch each time.
3430 	 */
3431 	ena_free_all_io_rings_resources(adapter);
3432 
3433 	ena_com_abort_admin_commands(ena_dev);
3434 
3435 	ena_com_wait_for_abort_completion(ena_dev);
3436 
3437 	ena_com_admin_destroy(ena_dev);
3438 
3439 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3440 
3441 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3442 
3443 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3444 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3445 }
3446 
3447 static int
ena_device_validate_params(struct ena_adapter * adapter,struct ena_com_dev_get_features_ctx * get_feat_ctx)3448 ena_device_validate_params(struct ena_adapter *adapter,
3449     struct ena_com_dev_get_features_ctx *get_feat_ctx)
3450 {
3451 	if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr,
3452 	    ETHER_ADDR_LEN) != 0) {
3453 		ena_log(adapter->pdev, ERR, "Error, mac addresses differ\n");
3454 		return (EINVAL);
3455 	}
3456 
3457 	if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) {
3458 		ena_log(adapter->pdev, ERR,
3459 		    "Error, device max mtu is smaller than ifp MTU\n");
3460 		return (EINVAL);
3461 	}
3462 
3463 	return 0;
3464 }
3465 
3466 int
ena_restore_device(struct ena_adapter * adapter)3467 ena_restore_device(struct ena_adapter *adapter)
3468 {
3469 	struct ena_com_dev_get_features_ctx get_feat_ctx;
3470 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3471 	if_t ifp = adapter->ifp;
3472 	device_t dev = adapter->pdev;
3473 	int wd_active;
3474 	int rc;
3475 
3476 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3477 
3478 	rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active);
3479 	if (rc != 0) {
3480 		ena_log(dev, ERR, "Cannot initialize device\n");
3481 		goto err;
3482 	}
3483 	/*
3484 	 * Only enable WD if it was enabled before reset, so it won't override
3485 	 * value set by the user by the sysctl.
3486 	 */
3487 	if (adapter->wd_active != 0)
3488 		adapter->wd_active = wd_active;
3489 
3490 	rc = ena_device_validate_params(adapter, &get_feat_ctx);
3491 	if (rc != 0) {
3492 		ena_log(dev, ERR, "Validation of device parameters failed\n");
3493 		goto err_device_destroy;
3494 	}
3495 
3496 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3497 	/* Make sure we don't have a race with AENQ Links state handler */
3498 	if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
3499 		if_link_state_change(ifp, LINK_STATE_UP);
3500 
3501 	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3502 	if (rc != 0) {
3503 		ena_log(dev, ERR, "Enable MSI-X failed\n");
3504 		goto err_device_destroy;
3505 	}
3506 
3507 	/*
3508 	 * Effective value of used MSIX vectors should be the same as before
3509 	 * `ena_destroy_device()`, if possible, or closest to it if less vectors
3510 	 * are available.
3511 	 */
3512 	if ((adapter->msix_vecs - ENA_ADMIN_MSIX_VEC) < adapter->num_io_queues)
3513 		adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3514 
3515 	/* Re-initialize rings basic information */
3516 	ena_init_io_rings(adapter);
3517 
3518 	/* If the interface was up before the reset bring it up */
3519 	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
3520 		rc = ena_up(adapter);
3521 		if (rc != 0) {
3522 			ena_log(dev, ERR, "Failed to create I/O queues\n");
3523 			goto err_disable_msix;
3524 		}
3525 	}
3526 
3527 	/* Indicate that device is running again and ready to work */
3528 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3529 
3530 	/*
3531 	 * As the AENQ handlers weren't executed during reset because
3532 	 * the flag ENA_FLAG_DEVICE_RUNNING was turned off, the
3533 	 * timestamp must be updated again That will prevent next reset
3534 	 * caused by missing keep alive.
3535 	 */
3536 	adapter->keep_alive_timestamp = getsbinuptime();
3537 	ENA_TIMER_RESET(adapter);
3538 
3539 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3540 
3541 	return (rc);
3542 
3543 err_disable_msix:
3544 	ena_free_mgmnt_irq(adapter);
3545 	ena_disable_msix(adapter);
3546 err_device_destroy:
3547 	ena_com_abort_admin_commands(ena_dev);
3548 	ena_com_wait_for_abort_completion(ena_dev);
3549 	ena_com_admin_destroy(ena_dev);
3550 	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3551 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3552 err:
3553 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3554 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3555 	ena_log(dev, ERR, "Reset attempt failed. Can not reset the device\n");
3556 
3557 	return (rc);
3558 }
3559 
3560 static void
ena_metrics_task(void * arg,int pending)3561 ena_metrics_task(void *arg, int pending)
3562 {
3563 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
3564 
3565 	ENA_LOCK_LOCK();
3566 
3567 	if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_CUSTOMER_METRICS))
3568 		(void)ena_copy_customer_metrics(adapter);
3569 	else if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENI_STATS))
3570 		(void)ena_copy_eni_metrics(adapter);
3571 
3572 	if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO))
3573 		(void)ena_copy_srd_metrics(adapter);
3574 
3575 	ENA_LOCK_UNLOCK();
3576 }
3577 
3578 static void
ena_reset_task(void * arg,int pending)3579 ena_reset_task(void *arg, int pending)
3580 {
3581 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
3582 
3583 	ENA_LOCK_LOCK();
3584 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3585 		ena_destroy_device(adapter, false);
3586 		ena_restore_device(adapter);
3587 
3588 		ena_log(adapter->pdev, INFO,
3589 		    "Device reset completed successfully, Driver info: %s\n",
3590 		    ena_version);
3591 	}
3592 	ENA_LOCK_UNLOCK();
3593 }
3594 
3595 static void
ena_free_stats(struct ena_adapter * adapter)3596 ena_free_stats(struct ena_adapter *adapter)
3597 {
3598 	ena_free_counters((counter_u64_t *)&adapter->hw_stats,
3599 	    sizeof(struct ena_hw_stats));
3600 	ena_free_counters((counter_u64_t *)&adapter->dev_stats,
3601 	    sizeof(struct ena_stats_dev));
3602 
3603 }
3604 /**
3605  * ena_attach - Device Initialization Routine
3606  * @pdev: device information struct
3607  *
3608  * Returns 0 on success, otherwise on failure.
3609  *
3610  * ena_attach initializes an adapter identified by a device structure.
3611  * The OS initialization, configuring of the adapter private structure,
3612  * and a hardware reset occur.
3613  **/
3614 static int
ena_attach(device_t pdev)3615 ena_attach(device_t pdev)
3616 {
3617 	struct ena_com_dev_get_features_ctx get_feat_ctx;
3618 	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
3619 	static int version_printed;
3620 	struct ena_adapter *adapter;
3621 	struct ena_com_dev *ena_dev = NULL;
3622 	uint32_t max_num_io_queues;
3623 	int msix_rid;
3624 	int rid, rc;
3625 
3626 	adapter = device_get_softc(pdev);
3627 	adapter->pdev = pdev;
3628 	adapter->first_bind = -1;
3629 
3630 	/*
3631 	 * Set up the timer service - driver is responsible for avoiding
3632 	 * concurrency, as the callout won't be using any locking inside.
3633 	 */
3634 	ENA_TIMER_INIT(adapter);
3635 	adapter->keep_alive_timeout = ENA_DEFAULT_KEEP_ALIVE_TO;
3636 	adapter->missing_tx_timeout = ENA_DEFAULT_TX_CMP_TO;
3637 	adapter->missing_tx_max_queues = ENA_DEFAULT_TX_MONITORED_QUEUES;
3638 	adapter->missing_tx_threshold = ENA_DEFAULT_TX_CMP_THRESHOLD;
3639 
3640 	adapter->irq_cpu_base = ENA_BASE_CPU_UNSPECIFIED;
3641 	adapter->irq_cpu_stride = 0;
3642 
3643 #ifdef RSS
3644 	adapter->rss_enabled = 1;
3645 #endif
3646 
3647 	if (version_printed++ == 0)
3648 		ena_log(pdev, INFO, "%s\n", ena_version);
3649 
3650 	/* Allocate memory for ena_dev structure */
3651 	ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF,
3652 	    M_WAITOK | M_ZERO);
3653 
3654 	adapter->ena_dev = ena_dev;
3655 	ena_dev->dmadev = pdev;
3656 
3657 	rid = PCIR_BAR(ENA_REG_BAR);
3658 	adapter->memory = NULL;
3659 	adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid,
3660 	    RF_ACTIVE);
3661 	if (unlikely(adapter->registers == NULL)) {
3662 		ena_log(pdev, ERR,
3663 		    "unable to allocate bus resource: registers!\n");
3664 		rc = ENOMEM;
3665 		goto err_dev_free;
3666 	}
3667 
3668 	/* MSIx vector table may reside on BAR0 with registers or on BAR1. */
3669 	msix_rid = pci_msix_table_bar(pdev);
3670 	if (msix_rid != rid) {
3671 		adapter->msix = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
3672 		    &msix_rid, RF_ACTIVE);
3673 		if (unlikely(adapter->msix == NULL)) {
3674 			ena_log(pdev, ERR,
3675 			    "unable to allocate bus resource: msix!\n");
3676 			rc = ENOMEM;
3677 			goto err_pci_free;
3678 		}
3679 		adapter->msix_rid = msix_rid;
3680 	}
3681 
3682 	ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF,
3683 	    M_WAITOK | M_ZERO);
3684 
3685 	/* Store register resources */
3686 	((struct ena_bus *)(ena_dev->bus))->reg_bar_t = rman_get_bustag(
3687 	    adapter->registers);
3688 	((struct ena_bus *)(ena_dev->bus))->reg_bar_h = rman_get_bushandle(
3689 	    adapter->registers);
3690 
3691 	if (unlikely(((struct ena_bus *)(ena_dev->bus))->reg_bar_h == 0)) {
3692 		ena_log(pdev, ERR, "failed to pmap registers bar\n");
3693 		rc = ENXIO;
3694 		goto err_bus_free;
3695 	}
3696 
3697 	rc = ena_map_llq_mem_bar(pdev, ena_dev);
3698 	if (unlikely(rc != 0)) {
3699 		ena_log(pdev, ERR, "Failed to map ENA mem bar");
3700 		goto err_bus_free;
3701 	}
3702 
3703 	/* Initially clear all the flags */
3704 	ENA_FLAG_ZERO(adapter);
3705 
3706 	/* Device initialization */
3707 	rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active);
3708 	if (unlikely(rc != 0)) {
3709 		ena_log(pdev, ERR, "ENA device init failed! (err: %d)\n", rc);
3710 		rc = ENXIO;
3711 		goto err_bus_free;
3712 	}
3713 
3714 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3715 		adapter->disable_meta_caching = !!(
3716 		    get_feat_ctx.llq.accel_mode.u.get.supported_flags &
3717 		    BIT(ENA_ADMIN_DISABLE_META_CACHING));
3718 
3719 	adapter->keep_alive_timestamp = getsbinuptime();
3720 
3721 	adapter->tx_offload_cap = get_feat_ctx.offload.tx;
3722 
3723 	memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
3724 	    ETHER_ADDR_LEN);
3725 
3726 	calc_queue_ctx.pdev = pdev;
3727 	calc_queue_ctx.ena_dev = ena_dev;
3728 	calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
3729 
3730 	/* Calculate initial and maximum IO queue number and size */
3731 	max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev,
3732 	    &get_feat_ctx);
3733 	rc = ena_calc_io_queue_size(&calc_queue_ctx);
3734 	if (unlikely((rc != 0) || (max_num_io_queues <= 0))) {
3735 		rc = EFAULT;
3736 		goto err_com_free;
3737 	}
3738 
3739 	adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
3740 	adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
3741 	adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
3742 	adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
3743 	adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
3744 	adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
3745 
3746 	adapter->max_num_io_queues = max_num_io_queues;
3747 
3748 	adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE;
3749 
3750 	adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
3751 
3752 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3753 
3754 	/* set up dma tags for rx and tx buffers */
3755 	rc = ena_setup_tx_dma_tag(adapter);
3756 	if (unlikely(rc != 0)) {
3757 		ena_log(pdev, ERR, "Failed to create TX DMA tag\n");
3758 		goto err_com_free;
3759 	}
3760 
3761 	rc = ena_setup_rx_dma_tag(adapter);
3762 	if (unlikely(rc != 0)) {
3763 		ena_log(pdev, ERR, "Failed to create RX DMA tag\n");
3764 		goto err_tx_tag_free;
3765 	}
3766 
3767 	/*
3768 	 * The amount of requested MSIX vectors is equal to
3769 	 * adapter::max_num_io_queues (see `ena_enable_msix()`), plus a constant
3770 	 * number of admin queue interrupts. The former is initially determined
3771 	 * by HW capabilities (see `ena_calc_max_io_queue_num())` but may not be
3772 	 * achieved if there are not enough system resources. By default, the
3773 	 * number of effectively used IO queues is the same but later on it can
3774 	 * be limited by the user using sysctl interface.
3775 	 */
3776 	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3777 	if (unlikely(rc != 0)) {
3778 		ena_log(pdev, ERR,
3779 		    "Failed to enable and set the admin interrupts\n");
3780 		goto err_io_free;
3781 	}
3782 	/* By default all of allocated MSIX vectors are actively used */
3783 	adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3784 
3785 	/* initialize rings basic information */
3786 	ena_init_io_rings(adapter);
3787 
3788 	rc = ena_com_allocate_customer_metrics_buffer(ena_dev);
3789 	if (rc) {
3790 		ena_log(pdev, ERR, "Failed to allocate customer metrics buffer.\n");
3791 		goto err_msix_free;
3792 	}
3793 
3794 	rc = ena_sysctl_allocate_customer_metrics_buffer(adapter);
3795 	if (unlikely(rc)){
3796 		ena_log(pdev, ERR, "Failed to allocate sysctl customer metrics buffer.\n");
3797 		goto err_metrics_buffer_destroy;
3798 	}
3799 
3800 	/* Initialize statistics */
3801 	ena_alloc_counters((counter_u64_t *)&adapter->dev_stats,
3802 	    sizeof(struct ena_stats_dev));
3803 	ena_alloc_counters((counter_u64_t *)&adapter->hw_stats,
3804 	    sizeof(struct ena_hw_stats));
3805 	ena_sysctl_add_nodes(adapter);
3806 
3807 	/* setup network interface */
3808 	ena_setup_ifnet(pdev, adapter, &get_feat_ctx);
3809 
3810 	/* Initialize reset task queue */
3811 	TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter);
3812 	adapter->reset_tq = taskqueue_create("ena_reset_enqueue",
3813 	    M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq);
3814 	taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET, "%s rstq",
3815 	    device_get_nameunit(adapter->pdev));
3816 
3817 	/* Initialize metrics task queue */
3818 	TASK_INIT(&adapter->metrics_task, 0, ena_metrics_task, adapter);
3819 	adapter->metrics_tq = taskqueue_create("ena_metrics_enqueue",
3820 	    M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->metrics_tq);
3821 	taskqueue_start_threads(&adapter->metrics_tq, 1, PI_NET, "%s metricsq",
3822 	    device_get_nameunit(adapter->pdev));
3823 
3824 #ifdef DEV_NETMAP
3825 	rc = ena_netmap_attach(adapter);
3826 	if (rc != 0) {
3827 		ena_log(pdev, ERR, "netmap attach failed: %d\n", rc);
3828 		goto err_detach;
3829 	}
3830 #endif /* DEV_NETMAP */
3831 
3832 	/* Tell the stack that the interface is not active */
3833 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
3834 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3835 
3836 	/* Run the timer service */
3837 	ENA_TIMER_RESET(adapter);
3838 
3839 	return (0);
3840 
3841 #ifdef DEV_NETMAP
3842 err_detach:
3843 	ether_ifdetach(adapter->ifp);
3844 #endif /* DEV_NETMAP */
3845 	free(adapter->customer_metrics_array, M_DEVBUF);
3846 err_metrics_buffer_destroy:
3847 	ena_com_delete_customer_metrics_buffer(ena_dev);
3848 err_msix_free:
3849 	ena_free_stats(adapter);
3850 	ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR);
3851 	ena_free_mgmnt_irq(adapter);
3852 	ena_disable_msix(adapter);
3853 err_io_free:
3854 	ena_free_all_io_rings_resources(adapter);
3855 	ena_free_rx_dma_tag(adapter);
3856 err_tx_tag_free:
3857 	ena_free_tx_dma_tag(adapter);
3858 err_com_free:
3859 	ena_com_admin_destroy(ena_dev);
3860 	ena_com_delete_host_info(ena_dev);
3861 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3862 err_bus_free:
3863 	free(ena_dev->bus, M_DEVBUF);
3864 err_pci_free:
3865 	ena_free_pci_resources(adapter);
3866 err_dev_free:
3867 	free(ena_dev, M_DEVBUF);
3868 
3869 	return (rc);
3870 }
3871 
3872 /**
3873  * ena_detach - Device Removal Routine
3874  * @pdev: device information struct
3875  *
3876  * ena_detach is called by the device subsystem to alert the driver
3877  * that it should release a PCI device.
3878  **/
3879 static int
ena_detach(device_t pdev)3880 ena_detach(device_t pdev)
3881 {
3882 	struct ena_adapter *adapter = device_get_softc(pdev);
3883 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3884 	int rc;
3885 
3886 	/* Make sure VLANS are not using driver */
3887 	if (if_vlantrunkinuse(adapter->ifp)) {
3888 		ena_log(adapter->pdev, ERR, "VLAN is in use, detach first\n");
3889 		return (EBUSY);
3890 	}
3891 
3892 	ether_ifdetach(adapter->ifp);
3893 
3894 	/* Stop timer service */
3895 	ENA_LOCK_LOCK();
3896 	ENA_TIMER_DRAIN(adapter);
3897 	ENA_LOCK_UNLOCK();
3898 
3899 	/* Release metrics task */
3900 	while (taskqueue_cancel(adapter->metrics_tq, &adapter->metrics_task, NULL))
3901 		taskqueue_drain(adapter->metrics_tq, &adapter->metrics_task);
3902 	taskqueue_free(adapter->metrics_tq);
3903 
3904 	/* Release reset task */
3905 	while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL))
3906 		taskqueue_drain(adapter->reset_tq, &adapter->reset_task);
3907 	taskqueue_free(adapter->reset_tq);
3908 
3909 	ENA_LOCK_LOCK();
3910 	ena_down(adapter);
3911 	ena_destroy_device(adapter, true);
3912 	ENA_LOCK_UNLOCK();
3913 
3914 	/* Restore unregistered sysctl queue nodes. */
3915 	ena_sysctl_update_queue_node_nb(adapter, adapter->num_io_queues,
3916 	    adapter->max_num_io_queues);
3917 
3918 #ifdef DEV_NETMAP
3919 	netmap_detach(adapter->ifp);
3920 #endif /* DEV_NETMAP */
3921 
3922 	ena_free_stats(adapter);
3923 
3924 	rc = ena_free_rx_dma_tag(adapter);
3925 	if (unlikely(rc != 0))
3926 		ena_log(adapter->pdev, WARN,
3927 		    "Unmapped RX DMA tag associations\n");
3928 
3929 	rc = ena_free_tx_dma_tag(adapter);
3930 	if (unlikely(rc != 0))
3931 		ena_log(adapter->pdev, WARN,
3932 		    "Unmapped TX DMA tag associations\n");
3933 
3934 	ena_free_irqs(adapter);
3935 
3936 	ena_free_pci_resources(adapter);
3937 
3938 	if (adapter->rss_indir != NULL)
3939 		free(adapter->rss_indir, M_DEVBUF);
3940 
3941 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)))
3942 		ena_com_rss_destroy(ena_dev);
3943 
3944 	ena_com_delete_host_info(ena_dev);
3945 
3946 	free(adapter->customer_metrics_array, M_DEVBUF);
3947 
3948 	ena_com_delete_customer_metrics_buffer(ena_dev);
3949 
3950 	if_free(adapter->ifp);
3951 
3952 	free(ena_dev->bus, M_DEVBUF);
3953 
3954 	free(ena_dev, M_DEVBUF);
3955 
3956 	return (bus_generic_detach(pdev));
3957 }
3958 
3959 /******************************************************************************
3960  ******************************** AENQ Handlers *******************************
3961  *****************************************************************************/
3962 /**
3963  * ena_update_on_link_change:
3964  * Notify the network interface about the change in link status
3965  **/
3966 static void
ena_update_on_link_change(void * adapter_data,struct ena_admin_aenq_entry * aenq_e)3967 ena_update_on_link_change(void *adapter_data,
3968     struct ena_admin_aenq_entry *aenq_e)
3969 {
3970 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3971 	struct ena_admin_aenq_link_change_desc *aenq_desc;
3972 	int status;
3973 	if_t ifp;
3974 
3975 	aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
3976 	ifp = adapter->ifp;
3977 	status = aenq_desc->flags &
3978 	    ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3979 
3980 	if (status != 0) {
3981 		ena_log(adapter->pdev, INFO, "link is UP\n");
3982 		ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3983 		if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter))
3984 			if_link_state_change(ifp, LINK_STATE_UP);
3985 	} else {
3986 		ena_log(adapter->pdev, INFO, "link is DOWN\n");
3987 		if_link_state_change(ifp, LINK_STATE_DOWN);
3988 		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3989 	}
3990 }
3991 
3992 static void
ena_notification(void * adapter_data,struct ena_admin_aenq_entry * aenq_e)3993 ena_notification(void *adapter_data, struct ena_admin_aenq_entry *aenq_e)
3994 {
3995 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3996 	struct ena_admin_ena_hw_hints *hints;
3997 
3998 	ENA_WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
3999 	    adapter->ena_dev, "Invalid group(%x) expected %x\n",
4000 	    aenq_e->aenq_common_desc.group, ENA_ADMIN_NOTIFICATION);
4001 
4002 	switch (aenq_e->aenq_common_desc.syndrome) {
4003 	case ENA_ADMIN_UPDATE_HINTS:
4004 		hints =
4005 		    (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4);
4006 		ena_update_hints(adapter, hints);
4007 		break;
4008 	default:
4009 		ena_log(adapter->pdev, ERR,
4010 		    "Invalid aenq notification link state %d\n",
4011 		    aenq_e->aenq_common_desc.syndrome);
4012 	}
4013 }
4014 
4015 static void
ena_lock_init(void * arg)4016 ena_lock_init(void *arg)
4017 {
4018 	ENA_LOCK_INIT();
4019 }
4020 SYSINIT(ena_lock_init, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_init, NULL);
4021 
4022 static void
ena_lock_uninit(void * arg)4023 ena_lock_uninit(void *arg)
4024 {
4025 	ENA_LOCK_DESTROY();
4026 }
4027 SYSUNINIT(ena_lock_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_uninit, NULL);
4028 
4029 /**
4030  * This handler will called for unknown event group or unimplemented handlers
4031  **/
4032 static void
unimplemented_aenq_handler(void * adapter_data,struct ena_admin_aenq_entry * aenq_e)4033 unimplemented_aenq_handler(void *adapter_data,
4034     struct ena_admin_aenq_entry *aenq_e)
4035 {
4036 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4037 
4038 	ena_log(adapter->pdev, ERR,
4039 	    "Unknown event was received or event with unimplemented handler\n");
4040 }
4041 
4042 static struct ena_aenq_handlers aenq_handlers = {
4043     .handlers = {
4044 	    [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4045 	    [ENA_ADMIN_NOTIFICATION] = ena_notification,
4046 	    [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
4047     },
4048     .unimplemented_handler = unimplemented_aenq_handler
4049 };
4050 
4051 /*********************************************************************
4052  *  FreeBSD Device Interface Entry Points
4053  *********************************************************************/
4054 
4055 static device_method_t ena_methods[] = { /* Device interface */
4056 	DEVMETHOD(device_probe, ena_probe),
4057 	DEVMETHOD(device_attach, ena_attach),
4058 	DEVMETHOD(device_detach, ena_detach), DEVMETHOD_END
4059 };
4060 
4061 static driver_t ena_driver = {
4062 	"ena",
4063 	ena_methods,
4064 	sizeof(struct ena_adapter),
4065 };
4066 
4067 DRIVER_MODULE(ena, pci, ena_driver, 0, 0);
4068 MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array,
4069     nitems(ena_vendor_info_array) - 1);
4070 MODULE_DEPEND(ena, pci, 1, 1, 1);
4071 MODULE_DEPEND(ena, ether, 1, 1, 1);
4072 #ifdef DEV_NETMAP
4073 MODULE_DEPEND(ena, netmap, 1, 1, 1);
4074 #endif /* DEV_NETMAP */
4075 
4076 /*********************************************************************/
4077