1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2023 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 */
31
32 #ifndef ENA_H
33 #define ENA_H
34
35 #include "opt_rss.h"
36
37 #include "ena-com/ena_com.h"
38 #include "ena-com/ena_eth_com.h"
39
40 #define ENA_DRV_MODULE_VER_MAJOR 2
41 #define ENA_DRV_MODULE_VER_MINOR 7
42 #define ENA_DRV_MODULE_VER_SUBMINOR 0
43
44 #define ENA_DRV_MODULE_NAME "ena"
45
46 #ifndef ENA_DRV_MODULE_VERSION
47 #define ENA_DRV_MODULE_VERSION \
48 __XSTRING(ENA_DRV_MODULE_VER_MAJOR) "." \
49 __XSTRING(ENA_DRV_MODULE_VER_MINOR) "." \
50 __XSTRING(ENA_DRV_MODULE_VER_SUBMINOR)
51 #endif
52 #define ENA_DEVICE_NAME "Elastic Network Adapter (ENA)"
53 #define ENA_DEVICE_DESC "ENA adapter"
54
55 /* Calculate DMA mask - width for ena cannot exceed 48, so it is safe */
56 #define ENA_DMA_BIT_MASK(x) ((1ULL << (x)) - 1ULL)
57
58 /* 1 for AENQ + ADMIN */
59 #define ENA_ADMIN_MSIX_VEC 1
60 #define ENA_MAX_MSIX_VEC(io_queues) (ENA_ADMIN_MSIX_VEC + (io_queues))
61
62 #define ENA_REG_BAR 0
63 #define ENA_MEM_BAR 2
64
65 #define ENA_BUS_DMA_SEGS 32
66
67 #define ENA_DEFAULT_BUF_RING_SIZE 4096
68
69 #define ENA_DEFAULT_RING_SIZE 1024
70 #define ENA_MIN_RING_SIZE 256
71
72 #define ENA_BASE_CPU_UNSPECIFIED -1
73 /*
74 * Refill Rx queue when number of required descriptors is above
75 * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER or ENA_RX_REFILL_THRESH_PACKET
76 */
77 #define ENA_RX_REFILL_THRESH_DIVIDER 8
78 #define ENA_RX_REFILL_THRESH_PACKET 256
79
80 #define ENA_IRQNAME_SIZE 40
81
82 #define ENA_PKT_MAX_BUFS 19
83
84 #define ENA_RX_RSS_TABLE_LOG_SIZE 7
85 #define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE)
86
87 #define ENA_HASH_KEY_SIZE 40
88
89 #define ENA_MAX_FRAME_LEN 10000
90 #define ENA_MIN_FRAME_LEN 60
91
92 #define ENA_TX_RESUME_THRESH (ENA_PKT_MAX_BUFS + 2)
93
94 #define ENA_DB_THRESHOLD 64
95
96 #define ENA_TX_COMMIT 32
97 /*
98 * TX budget for cleaning. It should be half of the RX budget to reduce amount
99 * of TCP retransmissions.
100 */
101 #define ENA_TX_BUDGET 128
102 /* RX cleanup budget. -1 stands for infinity. */
103 #define ENA_RX_BUDGET 256
104 /*
105 * How many times we can repeat cleanup in the io irq handling routine if the
106 * RX or TX budget was depleted.
107 */
108 #define ENA_CLEAN_BUDGET 8
109
110 #define ENA_RX_IRQ_INTERVAL 20
111 #define ENA_TX_IRQ_INTERVAL 50
112
113 #define ENA_MIN_MTU 128
114
115 #define ENA_TSO_MAXSIZE 65536
116
117 #define ENA_MMIO_DISABLE_REG_READ BIT(0)
118
119 #define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
120
121 #define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
122
123 #define ENA_IO_TXQ_IDX(q) (2 * (q))
124 #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1)
125 #define ENA_IO_TXQ_IDX_TO_COMBINED_IDX(q) ((q) / 2)
126 #define ENA_IO_RXQ_IDX_TO_COMBINED_IDX(q) (((q) - 1) / 2)
127
128 #define ENA_MGMNT_IRQ_IDX 0
129 #define ENA_IO_IRQ_FIRST_IDX 1
130 #define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q))
131
132 #define ENA_MAX_NO_INTERRUPT_ITERATIONS 3
133
134 /*
135 * ENA device should send keep alive msg every 1 sec.
136 * We wait for 6 sec just to be on the safe side.
137 */
138 #define ENA_DEFAULT_KEEP_ALIVE_TO (SBT_1S * 6)
139
140 /* Time in jiffies before concluding the transmitter is hung. */
141 #define ENA_DEFAULT_TX_CMP_TO (SBT_1S * 5)
142
143 /* Number of queues to check for missing queues per timer tick */
144 #define ENA_DEFAULT_TX_MONITORED_QUEUES (4)
145
146 /* Max number of timeouted packets before device reset */
147 #define ENA_DEFAULT_TX_CMP_THRESHOLD (128)
148
149 /*
150 * Supported PCI vendor and devices IDs
151 */
152 #define PCI_VENDOR_ID_AMAZON 0x1d0f
153
154 #define PCI_DEV_ID_ENA_PF 0x0ec2
155 #define PCI_DEV_ID_ENA_PF_RSERV0 0x1ec2
156 #define PCI_DEV_ID_ENA_VF 0xec20
157 #define PCI_DEV_ID_ENA_VF_RSERV0 0xec21
158
159 /*
160 * Flags indicating current ENA driver state
161 */
162 enum ena_flags_t {
163 ENA_FLAG_DEVICE_RUNNING,
164 ENA_FLAG_DEV_UP,
165 ENA_FLAG_LINK_UP,
166 ENA_FLAG_MSIX_ENABLED,
167 ENA_FLAG_TRIGGER_RESET,
168 ENA_FLAG_ONGOING_RESET,
169 ENA_FLAG_DEV_UP_BEFORE_RESET,
170 ENA_FLAG_RSS_ACTIVE,
171 ENA_FLAGS_NUMBER = ENA_FLAG_RSS_ACTIVE
172 };
173
174 BITSET_DEFINE(_ena_state, ENA_FLAGS_NUMBER);
175 typedef struct _ena_state ena_state_t;
176
177 #define ENA_FLAG_ZERO(adapter) \
178 BIT_ZERO(ENA_FLAGS_NUMBER, &(adapter)->flags)
179 #define ENA_FLAG_ISSET(bit, adapter) \
180 BIT_ISSET(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
181 #define ENA_FLAG_SET_ATOMIC(bit, adapter) \
182 BIT_SET_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
183 #define ENA_FLAG_CLEAR_ATOMIC(bit, adapter) \
184 BIT_CLR_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
185
186 struct msix_entry {
187 int entry;
188 int vector;
189 };
190
191 typedef struct _ena_vendor_info_t {
192 uint16_t vendor_id;
193 uint16_t device_id;
194 unsigned int index;
195 } ena_vendor_info_t;
196
197 struct ena_irq {
198 /* Interrupt resources */
199 struct resource *res;
200 driver_filter_t *handler;
201 void *data;
202 void *cookie;
203 unsigned int vector;
204 bool requested;
205 int cpu;
206 char name[ENA_IRQNAME_SIZE];
207 };
208
209 struct ena_que {
210 struct ena_adapter *adapter;
211 struct ena_ring *tx_ring;
212 struct ena_ring *rx_ring;
213
214 struct task cleanup_task;
215 struct taskqueue *cleanup_tq;
216
217 uint32_t id;
218 int cpu;
219 cpuset_t cpu_mask;
220 int domain;
221 struct sysctl_oid *oid;
222 };
223
224 struct ena_calc_queue_size_ctx {
225 struct ena_com_dev_get_features_ctx *get_feat_ctx;
226 struct ena_com_dev *ena_dev;
227 device_t pdev;
228 uint32_t tx_queue_size;
229 uint32_t rx_queue_size;
230 uint32_t max_tx_queue_size;
231 uint32_t max_rx_queue_size;
232 uint16_t max_tx_sgl_size;
233 uint16_t max_rx_sgl_size;
234 };
235
236 #ifdef DEV_NETMAP
237 struct ena_netmap_tx_info {
238 uint32_t socket_buf_idx[ENA_PKT_MAX_BUFS];
239 bus_dmamap_t map_seg[ENA_PKT_MAX_BUFS];
240 unsigned int sockets_used;
241 };
242 #endif
243
244 struct ena_tx_buffer {
245 struct mbuf *mbuf;
246 /* # of ena desc for this specific mbuf
247 * (includes data desc and metadata desc) */
248 unsigned int tx_descs;
249 /* # of buffers used by this mbuf */
250 unsigned int num_of_bufs;
251
252 bus_dmamap_t dmamap;
253
254 /* Used to detect missing tx packets */
255 struct bintime timestamp;
256 bool print_once;
257
258 #ifdef DEV_NETMAP
259 struct ena_netmap_tx_info nm_info;
260 #endif /* DEV_NETMAP */
261
262 struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
263 } __aligned(CACHE_LINE_SIZE);
264
265 struct ena_rx_buffer {
266 struct mbuf *mbuf;
267 bus_dmamap_t map;
268 struct ena_com_buf ena_buf;
269 #ifdef DEV_NETMAP
270 uint32_t netmap_buf_idx;
271 #endif /* DEV_NETMAP */
272 } __aligned(CACHE_LINE_SIZE);
273
274 struct ena_stats_tx {
275 counter_u64_t cnt;
276 counter_u64_t bytes;
277 counter_u64_t prepare_ctx_err;
278 counter_u64_t dma_mapping_err;
279 counter_u64_t doorbells;
280 counter_u64_t missing_tx_comp;
281 counter_u64_t bad_req_id;
282 counter_u64_t collapse;
283 counter_u64_t collapse_err;
284 counter_u64_t queue_wakeup;
285 counter_u64_t queue_stop;
286 counter_u64_t llq_buffer_copy;
287 counter_u64_t unmask_interrupt_num;
288 };
289
290 struct ena_stats_rx {
291 counter_u64_t cnt;
292 counter_u64_t bytes;
293 counter_u64_t refil_partial;
294 counter_u64_t csum_bad;
295 counter_u64_t mjum_alloc_fail;
296 counter_u64_t mbuf_alloc_fail;
297 counter_u64_t dma_mapping_err;
298 counter_u64_t bad_desc_num;
299 counter_u64_t bad_req_id;
300 counter_u64_t empty_rx_ring;
301 counter_u64_t csum_good;
302 };
303
304 struct ena_ring {
305 /* Holds the empty requests for TX/RX out of order completions */
306 union {
307 uint16_t *free_tx_ids;
308 uint16_t *free_rx_ids;
309 };
310 struct ena_com_dev *ena_dev;
311 struct ena_adapter *adapter;
312 struct ena_com_io_cq *ena_com_io_cq;
313 struct ena_com_io_sq *ena_com_io_sq;
314
315 uint16_t qid;
316
317 /* Determines if device will use LLQ or normal mode for TX */
318 enum ena_admin_placement_policy_type tx_mem_queue_type;
319 union {
320 /* The maximum length the driver can push to the device (For LLQ) */
321 uint8_t tx_max_header_size;
322 /* The maximum (and default) mbuf size for the Rx descriptor. */
323 uint16_t rx_mbuf_sz;
324
325 };
326
327 uint8_t first_interrupt;
328 uint16_t no_interrupt_event_cnt;
329
330 struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
331
332 struct ena_que *que;
333 struct lro_ctrl lro;
334
335 uint16_t next_to_use;
336 uint16_t next_to_clean;
337
338 union {
339 struct ena_tx_buffer *tx_buffer_info; /* contex of tx packet */
340 struct ena_rx_buffer *rx_buffer_info; /* contex of rx packet */
341 };
342 int ring_size; /* number of tx/rx_buffer_info's entries */
343
344 struct buf_ring *br; /* only for TX */
345 uint32_t buf_ring_size;
346
347 struct mtx ring_mtx;
348 char mtx_name[16];
349
350 struct {
351 struct task enqueue_task;
352 struct taskqueue *enqueue_tq;
353 };
354
355 union {
356 struct ena_stats_tx tx_stats;
357 struct ena_stats_rx rx_stats;
358 };
359
360 union {
361 int empty_rx_queue;
362 /* For Tx ring to indicate if it's running or not */
363 bool running;
364 };
365
366 /* How many packets are sent in one Tx loop, used for doorbells */
367 uint32_t acum_pkts;
368
369 /* Used for LLQ */
370 uint8_t *push_buf_intermediate_buf;
371
372 int tx_last_cleanup_ticks;
373
374 #ifdef DEV_NETMAP
375 bool initialized;
376 #endif /* DEV_NETMAP */
377 } __aligned(CACHE_LINE_SIZE);
378
379 struct ena_stats_dev {
380 counter_u64_t wd_expired;
381 counter_u64_t interface_up;
382 counter_u64_t interface_down;
383 counter_u64_t admin_q_pause;
384 };
385
386 struct ena_hw_stats {
387 counter_u64_t rx_packets;
388 counter_u64_t tx_packets;
389
390 counter_u64_t rx_bytes;
391 counter_u64_t tx_bytes;
392
393 counter_u64_t rx_drops;
394 counter_u64_t tx_drops;
395 };
396
397 /* Board specific private data structure */
398 struct ena_adapter {
399 struct ena_com_dev *ena_dev;
400
401 /* OS defined structs */
402 if_t ifp;
403 device_t pdev;
404 struct ifmedia media;
405
406 /* OS resources */
407 struct resource *memory;
408 struct resource *registers;
409 struct resource *msix;
410 int msix_rid;
411
412 /* MSI-X */
413 struct msix_entry *msix_entries;
414 int msix_vecs;
415
416 /* DMA tags used throughout the driver adapter for Tx and Rx */
417 bus_dma_tag_t tx_buf_tag;
418 bus_dma_tag_t rx_buf_tag;
419 int dma_width;
420
421 uint32_t max_mtu;
422
423 uint32_t num_io_queues;
424 uint32_t max_num_io_queues;
425
426 uint32_t requested_tx_ring_size;
427 uint32_t requested_rx_ring_size;
428
429 uint32_t max_tx_ring_size;
430 uint32_t max_rx_ring_size;
431
432 uint16_t max_tx_sgl_size;
433 uint16_t max_rx_sgl_size;
434
435 uint32_t tx_offload_cap;
436
437 uint32_t buf_ring_size;
438
439 /* RSS*/
440 int first_bind;
441 struct ena_indir *rss_indir;
442
443 uint8_t mac_addr[ETHER_ADDR_LEN];
444 /* mdio and phy*/
445
446 ena_state_t flags;
447
448 /* IRQ CPU affinity */
449 int irq_cpu_base;
450 uint32_t irq_cpu_stride;
451
452 uint8_t rss_enabled;
453
454 /* Queue will represent one TX and one RX ring */
455 struct ena_que que[ENA_MAX_NUM_IO_QUEUES]
456 __aligned(CACHE_LINE_SIZE);
457
458 /* TX */
459 struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES]
460 __aligned(CACHE_LINE_SIZE);
461
462 /* RX */
463 struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES]
464 __aligned(CACHE_LINE_SIZE);
465
466 struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)];
467
468 /* Timer service */
469 struct callout timer_service;
470 sbintime_t keep_alive_timestamp;
471 uint32_t next_monitored_tx_qid;
472 struct task reset_task;
473 struct taskqueue *reset_tq;
474 struct task metrics_task;
475 struct taskqueue *metrics_tq;
476 int wd_active;
477 sbintime_t keep_alive_timeout;
478 sbintime_t missing_tx_timeout;
479 uint32_t missing_tx_max_queues;
480 uint32_t missing_tx_threshold;
481 bool disable_meta_caching;
482
483 uint16_t metrics_sample_interval;
484 uint16_t metrics_sample_interval_cnt;
485
486 /* Statistics */
487 struct ena_stats_dev dev_stats;
488 struct ena_hw_stats hw_stats;
489 struct ena_admin_eni_stats eni_metrics;
490 struct ena_admin_ena_srd_info ena_srd_info;
491 uint64_t *customer_metrics_array;
492
493 enum ena_regs_reset_reason_types reset_reason;
494 };
495
496 #define ENA_RING_MTX_LOCK(_ring) mtx_lock(&(_ring)->ring_mtx)
497 #define ENA_RING_MTX_TRYLOCK(_ring) mtx_trylock(&(_ring)->ring_mtx)
498 #define ENA_RING_MTX_UNLOCK(_ring) mtx_unlock(&(_ring)->ring_mtx)
499 #define ENA_RING_MTX_ASSERT(_ring) \
500 mtx_assert(&(_ring)->ring_mtx, MA_OWNED)
501
502 #define ENA_LOCK_INIT() \
503 sx_init(&ena_global_lock, "ENA global lock")
504 #define ENA_LOCK_DESTROY() sx_destroy(&ena_global_lock)
505 #define ENA_LOCK_LOCK() sx_xlock(&ena_global_lock)
506 #define ENA_LOCK_UNLOCK() sx_unlock(&ena_global_lock)
507 #define ENA_LOCK_ASSERT() sx_assert(&ena_global_lock, SA_XLOCKED)
508
509 #define ENA_TIMER_INIT(_adapter) \
510 callout_init(&(_adapter)->timer_service, true)
511 #define ENA_TIMER_DRAIN(_adapter) \
512 callout_drain(&(_adapter)->timer_service)
513 #define ENA_TIMER_RESET(_adapter) \
514 callout_reset_sbt(&(_adapter)->timer_service, SBT_1S, SBT_1S, \
515 ena_timer_service, (void*)(_adapter), 0)
516
517 #define clamp_t(type, _x, min, max) min_t(type, max_t(type, _x, min), max)
518 #define clamp_val(val, lo, hi) clamp_t(__typeof(val), val, lo, hi)
519
520 extern struct sx ena_global_lock;
521
522 int ena_up(struct ena_adapter *adapter);
523 void ena_down(struct ena_adapter *adapter);
524 int ena_restore_device(struct ena_adapter *adapter);
525 void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
526 int ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num);
527 int ena_update_buf_ring_size(struct ena_adapter *adapter,
528 uint32_t new_buf_ring_size);
529 int ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
530 uint32_t new_rx_size);
531 int ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num);
532 int ena_update_base_cpu(struct ena_adapter *adapter, int new_num);
533 int ena_update_cpu_stride(struct ena_adapter *adapter, uint32_t new_num);
534 static inline int
ena_mbuf_count(struct mbuf * mbuf)535 ena_mbuf_count(struct mbuf *mbuf)
536 {
537 int count = 1;
538
539 while ((mbuf = mbuf->m_next) != NULL)
540 ++count;
541
542 return count;
543 }
544
545 static inline void
ena_trigger_reset(struct ena_adapter * adapter,enum ena_regs_reset_reason_types reset_reason)546 ena_trigger_reset(struct ena_adapter *adapter,
547 enum ena_regs_reset_reason_types reset_reason)
548 {
549 if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
550 adapter->reset_reason = reset_reason;
551 ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
552 }
553 }
554
555 static inline void
ena_ring_tx_doorbell(struct ena_ring * tx_ring)556 ena_ring_tx_doorbell(struct ena_ring *tx_ring)
557 {
558 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
559 counter_u64_add(tx_ring->tx_stats.doorbells, 1);
560 tx_ring->acum_pkts = 0;
561 }
562
563 #endif /* !(ENA_H) */
564