1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2023 Google LLC
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * 3. Neither the name of the copyright holder nor the names of its contributors
17 * may be used to endorse or promote products derived from this software without
18 * specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31 #ifndef _GVE_FBSD_H
32 #define _GVE_FBSD_H
33
34 #include "gve_desc.h"
35 #include "gve_plat.h"
36 #include "gve_register.h"
37
38 #ifndef PCI_VENDOR_ID_GOOGLE
39 #define PCI_VENDOR_ID_GOOGLE 0x1ae0
40 #endif
41
42 #define PCI_DEV_ID_GVNIC 0x0042
43 #define GVE_REGISTER_BAR 0
44 #define GVE_DOORBELL_BAR 2
45
46 /* Driver can alloc up to 2 segments for the header and 2 for the payload. */
47 #define GVE_TX_MAX_DESCS 4
48 #define GVE_TX_BUFRING_ENTRIES 4096
49
50 #define ADMINQ_SIZE PAGE_SIZE
51
52 #define GVE_DEFAULT_RX_BUFFER_SIZE 2048
53 /* Each RX bounce buffer page can fit two packet buffers. */
54 #define GVE_DEFAULT_RX_BUFFER_OFFSET (PAGE_SIZE / 2)
55
56 /*
57 * Number of descriptors per queue page list.
58 * Page count AKA QPL size can be derived by dividing the number of elements in
59 * a page by the number of descriptors available.
60 */
61 #define GVE_QPL_DIVISOR 16
62
63 static MALLOC_DEFINE(M_GVE, "gve", "gve allocations");
64
65 struct gve_dma_handle {
66 bus_addr_t bus_addr;
67 void *cpu_addr;
68 bus_dma_tag_t tag;
69 bus_dmamap_t map;
70 };
71
72 union gve_tx_desc {
73 struct gve_tx_pkt_desc pkt; /* first desc for a packet */
74 struct gve_tx_mtd_desc mtd; /* optional metadata descriptor */
75 struct gve_tx_seg_desc seg; /* subsequent descs for a packet */
76 };
77
78 /* Tracks the memory in the fifo occupied by a segment of a packet */
79 struct gve_tx_iovec {
80 uint32_t iov_offset; /* offset into this segment */
81 uint32_t iov_len; /* length */
82 uint32_t iov_padding; /* padding associated with this segment */
83 };
84
85 /* Tracks allowed and current queue settings */
86 struct gve_queue_config {
87 uint16_t max_queues;
88 uint16_t num_queues; /* current */
89 };
90
91 struct gve_irq_db {
92 __be32 index;
93 } __aligned(CACHE_LINE_SIZE);
94
95 /*
96 * GVE_QUEUE_FORMAT_UNSPECIFIED must be zero since 0 is the default value
97 * when the entire configure_device_resources command is zeroed out and the
98 * queue_format is not specified.
99 */
100 enum gve_queue_format {
101 GVE_QUEUE_FORMAT_UNSPECIFIED = 0x0,
102 GVE_GQI_RDA_FORMAT = 0x1,
103 GVE_GQI_QPL_FORMAT = 0x2,
104 GVE_DQO_RDA_FORMAT = 0x3,
105 };
106
107 enum gve_state_flags_bit {
108 GVE_STATE_FLAG_ADMINQ_OK,
109 GVE_STATE_FLAG_RESOURCES_OK,
110 GVE_STATE_FLAG_QPLREG_OK,
111 GVE_STATE_FLAG_RX_RINGS_OK,
112 GVE_STATE_FLAG_TX_RINGS_OK,
113 GVE_STATE_FLAG_QUEUES_UP,
114 GVE_STATE_FLAG_LINK_UP,
115 GVE_STATE_FLAG_DO_RESET,
116 GVE_STATE_FLAG_IN_RESET,
117 GVE_NUM_STATE_FLAGS /* Not part of the enum space */
118 };
119
120 BITSET_DEFINE(gve_state_flags, GVE_NUM_STATE_FLAGS);
121
122 #define GVE_DEVICE_STATUS_RESET (0x1 << 1)
123 #define GVE_DEVICE_STATUS_LINK_STATUS (0x1 << 2)
124
125 #define GVE_RING_LOCK(ring) mtx_lock(&(ring)->ring_mtx)
126 #define GVE_RING_TRYLOCK(ring) mtx_trylock(&(ring)->ring_mtx)
127 #define GVE_RING_UNLOCK(ring) mtx_unlock(&(ring)->ring_mtx)
128 #define GVE_RING_ASSERT(ring) mtx_assert(&(ring)->ring_mtx, MA_OWNED)
129
130 #define GVE_IFACE_LOCK_INIT(lock) sx_init(&lock, "gve interface lock")
131 #define GVE_IFACE_LOCK_DESTROY(lock) sx_destroy(&lock)
132 #define GVE_IFACE_LOCK_LOCK(lock) sx_xlock(&lock)
133 #define GVE_IFACE_LOCK_UNLOCK(lock) sx_unlock(&lock)
134 #define GVE_IFACE_LOCK_ASSERT(lock) sx_assert(&lock, SA_XLOCKED)
135
136 struct gve_queue_page_list {
137 uint32_t id;
138 uint32_t num_dmas;
139 uint32_t num_pages;
140 vm_offset_t kva;
141 vm_page_t *pages;
142 struct gve_dma_handle *dmas;
143 };
144
145 struct gve_irq {
146 struct resource *res;
147 void *cookie;
148 };
149
150 struct gve_rx_slot_page_info {
151 void *page_address;
152 vm_page_t page;
153 uint32_t page_offset;
154 uint16_t pad;
155 };
156
157 /*
158 * A single received packet split across multiple buffers may be
159 * reconstructed using the information in this structure.
160 */
161 struct gve_rx_ctx {
162 /* head and tail of mbuf chain for the current packet */
163 struct mbuf *mbuf_head;
164 struct mbuf *mbuf_tail;
165 uint32_t total_size;
166 uint8_t frag_cnt;
167 bool is_tcp;
168 bool drop_pkt;
169 };
170
171 struct gve_ring_com {
172 struct gve_priv *priv;
173 uint32_t id;
174
175 /*
176 * BAR2 offset for this ring's doorbell and the
177 * counter-array offset for this ring's counter.
178 * Acquired from the device individually for each
179 * queue in the queue_create adminq command.
180 */
181 struct gve_queue_resources *q_resources;
182 struct gve_dma_handle q_resources_mem;
183
184 /* Byte offset into BAR2 where this ring's 4-byte irq doorbell lies. */
185 uint32_t irq_db_offset;
186 /* Byte offset into BAR2 where this ring's 4-byte doorbell lies. */
187 uint32_t db_offset;
188 /*
189 * Index, not byte-offset, into the counter array where this ring's
190 * 4-byte counter lies.
191 */
192 uint32_t counter_idx;
193
194 /*
195 * The index of the MSIX vector that was assigned to
196 * this ring in `gve_alloc_irqs`.
197 *
198 * It is passed to the device in the queue_create adminq
199 * command.
200 *
201 * Additionally, this also serves as the index into
202 * `priv->irq_db_indices` where this ring's irq doorbell's
203 * BAR2 offset, `irq_db_idx`, can be found.
204 */
205 int ntfy_id;
206
207 /*
208 * The fixed bounce buffer for this ring.
209 * Once allocated, has to be offered to the device
210 * over the register-page-list adminq command.
211 */
212 struct gve_queue_page_list *qpl;
213
214 struct task cleanup_task;
215 struct taskqueue *cleanup_tq;
216 } __aligned(CACHE_LINE_SIZE);
217
218 struct gve_rxq_stats {
219 counter_u64_t rbytes;
220 counter_u64_t rpackets;
221 counter_u64_t rx_dropped_pkt;
222 counter_u64_t rx_copybreak_cnt;
223 counter_u64_t rx_frag_flip_cnt;
224 counter_u64_t rx_frag_copy_cnt;
225 counter_u64_t rx_dropped_pkt_desc_err;
226 counter_u64_t rx_dropped_pkt_mbuf_alloc_fail;
227 };
228
229 #define NUM_RX_STATS (sizeof(struct gve_rxq_stats) / sizeof(counter_u64_t))
230
231 /* power-of-2 sized receive ring */
232 struct gve_rx_ring {
233 struct gve_ring_com com;
234 struct gve_dma_handle desc_ring_mem;
235 struct gve_dma_handle data_ring_mem;
236
237 /* accessed in the receive hot path */
238 struct {
239 struct gve_rx_desc *desc_ring;
240 union gve_rx_data_slot *data_ring;
241 struct gve_rx_slot_page_info *page_info;
242
243 struct gve_rx_ctx ctx;
244 struct lro_ctrl lro;
245 uint8_t seq_no; /* helps traverse the descriptor ring */
246 uint32_t cnt; /* free-running total number of completed packets */
247 uint32_t fill_cnt; /* free-running total number of descs and buffs posted */
248 uint32_t mask; /* masks the cnt and fill_cnt to the size of the ring */
249 struct gve_rxq_stats stats;
250 } __aligned(CACHE_LINE_SIZE);
251
252 } __aligned(CACHE_LINE_SIZE);
253
254 /*
255 * A contiguous representation of the pages composing the Tx bounce buffer.
256 * The xmit taskqueue and the completion taskqueue both simultaneously use it.
257 * Both operate on `available`: the xmit tq lowers it and the completion tq
258 * raises it. `head` is the last location written at and so only the xmit tq
259 * uses it.
260 */
261 struct gve_tx_fifo {
262 vm_offset_t base; /* address of base of FIFO */
263 uint32_t size; /* total size */
264 volatile int available; /* how much space is still available */
265 uint32_t head; /* offset to write at */
266 };
267
268 struct gve_tx_buffer_state {
269 struct mbuf *mbuf;
270 struct gve_tx_iovec iov[GVE_TX_MAX_DESCS];
271 };
272
273 struct gve_txq_stats {
274 counter_u64_t tbytes;
275 counter_u64_t tpackets;
276 counter_u64_t tso_packet_cnt;
277 counter_u64_t tx_dropped_pkt;
278 counter_u64_t tx_dropped_pkt_nospace_device;
279 counter_u64_t tx_dropped_pkt_nospace_bufring;
280 counter_u64_t tx_dropped_pkt_vlan;
281 };
282
283 #define NUM_TX_STATS (sizeof(struct gve_txq_stats) / sizeof(counter_u64_t))
284
285 /* power-of-2 sized transmit ring */
286 struct gve_tx_ring {
287 struct gve_ring_com com;
288 struct gve_dma_handle desc_ring_mem;
289
290 struct task xmit_task;
291 struct taskqueue *xmit_tq;
292
293 /* accessed in the transmit hot path */
294 struct {
295 union gve_tx_desc *desc_ring;
296 struct gve_tx_buffer_state *info;
297 struct buf_ring *br;
298
299 struct gve_tx_fifo fifo;
300 struct mtx ring_mtx;
301
302 uint32_t req; /* free-running total number of packets written to the nic */
303 uint32_t done; /* free-running total number of completed packets */
304 uint32_t mask; /* masks the req and done to the size of the ring */
305 struct gve_txq_stats stats;
306 } __aligned(CACHE_LINE_SIZE);
307
308 } __aligned(CACHE_LINE_SIZE);
309
310 struct gve_priv {
311 if_t ifp;
312 device_t dev;
313 struct ifmedia media;
314
315 uint8_t mac[ETHER_ADDR_LEN];
316
317 struct gve_dma_handle aq_mem;
318
319 struct resource *reg_bar; /* BAR0 */
320 struct resource *db_bar; /* BAR2 */
321 struct resource *msix_table;
322
323 uint32_t mgmt_msix_idx;
324 uint32_t rx_copybreak;
325
326 uint16_t num_event_counters;
327 uint16_t default_num_queues;
328 uint16_t tx_desc_cnt;
329 uint16_t rx_desc_cnt;
330 uint16_t rx_pages_per_qpl;
331 uint64_t max_registered_pages;
332 uint64_t num_registered_pages;
333 uint32_t supported_features;
334 uint16_t max_mtu;
335
336 struct gve_dma_handle counter_array_mem;
337 __be32 *counters;
338 struct gve_dma_handle irqs_db_mem;
339 struct gve_irq_db *irq_db_indices;
340
341 enum gve_queue_format queue_format;
342 struct gve_queue_page_list *qpls;
343 struct gve_queue_config tx_cfg;
344 struct gve_queue_config rx_cfg;
345 uint32_t num_queues;
346
347 struct gve_irq *irq_tbl;
348 struct gve_tx_ring *tx;
349 struct gve_rx_ring *rx;
350
351 /*
352 * Admin queue - see gve_adminq.h
353 * Since AQ cmds do not run in steady state, 32 bit counters suffice
354 */
355 struct gve_adminq_command *adminq;
356 vm_paddr_t adminq_bus_addr;
357 uint32_t adminq_mask; /* masks prod_cnt to adminq size */
358 uint32_t adminq_prod_cnt; /* free-running count of AQ cmds executed */
359 uint32_t adminq_cmd_fail; /* free-running count of AQ cmds failed */
360 uint32_t adminq_timeouts; /* free-running count of AQ cmds timeouts */
361 /* free-running count of each distinct AQ cmd executed */
362 uint32_t adminq_describe_device_cnt;
363 uint32_t adminq_cfg_device_resources_cnt;
364 uint32_t adminq_register_page_list_cnt;
365 uint32_t adminq_unregister_page_list_cnt;
366 uint32_t adminq_create_tx_queue_cnt;
367 uint32_t adminq_create_rx_queue_cnt;
368 uint32_t adminq_destroy_tx_queue_cnt;
369 uint32_t adminq_destroy_rx_queue_cnt;
370 uint32_t adminq_dcfg_device_resources_cnt;
371 uint32_t adminq_set_driver_parameter_cnt;
372 uint32_t adminq_verify_driver_compatibility_cnt;
373
374 uint32_t interface_up_cnt;
375 uint32_t interface_down_cnt;
376 uint32_t reset_cnt;
377
378 struct task service_task;
379 struct taskqueue *service_tq;
380
381 struct gve_state_flags state_flags;
382 struct sx gve_iface_lock;
383 };
384
385 static inline bool
gve_get_state_flag(struct gve_priv * priv,int pos)386 gve_get_state_flag(struct gve_priv *priv, int pos)
387 {
388 return (BIT_ISSET(GVE_NUM_STATE_FLAGS, pos, &priv->state_flags));
389 }
390
391 static inline void
gve_set_state_flag(struct gve_priv * priv,int pos)392 gve_set_state_flag(struct gve_priv *priv, int pos)
393 {
394 BIT_SET_ATOMIC(GVE_NUM_STATE_FLAGS, pos, &priv->state_flags);
395 }
396
397 static inline void
gve_clear_state_flag(struct gve_priv * priv,int pos)398 gve_clear_state_flag(struct gve_priv *priv, int pos)
399 {
400 BIT_CLR_ATOMIC(GVE_NUM_STATE_FLAGS, pos, &priv->state_flags);
401 }
402
403 /* Defined in gve_main.c */
404 void gve_schedule_reset(struct gve_priv *priv);
405
406 /* Register access functions defined in gve_utils.c */
407 uint32_t gve_reg_bar_read_4(struct gve_priv *priv, bus_size_t offset);
408 void gve_reg_bar_write_4(struct gve_priv *priv, bus_size_t offset, uint32_t val);
409 void gve_db_bar_write_4(struct gve_priv *priv, bus_size_t offset, uint32_t val);
410
411 /* QPL (Queue Page List) functions defined in gve_qpl.c */
412 int gve_alloc_qpls(struct gve_priv *priv);
413 void gve_free_qpls(struct gve_priv *priv);
414 int gve_register_qpls(struct gve_priv *priv);
415 int gve_unregister_qpls(struct gve_priv *priv);
416
417 /* TX functions defined in gve_tx.c */
418 int gve_alloc_tx_rings(struct gve_priv *priv);
419 void gve_free_tx_rings(struct gve_priv *priv);
420 int gve_create_tx_rings(struct gve_priv *priv);
421 int gve_destroy_tx_rings(struct gve_priv *priv);
422 int gve_tx_intr(void *arg);
423 int gve_xmit_ifp(if_t ifp, struct mbuf *mbuf);
424 void gve_qflush(if_t ifp);
425 void gve_xmit_tq(void *arg, int pending);
426 void gve_tx_cleanup_tq(void *arg, int pending);
427
428 /* RX functions defined in gve_rx.c */
429 int gve_alloc_rx_rings(struct gve_priv *priv);
430 void gve_free_rx_rings(struct gve_priv *priv);
431 int gve_create_rx_rings(struct gve_priv *priv);
432 int gve_destroy_rx_rings(struct gve_priv *priv);
433 int gve_rx_intr(void *arg);
434 void gve_rx_cleanup_tq(void *arg, int pending);
435
436 /* DMA functions defined in gve_utils.c */
437 int gve_dma_alloc_coherent(struct gve_priv *priv, int size, int align,
438 struct gve_dma_handle *dma);
439 void gve_dma_free_coherent(struct gve_dma_handle *dma);
440 int gve_dmamap_create(struct gve_priv *priv, int size, int align,
441 struct gve_dma_handle *dma);
442 void gve_dmamap_destroy(struct gve_dma_handle *dma);
443
444 /* IRQ functions defined in gve_utils.c */
445 void gve_free_irqs(struct gve_priv *priv);
446 int gve_alloc_irqs(struct gve_priv *priv);
447 void gve_unmask_all_queue_irqs(struct gve_priv *priv);
448 void gve_mask_all_queue_irqs(struct gve_priv *priv);
449
450 /* Systcl functions defined in gve_sysctl.c*/
451 void gve_setup_sysctl(struct gve_priv *priv);
452 void gve_accum_stats(struct gve_priv *priv, uint64_t *rpackets,
453 uint64_t *rbytes, uint64_t *rx_dropped_pkt, uint64_t *tpackets,
454 uint64_t *tbytes, uint64_t *tx_dropped_pkt);
455
456 /* Stats functions defined in gve_utils.c */
457 void gve_alloc_counters(counter_u64_t *stat, int num_stats);
458 void gve_free_counters(counter_u64_t *stat, int num_stats);
459
460 #endif /* _GVE_FBSD_H_ */
461