xref: /qemu/hw/net/vmxnet3.c (revision 85aad98a)
1 /*
2  * QEMU VMWARE VMXNET3 paravirtual NIC
3  *
4  * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5  *
6  * Developed by Daynix Computing LTD (http://www.daynix.com)
7  *
8  * Authors:
9  * Dmitry Fleytman <dmitry@daynix.com>
10  * Tamir Shomer <tamirs@daynix.com>
11  * Yan Vugenfirer <yan@daynix.com>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2.
14  * See the COPYING file in the top-level directory.
15  *
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "net/net.h"
22 #include "net/tap.h"
23 #include "net/checksum.h"
24 #include "sysemu/sysemu.h"
25 #include "qemu-common.h"
26 #include "qemu/bswap.h"
27 #include "hw/pci/msix.h"
28 #include "hw/pci/msi.h"
29 
30 #include "vmxnet3.h"
31 #include "vmxnet_debug.h"
32 #include "vmware_utils.h"
33 #include "net_tx_pkt.h"
34 #include "net_rx_pkt.h"
35 
36 #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
37 #define VMXNET3_MSIX_BAR_SIZE 0x2000
38 #define MIN_BUF_SIZE 60
39 
40 /* Compatibility flags for migration */
41 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
42 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
43     (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
44 #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1
45 #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \
46     (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT)
47 
48 #define VMXNET3_EXP_EP_OFFSET (0x48)
49 #define VMXNET3_MSI_OFFSET(s) \
50     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
51 #define VMXNET3_MSIX_OFFSET(s) \
52     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c)
53 #define VMXNET3_DSN_OFFSET     (0x100)
54 
55 #define VMXNET3_BAR0_IDX      (0)
56 #define VMXNET3_BAR1_IDX      (1)
57 #define VMXNET3_MSIX_BAR_IDX  (2)
58 
59 #define VMXNET3_OFF_MSIX_TABLE (0x000)
60 #define VMXNET3_OFF_MSIX_PBA(s) \
61     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000)
62 
63 /* Link speed in Mbps should be shifted by 16 */
64 #define VMXNET3_LINK_SPEED      (1000 << 16)
65 
66 /* Link status: 1 - up, 0 - down. */
67 #define VMXNET3_LINK_STATUS_UP  0x1
68 
69 /* Least significant bit should be set for revision and version */
70 #define VMXNET3_UPT_REVISION      0x1
71 #define VMXNET3_DEVICE_REVISION   0x1
72 
73 /* Number of interrupt vectors for non-MSIx modes */
74 #define VMXNET3_MAX_NMSIX_INTRS   (1)
75 
76 /* Macros for rings descriptors access */
77 #define VMXNET3_READ_TX_QUEUE_DESCR8(_d, dpa, field) \
78     (vmw_shmem_ld8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
79 
80 #define VMXNET3_WRITE_TX_QUEUE_DESCR8(_d, dpa, field, value) \
81     (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value)))
82 
83 #define VMXNET3_READ_TX_QUEUE_DESCR32(_d, dpa, field) \
84     (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
85 
86 #define VMXNET3_WRITE_TX_QUEUE_DESCR32(_d, dpa, field, value) \
87     (vmw_shmem_st32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
88 
89 #define VMXNET3_READ_TX_QUEUE_DESCR64(_d, dpa, field) \
90     (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
91 
92 #define VMXNET3_WRITE_TX_QUEUE_DESCR64(_d, dpa, field, value) \
93     (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
94 
95 #define VMXNET3_READ_RX_QUEUE_DESCR64(_d, dpa, field) \
96     (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
97 
98 #define VMXNET3_READ_RX_QUEUE_DESCR32(_d, dpa, field) \
99     (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
100 
101 #define VMXNET3_WRITE_RX_QUEUE_DESCR64(_d, dpa, field, value) \
102     (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
103 
104 #define VMXNET3_WRITE_RX_QUEUE_DESCR8(_d, dpa, field, value) \
105     (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
106 
107 /* Macros for guest driver shared area access */
108 #define VMXNET3_READ_DRV_SHARED64(_d, shpa, field) \
109     (vmw_shmem_ld64(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
110 
111 #define VMXNET3_READ_DRV_SHARED32(_d, shpa, field) \
112     (vmw_shmem_ld32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
113 
114 #define VMXNET3_WRITE_DRV_SHARED32(_d, shpa, field, val) \
115     (vmw_shmem_st32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), val))
116 
117 #define VMXNET3_READ_DRV_SHARED16(_d, shpa, field) \
118     (vmw_shmem_ld16(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
119 
120 #define VMXNET3_READ_DRV_SHARED8(_d, shpa, field) \
121     (vmw_shmem_ld8(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
122 
123 #define VMXNET3_READ_DRV_SHARED(_d, shpa, field, b, l) \
124     (vmw_shmem_read(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l))
125 
126 #define VMXNET_FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag))
127 
128 typedef struct VMXNET3Class {
129     PCIDeviceClass parent_class;
130     DeviceRealize parent_dc_realize;
131 } VMXNET3Class;
132 
133 #define TYPE_VMXNET3 "vmxnet3"
134 #define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3)
135 
136 #define VMXNET3_DEVICE_CLASS(klass) \
137     OBJECT_CLASS_CHECK(VMXNET3Class, (klass), TYPE_VMXNET3)
138 #define VMXNET3_DEVICE_GET_CLASS(obj) \
139     OBJECT_GET_CLASS(VMXNET3Class, (obj), TYPE_VMXNET3)
140 
141 /* Cyclic ring abstraction */
142 typedef struct {
143     hwaddr pa;
144     size_t size;
145     size_t cell_size;
146     size_t next;
147     uint8_t gen;
148 } Vmxnet3Ring;
149 
150 static inline void vmxnet3_ring_init(PCIDevice *d,
151 				     Vmxnet3Ring *ring,
152                                      hwaddr pa,
153                                      size_t size,
154                                      size_t cell_size,
155                                      bool zero_region)
156 {
157     ring->pa = pa;
158     ring->size = size;
159     ring->cell_size = cell_size;
160     ring->gen = VMXNET3_INIT_GEN;
161     ring->next = 0;
162 
163     if (zero_region) {
164         vmw_shmem_set(d, pa, 0, size * cell_size);
165     }
166 }
167 
168 #define VMXNET3_RING_DUMP(macro, ring_name, ridx, r)                         \
169     macro("%s#%d: base %" PRIx64 " size %zu cell_size %zu gen %d next %zu",  \
170           (ring_name), (ridx),                                               \
171           (r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next)
172 
173 static inline void vmxnet3_ring_inc(Vmxnet3Ring *ring)
174 {
175     if (++ring->next >= ring->size) {
176         ring->next = 0;
177         ring->gen ^= 1;
178     }
179 }
180 
181 static inline void vmxnet3_ring_dec(Vmxnet3Ring *ring)
182 {
183     if (ring->next-- == 0) {
184         ring->next = ring->size - 1;
185         ring->gen ^= 1;
186     }
187 }
188 
189 static inline hwaddr vmxnet3_ring_curr_cell_pa(Vmxnet3Ring *ring)
190 {
191     return ring->pa + ring->next * ring->cell_size;
192 }
193 
194 static inline void vmxnet3_ring_read_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
195 					       void *buff)
196 {
197     vmw_shmem_read(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
198 }
199 
200 static inline void vmxnet3_ring_write_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
201 						void *buff)
202 {
203     vmw_shmem_write(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
204 }
205 
206 static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3Ring *ring)
207 {
208     return ring->next;
209 }
210 
211 static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3Ring *ring)
212 {
213     return ring->gen;
214 }
215 
216 /* Debug trace-related functions */
217 static inline void
218 vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr)
219 {
220     VMW_PKPRN("TX DESCR: "
221               "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
222               "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, "
223               "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d",
224               le64_to_cpu(descr->addr), descr->len, descr->gen, descr->rsvd,
225               descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om,
226               descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci);
227 }
228 
229 static inline void
230 vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr)
231 {
232     VMW_PKPRN("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, "
233               "csum_start: %d, csum_offset: %d",
234               vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size,
235               vhdr->csum_start, vhdr->csum_offset);
236 }
237 
238 static inline void
239 vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
240 {
241     VMW_PKPRN("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
242               "dtype: %d, ext1: %d, btype: %d",
243               le64_to_cpu(descr->addr), descr->len, descr->gen,
244               descr->rsvd, descr->dtype, descr->ext1, descr->btype);
245 }
246 
247 /* Device state and helper functions */
248 #define VMXNET3_RX_RINGS_PER_QUEUE (2)
249 
250 typedef struct {
251     Vmxnet3Ring tx_ring;
252     Vmxnet3Ring comp_ring;
253 
254     uint8_t intr_idx;
255     hwaddr tx_stats_pa;
256     struct UPT1_TxStats txq_stats;
257 } Vmxnet3TxqDescr;
258 
259 typedef struct {
260     Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE];
261     Vmxnet3Ring comp_ring;
262     uint8_t intr_idx;
263     hwaddr rx_stats_pa;
264     struct UPT1_RxStats rxq_stats;
265 } Vmxnet3RxqDescr;
266 
267 typedef struct {
268     bool is_masked;
269     bool is_pending;
270     bool is_asserted;
271 } Vmxnet3IntState;
272 
273 typedef struct {
274         PCIDevice parent_obj;
275         NICState *nic;
276         NICConf conf;
277         MemoryRegion bar0;
278         MemoryRegion bar1;
279         MemoryRegion msix_bar;
280 
281         Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES];
282         Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES];
283 
284         /* Whether MSI-X support was installed successfully */
285         bool msix_used;
286         hwaddr drv_shmem;
287         hwaddr temp_shared_guest_driver_memory;
288 
289         uint8_t txq_num;
290 
291         /* This boolean tells whether RX packet being indicated has to */
292         /* be split into head and body chunks from different RX rings  */
293         bool rx_packets_compound;
294 
295         bool rx_vlan_stripping;
296         bool lro_supported;
297 
298         uint8_t rxq_num;
299 
300         /* Network MTU */
301         uint32_t mtu;
302 
303         /* Maximum number of fragments for indicated TX packets */
304         uint32_t max_tx_frags;
305 
306         /* Maximum number of fragments for indicated RX packets */
307         uint16_t max_rx_frags;
308 
309         /* Index for events interrupt */
310         uint8_t event_int_idx;
311 
312         /* Whether automatic interrupts masking enabled */
313         bool auto_int_masking;
314 
315         bool peer_has_vhdr;
316 
317         /* TX packets to QEMU interface */
318         struct NetTxPkt *tx_pkt;
319         uint32_t offload_mode;
320         uint32_t cso_or_gso_size;
321         uint16_t tci;
322         bool needs_vlan;
323 
324         struct NetRxPkt *rx_pkt;
325 
326         bool tx_sop;
327         bool skip_current_tx_pkt;
328 
329         uint32_t device_active;
330         uint32_t last_command;
331 
332         uint32_t link_status_and_speed;
333 
334         Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS];
335 
336         uint32_t temp_mac;   /* To store the low part first */
337 
338         MACAddr perm_mac;
339         uint32_t vlan_table[VMXNET3_VFT_SIZE];
340         uint32_t rx_mode;
341         MACAddr *mcast_list;
342         uint32_t mcast_list_len;
343         uint32_t mcast_list_buff_size; /* needed for live migration. */
344 
345         /* Compatibility flags for migration */
346         uint32_t compat_flags;
347 } VMXNET3State;
348 
349 /* Interrupt management */
350 
351 /*
352  * This function returns sign whether interrupt line is in asserted state
353  * This depends on the type of interrupt used. For INTX interrupt line will
354  * be asserted until explicit deassertion, for MSI(X) interrupt line will
355  * be deasserted automatically due to notification semantics of the MSI(X)
356  * interrupts
357  */
358 static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx)
359 {
360     PCIDevice *d = PCI_DEVICE(s);
361 
362     if (s->msix_used && msix_enabled(d)) {
363         VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx);
364         msix_notify(d, int_idx);
365         return false;
366     }
367     if (msi_enabled(d)) {
368         VMW_IRPRN("Sending MSI notification for vector %u", int_idx);
369         msi_notify(d, int_idx);
370         return false;
371     }
372 
373     VMW_IRPRN("Asserting line for interrupt %u", int_idx);
374     pci_irq_assert(d);
375     return true;
376 }
377 
378 static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx)
379 {
380     PCIDevice *d = PCI_DEVICE(s);
381 
382     /*
383      * This function should never be called for MSI(X) interrupts
384      * because deassertion never required for message interrupts
385      */
386     assert(!s->msix_used || !msix_enabled(d));
387     /*
388      * This function should never be called for MSI(X) interrupts
389      * because deassertion never required for message interrupts
390      */
391     assert(!msi_enabled(d));
392 
393     VMW_IRPRN("Deasserting line for interrupt %u", lidx);
394     pci_irq_deassert(d);
395 }
396 
397 static void vmxnet3_update_interrupt_line_state(VMXNET3State *s, int lidx)
398 {
399     if (!s->interrupt_states[lidx].is_pending &&
400        s->interrupt_states[lidx].is_asserted) {
401         VMW_IRPRN("New interrupt line state for index %d is DOWN", lidx);
402         _vmxnet3_deassert_interrupt_line(s, lidx);
403         s->interrupt_states[lidx].is_asserted = false;
404         return;
405     }
406 
407     if (s->interrupt_states[lidx].is_pending &&
408        !s->interrupt_states[lidx].is_masked &&
409        !s->interrupt_states[lidx].is_asserted) {
410         VMW_IRPRN("New interrupt line state for index %d is UP", lidx);
411         s->interrupt_states[lidx].is_asserted =
412             _vmxnet3_assert_interrupt_line(s, lidx);
413         s->interrupt_states[lidx].is_pending = false;
414         return;
415     }
416 }
417 
418 static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx)
419 {
420     PCIDevice *d = PCI_DEVICE(s);
421     s->interrupt_states[lidx].is_pending = true;
422     vmxnet3_update_interrupt_line_state(s, lidx);
423 
424     if (s->msix_used && msix_enabled(d) && s->auto_int_masking) {
425         goto do_automask;
426     }
427 
428     if (msi_enabled(d) && s->auto_int_masking) {
429         goto do_automask;
430     }
431 
432     return;
433 
434 do_automask:
435     s->interrupt_states[lidx].is_masked = true;
436     vmxnet3_update_interrupt_line_state(s, lidx);
437 }
438 
439 static bool vmxnet3_interrupt_asserted(VMXNET3State *s, int lidx)
440 {
441     return s->interrupt_states[lidx].is_asserted;
442 }
443 
444 static void vmxnet3_clear_interrupt(VMXNET3State *s, int int_idx)
445 {
446     s->interrupt_states[int_idx].is_pending = false;
447     if (s->auto_int_masking) {
448         s->interrupt_states[int_idx].is_masked = true;
449     }
450     vmxnet3_update_interrupt_line_state(s, int_idx);
451 }
452 
453 static void
454 vmxnet3_on_interrupt_mask_changed(VMXNET3State *s, int lidx, bool is_masked)
455 {
456     s->interrupt_states[lidx].is_masked = is_masked;
457     vmxnet3_update_interrupt_line_state(s, lidx);
458 }
459 
460 static bool vmxnet3_verify_driver_magic(PCIDevice *d, hwaddr dshmem)
461 {
462     return (VMXNET3_READ_DRV_SHARED32(d, dshmem, magic) == VMXNET3_REV1_MAGIC);
463 }
464 
465 #define VMXNET3_GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF)
466 #define VMXNET3_MAKE_BYTE(byte_num, val) \
467     (((uint32_t)((val) & 0xFF)) << (byte_num)*8)
468 
469 static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l)
470 {
471     s->conf.macaddr.a[0] = VMXNET3_GET_BYTE(l,  0);
472     s->conf.macaddr.a[1] = VMXNET3_GET_BYTE(l,  1);
473     s->conf.macaddr.a[2] = VMXNET3_GET_BYTE(l,  2);
474     s->conf.macaddr.a[3] = VMXNET3_GET_BYTE(l,  3);
475     s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0);
476     s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1);
477 
478     VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
479 
480     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
481 }
482 
483 static uint64_t vmxnet3_get_mac_low(MACAddr *addr)
484 {
485     return VMXNET3_MAKE_BYTE(0, addr->a[0]) |
486            VMXNET3_MAKE_BYTE(1, addr->a[1]) |
487            VMXNET3_MAKE_BYTE(2, addr->a[2]) |
488            VMXNET3_MAKE_BYTE(3, addr->a[3]);
489 }
490 
491 static uint64_t vmxnet3_get_mac_high(MACAddr *addr)
492 {
493     return VMXNET3_MAKE_BYTE(0, addr->a[4]) |
494            VMXNET3_MAKE_BYTE(1, addr->a[5]);
495 }
496 
497 static void
498 vmxnet3_inc_tx_consumption_counter(VMXNET3State *s, int qidx)
499 {
500     vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring);
501 }
502 
503 static inline void
504 vmxnet3_inc_rx_consumption_counter(VMXNET3State *s, int qidx, int ridx)
505 {
506     vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]);
507 }
508 
509 static inline void
510 vmxnet3_inc_tx_completion_counter(VMXNET3State *s, int qidx)
511 {
512     vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring);
513 }
514 
515 static void
516 vmxnet3_inc_rx_completion_counter(VMXNET3State *s, int qidx)
517 {
518     vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring);
519 }
520 
521 static void
522 vmxnet3_dec_rx_completion_counter(VMXNET3State *s, int qidx)
523 {
524     vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring);
525 }
526 
527 static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32_t tx_ridx)
528 {
529     struct Vmxnet3_TxCompDesc txcq_descr;
530     PCIDevice *d = PCI_DEVICE(s);
531 
532     VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring);
533 
534     txcq_descr.txdIdx = tx_ridx;
535     txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring);
536 
537     vmxnet3_ring_write_curr_cell(d, &s->txq_descr[qidx].comp_ring, &txcq_descr);
538 
539     /* Flush changes in TX descriptor before changing the counter value */
540     smp_wmb();
541 
542     vmxnet3_inc_tx_completion_counter(s, qidx);
543     vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx);
544 }
545 
546 static bool
547 vmxnet3_setup_tx_offloads(VMXNET3State *s)
548 {
549     switch (s->offload_mode) {
550     case VMXNET3_OM_NONE:
551         net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);
552         break;
553 
554     case VMXNET3_OM_CSUM:
555         net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);
556         VMW_PKPRN("L4 CSO requested\n");
557         break;
558 
559     case VMXNET3_OM_TSO:
560         net_tx_pkt_build_vheader(s->tx_pkt, true, true,
561             s->cso_or_gso_size);
562         net_tx_pkt_update_ip_checksums(s->tx_pkt);
563         VMW_PKPRN("GSO offload requested.");
564         break;
565 
566     default:
567         g_assert_not_reached();
568         return false;
569     }
570 
571     return true;
572 }
573 
574 static void
575 vmxnet3_tx_retrieve_metadata(VMXNET3State *s,
576                              const struct Vmxnet3_TxDesc *txd)
577 {
578     s->offload_mode = txd->om;
579     s->cso_or_gso_size = txd->msscof;
580     s->tci = txd->tci;
581     s->needs_vlan = txd->ti;
582 }
583 
584 typedef enum {
585     VMXNET3_PKT_STATUS_OK,
586     VMXNET3_PKT_STATUS_ERROR,
587     VMXNET3_PKT_STATUS_DISCARD,/* only for tx */
588     VMXNET3_PKT_STATUS_OUT_OF_BUF /* only for rx */
589 } Vmxnet3PktStatus;
590 
591 static void
592 vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx,
593     Vmxnet3PktStatus status)
594 {
595     size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt);
596     struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
597 
598     switch (status) {
599     case VMXNET3_PKT_STATUS_OK:
600         switch (net_tx_pkt_get_packet_type(s->tx_pkt)) {
601         case ETH_PKT_BCAST:
602             stats->bcastPktsTxOK++;
603             stats->bcastBytesTxOK += tot_len;
604             break;
605         case ETH_PKT_MCAST:
606             stats->mcastPktsTxOK++;
607             stats->mcastBytesTxOK += tot_len;
608             break;
609         case ETH_PKT_UCAST:
610             stats->ucastPktsTxOK++;
611             stats->ucastBytesTxOK += tot_len;
612             break;
613         default:
614             g_assert_not_reached();
615         }
616 
617         if (s->offload_mode == VMXNET3_OM_TSO) {
618             /*
619              * According to VMWARE headers this statistic is a number
620              * of packets after segmentation but since we don't have
621              * this information in QEMU model, the best we can do is to
622              * provide number of non-segmented packets
623              */
624             stats->TSOPktsTxOK++;
625             stats->TSOBytesTxOK += tot_len;
626         }
627         break;
628 
629     case VMXNET3_PKT_STATUS_DISCARD:
630         stats->pktsTxDiscard++;
631         break;
632 
633     case VMXNET3_PKT_STATUS_ERROR:
634         stats->pktsTxError++;
635         break;
636 
637     default:
638         g_assert_not_reached();
639     }
640 }
641 
642 static void
643 vmxnet3_on_rx_done_update_stats(VMXNET3State *s,
644                                 int qidx,
645                                 Vmxnet3PktStatus status)
646 {
647     struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
648     size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt);
649 
650     switch (status) {
651     case VMXNET3_PKT_STATUS_OUT_OF_BUF:
652         stats->pktsRxOutOfBuf++;
653         break;
654 
655     case VMXNET3_PKT_STATUS_ERROR:
656         stats->pktsRxError++;
657         break;
658     case VMXNET3_PKT_STATUS_OK:
659         switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
660         case ETH_PKT_BCAST:
661             stats->bcastPktsRxOK++;
662             stats->bcastBytesRxOK += tot_len;
663             break;
664         case ETH_PKT_MCAST:
665             stats->mcastPktsRxOK++;
666             stats->mcastBytesRxOK += tot_len;
667             break;
668         case ETH_PKT_UCAST:
669             stats->ucastPktsRxOK++;
670             stats->ucastBytesRxOK += tot_len;
671             break;
672         default:
673             g_assert_not_reached();
674         }
675 
676         if (tot_len > s->mtu) {
677             stats->LROPktsRxOK++;
678             stats->LROBytesRxOK += tot_len;
679         }
680         break;
681     default:
682         g_assert_not_reached();
683     }
684 }
685 
686 static inline bool
687 vmxnet3_pop_next_tx_descr(VMXNET3State *s,
688                           int qidx,
689                           struct Vmxnet3_TxDesc *txd,
690                           uint32_t *descr_idx)
691 {
692     Vmxnet3Ring *ring = &s->txq_descr[qidx].tx_ring;
693     PCIDevice *d = PCI_DEVICE(s);
694 
695     vmxnet3_ring_read_curr_cell(d, ring, txd);
696     if (txd->gen == vmxnet3_ring_curr_gen(ring)) {
697         /* Only read after generation field verification */
698         smp_rmb();
699         /* Re-read to be sure we got the latest version */
700         vmxnet3_ring_read_curr_cell(d, ring, txd);
701         VMXNET3_RING_DUMP(VMW_RIPRN, "TX", qidx, ring);
702         *descr_idx = vmxnet3_ring_curr_cell_idx(ring);
703         vmxnet3_inc_tx_consumption_counter(s, qidx);
704         return true;
705     }
706 
707     return false;
708 }
709 
710 static bool
711 vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx)
712 {
713     Vmxnet3PktStatus status = VMXNET3_PKT_STATUS_OK;
714 
715     if (!vmxnet3_setup_tx_offloads(s)) {
716         status = VMXNET3_PKT_STATUS_ERROR;
717         goto func_exit;
718     }
719 
720     /* debug prints */
721     vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt));
722     net_tx_pkt_dump(s->tx_pkt);
723 
724     if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) {
725         status = VMXNET3_PKT_STATUS_DISCARD;
726         goto func_exit;
727     }
728 
729 func_exit:
730     vmxnet3_on_tx_done_update_stats(s, qidx, status);
731     return (status == VMXNET3_PKT_STATUS_OK);
732 }
733 
734 static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
735 {
736     struct Vmxnet3_TxDesc txd;
737     uint32_t txd_idx;
738     uint32_t data_len;
739     hwaddr data_pa;
740 
741     for (;;) {
742         if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) {
743             break;
744         }
745 
746         vmxnet3_dump_tx_descr(&txd);
747 
748         if (!s->skip_current_tx_pkt) {
749             data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
750             data_pa = le64_to_cpu(txd.addr);
751 
752             if (!net_tx_pkt_add_raw_fragment(s->tx_pkt,
753                                                 data_pa,
754                                                 data_len)) {
755                 s->skip_current_tx_pkt = true;
756             }
757         }
758 
759         if (s->tx_sop) {
760             vmxnet3_tx_retrieve_metadata(s, &txd);
761             s->tx_sop = false;
762         }
763 
764         if (txd.eop) {
765             if (!s->skip_current_tx_pkt && net_tx_pkt_parse(s->tx_pkt)) {
766                 if (s->needs_vlan) {
767                     net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci);
768                 }
769 
770                 vmxnet3_send_packet(s, qidx);
771             } else {
772                 vmxnet3_on_tx_done_update_stats(s, qidx,
773                                                 VMXNET3_PKT_STATUS_ERROR);
774             }
775 
776             vmxnet3_complete_packet(s, qidx, txd_idx);
777             s->tx_sop = true;
778             s->skip_current_tx_pkt = false;
779             net_tx_pkt_reset(s->tx_pkt);
780         }
781     }
782 }
783 
784 static inline void
785 vmxnet3_read_next_rx_descr(VMXNET3State *s, int qidx, int ridx,
786                            struct Vmxnet3_RxDesc *dbuf, uint32_t *didx)
787 {
788     PCIDevice *d = PCI_DEVICE(s);
789 
790     Vmxnet3Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx];
791     *didx = vmxnet3_ring_curr_cell_idx(ring);
792     vmxnet3_ring_read_curr_cell(d, ring, dbuf);
793 }
794 
795 static inline uint8_t
796 vmxnet3_get_rx_ring_gen(VMXNET3State *s, int qidx, int ridx)
797 {
798     return s->rxq_descr[qidx].rx_ring[ridx].gen;
799 }
800 
801 static inline hwaddr
802 vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen)
803 {
804     uint8_t ring_gen;
805     struct Vmxnet3_RxCompDesc rxcd;
806 
807     hwaddr daddr =
808         vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring);
809 
810     pci_dma_read(PCI_DEVICE(s),
811                  daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc));
812     ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring);
813 
814     if (rxcd.gen != ring_gen) {
815         *descr_gen = ring_gen;
816         vmxnet3_inc_rx_completion_counter(s, qidx);
817         return daddr;
818     }
819 
820     return 0;
821 }
822 
823 static inline void
824 vmxnet3_revert_rxc_descr(VMXNET3State *s, int qidx)
825 {
826     vmxnet3_dec_rx_completion_counter(s, qidx);
827 }
828 
829 #define RXQ_IDX      (0)
830 #define RX_HEAD_BODY_RING (0)
831 #define RX_BODY_ONLY_RING (1)
832 
833 static bool
834 vmxnet3_get_next_head_rx_descr(VMXNET3State *s,
835                                struct Vmxnet3_RxDesc *descr_buf,
836                                uint32_t *descr_idx,
837                                uint32_t *ridx)
838 {
839     for (;;) {
840         uint32_t ring_gen;
841         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
842                                    descr_buf, descr_idx);
843 
844         /* If no more free descriptors - return */
845         ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING);
846         if (descr_buf->gen != ring_gen) {
847             return false;
848         }
849 
850         /* Only read after generation field verification */
851         smp_rmb();
852         /* Re-read to be sure we got the latest version */
853         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
854                                    descr_buf, descr_idx);
855 
856         /* Mark current descriptor as used/skipped */
857         vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
858 
859         /* If this is what we are looking for - return */
860         if (descr_buf->btype == VMXNET3_RXD_BTYPE_HEAD) {
861             *ridx = RX_HEAD_BODY_RING;
862             return true;
863         }
864     }
865 }
866 
867 static bool
868 vmxnet3_get_next_body_rx_descr(VMXNET3State *s,
869                                struct Vmxnet3_RxDesc *d,
870                                uint32_t *didx,
871                                uint32_t *ridx)
872 {
873     vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
874 
875     /* Try to find corresponding descriptor in head/body ring */
876     if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) {
877         /* Only read after generation field verification */
878         smp_rmb();
879         /* Re-read to be sure we got the latest version */
880         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
881         if (d->btype == VMXNET3_RXD_BTYPE_BODY) {
882             vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
883             *ridx = RX_HEAD_BODY_RING;
884             return true;
885         }
886     }
887 
888     /*
889      * If there is no free descriptors on head/body ring or next free
890      * descriptor is a head descriptor switch to body only ring
891      */
892     vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
893 
894     /* If no more free descriptors - return */
895     if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) {
896         /* Only read after generation field verification */
897         smp_rmb();
898         /* Re-read to be sure we got the latest version */
899         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
900         assert(d->btype == VMXNET3_RXD_BTYPE_BODY);
901         *ridx = RX_BODY_ONLY_RING;
902         vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING);
903         return true;
904     }
905 
906     return false;
907 }
908 
909 static inline bool
910 vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head,
911                           struct Vmxnet3_RxDesc *descr_buf,
912                           uint32_t *descr_idx,
913                           uint32_t *ridx)
914 {
915     if (is_head || !s->rx_packets_compound) {
916         return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx);
917     } else {
918         return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx);
919     }
920 }
921 
922 /* In case packet was csum offloaded (either NEEDS_CSUM or DATA_VALID),
923  * the implementation always passes an RxCompDesc with a "Checksum
924  * calculated and found correct" to the OS (cnc=0 and tuc=1, see
925  * vmxnet3_rx_update_descr). This emulates the observed ESXi behavior.
926  *
927  * Therefore, if packet has the NEEDS_CSUM set, we must calculate
928  * and place a fully computed checksum into the tcp/udp header.
929  * Otherwise, the OS driver will receive a checksum-correct indication
930  * (CHECKSUM_UNNECESSARY), but with the actual tcp/udp checksum field
931  * having just the pseudo header csum value.
932  *
933  * While this is not a problem if packet is destined for local delivery,
934  * in the case the host OS performs forwarding, it will forward an
935  * incorrectly checksummed packet.
936  */
937 static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt,
938                                            const void *pkt_data,
939                                            size_t pkt_len)
940 {
941     struct virtio_net_hdr *vhdr;
942     bool isip4, isip6, istcp, isudp;
943     uint8_t *data;
944     int len;
945 
946     if (!net_rx_pkt_has_virt_hdr(pkt)) {
947         return;
948     }
949 
950     vhdr = net_rx_pkt_get_vhdr(pkt);
951     if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
952         return;
953     }
954 
955     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
956     if (!(isip4 || isip6) || !(istcp || isudp)) {
957         return;
958     }
959 
960     vmxnet3_dump_virt_hdr(vhdr);
961 
962     /* Validate packet len: csum_start + scum_offset + length of csum field */
963     if (pkt_len < (vhdr->csum_start + vhdr->csum_offset + 2)) {
964         VMW_PKPRN("packet len:%zu < csum_start(%d) + csum_offset(%d) + 2, "
965                   "cannot calculate checksum",
966                   pkt_len, vhdr->csum_start, vhdr->csum_offset);
967         return;
968     }
969 
970     data = (uint8_t *)pkt_data + vhdr->csum_start;
971     len = pkt_len - vhdr->csum_start;
972     /* Put the checksum obtained into the packet */
973     stw_be_p(data + vhdr->csum_offset, net_raw_checksum(data, len));
974 
975     vhdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
976     vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
977 }
978 
979 static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt,
980     struct Vmxnet3_RxCompDesc *rxcd)
981 {
982     int csum_ok, is_gso;
983     bool isip4, isip6, istcp, isudp;
984     struct virtio_net_hdr *vhdr;
985     uint8_t offload_type;
986 
987     if (net_rx_pkt_is_vlan_stripped(pkt)) {
988         rxcd->ts = 1;
989         rxcd->tci = net_rx_pkt_get_vlan_tag(pkt);
990     }
991 
992     if (!net_rx_pkt_has_virt_hdr(pkt)) {
993         goto nocsum;
994     }
995 
996     vhdr = net_rx_pkt_get_vhdr(pkt);
997     /*
998      * Checksum is valid when lower level tell so or when lower level
999      * requires checksum offload telling that packet produced/bridged
1000      * locally and did travel over network after last checksum calculation
1001      * or production
1002      */
1003     csum_ok = VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) ||
1004               VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM);
1005 
1006     offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
1007     is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0;
1008 
1009     if (!csum_ok && !is_gso) {
1010         goto nocsum;
1011     }
1012 
1013     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1014     if ((!istcp && !isudp) || (!isip4 && !isip6)) {
1015         goto nocsum;
1016     }
1017 
1018     rxcd->cnc = 0;
1019     rxcd->v4 = isip4 ? 1 : 0;
1020     rxcd->v6 = isip6 ? 1 : 0;
1021     rxcd->tcp = istcp ? 1 : 0;
1022     rxcd->udp = isudp ? 1 : 0;
1023     rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
1024     return;
1025 
1026 nocsum:
1027     rxcd->cnc = 1;
1028     return;
1029 }
1030 
1031 static void
1032 vmxnet3_pci_dma_writev(PCIDevice *pci_dev,
1033                        const struct iovec *iov,
1034                        size_t start_iov_off,
1035                        hwaddr target_addr,
1036                        size_t bytes_to_copy)
1037 {
1038     size_t curr_off = 0;
1039     size_t copied = 0;
1040 
1041     while (bytes_to_copy) {
1042         if (start_iov_off < (curr_off + iov->iov_len)) {
1043             size_t chunk_len =
1044                 MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy);
1045 
1046             pci_dma_write(pci_dev, target_addr + copied,
1047                           iov->iov_base + start_iov_off - curr_off,
1048                           chunk_len);
1049 
1050             copied += chunk_len;
1051             start_iov_off += chunk_len;
1052             curr_off = start_iov_off;
1053             bytes_to_copy -= chunk_len;
1054         } else {
1055             curr_off += iov->iov_len;
1056         }
1057         iov++;
1058     }
1059 }
1060 
1061 static bool
1062 vmxnet3_indicate_packet(VMXNET3State *s)
1063 {
1064     struct Vmxnet3_RxDesc rxd;
1065     PCIDevice *d = PCI_DEVICE(s);
1066     bool is_head = true;
1067     uint32_t rxd_idx;
1068     uint32_t rx_ridx = 0;
1069 
1070     struct Vmxnet3_RxCompDesc rxcd;
1071     uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
1072     hwaddr new_rxcd_pa = 0;
1073     hwaddr ready_rxcd_pa = 0;
1074     struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt);
1075     size_t bytes_copied = 0;
1076     size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt);
1077     uint16_t num_frags = 0;
1078     size_t chunk_size;
1079 
1080     net_rx_pkt_dump(s->rx_pkt);
1081 
1082     while (bytes_left > 0) {
1083 
1084         /* cannot add more frags to packet */
1085         if (num_frags == s->max_rx_frags) {
1086             break;
1087         }
1088 
1089         new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen);
1090         if (!new_rxcd_pa) {
1091             break;
1092         }
1093 
1094         if (!vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) {
1095             break;
1096         }
1097 
1098         chunk_size = MIN(bytes_left, rxd.len);
1099         vmxnet3_pci_dma_writev(d, data, bytes_copied,
1100                                le64_to_cpu(rxd.addr), chunk_size);
1101         bytes_copied += chunk_size;
1102         bytes_left -= chunk_size;
1103 
1104         vmxnet3_dump_rx_descr(&rxd);
1105 
1106         if (ready_rxcd_pa != 0) {
1107             pci_dma_write(d, ready_rxcd_pa, &rxcd, sizeof(rxcd));
1108         }
1109 
1110         memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc));
1111         rxcd.rxdIdx = rxd_idx;
1112         rxcd.len = chunk_size;
1113         rxcd.sop = is_head;
1114         rxcd.gen = new_rxcd_gen;
1115         rxcd.rqID = RXQ_IDX + rx_ridx * s->rxq_num;
1116 
1117         if (bytes_left == 0) {
1118             vmxnet3_rx_update_descr(s->rx_pkt, &rxcd);
1119         }
1120 
1121         VMW_RIPRN("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu "
1122                   "sop %d csum_correct %lu",
1123                   (unsigned long) rx_ridx,
1124                   (unsigned long) rxcd.rxdIdx,
1125                   (unsigned long) rxcd.len,
1126                   (int) rxcd.sop,
1127                   (unsigned long) rxcd.tuc);
1128 
1129         is_head = false;
1130         ready_rxcd_pa = new_rxcd_pa;
1131         new_rxcd_pa = 0;
1132         num_frags++;
1133     }
1134 
1135     if (ready_rxcd_pa != 0) {
1136         rxcd.eop = 1;
1137         rxcd.err = (bytes_left != 0);
1138 
1139         pci_dma_write(d, ready_rxcd_pa, &rxcd, sizeof(rxcd));
1140 
1141         /* Flush RX descriptor changes */
1142         smp_wmb();
1143     }
1144 
1145     if (new_rxcd_pa != 0) {
1146         vmxnet3_revert_rxc_descr(s, RXQ_IDX);
1147     }
1148 
1149     vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx);
1150 
1151     if (bytes_left == 0) {
1152         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_OK);
1153         return true;
1154     } else if (num_frags == s->max_rx_frags) {
1155         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_ERROR);
1156         return false;
1157     } else {
1158         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX,
1159                                         VMXNET3_PKT_STATUS_OUT_OF_BUF);
1160         return false;
1161     }
1162 }
1163 
1164 static void
1165 vmxnet3_io_bar0_write(void *opaque, hwaddr addr,
1166                       uint64_t val, unsigned size)
1167 {
1168     VMXNET3State *s = opaque;
1169 
1170     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD,
1171                         VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) {
1172         int tx_queue_idx =
1173             VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD,
1174                                      VMXNET3_REG_ALIGN);
1175         assert(tx_queue_idx <= s->txq_num);
1176         vmxnet3_process_tx_queue(s, tx_queue_idx);
1177         return;
1178     }
1179 
1180     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1181                         VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1182         int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1183                                          VMXNET3_REG_ALIGN);
1184 
1185         VMW_CBPRN("Interrupt mask for line %d written: 0x%" PRIx64, l, val);
1186 
1187         vmxnet3_on_interrupt_mask_changed(s, l, val);
1188         return;
1189     }
1190 
1191     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD,
1192                         VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) ||
1193        VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2,
1194                         VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) {
1195         return;
1196     }
1197 
1198     VMW_WRPRN("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d",
1199               (uint64_t) addr, val, size);
1200 }
1201 
1202 static uint64_t
1203 vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size)
1204 {
1205     VMXNET3State *s = opaque;
1206 
1207     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1208                         VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1209         int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1210                                          VMXNET3_REG_ALIGN);
1211         return s->interrupt_states[l].is_masked;
1212     }
1213 
1214     VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size);
1215     return 0;
1216 }
1217 
1218 static void vmxnet3_reset_interrupt_states(VMXNET3State *s)
1219 {
1220     int i;
1221     for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) {
1222         s->interrupt_states[i].is_asserted = false;
1223         s->interrupt_states[i].is_pending = false;
1224         s->interrupt_states[i].is_masked = true;
1225     }
1226 }
1227 
1228 static void vmxnet3_reset_mac(VMXNET3State *s)
1229 {
1230     memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a));
1231     VMW_CFPRN("MAC address set to: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
1232 }
1233 
1234 static void vmxnet3_deactivate_device(VMXNET3State *s)
1235 {
1236     if (s->device_active) {
1237         VMW_CBPRN("Deactivating vmxnet3...");
1238         net_tx_pkt_reset(s->tx_pkt);
1239         net_tx_pkt_uninit(s->tx_pkt);
1240         net_rx_pkt_uninit(s->rx_pkt);
1241         s->device_active = false;
1242     }
1243 }
1244 
1245 static void vmxnet3_reset(VMXNET3State *s)
1246 {
1247     VMW_CBPRN("Resetting vmxnet3...");
1248 
1249     vmxnet3_deactivate_device(s);
1250     vmxnet3_reset_interrupt_states(s);
1251     s->drv_shmem = 0;
1252     s->tx_sop = true;
1253     s->skip_current_tx_pkt = false;
1254 }
1255 
1256 static void vmxnet3_update_rx_mode(VMXNET3State *s)
1257 {
1258     PCIDevice *d = PCI_DEVICE(s);
1259 
1260     s->rx_mode = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1261                                            devRead.rxFilterConf.rxMode);
1262     VMW_CFPRN("RX mode: 0x%08X", s->rx_mode);
1263 }
1264 
1265 static void vmxnet3_update_vlan_filters(VMXNET3State *s)
1266 {
1267     int i;
1268     PCIDevice *d = PCI_DEVICE(s);
1269 
1270     /* Copy configuration from shared memory */
1271     VMXNET3_READ_DRV_SHARED(d, s->drv_shmem,
1272                             devRead.rxFilterConf.vfTable,
1273                             s->vlan_table,
1274                             sizeof(s->vlan_table));
1275 
1276     /* Invert byte order when needed */
1277     for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) {
1278         s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]);
1279     }
1280 
1281     /* Dump configuration for debugging purposes */
1282     VMW_CFPRN("Configured VLANs:");
1283     for (i = 0; i < sizeof(s->vlan_table) * 8; i++) {
1284         if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) {
1285             VMW_CFPRN("\tVLAN %d is present", i);
1286         }
1287     }
1288 }
1289 
1290 static void vmxnet3_update_mcast_filters(VMXNET3State *s)
1291 {
1292     PCIDevice *d = PCI_DEVICE(s);
1293 
1294     uint16_t list_bytes =
1295         VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem,
1296                                   devRead.rxFilterConf.mfTableLen);
1297 
1298     s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]);
1299 
1300     s->mcast_list = g_realloc(s->mcast_list, list_bytes);
1301     if (!s->mcast_list) {
1302         if (s->mcast_list_len == 0) {
1303             VMW_CFPRN("Current multicast list is empty");
1304         } else {
1305             VMW_ERPRN("Failed to allocate multicast list of %d elements",
1306                       s->mcast_list_len);
1307         }
1308         s->mcast_list_len = 0;
1309     } else {
1310         int i;
1311         hwaddr mcast_list_pa =
1312             VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem,
1313                                       devRead.rxFilterConf.mfTablePA);
1314 
1315         pci_dma_read(d, mcast_list_pa, s->mcast_list, list_bytes);
1316 
1317         VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len);
1318         for (i = 0; i < s->mcast_list_len; i++) {
1319             VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a));
1320         }
1321     }
1322 }
1323 
1324 static void vmxnet3_setup_rx_filtering(VMXNET3State *s)
1325 {
1326     vmxnet3_update_rx_mode(s);
1327     vmxnet3_update_vlan_filters(s);
1328     vmxnet3_update_mcast_filters(s);
1329 }
1330 
1331 static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s)
1332 {
1333     uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2);
1334     VMW_CFPRN("Interrupt config is 0x%X", interrupt_mode);
1335     return interrupt_mode;
1336 }
1337 
1338 static void vmxnet3_fill_stats(VMXNET3State *s)
1339 {
1340     int i;
1341     PCIDevice *d = PCI_DEVICE(s);
1342 
1343     if (!s->device_active)
1344         return;
1345 
1346     for (i = 0; i < s->txq_num; i++) {
1347         pci_dma_write(d,
1348                       s->txq_descr[i].tx_stats_pa,
1349                       &s->txq_descr[i].txq_stats,
1350                       sizeof(s->txq_descr[i].txq_stats));
1351     }
1352 
1353     for (i = 0; i < s->rxq_num; i++) {
1354         pci_dma_write(d,
1355                       s->rxq_descr[i].rx_stats_pa,
1356                       &s->rxq_descr[i].rxq_stats,
1357                       sizeof(s->rxq_descr[i].rxq_stats));
1358     }
1359 }
1360 
1361 static void vmxnet3_adjust_by_guest_type(VMXNET3State *s)
1362 {
1363     struct Vmxnet3_GOSInfo gos;
1364     PCIDevice *d = PCI_DEVICE(s);
1365 
1366     VMXNET3_READ_DRV_SHARED(d, s->drv_shmem, devRead.misc.driverInfo.gos,
1367                             &gos, sizeof(gos));
1368     s->rx_packets_compound =
1369         (gos.gosType == VMXNET3_GOS_TYPE_WIN) ? false : true;
1370 
1371     VMW_CFPRN("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound);
1372 }
1373 
1374 static void
1375 vmxnet3_dump_conf_descr(const char *name,
1376                         struct Vmxnet3_VariableLenConfDesc *pm_descr)
1377 {
1378     VMW_CFPRN("%s descriptor dump: Version %u, Length %u",
1379               name, pm_descr->confVer, pm_descr->confLen);
1380 
1381 };
1382 
1383 static void vmxnet3_update_pm_state(VMXNET3State *s)
1384 {
1385     struct Vmxnet3_VariableLenConfDesc pm_descr;
1386     PCIDevice *d = PCI_DEVICE(s);
1387 
1388     pm_descr.confLen =
1389         VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confLen);
1390     pm_descr.confVer =
1391         VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confVer);
1392     pm_descr.confPA =
1393         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.pmConfDesc.confPA);
1394 
1395     vmxnet3_dump_conf_descr("PM State", &pm_descr);
1396 }
1397 
1398 static void vmxnet3_update_features(VMXNET3State *s)
1399 {
1400     uint32_t guest_features;
1401     int rxcso_supported;
1402     PCIDevice *d = PCI_DEVICE(s);
1403 
1404     guest_features = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1405                                                devRead.misc.uptFeatures);
1406 
1407     rxcso_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXCSUM);
1408     s->rx_vlan_stripping = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXVLAN);
1409     s->lro_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_LRO);
1410 
1411     VMW_CFPRN("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d",
1412               s->lro_supported, rxcso_supported,
1413               s->rx_vlan_stripping);
1414     if (s->peer_has_vhdr) {
1415         qemu_set_offload(qemu_get_queue(s->nic)->peer,
1416                          rxcso_supported,
1417                          s->lro_supported,
1418                          s->lro_supported,
1419                          0,
1420                          0);
1421     }
1422 }
1423 
1424 static bool vmxnet3_verify_intx(VMXNET3State *s, int intx)
1425 {
1426     return s->msix_used || msi_enabled(PCI_DEVICE(s))
1427         || intx == pci_get_byte(s->parent_obj.config + PCI_INTERRUPT_PIN) - 1;
1428 }
1429 
1430 static void vmxnet3_validate_interrupt_idx(bool is_msix, int idx)
1431 {
1432     int max_ints = is_msix ? VMXNET3_MAX_INTRS : VMXNET3_MAX_NMSIX_INTRS;
1433     if (idx >= max_ints) {
1434         hw_error("Bad interrupt index: %d\n", idx);
1435     }
1436 }
1437 
1438 static void vmxnet3_validate_interrupts(VMXNET3State *s)
1439 {
1440     int i;
1441 
1442     VMW_CFPRN("Verifying event interrupt index (%d)", s->event_int_idx);
1443     vmxnet3_validate_interrupt_idx(s->msix_used, s->event_int_idx);
1444 
1445     for (i = 0; i < s->txq_num; i++) {
1446         int idx = s->txq_descr[i].intr_idx;
1447         VMW_CFPRN("Verifying TX queue %d interrupt index (%d)", i, idx);
1448         vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1449     }
1450 
1451     for (i = 0; i < s->rxq_num; i++) {
1452         int idx = s->rxq_descr[i].intr_idx;
1453         VMW_CFPRN("Verifying RX queue %d interrupt index (%d)", i, idx);
1454         vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1455     }
1456 }
1457 
1458 static void vmxnet3_validate_queues(VMXNET3State *s)
1459 {
1460     /*
1461     * txq_num and rxq_num are total number of queues
1462     * configured by guest. These numbers must not
1463     * exceed corresponding maximal values.
1464     */
1465 
1466     if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) {
1467         hw_error("Bad TX queues number: %d\n", s->txq_num);
1468     }
1469 
1470     if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) {
1471         hw_error("Bad RX queues number: %d\n", s->rxq_num);
1472     }
1473 }
1474 
1475 static void vmxnet3_activate_device(VMXNET3State *s)
1476 {
1477     int i;
1478     static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1;
1479     PCIDevice *d = PCI_DEVICE(s);
1480     hwaddr qdescr_table_pa;
1481     uint64_t pa;
1482     uint32_t size;
1483 
1484     /* Verify configuration consistency */
1485     if (!vmxnet3_verify_driver_magic(d, s->drv_shmem)) {
1486         VMW_ERPRN("Device configuration received from driver is invalid");
1487         return;
1488     }
1489 
1490     /* Verify if device is active */
1491     if (s->device_active) {
1492         VMW_CFPRN("Vmxnet3 device is active");
1493         return;
1494     }
1495 
1496     vmxnet3_adjust_by_guest_type(s);
1497     vmxnet3_update_features(s);
1498     vmxnet3_update_pm_state(s);
1499     vmxnet3_setup_rx_filtering(s);
1500     /* Cache fields from shared memory */
1501     s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
1502     VMW_CFPRN("MTU is %u", s->mtu);
1503 
1504     s->max_rx_frags =
1505         VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem, devRead.misc.maxNumRxSG);
1506 
1507     if (s->max_rx_frags == 0) {
1508         s->max_rx_frags = 1;
1509     }
1510 
1511     VMW_CFPRN("Max RX fragments is %u", s->max_rx_frags);
1512 
1513     s->event_int_idx =
1514         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.eventIntrIdx);
1515     assert(vmxnet3_verify_intx(s, s->event_int_idx));
1516     VMW_CFPRN("Events interrupt line is %u", s->event_int_idx);
1517 
1518     s->auto_int_masking =
1519         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.autoMask);
1520     VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking);
1521 
1522     s->txq_num =
1523         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues);
1524     s->rxq_num =
1525         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues);
1526 
1527     VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
1528     vmxnet3_validate_queues(s);
1529 
1530     qdescr_table_pa =
1531         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.misc.queueDescPA);
1532     VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa);
1533 
1534     /*
1535      * Worst-case scenario is a packet that holds all TX rings space so
1536      * we calculate total size of all TX rings for max TX fragments number
1537      */
1538     s->max_tx_frags = 0;
1539 
1540     /* TX queues */
1541     for (i = 0; i < s->txq_num; i++) {
1542         hwaddr qdescr_pa =
1543             qdescr_table_pa + i * sizeof(struct Vmxnet3_TxQueueDesc);
1544 
1545         /* Read interrupt number for this TX queue */
1546         s->txq_descr[i].intr_idx =
1547             VMXNET3_READ_TX_QUEUE_DESCR8(d, qdescr_pa, conf.intrIdx);
1548         assert(vmxnet3_verify_intx(s, s->txq_descr[i].intr_idx));
1549 
1550         VMW_CFPRN("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx);
1551 
1552         /* Read rings memory locations for TX queues */
1553         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA);
1554         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize);
1555 
1556         vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size,
1557                           sizeof(struct Vmxnet3_TxDesc), false);
1558         VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring);
1559 
1560         s->max_tx_frags += size;
1561 
1562         /* TXC ring */
1563         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA);
1564         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize);
1565         vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size,
1566                           sizeof(struct Vmxnet3_TxCompDesc), true);
1567         VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring);
1568 
1569         s->txq_descr[i].tx_stats_pa =
1570             qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats);
1571 
1572         memset(&s->txq_descr[i].txq_stats, 0,
1573                sizeof(s->txq_descr[i].txq_stats));
1574 
1575         /* Fill device-managed parameters for queues */
1576         VMXNET3_WRITE_TX_QUEUE_DESCR32(d, qdescr_pa,
1577                                        ctrl.txThreshold,
1578                                        VMXNET3_DEF_TX_THRESHOLD);
1579     }
1580 
1581     /* Preallocate TX packet wrapper */
1582     VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags);
1583     net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
1584                     s->max_tx_frags, s->peer_has_vhdr);
1585     net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
1586 
1587     /* Read rings memory locations for RX queues */
1588     for (i = 0; i < s->rxq_num; i++) {
1589         int j;
1590         hwaddr qd_pa =
1591             qdescr_table_pa + s->txq_num * sizeof(struct Vmxnet3_TxQueueDesc) +
1592             i * sizeof(struct Vmxnet3_RxQueueDesc);
1593 
1594         /* Read interrupt number for this RX queue */
1595         s->rxq_descr[i].intr_idx =
1596             VMXNET3_READ_TX_QUEUE_DESCR8(d, qd_pa, conf.intrIdx);
1597         assert(vmxnet3_verify_intx(s, s->rxq_descr[i].intr_idx));
1598 
1599         VMW_CFPRN("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx);
1600 
1601         /* Read rings memory locations */
1602         for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) {
1603             /* RX rings */
1604             pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]);
1605             size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]);
1606             vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size,
1607                               sizeof(struct Vmxnet3_RxDesc), false);
1608             VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
1609                       i, j, pa, size);
1610         }
1611 
1612         /* RXC ring */
1613         pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA);
1614         size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize);
1615         vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size,
1616                           sizeof(struct Vmxnet3_RxCompDesc), true);
1617         VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);
1618 
1619         s->rxq_descr[i].rx_stats_pa =
1620             qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats);
1621         memset(&s->rxq_descr[i].rxq_stats, 0,
1622                sizeof(s->rxq_descr[i].rxq_stats));
1623     }
1624 
1625     vmxnet3_validate_interrupts(s);
1626 
1627     /* Make sure everything is in place before device activation */
1628     smp_wmb();
1629 
1630     vmxnet3_reset_mac(s);
1631 
1632     s->device_active = true;
1633 }
1634 
1635 static void vmxnet3_handle_command(VMXNET3State *s, uint64_t cmd)
1636 {
1637     s->last_command = cmd;
1638 
1639     switch (cmd) {
1640     case VMXNET3_CMD_GET_PERM_MAC_HI:
1641         VMW_CBPRN("Set: Get upper part of permanent MAC");
1642         break;
1643 
1644     case VMXNET3_CMD_GET_PERM_MAC_LO:
1645         VMW_CBPRN("Set: Get lower part of permanent MAC");
1646         break;
1647 
1648     case VMXNET3_CMD_GET_STATS:
1649         VMW_CBPRN("Set: Get device statistics");
1650         vmxnet3_fill_stats(s);
1651         break;
1652 
1653     case VMXNET3_CMD_ACTIVATE_DEV:
1654         VMW_CBPRN("Set: Activating vmxnet3 device");
1655         vmxnet3_activate_device(s);
1656         break;
1657 
1658     case VMXNET3_CMD_UPDATE_RX_MODE:
1659         VMW_CBPRN("Set: Update rx mode");
1660         vmxnet3_update_rx_mode(s);
1661         break;
1662 
1663     case VMXNET3_CMD_UPDATE_VLAN_FILTERS:
1664         VMW_CBPRN("Set: Update VLAN filters");
1665         vmxnet3_update_vlan_filters(s);
1666         break;
1667 
1668     case VMXNET3_CMD_UPDATE_MAC_FILTERS:
1669         VMW_CBPRN("Set: Update MAC filters");
1670         vmxnet3_update_mcast_filters(s);
1671         break;
1672 
1673     case VMXNET3_CMD_UPDATE_FEATURE:
1674         VMW_CBPRN("Set: Update features");
1675         vmxnet3_update_features(s);
1676         break;
1677 
1678     case VMXNET3_CMD_UPDATE_PMCFG:
1679         VMW_CBPRN("Set: Update power management config");
1680         vmxnet3_update_pm_state(s);
1681         break;
1682 
1683     case VMXNET3_CMD_GET_LINK:
1684         VMW_CBPRN("Set: Get link");
1685         break;
1686 
1687     case VMXNET3_CMD_RESET_DEV:
1688         VMW_CBPRN("Set: Reset device");
1689         vmxnet3_reset(s);
1690         break;
1691 
1692     case VMXNET3_CMD_QUIESCE_DEV:
1693         VMW_CBPRN("Set: VMXNET3_CMD_QUIESCE_DEV - deactivate the device");
1694         vmxnet3_deactivate_device(s);
1695         break;
1696 
1697     case VMXNET3_CMD_GET_CONF_INTR:
1698         VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration");
1699         break;
1700 
1701     case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1702         VMW_CBPRN("Set: VMXNET3_CMD_GET_ADAPTIVE_RING_INFO - "
1703                   "adaptive ring info flags");
1704         break;
1705 
1706     case VMXNET3_CMD_GET_DID_LO:
1707         VMW_CBPRN("Set: Get lower part of device ID");
1708         break;
1709 
1710     case VMXNET3_CMD_GET_DID_HI:
1711         VMW_CBPRN("Set: Get upper part of device ID");
1712         break;
1713 
1714     case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1715         VMW_CBPRN("Set: Get device extra info");
1716         break;
1717 
1718     default:
1719         VMW_CBPRN("Received unknown command: %" PRIx64, cmd);
1720         break;
1721     }
1722 }
1723 
1724 static uint64_t vmxnet3_get_command_status(VMXNET3State *s)
1725 {
1726     uint64_t ret;
1727 
1728     switch (s->last_command) {
1729     case VMXNET3_CMD_ACTIVATE_DEV:
1730         ret = (s->device_active) ? 0 : 1;
1731         VMW_CFPRN("Device active: %" PRIx64, ret);
1732         break;
1733 
1734     case VMXNET3_CMD_RESET_DEV:
1735     case VMXNET3_CMD_QUIESCE_DEV:
1736     case VMXNET3_CMD_GET_QUEUE_STATUS:
1737     case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1738         ret = 0;
1739         break;
1740 
1741     case VMXNET3_CMD_GET_LINK:
1742         ret = s->link_status_and_speed;
1743         VMW_CFPRN("Link and speed: %" PRIx64, ret);
1744         break;
1745 
1746     case VMXNET3_CMD_GET_PERM_MAC_LO:
1747         ret = vmxnet3_get_mac_low(&s->perm_mac);
1748         break;
1749 
1750     case VMXNET3_CMD_GET_PERM_MAC_HI:
1751         ret = vmxnet3_get_mac_high(&s->perm_mac);
1752         break;
1753 
1754     case VMXNET3_CMD_GET_CONF_INTR:
1755         ret = vmxnet3_get_interrupt_config(s);
1756         break;
1757 
1758     case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1759         ret = VMXNET3_DISABLE_ADAPTIVE_RING;
1760         break;
1761 
1762     case VMXNET3_CMD_GET_DID_LO:
1763         ret = PCI_DEVICE_ID_VMWARE_VMXNET3;
1764         break;
1765 
1766     case VMXNET3_CMD_GET_DID_HI:
1767         ret = VMXNET3_DEVICE_REVISION;
1768         break;
1769 
1770     default:
1771         VMW_WRPRN("Received request for unknown command: %x", s->last_command);
1772         ret = 0;
1773         break;
1774     }
1775 
1776     return ret;
1777 }
1778 
1779 static void vmxnet3_set_events(VMXNET3State *s, uint32_t val)
1780 {
1781     uint32_t events;
1782     PCIDevice *d = PCI_DEVICE(s);
1783 
1784     VMW_CBPRN("Setting events: 0x%x", val);
1785     events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) | val;
1786     VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1787 }
1788 
1789 static void vmxnet3_ack_events(VMXNET3State *s, uint32_t val)
1790 {
1791     PCIDevice *d = PCI_DEVICE(s);
1792     uint32_t events;
1793 
1794     VMW_CBPRN("Clearing events: 0x%x", val);
1795     events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) & ~val;
1796     VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1797 }
1798 
1799 static void
1800 vmxnet3_io_bar1_write(void *opaque,
1801                       hwaddr addr,
1802                       uint64_t val,
1803                       unsigned size)
1804 {
1805     VMXNET3State *s = opaque;
1806 
1807     switch (addr) {
1808     /* Vmxnet3 Revision Report Selection */
1809     case VMXNET3_REG_VRRS:
1810         VMW_CBPRN("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d",
1811                   val, size);
1812         break;
1813 
1814     /* UPT Version Report Selection */
1815     case VMXNET3_REG_UVRS:
1816         VMW_CBPRN("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d",
1817                   val, size);
1818         break;
1819 
1820     /* Driver Shared Address Low */
1821     case VMXNET3_REG_DSAL:
1822         VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d",
1823                   val, size);
1824         /*
1825          * Guest driver will first write the low part of the shared
1826          * memory address. We save it to temp variable and set the
1827          * shared address only after we get the high part
1828          */
1829         if (val == 0) {
1830             vmxnet3_deactivate_device(s);
1831         }
1832         s->temp_shared_guest_driver_memory = val;
1833         s->drv_shmem = 0;
1834         break;
1835 
1836     /* Driver Shared Address High */
1837     case VMXNET3_REG_DSAH:
1838         VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d",
1839                   val, size);
1840         /*
1841          * Set the shared memory between guest driver and device.
1842          * We already should have low address part.
1843          */
1844         s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32);
1845         break;
1846 
1847     /* Command */
1848     case VMXNET3_REG_CMD:
1849         VMW_CBPRN("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d",
1850                   val, size);
1851         vmxnet3_handle_command(s, val);
1852         break;
1853 
1854     /* MAC Address Low */
1855     case VMXNET3_REG_MACL:
1856         VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d",
1857                   val, size);
1858         s->temp_mac = val;
1859         break;
1860 
1861     /* MAC Address High */
1862     case VMXNET3_REG_MACH:
1863         VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d",
1864                   val, size);
1865         vmxnet3_set_variable_mac(s, val, s->temp_mac);
1866         break;
1867 
1868     /* Interrupt Cause Register */
1869     case VMXNET3_REG_ICR:
1870         VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
1871                   val, size);
1872         g_assert_not_reached();
1873         break;
1874 
1875     /* Event Cause Register */
1876     case VMXNET3_REG_ECR:
1877         VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
1878                   val, size);
1879         vmxnet3_ack_events(s, val);
1880         break;
1881 
1882     default:
1883         VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d",
1884                   addr, val, size);
1885         break;
1886     }
1887 }
1888 
1889 static uint64_t
1890 vmxnet3_io_bar1_read(void *opaque, hwaddr addr, unsigned size)
1891 {
1892         VMXNET3State *s = opaque;
1893         uint64_t ret = 0;
1894 
1895         switch (addr) {
1896         /* Vmxnet3 Revision Report Selection */
1897         case VMXNET3_REG_VRRS:
1898             VMW_CBPRN("Read BAR1 [VMXNET3_REG_VRRS], size %d", size);
1899             ret = VMXNET3_DEVICE_REVISION;
1900             break;
1901 
1902         /* UPT Version Report Selection */
1903         case VMXNET3_REG_UVRS:
1904             VMW_CBPRN("Read BAR1 [VMXNET3_REG_UVRS], size %d", size);
1905             ret = VMXNET3_UPT_REVISION;
1906             break;
1907 
1908         /* Command */
1909         case VMXNET3_REG_CMD:
1910             VMW_CBPRN("Read BAR1 [VMXNET3_REG_CMD], size %d", size);
1911             ret = vmxnet3_get_command_status(s);
1912             break;
1913 
1914         /* MAC Address Low */
1915         case VMXNET3_REG_MACL:
1916             VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACL], size %d", size);
1917             ret = vmxnet3_get_mac_low(&s->conf.macaddr);
1918             break;
1919 
1920         /* MAC Address High */
1921         case VMXNET3_REG_MACH:
1922             VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACH], size %d", size);
1923             ret = vmxnet3_get_mac_high(&s->conf.macaddr);
1924             break;
1925 
1926         /*
1927          * Interrupt Cause Register
1928          * Used for legacy interrupts only so interrupt index always 0
1929          */
1930         case VMXNET3_REG_ICR:
1931             VMW_CBPRN("Read BAR1 [VMXNET3_REG_ICR], size %d", size);
1932             if (vmxnet3_interrupt_asserted(s, 0)) {
1933                 vmxnet3_clear_interrupt(s, 0);
1934                 ret = true;
1935             } else {
1936                 ret = false;
1937             }
1938             break;
1939 
1940         default:
1941             VMW_CBPRN("Unknow read BAR1[%" PRIx64 "], %d bytes", addr, size);
1942             break;
1943         }
1944 
1945         return ret;
1946 }
1947 
1948 static int
1949 vmxnet3_can_receive(NetClientState *nc)
1950 {
1951     VMXNET3State *s = qemu_get_nic_opaque(nc);
1952     return s->device_active &&
1953            VMXNET_FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP);
1954 }
1955 
1956 static inline bool
1957 vmxnet3_is_registered_vlan(VMXNET3State *s, const void *data)
1958 {
1959     uint16_t vlan_tag = eth_get_pkt_tci(data) & VLAN_VID_MASK;
1960     if (IS_SPECIAL_VLAN_ID(vlan_tag)) {
1961         return true;
1962     }
1963 
1964     return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag);
1965 }
1966 
1967 static bool
1968 vmxnet3_is_allowed_mcast_group(VMXNET3State *s, const uint8_t *group_mac)
1969 {
1970     int i;
1971     for (i = 0; i < s->mcast_list_len; i++) {
1972         if (!memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) {
1973             return true;
1974         }
1975     }
1976     return false;
1977 }
1978 
1979 static bool
1980 vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data,
1981     size_t size)
1982 {
1983     struct eth_header *ehdr = PKT_GET_ETH_HDR(data);
1984 
1985     if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) {
1986         return true;
1987     }
1988 
1989     if (!vmxnet3_is_registered_vlan(s, data)) {
1990         return false;
1991     }
1992 
1993     switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
1994     case ETH_PKT_UCAST:
1995         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) {
1996             return false;
1997         }
1998         if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) {
1999             return false;
2000         }
2001         break;
2002 
2003     case ETH_PKT_BCAST:
2004         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) {
2005             return false;
2006         }
2007         break;
2008 
2009     case ETH_PKT_MCAST:
2010         if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) {
2011             return true;
2012         }
2013         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) {
2014             return false;
2015         }
2016         if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) {
2017             return false;
2018         }
2019         break;
2020 
2021     default:
2022         g_assert_not_reached();
2023     }
2024 
2025     return true;
2026 }
2027 
2028 static ssize_t
2029 vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
2030 {
2031     VMXNET3State *s = qemu_get_nic_opaque(nc);
2032     size_t bytes_indicated;
2033     uint8_t min_buf[MIN_BUF_SIZE];
2034 
2035     if (!vmxnet3_can_receive(nc)) {
2036         VMW_PKPRN("Cannot receive now");
2037         return -1;
2038     }
2039 
2040     if (s->peer_has_vhdr) {
2041         net_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
2042         buf += sizeof(struct virtio_net_hdr);
2043         size -= sizeof(struct virtio_net_hdr);
2044     }
2045 
2046     /* Pad to minimum Ethernet frame length */
2047     if (size < sizeof(min_buf)) {
2048         memcpy(min_buf, buf, size);
2049         memset(&min_buf[size], 0, sizeof(min_buf) - size);
2050         buf = min_buf;
2051         size = sizeof(min_buf);
2052     }
2053 
2054     net_rx_pkt_set_packet_type(s->rx_pkt,
2055         get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
2056 
2057     if (vmxnet3_rx_filter_may_indicate(s, buf, size)) {
2058         net_rx_pkt_set_protocols(s->rx_pkt, buf, size);
2059         vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size);
2060         net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping);
2061         bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1;
2062         if (bytes_indicated < size) {
2063             VMW_PKPRN("RX: %zu of %zu bytes indicated", bytes_indicated, size);
2064         }
2065     } else {
2066         VMW_PKPRN("Packet dropped by RX filter");
2067         bytes_indicated = size;
2068     }
2069 
2070     assert(size > 0);
2071     assert(bytes_indicated != 0);
2072     return bytes_indicated;
2073 }
2074 
2075 static void vmxnet3_set_link_status(NetClientState *nc)
2076 {
2077     VMXNET3State *s = qemu_get_nic_opaque(nc);
2078 
2079     if (nc->link_down) {
2080         s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP;
2081     } else {
2082         s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP;
2083     }
2084 
2085     vmxnet3_set_events(s, VMXNET3_ECR_LINK);
2086     vmxnet3_trigger_interrupt(s, s->event_int_idx);
2087 }
2088 
2089 static NetClientInfo net_vmxnet3_info = {
2090         .type = NET_CLIENT_OPTIONS_KIND_NIC,
2091         .size = sizeof(NICState),
2092         .receive = vmxnet3_receive,
2093         .link_status_changed = vmxnet3_set_link_status,
2094 };
2095 
2096 static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s)
2097 {
2098     NetClientState *nc = qemu_get_queue(s->nic);
2099 
2100     if (qemu_has_vnet_hdr(nc->peer)) {
2101         return true;
2102     }
2103 
2104     return false;
2105 }
2106 
2107 static void vmxnet3_net_uninit(VMXNET3State *s)
2108 {
2109     g_free(s->mcast_list);
2110     vmxnet3_deactivate_device(s);
2111     qemu_del_nic(s->nic);
2112 }
2113 
2114 static void vmxnet3_net_init(VMXNET3State *s)
2115 {
2116     DeviceState *d = DEVICE(s);
2117 
2118     VMW_CBPRN("vmxnet3_net_init called...");
2119 
2120     qemu_macaddr_default_if_unset(&s->conf.macaddr);
2121 
2122     /* Windows guest will query the address that was set on init */
2123     memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a));
2124 
2125     s->mcast_list = NULL;
2126     s->mcast_list_len = 0;
2127 
2128     s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP;
2129 
2130     VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a));
2131 
2132     s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf,
2133                           object_get_typename(OBJECT(s)),
2134                           d->id, s);
2135 
2136     s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s);
2137     s->tx_sop = true;
2138     s->skip_current_tx_pkt = false;
2139     s->tx_pkt = NULL;
2140     s->rx_pkt = NULL;
2141     s->rx_vlan_stripping = false;
2142     s->lro_supported = false;
2143 
2144     if (s->peer_has_vhdr) {
2145         qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
2146             sizeof(struct virtio_net_hdr));
2147 
2148         qemu_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1);
2149     }
2150 
2151     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
2152 }
2153 
2154 static void
2155 vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors)
2156 {
2157     PCIDevice *d = PCI_DEVICE(s);
2158     int i;
2159     for (i = 0; i < num_vectors; i++) {
2160         msix_vector_unuse(d, i);
2161     }
2162 }
2163 
2164 static bool
2165 vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors)
2166 {
2167     PCIDevice *d = PCI_DEVICE(s);
2168     int i;
2169     for (i = 0; i < num_vectors; i++) {
2170         int res = msix_vector_use(d, i);
2171         if (0 > res) {
2172             VMW_WRPRN("Failed to use MSI-X vector %d, error %d", i, res);
2173             vmxnet3_unuse_msix_vectors(s, i);
2174             return false;
2175         }
2176     }
2177     return true;
2178 }
2179 
2180 static bool
2181 vmxnet3_init_msix(VMXNET3State *s)
2182 {
2183     PCIDevice *d = PCI_DEVICE(s);
2184     int res = msix_init(d, VMXNET3_MAX_INTRS,
2185                         &s->msix_bar,
2186                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
2187                         &s->msix_bar,
2188                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s),
2189                         VMXNET3_MSIX_OFFSET(s));
2190 
2191     if (0 > res) {
2192         VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
2193         s->msix_used = false;
2194     } else {
2195         if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
2196             VMW_WRPRN("Failed to use MSI-X vectors, error %d", res);
2197             msix_uninit(d, &s->msix_bar, &s->msix_bar);
2198             s->msix_used = false;
2199         } else {
2200             s->msix_used = true;
2201         }
2202     }
2203     return s->msix_used;
2204 }
2205 
2206 static void
2207 vmxnet3_cleanup_msix(VMXNET3State *s)
2208 {
2209     PCIDevice *d = PCI_DEVICE(s);
2210 
2211     if (s->msix_used) {
2212         vmxnet3_unuse_msix_vectors(s, VMXNET3_MAX_INTRS);
2213         msix_uninit(d, &s->msix_bar, &s->msix_bar);
2214     }
2215 }
2216 
2217 static void
2218 vmxnet3_cleanup_msi(VMXNET3State *s)
2219 {
2220     PCIDevice *d = PCI_DEVICE(s);
2221 
2222     msi_uninit(d);
2223 }
2224 
2225 static void
2226 vmxnet3_msix_save(QEMUFile *f, void *opaque)
2227 {
2228     PCIDevice *d = PCI_DEVICE(opaque);
2229     msix_save(d, f);
2230 }
2231 
2232 static int
2233 vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id)
2234 {
2235     PCIDevice *d = PCI_DEVICE(opaque);
2236     msix_load(d, f);
2237     return 0;
2238 }
2239 
2240 static const MemoryRegionOps b0_ops = {
2241     .read = vmxnet3_io_bar0_read,
2242     .write = vmxnet3_io_bar0_write,
2243     .endianness = DEVICE_LITTLE_ENDIAN,
2244     .impl = {
2245             .min_access_size = 4,
2246             .max_access_size = 4,
2247     },
2248 };
2249 
2250 static const MemoryRegionOps b1_ops = {
2251     .read = vmxnet3_io_bar1_read,
2252     .write = vmxnet3_io_bar1_write,
2253     .endianness = DEVICE_LITTLE_ENDIAN,
2254     .impl = {
2255             .min_access_size = 4,
2256             .max_access_size = 4,
2257     },
2258 };
2259 
2260 static uint64_t vmxnet3_device_serial_num(VMXNET3State *s)
2261 {
2262     uint64_t dsn_payload;
2263     uint8_t *dsnp = (uint8_t *)&dsn_payload;
2264 
2265     dsnp[0] = 0xfe;
2266     dsnp[1] = s->conf.macaddr.a[3];
2267     dsnp[2] = s->conf.macaddr.a[4];
2268     dsnp[3] = s->conf.macaddr.a[5];
2269     dsnp[4] = s->conf.macaddr.a[0];
2270     dsnp[5] = s->conf.macaddr.a[1];
2271     dsnp[6] = s->conf.macaddr.a[2];
2272     dsnp[7] = 0xff;
2273     return dsn_payload;
2274 }
2275 
2276 
2277 #define VMXNET3_USE_64BIT         (true)
2278 #define VMXNET3_PER_VECTOR_MASK   (false)
2279 
2280 static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
2281 {
2282     DeviceState *dev = DEVICE(pci_dev);
2283     VMXNET3State *s = VMXNET3(pci_dev);
2284     int ret;
2285 
2286     VMW_CBPRN("Starting init...");
2287 
2288     memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s,
2289                           "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
2290     pci_register_bar(pci_dev, VMXNET3_BAR0_IDX,
2291                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0);
2292 
2293     memory_region_init_io(&s->bar1, OBJECT(s), &b1_ops, s,
2294                           "vmxnet3-b1", VMXNET3_VD_REG_SIZE);
2295     pci_register_bar(pci_dev, VMXNET3_BAR1_IDX,
2296                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1);
2297 
2298     memory_region_init(&s->msix_bar, OBJECT(s), "vmxnet3-msix-bar",
2299                        VMXNET3_MSIX_BAR_SIZE);
2300     pci_register_bar(pci_dev, VMXNET3_MSIX_BAR_IDX,
2301                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar);
2302 
2303     vmxnet3_reset_interrupt_states(s);
2304 
2305     /* Interrupt pin A */
2306     pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
2307 
2308     ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
2309                    VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL);
2310     /* Any error other than -ENOTSUP(board's MSI support is broken)
2311      * is a programming error. Fall back to INTx silently on -ENOTSUP */
2312     assert(!ret || ret == -ENOTSUP);
2313 
2314     if (!vmxnet3_init_msix(s)) {
2315         VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent.");
2316     }
2317 
2318     vmxnet3_net_init(s);
2319 
2320     if (pci_is_express(pci_dev)) {
2321         if (pci_bus_is_express(pci_dev->bus)) {
2322             pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET);
2323         }
2324 
2325         pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET,
2326                               vmxnet3_device_serial_num(s));
2327     }
2328 
2329     register_savevm(dev, "vmxnet3-msix", -1, 1,
2330                     vmxnet3_msix_save, vmxnet3_msix_load, s);
2331 }
2332 
2333 static void vmxnet3_instance_init(Object *obj)
2334 {
2335     VMXNET3State *s = VMXNET3(obj);
2336     device_add_bootindex_property(obj, &s->conf.bootindex,
2337                                   "bootindex", "/ethernet-phy@0",
2338                                   DEVICE(obj), NULL);
2339 }
2340 
2341 static void vmxnet3_pci_uninit(PCIDevice *pci_dev)
2342 {
2343     DeviceState *dev = DEVICE(pci_dev);
2344     VMXNET3State *s = VMXNET3(pci_dev);
2345 
2346     VMW_CBPRN("Starting uninit...");
2347 
2348     unregister_savevm(dev, "vmxnet3-msix", s);
2349 
2350     vmxnet3_net_uninit(s);
2351 
2352     vmxnet3_cleanup_msix(s);
2353 
2354     vmxnet3_cleanup_msi(s);
2355 }
2356 
2357 static void vmxnet3_qdev_reset(DeviceState *dev)
2358 {
2359     PCIDevice *d = PCI_DEVICE(dev);
2360     VMXNET3State *s = VMXNET3(d);
2361 
2362     VMW_CBPRN("Starting QDEV reset...");
2363     vmxnet3_reset(s);
2364 }
2365 
2366 static bool vmxnet3_mc_list_needed(void *opaque)
2367 {
2368     return true;
2369 }
2370 
2371 static int vmxnet3_mcast_list_pre_load(void *opaque)
2372 {
2373     VMXNET3State *s = opaque;
2374 
2375     s->mcast_list = g_malloc(s->mcast_list_buff_size);
2376 
2377     return 0;
2378 }
2379 
2380 
2381 static void vmxnet3_pre_save(void *opaque)
2382 {
2383     VMXNET3State *s = opaque;
2384 
2385     s->mcast_list_buff_size = s->mcast_list_len * sizeof(MACAddr);
2386 }
2387 
2388 static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
2389     .name = "vmxnet3/mcast_list",
2390     .version_id = 1,
2391     .minimum_version_id = 1,
2392     .pre_load = vmxnet3_mcast_list_pre_load,
2393     .needed = vmxnet3_mc_list_needed,
2394     .fields = (VMStateField[]) {
2395         VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0,
2396             mcast_list_buff_size),
2397         VMSTATE_END_OF_LIST()
2398     }
2399 };
2400 
2401 static void vmxnet3_get_ring_from_file(QEMUFile *f, Vmxnet3Ring *r)
2402 {
2403     r->pa = qemu_get_be64(f);
2404     r->size = qemu_get_be32(f);
2405     r->cell_size = qemu_get_be32(f);
2406     r->next = qemu_get_be32(f);
2407     r->gen = qemu_get_byte(f);
2408 }
2409 
2410 static void vmxnet3_put_ring_to_file(QEMUFile *f, Vmxnet3Ring *r)
2411 {
2412     qemu_put_be64(f, r->pa);
2413     qemu_put_be32(f, r->size);
2414     qemu_put_be32(f, r->cell_size);
2415     qemu_put_be32(f, r->next);
2416     qemu_put_byte(f, r->gen);
2417 }
2418 
2419 static void vmxnet3_get_tx_stats_from_file(QEMUFile *f,
2420     struct UPT1_TxStats *tx_stat)
2421 {
2422     tx_stat->TSOPktsTxOK = qemu_get_be64(f);
2423     tx_stat->TSOBytesTxOK = qemu_get_be64(f);
2424     tx_stat->ucastPktsTxOK = qemu_get_be64(f);
2425     tx_stat->ucastBytesTxOK = qemu_get_be64(f);
2426     tx_stat->mcastPktsTxOK = qemu_get_be64(f);
2427     tx_stat->mcastBytesTxOK = qemu_get_be64(f);
2428     tx_stat->bcastPktsTxOK = qemu_get_be64(f);
2429     tx_stat->bcastBytesTxOK = qemu_get_be64(f);
2430     tx_stat->pktsTxError = qemu_get_be64(f);
2431     tx_stat->pktsTxDiscard = qemu_get_be64(f);
2432 }
2433 
2434 static void vmxnet3_put_tx_stats_to_file(QEMUFile *f,
2435     struct UPT1_TxStats *tx_stat)
2436 {
2437     qemu_put_be64(f, tx_stat->TSOPktsTxOK);
2438     qemu_put_be64(f, tx_stat->TSOBytesTxOK);
2439     qemu_put_be64(f, tx_stat->ucastPktsTxOK);
2440     qemu_put_be64(f, tx_stat->ucastBytesTxOK);
2441     qemu_put_be64(f, tx_stat->mcastPktsTxOK);
2442     qemu_put_be64(f, tx_stat->mcastBytesTxOK);
2443     qemu_put_be64(f, tx_stat->bcastPktsTxOK);
2444     qemu_put_be64(f, tx_stat->bcastBytesTxOK);
2445     qemu_put_be64(f, tx_stat->pktsTxError);
2446     qemu_put_be64(f, tx_stat->pktsTxDiscard);
2447 }
2448 
2449 static int vmxnet3_get_txq_descr(QEMUFile *f, void *pv, size_t size)
2450 {
2451     Vmxnet3TxqDescr *r = pv;
2452 
2453     vmxnet3_get_ring_from_file(f, &r->tx_ring);
2454     vmxnet3_get_ring_from_file(f, &r->comp_ring);
2455     r->intr_idx = qemu_get_byte(f);
2456     r->tx_stats_pa = qemu_get_be64(f);
2457 
2458     vmxnet3_get_tx_stats_from_file(f, &r->txq_stats);
2459 
2460     return 0;
2461 }
2462 
2463 static void vmxnet3_put_txq_descr(QEMUFile *f, void *pv, size_t size)
2464 {
2465     Vmxnet3TxqDescr *r = pv;
2466 
2467     vmxnet3_put_ring_to_file(f, &r->tx_ring);
2468     vmxnet3_put_ring_to_file(f, &r->comp_ring);
2469     qemu_put_byte(f, r->intr_idx);
2470     qemu_put_be64(f, r->tx_stats_pa);
2471     vmxnet3_put_tx_stats_to_file(f, &r->txq_stats);
2472 }
2473 
2474 static const VMStateInfo txq_descr_info = {
2475     .name = "txq_descr",
2476     .get = vmxnet3_get_txq_descr,
2477     .put = vmxnet3_put_txq_descr
2478 };
2479 
2480 static void vmxnet3_get_rx_stats_from_file(QEMUFile *f,
2481     struct UPT1_RxStats *rx_stat)
2482 {
2483     rx_stat->LROPktsRxOK = qemu_get_be64(f);
2484     rx_stat->LROBytesRxOK = qemu_get_be64(f);
2485     rx_stat->ucastPktsRxOK = qemu_get_be64(f);
2486     rx_stat->ucastBytesRxOK = qemu_get_be64(f);
2487     rx_stat->mcastPktsRxOK = qemu_get_be64(f);
2488     rx_stat->mcastBytesRxOK = qemu_get_be64(f);
2489     rx_stat->bcastPktsRxOK = qemu_get_be64(f);
2490     rx_stat->bcastBytesRxOK = qemu_get_be64(f);
2491     rx_stat->pktsRxOutOfBuf = qemu_get_be64(f);
2492     rx_stat->pktsRxError = qemu_get_be64(f);
2493 }
2494 
2495 static void vmxnet3_put_rx_stats_to_file(QEMUFile *f,
2496     struct UPT1_RxStats *rx_stat)
2497 {
2498     qemu_put_be64(f, rx_stat->LROPktsRxOK);
2499     qemu_put_be64(f, rx_stat->LROBytesRxOK);
2500     qemu_put_be64(f, rx_stat->ucastPktsRxOK);
2501     qemu_put_be64(f, rx_stat->ucastBytesRxOK);
2502     qemu_put_be64(f, rx_stat->mcastPktsRxOK);
2503     qemu_put_be64(f, rx_stat->mcastBytesRxOK);
2504     qemu_put_be64(f, rx_stat->bcastPktsRxOK);
2505     qemu_put_be64(f, rx_stat->bcastBytesRxOK);
2506     qemu_put_be64(f, rx_stat->pktsRxOutOfBuf);
2507     qemu_put_be64(f, rx_stat->pktsRxError);
2508 }
2509 
2510 static int vmxnet3_get_rxq_descr(QEMUFile *f, void *pv, size_t size)
2511 {
2512     Vmxnet3RxqDescr *r = pv;
2513     int i;
2514 
2515     for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) {
2516         vmxnet3_get_ring_from_file(f, &r->rx_ring[i]);
2517     }
2518 
2519     vmxnet3_get_ring_from_file(f, &r->comp_ring);
2520     r->intr_idx = qemu_get_byte(f);
2521     r->rx_stats_pa = qemu_get_be64(f);
2522 
2523     vmxnet3_get_rx_stats_from_file(f, &r->rxq_stats);
2524 
2525     return 0;
2526 }
2527 
2528 static void vmxnet3_put_rxq_descr(QEMUFile *f, void *pv, size_t size)
2529 {
2530     Vmxnet3RxqDescr *r = pv;
2531     int i;
2532 
2533     for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) {
2534         vmxnet3_put_ring_to_file(f, &r->rx_ring[i]);
2535     }
2536 
2537     vmxnet3_put_ring_to_file(f, &r->comp_ring);
2538     qemu_put_byte(f, r->intr_idx);
2539     qemu_put_be64(f, r->rx_stats_pa);
2540     vmxnet3_put_rx_stats_to_file(f, &r->rxq_stats);
2541 }
2542 
2543 static int vmxnet3_post_load(void *opaque, int version_id)
2544 {
2545     VMXNET3State *s = opaque;
2546     PCIDevice *d = PCI_DEVICE(s);
2547 
2548     net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
2549                     s->max_tx_frags, s->peer_has_vhdr);
2550     net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
2551 
2552     if (s->msix_used) {
2553         if  (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
2554             VMW_WRPRN("Failed to re-use MSI-X vectors");
2555             msix_uninit(d, &s->msix_bar, &s->msix_bar);
2556             s->msix_used = false;
2557             return -1;
2558         }
2559     }
2560 
2561     vmxnet3_validate_queues(s);
2562     vmxnet3_validate_interrupts(s);
2563 
2564     return 0;
2565 }
2566 
2567 static const VMStateInfo rxq_descr_info = {
2568     .name = "rxq_descr",
2569     .get = vmxnet3_get_rxq_descr,
2570     .put = vmxnet3_put_rxq_descr
2571 };
2572 
2573 static int vmxnet3_get_int_state(QEMUFile *f, void *pv, size_t size)
2574 {
2575     Vmxnet3IntState *r = pv;
2576 
2577     r->is_masked = qemu_get_byte(f);
2578     r->is_pending = qemu_get_byte(f);
2579     r->is_asserted = qemu_get_byte(f);
2580 
2581     return 0;
2582 }
2583 
2584 static void vmxnet3_put_int_state(QEMUFile *f, void *pv, size_t size)
2585 {
2586     Vmxnet3IntState *r = pv;
2587 
2588     qemu_put_byte(f, r->is_masked);
2589     qemu_put_byte(f, r->is_pending);
2590     qemu_put_byte(f, r->is_asserted);
2591 }
2592 
2593 static const VMStateInfo int_state_info = {
2594     .name = "int_state",
2595     .get = vmxnet3_get_int_state,
2596     .put = vmxnet3_put_int_state
2597 };
2598 
2599 static bool vmxnet3_vmstate_need_pcie_device(void *opaque)
2600 {
2601     VMXNET3State *s = VMXNET3(opaque);
2602 
2603     return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE);
2604 }
2605 
2606 static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id)
2607 {
2608     return !vmxnet3_vmstate_need_pcie_device(opaque);
2609 }
2610 
2611 static const VMStateDescription vmstate_vmxnet3_pcie_device = {
2612     .name = "vmxnet3/pcie",
2613     .version_id = 1,
2614     .minimum_version_id = 1,
2615     .needed = vmxnet3_vmstate_need_pcie_device,
2616     .fields = (VMStateField[]) {
2617         VMSTATE_PCIE_DEVICE(parent_obj, VMXNET3State),
2618         VMSTATE_END_OF_LIST()
2619     }
2620 };
2621 
2622 static const VMStateDescription vmstate_vmxnet3 = {
2623     .name = "vmxnet3",
2624     .version_id = 1,
2625     .minimum_version_id = 1,
2626     .pre_save = vmxnet3_pre_save,
2627     .post_load = vmxnet3_post_load,
2628     .fields = (VMStateField[]) {
2629             VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State,
2630                                 vmxnet3_vmstate_test_pci_device, 0,
2631                                 vmstate_pci_device, PCIDevice),
2632             VMSTATE_BOOL(rx_packets_compound, VMXNET3State),
2633             VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State),
2634             VMSTATE_BOOL(lro_supported, VMXNET3State),
2635             VMSTATE_UINT32(rx_mode, VMXNET3State),
2636             VMSTATE_UINT32(mcast_list_len, VMXNET3State),
2637             VMSTATE_UINT32(mcast_list_buff_size, VMXNET3State),
2638             VMSTATE_UINT32_ARRAY(vlan_table, VMXNET3State, VMXNET3_VFT_SIZE),
2639             VMSTATE_UINT32(mtu, VMXNET3State),
2640             VMSTATE_UINT16(max_rx_frags, VMXNET3State),
2641             VMSTATE_UINT32(max_tx_frags, VMXNET3State),
2642             VMSTATE_UINT8(event_int_idx, VMXNET3State),
2643             VMSTATE_BOOL(auto_int_masking, VMXNET3State),
2644             VMSTATE_UINT8(txq_num, VMXNET3State),
2645             VMSTATE_UINT8(rxq_num, VMXNET3State),
2646             VMSTATE_UINT32(device_active, VMXNET3State),
2647             VMSTATE_UINT32(last_command, VMXNET3State),
2648             VMSTATE_UINT32(link_status_and_speed, VMXNET3State),
2649             VMSTATE_UINT32(temp_mac, VMXNET3State),
2650             VMSTATE_UINT64(drv_shmem, VMXNET3State),
2651             VMSTATE_UINT64(temp_shared_guest_driver_memory, VMXNET3State),
2652 
2653             VMSTATE_ARRAY(txq_descr, VMXNET3State,
2654                 VMXNET3_DEVICE_MAX_TX_QUEUES, 0, txq_descr_info,
2655                 Vmxnet3TxqDescr),
2656             VMSTATE_ARRAY(rxq_descr, VMXNET3State,
2657                 VMXNET3_DEVICE_MAX_RX_QUEUES, 0, rxq_descr_info,
2658                 Vmxnet3RxqDescr),
2659             VMSTATE_ARRAY(interrupt_states, VMXNET3State, VMXNET3_MAX_INTRS,
2660                 0, int_state_info, Vmxnet3IntState),
2661 
2662             VMSTATE_END_OF_LIST()
2663     },
2664     .subsections = (const VMStateDescription*[]) {
2665         &vmxstate_vmxnet3_mcast_list,
2666         &vmstate_vmxnet3_pcie_device,
2667         NULL
2668     }
2669 };
2670 
2671 static Property vmxnet3_properties[] = {
2672     DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
2673     DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags,
2674                     VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false),
2675     DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags,
2676                     VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false),
2677     DEFINE_PROP_END_OF_LIST(),
2678 };
2679 
2680 static void vmxnet3_realize(DeviceState *qdev, Error **errp)
2681 {
2682     VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
2683     PCIDevice *pci_dev = PCI_DEVICE(qdev);
2684     VMXNET3State *s = VMXNET3(qdev);
2685 
2686     if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
2687         pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
2688     }
2689 
2690     vc->parent_dc_realize(qdev, errp);
2691 }
2692 
2693 static void vmxnet3_class_init(ObjectClass *class, void *data)
2694 {
2695     DeviceClass *dc = DEVICE_CLASS(class);
2696     PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
2697     VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class);
2698 
2699     c->realize = vmxnet3_pci_realize;
2700     c->exit = vmxnet3_pci_uninit;
2701     c->vendor_id = PCI_VENDOR_ID_VMWARE;
2702     c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2703     c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION;
2704     c->romfile = "efi-vmxnet3.rom";
2705     c->class_id = PCI_CLASS_NETWORK_ETHERNET;
2706     c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
2707     c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2708     vc->parent_dc_realize = dc->realize;
2709     dc->realize = vmxnet3_realize;
2710     dc->desc = "VMWare Paravirtualized Ethernet v3";
2711     dc->reset = vmxnet3_qdev_reset;
2712     dc->vmsd = &vmstate_vmxnet3;
2713     dc->props = vmxnet3_properties;
2714     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2715 }
2716 
2717 static const TypeInfo vmxnet3_info = {
2718     .name          = TYPE_VMXNET3,
2719     .parent        = TYPE_PCI_DEVICE,
2720     .class_size    = sizeof(VMXNET3Class),
2721     .instance_size = sizeof(VMXNET3State),
2722     .class_init    = vmxnet3_class_init,
2723     .instance_init = vmxnet3_instance_init,
2724 };
2725 
2726 static void vmxnet3_register_types(void)
2727 {
2728     VMW_CBPRN("vmxnet3_register_types called...");
2729     type_register_static(&vmxnet3_info);
2730 }
2731 
2732 type_init(vmxnet3_register_types)
2733