xref: /qemu/hw/net/vmxnet3.c (revision bf8d4924)
1 /*
2  * QEMU VMWARE VMXNET3 paravirtual NIC
3  *
4  * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5  *
6  * Developed by Daynix Computing LTD (http://www.daynix.com)
7  *
8  * Authors:
9  * Dmitry Fleytman <dmitry@daynix.com>
10  * Tamir Shomer <tamirs@daynix.com>
11  * Yan Vugenfirer <yan@daynix.com>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2.
14  * See the COPYING file in the top-level directory.
15  *
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "net/net.h"
22 #include "net/tap.h"
23 #include "net/checksum.h"
24 #include "sysemu/sysemu.h"
25 #include "qemu-common.h"
26 #include "qemu/bswap.h"
27 #include "hw/pci/msix.h"
28 #include "hw/pci/msi.h"
29 
30 #include "vmxnet3.h"
31 #include "vmxnet_debug.h"
32 #include "vmware_utils.h"
33 #include "net_tx_pkt.h"
34 #include "net_rx_pkt.h"
35 
36 #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
37 #define VMXNET3_MSIX_BAR_SIZE 0x2000
38 #define MIN_BUF_SIZE 60
39 
40 /* Compatibility flags for migration */
41 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
42 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
43     (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
44 #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1
45 #define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \
46     (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT)
47 
48 #define VMXNET3_EXP_EP_OFFSET (0x48)
49 #define VMXNET3_MSI_OFFSET(s) \
50     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
51 #define VMXNET3_MSIX_OFFSET(s) \
52     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c)
53 #define VMXNET3_DSN_OFFSET     (0x100)
54 
55 #define VMXNET3_BAR0_IDX      (0)
56 #define VMXNET3_BAR1_IDX      (1)
57 #define VMXNET3_MSIX_BAR_IDX  (2)
58 
59 #define VMXNET3_OFF_MSIX_TABLE (0x000)
60 #define VMXNET3_OFF_MSIX_PBA(s) \
61     ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000)
62 
63 /* Link speed in Mbps should be shifted by 16 */
64 #define VMXNET3_LINK_SPEED      (1000 << 16)
65 
66 /* Link status: 1 - up, 0 - down. */
67 #define VMXNET3_LINK_STATUS_UP  0x1
68 
69 /* Least significant bit should be set for revision and version */
70 #define VMXNET3_UPT_REVISION      0x1
71 #define VMXNET3_DEVICE_REVISION   0x1
72 
73 /* Number of interrupt vectors for non-MSIx modes */
74 #define VMXNET3_MAX_NMSIX_INTRS   (1)
75 
76 /* Macros for rings descriptors access */
77 #define VMXNET3_READ_TX_QUEUE_DESCR8(_d, dpa, field) \
78     (vmw_shmem_ld8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
79 
80 #define VMXNET3_WRITE_TX_QUEUE_DESCR8(_d, dpa, field, value) \
81     (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value)))
82 
83 #define VMXNET3_READ_TX_QUEUE_DESCR32(_d, dpa, field) \
84     (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
85 
86 #define VMXNET3_WRITE_TX_QUEUE_DESCR32(_d, dpa, field, value) \
87     (vmw_shmem_st32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
88 
89 #define VMXNET3_READ_TX_QUEUE_DESCR64(_d, dpa, field) \
90     (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
91 
92 #define VMXNET3_WRITE_TX_QUEUE_DESCR64(_d, dpa, field, value) \
93     (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
94 
95 #define VMXNET3_READ_RX_QUEUE_DESCR64(_d, dpa, field) \
96     (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
97 
98 #define VMXNET3_READ_RX_QUEUE_DESCR32(_d, dpa, field) \
99     (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
100 
101 #define VMXNET3_WRITE_RX_QUEUE_DESCR64(_d, dpa, field, value) \
102     (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
103 
104 #define VMXNET3_WRITE_RX_QUEUE_DESCR8(_d, dpa, field, value) \
105     (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
106 
107 /* Macros for guest driver shared area access */
108 #define VMXNET3_READ_DRV_SHARED64(_d, shpa, field) \
109     (vmw_shmem_ld64(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
110 
111 #define VMXNET3_READ_DRV_SHARED32(_d, shpa, field) \
112     (vmw_shmem_ld32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
113 
114 #define VMXNET3_WRITE_DRV_SHARED32(_d, shpa, field, val) \
115     (vmw_shmem_st32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), val))
116 
117 #define VMXNET3_READ_DRV_SHARED16(_d, shpa, field) \
118     (vmw_shmem_ld16(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
119 
120 #define VMXNET3_READ_DRV_SHARED8(_d, shpa, field) \
121     (vmw_shmem_ld8(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
122 
123 #define VMXNET3_READ_DRV_SHARED(_d, shpa, field, b, l) \
124     (vmw_shmem_read(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l))
125 
126 #define VMXNET_FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag))
127 
128 typedef struct VMXNET3Class {
129     PCIDeviceClass parent_class;
130     DeviceRealize parent_dc_realize;
131 } VMXNET3Class;
132 
133 #define TYPE_VMXNET3 "vmxnet3"
134 #define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3)
135 
136 #define VMXNET3_DEVICE_CLASS(klass) \
137     OBJECT_CLASS_CHECK(VMXNET3Class, (klass), TYPE_VMXNET3)
138 #define VMXNET3_DEVICE_GET_CLASS(obj) \
139     OBJECT_GET_CLASS(VMXNET3Class, (obj), TYPE_VMXNET3)
140 
141 /* Cyclic ring abstraction */
142 typedef struct {
143     hwaddr pa;
144     size_t size;
145     size_t cell_size;
146     size_t next;
147     uint8_t gen;
148 } Vmxnet3Ring;
149 
150 static inline void vmxnet3_ring_init(PCIDevice *d,
151 				     Vmxnet3Ring *ring,
152                                      hwaddr pa,
153                                      size_t size,
154                                      size_t cell_size,
155                                      bool zero_region)
156 {
157     ring->pa = pa;
158     ring->size = size;
159     ring->cell_size = cell_size;
160     ring->gen = VMXNET3_INIT_GEN;
161     ring->next = 0;
162 
163     if (zero_region) {
164         vmw_shmem_set(d, pa, 0, size * cell_size);
165     }
166 }
167 
168 #define VMXNET3_RING_DUMP(macro, ring_name, ridx, r)                         \
169     macro("%s#%d: base %" PRIx64 " size %zu cell_size %zu gen %d next %zu",  \
170           (ring_name), (ridx),                                               \
171           (r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next)
172 
173 static inline void vmxnet3_ring_inc(Vmxnet3Ring *ring)
174 {
175     if (++ring->next >= ring->size) {
176         ring->next = 0;
177         ring->gen ^= 1;
178     }
179 }
180 
181 static inline void vmxnet3_ring_dec(Vmxnet3Ring *ring)
182 {
183     if (ring->next-- == 0) {
184         ring->next = ring->size - 1;
185         ring->gen ^= 1;
186     }
187 }
188 
189 static inline hwaddr vmxnet3_ring_curr_cell_pa(Vmxnet3Ring *ring)
190 {
191     return ring->pa + ring->next * ring->cell_size;
192 }
193 
194 static inline void vmxnet3_ring_read_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
195 					       void *buff)
196 {
197     vmw_shmem_read(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
198 }
199 
200 static inline void vmxnet3_ring_write_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
201 						void *buff)
202 {
203     vmw_shmem_write(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
204 }
205 
206 static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3Ring *ring)
207 {
208     return ring->next;
209 }
210 
211 static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3Ring *ring)
212 {
213     return ring->gen;
214 }
215 
216 /* Debug trace-related functions */
217 static inline void
218 vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr)
219 {
220     VMW_PKPRN("TX DESCR: "
221               "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
222               "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, "
223               "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d",
224               le64_to_cpu(descr->addr), descr->len, descr->gen, descr->rsvd,
225               descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om,
226               descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci);
227 }
228 
229 static inline void
230 vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr)
231 {
232     VMW_PKPRN("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, "
233               "csum_start: %d, csum_offset: %d",
234               vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size,
235               vhdr->csum_start, vhdr->csum_offset);
236 }
237 
238 static inline void
239 vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
240 {
241     VMW_PKPRN("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
242               "dtype: %d, ext1: %d, btype: %d",
243               le64_to_cpu(descr->addr), descr->len, descr->gen,
244               descr->rsvd, descr->dtype, descr->ext1, descr->btype);
245 }
246 
247 /* Device state and helper functions */
248 #define VMXNET3_RX_RINGS_PER_QUEUE (2)
249 
250 typedef struct {
251     Vmxnet3Ring tx_ring;
252     Vmxnet3Ring comp_ring;
253 
254     uint8_t intr_idx;
255     hwaddr tx_stats_pa;
256     struct UPT1_TxStats txq_stats;
257 } Vmxnet3TxqDescr;
258 
259 typedef struct {
260     Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE];
261     Vmxnet3Ring comp_ring;
262     uint8_t intr_idx;
263     hwaddr rx_stats_pa;
264     struct UPT1_RxStats rxq_stats;
265 } Vmxnet3RxqDescr;
266 
267 typedef struct {
268     bool is_masked;
269     bool is_pending;
270     bool is_asserted;
271 } Vmxnet3IntState;
272 
273 typedef struct {
274         PCIDevice parent_obj;
275         NICState *nic;
276         NICConf conf;
277         MemoryRegion bar0;
278         MemoryRegion bar1;
279         MemoryRegion msix_bar;
280 
281         Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES];
282         Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES];
283 
284         /* Whether MSI-X support was installed successfully */
285         bool msix_used;
286         /* Whether MSI support was installed successfully */
287         bool msi_used;
288         hwaddr drv_shmem;
289         hwaddr temp_shared_guest_driver_memory;
290 
291         uint8_t txq_num;
292 
293         /* This boolean tells whether RX packet being indicated has to */
294         /* be split into head and body chunks from different RX rings  */
295         bool rx_packets_compound;
296 
297         bool rx_vlan_stripping;
298         bool lro_supported;
299 
300         uint8_t rxq_num;
301 
302         /* Network MTU */
303         uint32_t mtu;
304 
305         /* Maximum number of fragments for indicated TX packets */
306         uint32_t max_tx_frags;
307 
308         /* Maximum number of fragments for indicated RX packets */
309         uint16_t max_rx_frags;
310 
311         /* Index for events interrupt */
312         uint8_t event_int_idx;
313 
314         /* Whether automatic interrupts masking enabled */
315         bool auto_int_masking;
316 
317         bool peer_has_vhdr;
318 
319         /* TX packets to QEMU interface */
320         struct NetTxPkt *tx_pkt;
321         uint32_t offload_mode;
322         uint32_t cso_or_gso_size;
323         uint16_t tci;
324         bool needs_vlan;
325 
326         struct NetRxPkt *rx_pkt;
327 
328         bool tx_sop;
329         bool skip_current_tx_pkt;
330 
331         uint32_t device_active;
332         uint32_t last_command;
333 
334         uint32_t link_status_and_speed;
335 
336         Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS];
337 
338         uint32_t temp_mac;   /* To store the low part first */
339 
340         MACAddr perm_mac;
341         uint32_t vlan_table[VMXNET3_VFT_SIZE];
342         uint32_t rx_mode;
343         MACAddr *mcast_list;
344         uint32_t mcast_list_len;
345         uint32_t mcast_list_buff_size; /* needed for live migration. */
346 
347         /* Compatibility flags for migration */
348         uint32_t compat_flags;
349 } VMXNET3State;
350 
351 /* Interrupt management */
352 
353 /*
354  * This function returns sign whether interrupt line is in asserted state
355  * This depends on the type of interrupt used. For INTX interrupt line will
356  * be asserted until explicit deassertion, for MSI(X) interrupt line will
357  * be deasserted automatically due to notification semantics of the MSI(X)
358  * interrupts
359  */
360 static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx)
361 {
362     PCIDevice *d = PCI_DEVICE(s);
363 
364     if (s->msix_used && msix_enabled(d)) {
365         VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx);
366         msix_notify(d, int_idx);
367         return false;
368     }
369     if (s->msi_used && msi_enabled(d)) {
370         VMW_IRPRN("Sending MSI notification for vector %u", int_idx);
371         msi_notify(d, int_idx);
372         return false;
373     }
374 
375     VMW_IRPRN("Asserting line for interrupt %u", int_idx);
376     pci_irq_assert(d);
377     return true;
378 }
379 
380 static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx)
381 {
382     PCIDevice *d = PCI_DEVICE(s);
383 
384     /*
385      * This function should never be called for MSI(X) interrupts
386      * because deassertion never required for message interrupts
387      */
388     assert(!s->msix_used || !msix_enabled(d));
389     /*
390      * This function should never be called for MSI(X) interrupts
391      * because deassertion never required for message interrupts
392      */
393     assert(!s->msi_used || !msi_enabled(d));
394 
395     VMW_IRPRN("Deasserting line for interrupt %u", lidx);
396     pci_irq_deassert(d);
397 }
398 
399 static void vmxnet3_update_interrupt_line_state(VMXNET3State *s, int lidx)
400 {
401     if (!s->interrupt_states[lidx].is_pending &&
402        s->interrupt_states[lidx].is_asserted) {
403         VMW_IRPRN("New interrupt line state for index %d is DOWN", lidx);
404         _vmxnet3_deassert_interrupt_line(s, lidx);
405         s->interrupt_states[lidx].is_asserted = false;
406         return;
407     }
408 
409     if (s->interrupt_states[lidx].is_pending &&
410        !s->interrupt_states[lidx].is_masked &&
411        !s->interrupt_states[lidx].is_asserted) {
412         VMW_IRPRN("New interrupt line state for index %d is UP", lidx);
413         s->interrupt_states[lidx].is_asserted =
414             _vmxnet3_assert_interrupt_line(s, lidx);
415         s->interrupt_states[lidx].is_pending = false;
416         return;
417     }
418 }
419 
420 static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx)
421 {
422     PCIDevice *d = PCI_DEVICE(s);
423     s->interrupt_states[lidx].is_pending = true;
424     vmxnet3_update_interrupt_line_state(s, lidx);
425 
426     if (s->msix_used && msix_enabled(d) && s->auto_int_masking) {
427         goto do_automask;
428     }
429 
430     if (s->msi_used && msi_enabled(d) && s->auto_int_masking) {
431         goto do_automask;
432     }
433 
434     return;
435 
436 do_automask:
437     s->interrupt_states[lidx].is_masked = true;
438     vmxnet3_update_interrupt_line_state(s, lidx);
439 }
440 
441 static bool vmxnet3_interrupt_asserted(VMXNET3State *s, int lidx)
442 {
443     return s->interrupt_states[lidx].is_asserted;
444 }
445 
446 static void vmxnet3_clear_interrupt(VMXNET3State *s, int int_idx)
447 {
448     s->interrupt_states[int_idx].is_pending = false;
449     if (s->auto_int_masking) {
450         s->interrupt_states[int_idx].is_masked = true;
451     }
452     vmxnet3_update_interrupt_line_state(s, int_idx);
453 }
454 
455 static void
456 vmxnet3_on_interrupt_mask_changed(VMXNET3State *s, int lidx, bool is_masked)
457 {
458     s->interrupt_states[lidx].is_masked = is_masked;
459     vmxnet3_update_interrupt_line_state(s, lidx);
460 }
461 
462 static bool vmxnet3_verify_driver_magic(PCIDevice *d, hwaddr dshmem)
463 {
464     return (VMXNET3_READ_DRV_SHARED32(d, dshmem, magic) == VMXNET3_REV1_MAGIC);
465 }
466 
467 #define VMXNET3_GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF)
468 #define VMXNET3_MAKE_BYTE(byte_num, val) \
469     (((uint32_t)((val) & 0xFF)) << (byte_num)*8)
470 
471 static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l)
472 {
473     s->conf.macaddr.a[0] = VMXNET3_GET_BYTE(l,  0);
474     s->conf.macaddr.a[1] = VMXNET3_GET_BYTE(l,  1);
475     s->conf.macaddr.a[2] = VMXNET3_GET_BYTE(l,  2);
476     s->conf.macaddr.a[3] = VMXNET3_GET_BYTE(l,  3);
477     s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0);
478     s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1);
479 
480     VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
481 
482     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
483 }
484 
485 static uint64_t vmxnet3_get_mac_low(MACAddr *addr)
486 {
487     return VMXNET3_MAKE_BYTE(0, addr->a[0]) |
488            VMXNET3_MAKE_BYTE(1, addr->a[1]) |
489            VMXNET3_MAKE_BYTE(2, addr->a[2]) |
490            VMXNET3_MAKE_BYTE(3, addr->a[3]);
491 }
492 
493 static uint64_t vmxnet3_get_mac_high(MACAddr *addr)
494 {
495     return VMXNET3_MAKE_BYTE(0, addr->a[4]) |
496            VMXNET3_MAKE_BYTE(1, addr->a[5]);
497 }
498 
499 static void
500 vmxnet3_inc_tx_consumption_counter(VMXNET3State *s, int qidx)
501 {
502     vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring);
503 }
504 
505 static inline void
506 vmxnet3_inc_rx_consumption_counter(VMXNET3State *s, int qidx, int ridx)
507 {
508     vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]);
509 }
510 
511 static inline void
512 vmxnet3_inc_tx_completion_counter(VMXNET3State *s, int qidx)
513 {
514     vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring);
515 }
516 
517 static void
518 vmxnet3_inc_rx_completion_counter(VMXNET3State *s, int qidx)
519 {
520     vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring);
521 }
522 
523 static void
524 vmxnet3_dec_rx_completion_counter(VMXNET3State *s, int qidx)
525 {
526     vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring);
527 }
528 
529 static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32_t tx_ridx)
530 {
531     struct Vmxnet3_TxCompDesc txcq_descr;
532     PCIDevice *d = PCI_DEVICE(s);
533 
534     VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring);
535 
536     txcq_descr.txdIdx = tx_ridx;
537     txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring);
538 
539     vmxnet3_ring_write_curr_cell(d, &s->txq_descr[qidx].comp_ring, &txcq_descr);
540 
541     /* Flush changes in TX descriptor before changing the counter value */
542     smp_wmb();
543 
544     vmxnet3_inc_tx_completion_counter(s, qidx);
545     vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx);
546 }
547 
548 static bool
549 vmxnet3_setup_tx_offloads(VMXNET3State *s)
550 {
551     switch (s->offload_mode) {
552     case VMXNET3_OM_NONE:
553         net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);
554         break;
555 
556     case VMXNET3_OM_CSUM:
557         net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);
558         VMW_PKPRN("L4 CSO requested\n");
559         break;
560 
561     case VMXNET3_OM_TSO:
562         net_tx_pkt_build_vheader(s->tx_pkt, true, true,
563             s->cso_or_gso_size);
564         net_tx_pkt_update_ip_checksums(s->tx_pkt);
565         VMW_PKPRN("GSO offload requested.");
566         break;
567 
568     default:
569         g_assert_not_reached();
570         return false;
571     }
572 
573     return true;
574 }
575 
576 static void
577 vmxnet3_tx_retrieve_metadata(VMXNET3State *s,
578                              const struct Vmxnet3_TxDesc *txd)
579 {
580     s->offload_mode = txd->om;
581     s->cso_or_gso_size = txd->msscof;
582     s->tci = txd->tci;
583     s->needs_vlan = txd->ti;
584 }
585 
586 typedef enum {
587     VMXNET3_PKT_STATUS_OK,
588     VMXNET3_PKT_STATUS_ERROR,
589     VMXNET3_PKT_STATUS_DISCARD,/* only for tx */
590     VMXNET3_PKT_STATUS_OUT_OF_BUF /* only for rx */
591 } Vmxnet3PktStatus;
592 
593 static void
594 vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx,
595     Vmxnet3PktStatus status)
596 {
597     size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt);
598     struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
599 
600     switch (status) {
601     case VMXNET3_PKT_STATUS_OK:
602         switch (net_tx_pkt_get_packet_type(s->tx_pkt)) {
603         case ETH_PKT_BCAST:
604             stats->bcastPktsTxOK++;
605             stats->bcastBytesTxOK += tot_len;
606             break;
607         case ETH_PKT_MCAST:
608             stats->mcastPktsTxOK++;
609             stats->mcastBytesTxOK += tot_len;
610             break;
611         case ETH_PKT_UCAST:
612             stats->ucastPktsTxOK++;
613             stats->ucastBytesTxOK += tot_len;
614             break;
615         default:
616             g_assert_not_reached();
617         }
618 
619         if (s->offload_mode == VMXNET3_OM_TSO) {
620             /*
621              * According to VMWARE headers this statistic is a number
622              * of packets after segmentation but since we don't have
623              * this information in QEMU model, the best we can do is to
624              * provide number of non-segmented packets
625              */
626             stats->TSOPktsTxOK++;
627             stats->TSOBytesTxOK += tot_len;
628         }
629         break;
630 
631     case VMXNET3_PKT_STATUS_DISCARD:
632         stats->pktsTxDiscard++;
633         break;
634 
635     case VMXNET3_PKT_STATUS_ERROR:
636         stats->pktsTxError++;
637         break;
638 
639     default:
640         g_assert_not_reached();
641     }
642 }
643 
644 static void
645 vmxnet3_on_rx_done_update_stats(VMXNET3State *s,
646                                 int qidx,
647                                 Vmxnet3PktStatus status)
648 {
649     struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
650     size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt);
651 
652     switch (status) {
653     case VMXNET3_PKT_STATUS_OUT_OF_BUF:
654         stats->pktsRxOutOfBuf++;
655         break;
656 
657     case VMXNET3_PKT_STATUS_ERROR:
658         stats->pktsRxError++;
659         break;
660     case VMXNET3_PKT_STATUS_OK:
661         switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
662         case ETH_PKT_BCAST:
663             stats->bcastPktsRxOK++;
664             stats->bcastBytesRxOK += tot_len;
665             break;
666         case ETH_PKT_MCAST:
667             stats->mcastPktsRxOK++;
668             stats->mcastBytesRxOK += tot_len;
669             break;
670         case ETH_PKT_UCAST:
671             stats->ucastPktsRxOK++;
672             stats->ucastBytesRxOK += tot_len;
673             break;
674         default:
675             g_assert_not_reached();
676         }
677 
678         if (tot_len > s->mtu) {
679             stats->LROPktsRxOK++;
680             stats->LROBytesRxOK += tot_len;
681         }
682         break;
683     default:
684         g_assert_not_reached();
685     }
686 }
687 
688 static inline bool
689 vmxnet3_pop_next_tx_descr(VMXNET3State *s,
690                           int qidx,
691                           struct Vmxnet3_TxDesc *txd,
692                           uint32_t *descr_idx)
693 {
694     Vmxnet3Ring *ring = &s->txq_descr[qidx].tx_ring;
695     PCIDevice *d = PCI_DEVICE(s);
696 
697     vmxnet3_ring_read_curr_cell(d, ring, txd);
698     if (txd->gen == vmxnet3_ring_curr_gen(ring)) {
699         /* Only read after generation field verification */
700         smp_rmb();
701         /* Re-read to be sure we got the latest version */
702         vmxnet3_ring_read_curr_cell(d, ring, txd);
703         VMXNET3_RING_DUMP(VMW_RIPRN, "TX", qidx, ring);
704         *descr_idx = vmxnet3_ring_curr_cell_idx(ring);
705         vmxnet3_inc_tx_consumption_counter(s, qidx);
706         return true;
707     }
708 
709     return false;
710 }
711 
712 static bool
713 vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx)
714 {
715     Vmxnet3PktStatus status = VMXNET3_PKT_STATUS_OK;
716 
717     if (!vmxnet3_setup_tx_offloads(s)) {
718         status = VMXNET3_PKT_STATUS_ERROR;
719         goto func_exit;
720     }
721 
722     /* debug prints */
723     vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt));
724     net_tx_pkt_dump(s->tx_pkt);
725 
726     if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) {
727         status = VMXNET3_PKT_STATUS_DISCARD;
728         goto func_exit;
729     }
730 
731 func_exit:
732     vmxnet3_on_tx_done_update_stats(s, qidx, status);
733     return (status == VMXNET3_PKT_STATUS_OK);
734 }
735 
736 static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
737 {
738     struct Vmxnet3_TxDesc txd;
739     uint32_t txd_idx;
740     uint32_t data_len;
741     hwaddr data_pa;
742 
743     for (;;) {
744         if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) {
745             break;
746         }
747 
748         vmxnet3_dump_tx_descr(&txd);
749 
750         if (!s->skip_current_tx_pkt) {
751             data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
752             data_pa = le64_to_cpu(txd.addr);
753 
754             if (!net_tx_pkt_add_raw_fragment(s->tx_pkt,
755                                                 data_pa,
756                                                 data_len)) {
757                 s->skip_current_tx_pkt = true;
758             }
759         }
760 
761         if (s->tx_sop) {
762             vmxnet3_tx_retrieve_metadata(s, &txd);
763             s->tx_sop = false;
764         }
765 
766         if (txd.eop) {
767             if (!s->skip_current_tx_pkt && net_tx_pkt_parse(s->tx_pkt)) {
768                 if (s->needs_vlan) {
769                     net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci);
770                 }
771 
772                 vmxnet3_send_packet(s, qidx);
773             } else {
774                 vmxnet3_on_tx_done_update_stats(s, qidx,
775                                                 VMXNET3_PKT_STATUS_ERROR);
776             }
777 
778             vmxnet3_complete_packet(s, qidx, txd_idx);
779             s->tx_sop = true;
780             s->skip_current_tx_pkt = false;
781             net_tx_pkt_reset(s->tx_pkt);
782         }
783     }
784 }
785 
786 static inline void
787 vmxnet3_read_next_rx_descr(VMXNET3State *s, int qidx, int ridx,
788                            struct Vmxnet3_RxDesc *dbuf, uint32_t *didx)
789 {
790     PCIDevice *d = PCI_DEVICE(s);
791 
792     Vmxnet3Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx];
793     *didx = vmxnet3_ring_curr_cell_idx(ring);
794     vmxnet3_ring_read_curr_cell(d, ring, dbuf);
795 }
796 
797 static inline uint8_t
798 vmxnet3_get_rx_ring_gen(VMXNET3State *s, int qidx, int ridx)
799 {
800     return s->rxq_descr[qidx].rx_ring[ridx].gen;
801 }
802 
803 static inline hwaddr
804 vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen)
805 {
806     uint8_t ring_gen;
807     struct Vmxnet3_RxCompDesc rxcd;
808 
809     hwaddr daddr =
810         vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring);
811 
812     pci_dma_read(PCI_DEVICE(s),
813                  daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc));
814     ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring);
815 
816     if (rxcd.gen != ring_gen) {
817         *descr_gen = ring_gen;
818         vmxnet3_inc_rx_completion_counter(s, qidx);
819         return daddr;
820     }
821 
822     return 0;
823 }
824 
825 static inline void
826 vmxnet3_revert_rxc_descr(VMXNET3State *s, int qidx)
827 {
828     vmxnet3_dec_rx_completion_counter(s, qidx);
829 }
830 
831 #define RXQ_IDX      (0)
832 #define RX_HEAD_BODY_RING (0)
833 #define RX_BODY_ONLY_RING (1)
834 
835 static bool
836 vmxnet3_get_next_head_rx_descr(VMXNET3State *s,
837                                struct Vmxnet3_RxDesc *descr_buf,
838                                uint32_t *descr_idx,
839                                uint32_t *ridx)
840 {
841     for (;;) {
842         uint32_t ring_gen;
843         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
844                                    descr_buf, descr_idx);
845 
846         /* If no more free descriptors - return */
847         ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING);
848         if (descr_buf->gen != ring_gen) {
849             return false;
850         }
851 
852         /* Only read after generation field verification */
853         smp_rmb();
854         /* Re-read to be sure we got the latest version */
855         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
856                                    descr_buf, descr_idx);
857 
858         /* Mark current descriptor as used/skipped */
859         vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
860 
861         /* If this is what we are looking for - return */
862         if (descr_buf->btype == VMXNET3_RXD_BTYPE_HEAD) {
863             *ridx = RX_HEAD_BODY_RING;
864             return true;
865         }
866     }
867 }
868 
869 static bool
870 vmxnet3_get_next_body_rx_descr(VMXNET3State *s,
871                                struct Vmxnet3_RxDesc *d,
872                                uint32_t *didx,
873                                uint32_t *ridx)
874 {
875     vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
876 
877     /* Try to find corresponding descriptor in head/body ring */
878     if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) {
879         /* Only read after generation field verification */
880         smp_rmb();
881         /* Re-read to be sure we got the latest version */
882         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
883         if (d->btype == VMXNET3_RXD_BTYPE_BODY) {
884             vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
885             *ridx = RX_HEAD_BODY_RING;
886             return true;
887         }
888     }
889 
890     /*
891      * If there is no free descriptors on head/body ring or next free
892      * descriptor is a head descriptor switch to body only ring
893      */
894     vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
895 
896     /* If no more free descriptors - return */
897     if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) {
898         /* Only read after generation field verification */
899         smp_rmb();
900         /* Re-read to be sure we got the latest version */
901         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
902         assert(d->btype == VMXNET3_RXD_BTYPE_BODY);
903         *ridx = RX_BODY_ONLY_RING;
904         vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING);
905         return true;
906     }
907 
908     return false;
909 }
910 
911 static inline bool
912 vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head,
913                           struct Vmxnet3_RxDesc *descr_buf,
914                           uint32_t *descr_idx,
915                           uint32_t *ridx)
916 {
917     if (is_head || !s->rx_packets_compound) {
918         return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx);
919     } else {
920         return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx);
921     }
922 }
923 
924 /* In case packet was csum offloaded (either NEEDS_CSUM or DATA_VALID),
925  * the implementation always passes an RxCompDesc with a "Checksum
926  * calculated and found correct" to the OS (cnc=0 and tuc=1, see
927  * vmxnet3_rx_update_descr). This emulates the observed ESXi behavior.
928  *
929  * Therefore, if packet has the NEEDS_CSUM set, we must calculate
930  * and place a fully computed checksum into the tcp/udp header.
931  * Otherwise, the OS driver will receive a checksum-correct indication
932  * (CHECKSUM_UNNECESSARY), but with the actual tcp/udp checksum field
933  * having just the pseudo header csum value.
934  *
935  * While this is not a problem if packet is destined for local delivery,
936  * in the case the host OS performs forwarding, it will forward an
937  * incorrectly checksummed packet.
938  */
939 static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt,
940                                            const void *pkt_data,
941                                            size_t pkt_len)
942 {
943     struct virtio_net_hdr *vhdr;
944     bool isip4, isip6, istcp, isudp;
945     uint8_t *data;
946     int len;
947 
948     if (!net_rx_pkt_has_virt_hdr(pkt)) {
949         return;
950     }
951 
952     vhdr = net_rx_pkt_get_vhdr(pkt);
953     if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
954         return;
955     }
956 
957     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
958     if (!(isip4 || isip6) || !(istcp || isudp)) {
959         return;
960     }
961 
962     vmxnet3_dump_virt_hdr(vhdr);
963 
964     /* Validate packet len: csum_start + scum_offset + length of csum field */
965     if (pkt_len < (vhdr->csum_start + vhdr->csum_offset + 2)) {
966         VMW_PKPRN("packet len:%zu < csum_start(%d) + csum_offset(%d) + 2, "
967                   "cannot calculate checksum",
968                   pkt_len, vhdr->csum_start, vhdr->csum_offset);
969         return;
970     }
971 
972     data = (uint8_t *)pkt_data + vhdr->csum_start;
973     len = pkt_len - vhdr->csum_start;
974     /* Put the checksum obtained into the packet */
975     stw_be_p(data + vhdr->csum_offset, net_raw_checksum(data, len));
976 
977     vhdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
978     vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
979 }
980 
981 static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt,
982     struct Vmxnet3_RxCompDesc *rxcd)
983 {
984     int csum_ok, is_gso;
985     bool isip4, isip6, istcp, isudp;
986     struct virtio_net_hdr *vhdr;
987     uint8_t offload_type;
988 
989     if (net_rx_pkt_is_vlan_stripped(pkt)) {
990         rxcd->ts = 1;
991         rxcd->tci = net_rx_pkt_get_vlan_tag(pkt);
992     }
993 
994     if (!net_rx_pkt_has_virt_hdr(pkt)) {
995         goto nocsum;
996     }
997 
998     vhdr = net_rx_pkt_get_vhdr(pkt);
999     /*
1000      * Checksum is valid when lower level tell so or when lower level
1001      * requires checksum offload telling that packet produced/bridged
1002      * locally and did travel over network after last checksum calculation
1003      * or production
1004      */
1005     csum_ok = VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) ||
1006               VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM);
1007 
1008     offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
1009     is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0;
1010 
1011     if (!csum_ok && !is_gso) {
1012         goto nocsum;
1013     }
1014 
1015     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1016     if ((!istcp && !isudp) || (!isip4 && !isip6)) {
1017         goto nocsum;
1018     }
1019 
1020     rxcd->cnc = 0;
1021     rxcd->v4 = isip4 ? 1 : 0;
1022     rxcd->v6 = isip6 ? 1 : 0;
1023     rxcd->tcp = istcp ? 1 : 0;
1024     rxcd->udp = isudp ? 1 : 0;
1025     rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
1026     return;
1027 
1028 nocsum:
1029     rxcd->cnc = 1;
1030     return;
1031 }
1032 
1033 static void
1034 vmxnet3_pci_dma_writev(PCIDevice *pci_dev,
1035                        const struct iovec *iov,
1036                        size_t start_iov_off,
1037                        hwaddr target_addr,
1038                        size_t bytes_to_copy)
1039 {
1040     size_t curr_off = 0;
1041     size_t copied = 0;
1042 
1043     while (bytes_to_copy) {
1044         if (start_iov_off < (curr_off + iov->iov_len)) {
1045             size_t chunk_len =
1046                 MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy);
1047 
1048             pci_dma_write(pci_dev, target_addr + copied,
1049                           iov->iov_base + start_iov_off - curr_off,
1050                           chunk_len);
1051 
1052             copied += chunk_len;
1053             start_iov_off += chunk_len;
1054             curr_off = start_iov_off;
1055             bytes_to_copy -= chunk_len;
1056         } else {
1057             curr_off += iov->iov_len;
1058         }
1059         iov++;
1060     }
1061 }
1062 
1063 static bool
1064 vmxnet3_indicate_packet(VMXNET3State *s)
1065 {
1066     struct Vmxnet3_RxDesc rxd;
1067     PCIDevice *d = PCI_DEVICE(s);
1068     bool is_head = true;
1069     uint32_t rxd_idx;
1070     uint32_t rx_ridx = 0;
1071 
1072     struct Vmxnet3_RxCompDesc rxcd;
1073     uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
1074     hwaddr new_rxcd_pa = 0;
1075     hwaddr ready_rxcd_pa = 0;
1076     struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt);
1077     size_t bytes_copied = 0;
1078     size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt);
1079     uint16_t num_frags = 0;
1080     size_t chunk_size;
1081 
1082     net_rx_pkt_dump(s->rx_pkt);
1083 
1084     while (bytes_left > 0) {
1085 
1086         /* cannot add more frags to packet */
1087         if (num_frags == s->max_rx_frags) {
1088             break;
1089         }
1090 
1091         new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen);
1092         if (!new_rxcd_pa) {
1093             break;
1094         }
1095 
1096         if (!vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) {
1097             break;
1098         }
1099 
1100         chunk_size = MIN(bytes_left, rxd.len);
1101         vmxnet3_pci_dma_writev(d, data, bytes_copied,
1102                                le64_to_cpu(rxd.addr), chunk_size);
1103         bytes_copied += chunk_size;
1104         bytes_left -= chunk_size;
1105 
1106         vmxnet3_dump_rx_descr(&rxd);
1107 
1108         if (ready_rxcd_pa != 0) {
1109             pci_dma_write(d, ready_rxcd_pa, &rxcd, sizeof(rxcd));
1110         }
1111 
1112         memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc));
1113         rxcd.rxdIdx = rxd_idx;
1114         rxcd.len = chunk_size;
1115         rxcd.sop = is_head;
1116         rxcd.gen = new_rxcd_gen;
1117         rxcd.rqID = RXQ_IDX + rx_ridx * s->rxq_num;
1118 
1119         if (bytes_left == 0) {
1120             vmxnet3_rx_update_descr(s->rx_pkt, &rxcd);
1121         }
1122 
1123         VMW_RIPRN("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu "
1124                   "sop %d csum_correct %lu",
1125                   (unsigned long) rx_ridx,
1126                   (unsigned long) rxcd.rxdIdx,
1127                   (unsigned long) rxcd.len,
1128                   (int) rxcd.sop,
1129                   (unsigned long) rxcd.tuc);
1130 
1131         is_head = false;
1132         ready_rxcd_pa = new_rxcd_pa;
1133         new_rxcd_pa = 0;
1134         num_frags++;
1135     }
1136 
1137     if (ready_rxcd_pa != 0) {
1138         rxcd.eop = 1;
1139         rxcd.err = (bytes_left != 0);
1140 
1141         pci_dma_write(d, ready_rxcd_pa, &rxcd, sizeof(rxcd));
1142 
1143         /* Flush RX descriptor changes */
1144         smp_wmb();
1145     }
1146 
1147     if (new_rxcd_pa != 0) {
1148         vmxnet3_revert_rxc_descr(s, RXQ_IDX);
1149     }
1150 
1151     vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx);
1152 
1153     if (bytes_left == 0) {
1154         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_OK);
1155         return true;
1156     } else if (num_frags == s->max_rx_frags) {
1157         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_ERROR);
1158         return false;
1159     } else {
1160         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX,
1161                                         VMXNET3_PKT_STATUS_OUT_OF_BUF);
1162         return false;
1163     }
1164 }
1165 
1166 static void
1167 vmxnet3_io_bar0_write(void *opaque, hwaddr addr,
1168                       uint64_t val, unsigned size)
1169 {
1170     VMXNET3State *s = opaque;
1171 
1172     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD,
1173                         VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) {
1174         int tx_queue_idx =
1175             VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD,
1176                                      VMXNET3_REG_ALIGN);
1177         assert(tx_queue_idx <= s->txq_num);
1178         vmxnet3_process_tx_queue(s, tx_queue_idx);
1179         return;
1180     }
1181 
1182     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1183                         VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1184         int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1185                                          VMXNET3_REG_ALIGN);
1186 
1187         VMW_CBPRN("Interrupt mask for line %d written: 0x%" PRIx64, l, val);
1188 
1189         vmxnet3_on_interrupt_mask_changed(s, l, val);
1190         return;
1191     }
1192 
1193     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD,
1194                         VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) ||
1195        VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2,
1196                         VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) {
1197         return;
1198     }
1199 
1200     VMW_WRPRN("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d",
1201               (uint64_t) addr, val, size);
1202 }
1203 
1204 static uint64_t
1205 vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size)
1206 {
1207     VMXNET3State *s = opaque;
1208 
1209     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1210                         VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1211         int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1212                                          VMXNET3_REG_ALIGN);
1213         return s->interrupt_states[l].is_masked;
1214     }
1215 
1216     VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size);
1217     return 0;
1218 }
1219 
1220 static void vmxnet3_reset_interrupt_states(VMXNET3State *s)
1221 {
1222     int i;
1223     for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) {
1224         s->interrupt_states[i].is_asserted = false;
1225         s->interrupt_states[i].is_pending = false;
1226         s->interrupt_states[i].is_masked = true;
1227     }
1228 }
1229 
1230 static void vmxnet3_reset_mac(VMXNET3State *s)
1231 {
1232     memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a));
1233     VMW_CFPRN("MAC address set to: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
1234 }
1235 
1236 static void vmxnet3_deactivate_device(VMXNET3State *s)
1237 {
1238     if (s->device_active) {
1239         VMW_CBPRN("Deactivating vmxnet3...");
1240         net_tx_pkt_reset(s->tx_pkt);
1241         net_tx_pkt_uninit(s->tx_pkt);
1242         net_rx_pkt_uninit(s->rx_pkt);
1243         s->device_active = false;
1244     }
1245 }
1246 
1247 static void vmxnet3_reset(VMXNET3State *s)
1248 {
1249     VMW_CBPRN("Resetting vmxnet3...");
1250 
1251     vmxnet3_deactivate_device(s);
1252     vmxnet3_reset_interrupt_states(s);
1253     s->drv_shmem = 0;
1254     s->tx_sop = true;
1255     s->skip_current_tx_pkt = false;
1256 }
1257 
1258 static void vmxnet3_update_rx_mode(VMXNET3State *s)
1259 {
1260     PCIDevice *d = PCI_DEVICE(s);
1261 
1262     s->rx_mode = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1263                                            devRead.rxFilterConf.rxMode);
1264     VMW_CFPRN("RX mode: 0x%08X", s->rx_mode);
1265 }
1266 
1267 static void vmxnet3_update_vlan_filters(VMXNET3State *s)
1268 {
1269     int i;
1270     PCIDevice *d = PCI_DEVICE(s);
1271 
1272     /* Copy configuration from shared memory */
1273     VMXNET3_READ_DRV_SHARED(d, s->drv_shmem,
1274                             devRead.rxFilterConf.vfTable,
1275                             s->vlan_table,
1276                             sizeof(s->vlan_table));
1277 
1278     /* Invert byte order when needed */
1279     for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) {
1280         s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]);
1281     }
1282 
1283     /* Dump configuration for debugging purposes */
1284     VMW_CFPRN("Configured VLANs:");
1285     for (i = 0; i < sizeof(s->vlan_table) * 8; i++) {
1286         if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) {
1287             VMW_CFPRN("\tVLAN %d is present", i);
1288         }
1289     }
1290 }
1291 
1292 static void vmxnet3_update_mcast_filters(VMXNET3State *s)
1293 {
1294     PCIDevice *d = PCI_DEVICE(s);
1295 
1296     uint16_t list_bytes =
1297         VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem,
1298                                   devRead.rxFilterConf.mfTableLen);
1299 
1300     s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]);
1301 
1302     s->mcast_list = g_realloc(s->mcast_list, list_bytes);
1303     if (!s->mcast_list) {
1304         if (s->mcast_list_len == 0) {
1305             VMW_CFPRN("Current multicast list is empty");
1306         } else {
1307             VMW_ERPRN("Failed to allocate multicast list of %d elements",
1308                       s->mcast_list_len);
1309         }
1310         s->mcast_list_len = 0;
1311     } else {
1312         int i;
1313         hwaddr mcast_list_pa =
1314             VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem,
1315                                       devRead.rxFilterConf.mfTablePA);
1316 
1317         pci_dma_read(d, mcast_list_pa, s->mcast_list, list_bytes);
1318 
1319         VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len);
1320         for (i = 0; i < s->mcast_list_len; i++) {
1321             VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a));
1322         }
1323     }
1324 }
1325 
1326 static void vmxnet3_setup_rx_filtering(VMXNET3State *s)
1327 {
1328     vmxnet3_update_rx_mode(s);
1329     vmxnet3_update_vlan_filters(s);
1330     vmxnet3_update_mcast_filters(s);
1331 }
1332 
1333 static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s)
1334 {
1335     uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2);
1336     VMW_CFPRN("Interrupt config is 0x%X", interrupt_mode);
1337     return interrupt_mode;
1338 }
1339 
1340 static void vmxnet3_fill_stats(VMXNET3State *s)
1341 {
1342     int i;
1343     PCIDevice *d = PCI_DEVICE(s);
1344 
1345     if (!s->device_active)
1346         return;
1347 
1348     for (i = 0; i < s->txq_num; i++) {
1349         pci_dma_write(d,
1350                       s->txq_descr[i].tx_stats_pa,
1351                       &s->txq_descr[i].txq_stats,
1352                       sizeof(s->txq_descr[i].txq_stats));
1353     }
1354 
1355     for (i = 0; i < s->rxq_num; i++) {
1356         pci_dma_write(d,
1357                       s->rxq_descr[i].rx_stats_pa,
1358                       &s->rxq_descr[i].rxq_stats,
1359                       sizeof(s->rxq_descr[i].rxq_stats));
1360     }
1361 }
1362 
1363 static void vmxnet3_adjust_by_guest_type(VMXNET3State *s)
1364 {
1365     struct Vmxnet3_GOSInfo gos;
1366     PCIDevice *d = PCI_DEVICE(s);
1367 
1368     VMXNET3_READ_DRV_SHARED(d, s->drv_shmem, devRead.misc.driverInfo.gos,
1369                             &gos, sizeof(gos));
1370     s->rx_packets_compound =
1371         (gos.gosType == VMXNET3_GOS_TYPE_WIN) ? false : true;
1372 
1373     VMW_CFPRN("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound);
1374 }
1375 
1376 static void
1377 vmxnet3_dump_conf_descr(const char *name,
1378                         struct Vmxnet3_VariableLenConfDesc *pm_descr)
1379 {
1380     VMW_CFPRN("%s descriptor dump: Version %u, Length %u",
1381               name, pm_descr->confVer, pm_descr->confLen);
1382 
1383 };
1384 
1385 static void vmxnet3_update_pm_state(VMXNET3State *s)
1386 {
1387     struct Vmxnet3_VariableLenConfDesc pm_descr;
1388     PCIDevice *d = PCI_DEVICE(s);
1389 
1390     pm_descr.confLen =
1391         VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confLen);
1392     pm_descr.confVer =
1393         VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confVer);
1394     pm_descr.confPA =
1395         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.pmConfDesc.confPA);
1396 
1397     vmxnet3_dump_conf_descr("PM State", &pm_descr);
1398 }
1399 
1400 static void vmxnet3_update_features(VMXNET3State *s)
1401 {
1402     uint32_t guest_features;
1403     int rxcso_supported;
1404     PCIDevice *d = PCI_DEVICE(s);
1405 
1406     guest_features = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1407                                                devRead.misc.uptFeatures);
1408 
1409     rxcso_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXCSUM);
1410     s->rx_vlan_stripping = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXVLAN);
1411     s->lro_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_LRO);
1412 
1413     VMW_CFPRN("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d",
1414               s->lro_supported, rxcso_supported,
1415               s->rx_vlan_stripping);
1416     if (s->peer_has_vhdr) {
1417         qemu_set_offload(qemu_get_queue(s->nic)->peer,
1418                          rxcso_supported,
1419                          s->lro_supported,
1420                          s->lro_supported,
1421                          0,
1422                          0);
1423     }
1424 }
1425 
1426 static bool vmxnet3_verify_intx(VMXNET3State *s, int intx)
1427 {
1428     return s->msix_used || s->msi_used || (intx ==
1429            (pci_get_byte(s->parent_obj.config + PCI_INTERRUPT_PIN) - 1));
1430 }
1431 
1432 static void vmxnet3_validate_interrupt_idx(bool is_msix, int idx)
1433 {
1434     int max_ints = is_msix ? VMXNET3_MAX_INTRS : VMXNET3_MAX_NMSIX_INTRS;
1435     if (idx >= max_ints) {
1436         hw_error("Bad interrupt index: %d\n", idx);
1437     }
1438 }
1439 
1440 static void vmxnet3_validate_interrupts(VMXNET3State *s)
1441 {
1442     int i;
1443 
1444     VMW_CFPRN("Verifying event interrupt index (%d)", s->event_int_idx);
1445     vmxnet3_validate_interrupt_idx(s->msix_used, s->event_int_idx);
1446 
1447     for (i = 0; i < s->txq_num; i++) {
1448         int idx = s->txq_descr[i].intr_idx;
1449         VMW_CFPRN("Verifying TX queue %d interrupt index (%d)", i, idx);
1450         vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1451     }
1452 
1453     for (i = 0; i < s->rxq_num; i++) {
1454         int idx = s->rxq_descr[i].intr_idx;
1455         VMW_CFPRN("Verifying RX queue %d interrupt index (%d)", i, idx);
1456         vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1457     }
1458 }
1459 
1460 static void vmxnet3_validate_queues(VMXNET3State *s)
1461 {
1462     /*
1463     * txq_num and rxq_num are total number of queues
1464     * configured by guest. These numbers must not
1465     * exceed corresponding maximal values.
1466     */
1467 
1468     if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) {
1469         hw_error("Bad TX queues number: %d\n", s->txq_num);
1470     }
1471 
1472     if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) {
1473         hw_error("Bad RX queues number: %d\n", s->rxq_num);
1474     }
1475 }
1476 
1477 static void vmxnet3_activate_device(VMXNET3State *s)
1478 {
1479     int i;
1480     static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1;
1481     PCIDevice *d = PCI_DEVICE(s);
1482     hwaddr qdescr_table_pa;
1483     uint64_t pa;
1484     uint32_t size;
1485 
1486     /* Verify configuration consistency */
1487     if (!vmxnet3_verify_driver_magic(d, s->drv_shmem)) {
1488         VMW_ERPRN("Device configuration received from driver is invalid");
1489         return;
1490     }
1491 
1492     /* Verify if device is active */
1493     if (s->device_active) {
1494         VMW_CFPRN("Vmxnet3 device is active");
1495         return;
1496     }
1497 
1498     vmxnet3_adjust_by_guest_type(s);
1499     vmxnet3_update_features(s);
1500     vmxnet3_update_pm_state(s);
1501     vmxnet3_setup_rx_filtering(s);
1502     /* Cache fields from shared memory */
1503     s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
1504     VMW_CFPRN("MTU is %u", s->mtu);
1505 
1506     s->max_rx_frags =
1507         VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem, devRead.misc.maxNumRxSG);
1508 
1509     if (s->max_rx_frags == 0) {
1510         s->max_rx_frags = 1;
1511     }
1512 
1513     VMW_CFPRN("Max RX fragments is %u", s->max_rx_frags);
1514 
1515     s->event_int_idx =
1516         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.eventIntrIdx);
1517     assert(vmxnet3_verify_intx(s, s->event_int_idx));
1518     VMW_CFPRN("Events interrupt line is %u", s->event_int_idx);
1519 
1520     s->auto_int_masking =
1521         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.autoMask);
1522     VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking);
1523 
1524     s->txq_num =
1525         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues);
1526     s->rxq_num =
1527         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues);
1528 
1529     VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
1530     vmxnet3_validate_queues(s);
1531 
1532     qdescr_table_pa =
1533         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.misc.queueDescPA);
1534     VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa);
1535 
1536     /*
1537      * Worst-case scenario is a packet that holds all TX rings space so
1538      * we calculate total size of all TX rings for max TX fragments number
1539      */
1540     s->max_tx_frags = 0;
1541 
1542     /* TX queues */
1543     for (i = 0; i < s->txq_num; i++) {
1544         hwaddr qdescr_pa =
1545             qdescr_table_pa + i * sizeof(struct Vmxnet3_TxQueueDesc);
1546 
1547         /* Read interrupt number for this TX queue */
1548         s->txq_descr[i].intr_idx =
1549             VMXNET3_READ_TX_QUEUE_DESCR8(d, qdescr_pa, conf.intrIdx);
1550         assert(vmxnet3_verify_intx(s, s->txq_descr[i].intr_idx));
1551 
1552         VMW_CFPRN("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx);
1553 
1554         /* Read rings memory locations for TX queues */
1555         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA);
1556         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize);
1557 
1558         vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size,
1559                           sizeof(struct Vmxnet3_TxDesc), false);
1560         VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring);
1561 
1562         s->max_tx_frags += size;
1563 
1564         /* TXC ring */
1565         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA);
1566         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize);
1567         vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size,
1568                           sizeof(struct Vmxnet3_TxCompDesc), true);
1569         VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring);
1570 
1571         s->txq_descr[i].tx_stats_pa =
1572             qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats);
1573 
1574         memset(&s->txq_descr[i].txq_stats, 0,
1575                sizeof(s->txq_descr[i].txq_stats));
1576 
1577         /* Fill device-managed parameters for queues */
1578         VMXNET3_WRITE_TX_QUEUE_DESCR32(d, qdescr_pa,
1579                                        ctrl.txThreshold,
1580                                        VMXNET3_DEF_TX_THRESHOLD);
1581     }
1582 
1583     /* Preallocate TX packet wrapper */
1584     VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags);
1585     net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
1586                     s->max_tx_frags, s->peer_has_vhdr);
1587     net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
1588 
1589     /* Read rings memory locations for RX queues */
1590     for (i = 0; i < s->rxq_num; i++) {
1591         int j;
1592         hwaddr qd_pa =
1593             qdescr_table_pa + s->txq_num * sizeof(struct Vmxnet3_TxQueueDesc) +
1594             i * sizeof(struct Vmxnet3_RxQueueDesc);
1595 
1596         /* Read interrupt number for this RX queue */
1597         s->rxq_descr[i].intr_idx =
1598             VMXNET3_READ_TX_QUEUE_DESCR8(d, qd_pa, conf.intrIdx);
1599         assert(vmxnet3_verify_intx(s, s->rxq_descr[i].intr_idx));
1600 
1601         VMW_CFPRN("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx);
1602 
1603         /* Read rings memory locations */
1604         for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) {
1605             /* RX rings */
1606             pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]);
1607             size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]);
1608             vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size,
1609                               sizeof(struct Vmxnet3_RxDesc), false);
1610             VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
1611                       i, j, pa, size);
1612         }
1613 
1614         /* RXC ring */
1615         pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA);
1616         size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize);
1617         vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size,
1618                           sizeof(struct Vmxnet3_RxCompDesc), true);
1619         VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);
1620 
1621         s->rxq_descr[i].rx_stats_pa =
1622             qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats);
1623         memset(&s->rxq_descr[i].rxq_stats, 0,
1624                sizeof(s->rxq_descr[i].rxq_stats));
1625     }
1626 
1627     vmxnet3_validate_interrupts(s);
1628 
1629     /* Make sure everything is in place before device activation */
1630     smp_wmb();
1631 
1632     vmxnet3_reset_mac(s);
1633 
1634     s->device_active = true;
1635 }
1636 
1637 static void vmxnet3_handle_command(VMXNET3State *s, uint64_t cmd)
1638 {
1639     s->last_command = cmd;
1640 
1641     switch (cmd) {
1642     case VMXNET3_CMD_GET_PERM_MAC_HI:
1643         VMW_CBPRN("Set: Get upper part of permanent MAC");
1644         break;
1645 
1646     case VMXNET3_CMD_GET_PERM_MAC_LO:
1647         VMW_CBPRN("Set: Get lower part of permanent MAC");
1648         break;
1649 
1650     case VMXNET3_CMD_GET_STATS:
1651         VMW_CBPRN("Set: Get device statistics");
1652         vmxnet3_fill_stats(s);
1653         break;
1654 
1655     case VMXNET3_CMD_ACTIVATE_DEV:
1656         VMW_CBPRN("Set: Activating vmxnet3 device");
1657         vmxnet3_activate_device(s);
1658         break;
1659 
1660     case VMXNET3_CMD_UPDATE_RX_MODE:
1661         VMW_CBPRN("Set: Update rx mode");
1662         vmxnet3_update_rx_mode(s);
1663         break;
1664 
1665     case VMXNET3_CMD_UPDATE_VLAN_FILTERS:
1666         VMW_CBPRN("Set: Update VLAN filters");
1667         vmxnet3_update_vlan_filters(s);
1668         break;
1669 
1670     case VMXNET3_CMD_UPDATE_MAC_FILTERS:
1671         VMW_CBPRN("Set: Update MAC filters");
1672         vmxnet3_update_mcast_filters(s);
1673         break;
1674 
1675     case VMXNET3_CMD_UPDATE_FEATURE:
1676         VMW_CBPRN("Set: Update features");
1677         vmxnet3_update_features(s);
1678         break;
1679 
1680     case VMXNET3_CMD_UPDATE_PMCFG:
1681         VMW_CBPRN("Set: Update power management config");
1682         vmxnet3_update_pm_state(s);
1683         break;
1684 
1685     case VMXNET3_CMD_GET_LINK:
1686         VMW_CBPRN("Set: Get link");
1687         break;
1688 
1689     case VMXNET3_CMD_RESET_DEV:
1690         VMW_CBPRN("Set: Reset device");
1691         vmxnet3_reset(s);
1692         break;
1693 
1694     case VMXNET3_CMD_QUIESCE_DEV:
1695         VMW_CBPRN("Set: VMXNET3_CMD_QUIESCE_DEV - deactivate the device");
1696         vmxnet3_deactivate_device(s);
1697         break;
1698 
1699     case VMXNET3_CMD_GET_CONF_INTR:
1700         VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration");
1701         break;
1702 
1703     case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1704         VMW_CBPRN("Set: VMXNET3_CMD_GET_ADAPTIVE_RING_INFO - "
1705                   "adaptive ring info flags");
1706         break;
1707 
1708     case VMXNET3_CMD_GET_DID_LO:
1709         VMW_CBPRN("Set: Get lower part of device ID");
1710         break;
1711 
1712     case VMXNET3_CMD_GET_DID_HI:
1713         VMW_CBPRN("Set: Get upper part of device ID");
1714         break;
1715 
1716     case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1717         VMW_CBPRN("Set: Get device extra info");
1718         break;
1719 
1720     default:
1721         VMW_CBPRN("Received unknown command: %" PRIx64, cmd);
1722         break;
1723     }
1724 }
1725 
1726 static uint64_t vmxnet3_get_command_status(VMXNET3State *s)
1727 {
1728     uint64_t ret;
1729 
1730     switch (s->last_command) {
1731     case VMXNET3_CMD_ACTIVATE_DEV:
1732         ret = (s->device_active) ? 0 : 1;
1733         VMW_CFPRN("Device active: %" PRIx64, ret);
1734         break;
1735 
1736     case VMXNET3_CMD_RESET_DEV:
1737     case VMXNET3_CMD_QUIESCE_DEV:
1738     case VMXNET3_CMD_GET_QUEUE_STATUS:
1739     case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1740         ret = 0;
1741         break;
1742 
1743     case VMXNET3_CMD_GET_LINK:
1744         ret = s->link_status_and_speed;
1745         VMW_CFPRN("Link and speed: %" PRIx64, ret);
1746         break;
1747 
1748     case VMXNET3_CMD_GET_PERM_MAC_LO:
1749         ret = vmxnet3_get_mac_low(&s->perm_mac);
1750         break;
1751 
1752     case VMXNET3_CMD_GET_PERM_MAC_HI:
1753         ret = vmxnet3_get_mac_high(&s->perm_mac);
1754         break;
1755 
1756     case VMXNET3_CMD_GET_CONF_INTR:
1757         ret = vmxnet3_get_interrupt_config(s);
1758         break;
1759 
1760     case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1761         ret = VMXNET3_DISABLE_ADAPTIVE_RING;
1762         break;
1763 
1764     case VMXNET3_CMD_GET_DID_LO:
1765         ret = PCI_DEVICE_ID_VMWARE_VMXNET3;
1766         break;
1767 
1768     case VMXNET3_CMD_GET_DID_HI:
1769         ret = VMXNET3_DEVICE_REVISION;
1770         break;
1771 
1772     default:
1773         VMW_WRPRN("Received request for unknown command: %x", s->last_command);
1774         ret = 0;
1775         break;
1776     }
1777 
1778     return ret;
1779 }
1780 
1781 static void vmxnet3_set_events(VMXNET3State *s, uint32_t val)
1782 {
1783     uint32_t events;
1784     PCIDevice *d = PCI_DEVICE(s);
1785 
1786     VMW_CBPRN("Setting events: 0x%x", val);
1787     events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) | val;
1788     VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1789 }
1790 
1791 static void vmxnet3_ack_events(VMXNET3State *s, uint32_t val)
1792 {
1793     PCIDevice *d = PCI_DEVICE(s);
1794     uint32_t events;
1795 
1796     VMW_CBPRN("Clearing events: 0x%x", val);
1797     events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) & ~val;
1798     VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1799 }
1800 
1801 static void
1802 vmxnet3_io_bar1_write(void *opaque,
1803                       hwaddr addr,
1804                       uint64_t val,
1805                       unsigned size)
1806 {
1807     VMXNET3State *s = opaque;
1808 
1809     switch (addr) {
1810     /* Vmxnet3 Revision Report Selection */
1811     case VMXNET3_REG_VRRS:
1812         VMW_CBPRN("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d",
1813                   val, size);
1814         break;
1815 
1816     /* UPT Version Report Selection */
1817     case VMXNET3_REG_UVRS:
1818         VMW_CBPRN("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d",
1819                   val, size);
1820         break;
1821 
1822     /* Driver Shared Address Low */
1823     case VMXNET3_REG_DSAL:
1824         VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d",
1825                   val, size);
1826         /*
1827          * Guest driver will first write the low part of the shared
1828          * memory address. We save it to temp variable and set the
1829          * shared address only after we get the high part
1830          */
1831         if (val == 0) {
1832             vmxnet3_deactivate_device(s);
1833         }
1834         s->temp_shared_guest_driver_memory = val;
1835         s->drv_shmem = 0;
1836         break;
1837 
1838     /* Driver Shared Address High */
1839     case VMXNET3_REG_DSAH:
1840         VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d",
1841                   val, size);
1842         /*
1843          * Set the shared memory between guest driver and device.
1844          * We already should have low address part.
1845          */
1846         s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32);
1847         break;
1848 
1849     /* Command */
1850     case VMXNET3_REG_CMD:
1851         VMW_CBPRN("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d",
1852                   val, size);
1853         vmxnet3_handle_command(s, val);
1854         break;
1855 
1856     /* MAC Address Low */
1857     case VMXNET3_REG_MACL:
1858         VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d",
1859                   val, size);
1860         s->temp_mac = val;
1861         break;
1862 
1863     /* MAC Address High */
1864     case VMXNET3_REG_MACH:
1865         VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d",
1866                   val, size);
1867         vmxnet3_set_variable_mac(s, val, s->temp_mac);
1868         break;
1869 
1870     /* Interrupt Cause Register */
1871     case VMXNET3_REG_ICR:
1872         VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
1873                   val, size);
1874         g_assert_not_reached();
1875         break;
1876 
1877     /* Event Cause Register */
1878     case VMXNET3_REG_ECR:
1879         VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
1880                   val, size);
1881         vmxnet3_ack_events(s, val);
1882         break;
1883 
1884     default:
1885         VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d",
1886                   addr, val, size);
1887         break;
1888     }
1889 }
1890 
1891 static uint64_t
1892 vmxnet3_io_bar1_read(void *opaque, hwaddr addr, unsigned size)
1893 {
1894         VMXNET3State *s = opaque;
1895         uint64_t ret = 0;
1896 
1897         switch (addr) {
1898         /* Vmxnet3 Revision Report Selection */
1899         case VMXNET3_REG_VRRS:
1900             VMW_CBPRN("Read BAR1 [VMXNET3_REG_VRRS], size %d", size);
1901             ret = VMXNET3_DEVICE_REVISION;
1902             break;
1903 
1904         /* UPT Version Report Selection */
1905         case VMXNET3_REG_UVRS:
1906             VMW_CBPRN("Read BAR1 [VMXNET3_REG_UVRS], size %d", size);
1907             ret = VMXNET3_UPT_REVISION;
1908             break;
1909 
1910         /* Command */
1911         case VMXNET3_REG_CMD:
1912             VMW_CBPRN("Read BAR1 [VMXNET3_REG_CMD], size %d", size);
1913             ret = vmxnet3_get_command_status(s);
1914             break;
1915 
1916         /* MAC Address Low */
1917         case VMXNET3_REG_MACL:
1918             VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACL], size %d", size);
1919             ret = vmxnet3_get_mac_low(&s->conf.macaddr);
1920             break;
1921 
1922         /* MAC Address High */
1923         case VMXNET3_REG_MACH:
1924             VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACH], size %d", size);
1925             ret = vmxnet3_get_mac_high(&s->conf.macaddr);
1926             break;
1927 
1928         /*
1929          * Interrupt Cause Register
1930          * Used for legacy interrupts only so interrupt index always 0
1931          */
1932         case VMXNET3_REG_ICR:
1933             VMW_CBPRN("Read BAR1 [VMXNET3_REG_ICR], size %d", size);
1934             if (vmxnet3_interrupt_asserted(s, 0)) {
1935                 vmxnet3_clear_interrupt(s, 0);
1936                 ret = true;
1937             } else {
1938                 ret = false;
1939             }
1940             break;
1941 
1942         default:
1943             VMW_CBPRN("Unknow read BAR1[%" PRIx64 "], %d bytes", addr, size);
1944             break;
1945         }
1946 
1947         return ret;
1948 }
1949 
1950 static int
1951 vmxnet3_can_receive(NetClientState *nc)
1952 {
1953     VMXNET3State *s = qemu_get_nic_opaque(nc);
1954     return s->device_active &&
1955            VMXNET_FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP);
1956 }
1957 
1958 static inline bool
1959 vmxnet3_is_registered_vlan(VMXNET3State *s, const void *data)
1960 {
1961     uint16_t vlan_tag = eth_get_pkt_tci(data) & VLAN_VID_MASK;
1962     if (IS_SPECIAL_VLAN_ID(vlan_tag)) {
1963         return true;
1964     }
1965 
1966     return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag);
1967 }
1968 
1969 static bool
1970 vmxnet3_is_allowed_mcast_group(VMXNET3State *s, const uint8_t *group_mac)
1971 {
1972     int i;
1973     for (i = 0; i < s->mcast_list_len; i++) {
1974         if (!memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) {
1975             return true;
1976         }
1977     }
1978     return false;
1979 }
1980 
1981 static bool
1982 vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data,
1983     size_t size)
1984 {
1985     struct eth_header *ehdr = PKT_GET_ETH_HDR(data);
1986 
1987     if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) {
1988         return true;
1989     }
1990 
1991     if (!vmxnet3_is_registered_vlan(s, data)) {
1992         return false;
1993     }
1994 
1995     switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
1996     case ETH_PKT_UCAST:
1997         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) {
1998             return false;
1999         }
2000         if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) {
2001             return false;
2002         }
2003         break;
2004 
2005     case ETH_PKT_BCAST:
2006         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) {
2007             return false;
2008         }
2009         break;
2010 
2011     case ETH_PKT_MCAST:
2012         if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) {
2013             return true;
2014         }
2015         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) {
2016             return false;
2017         }
2018         if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) {
2019             return false;
2020         }
2021         break;
2022 
2023     default:
2024         g_assert_not_reached();
2025     }
2026 
2027     return true;
2028 }
2029 
2030 static ssize_t
2031 vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
2032 {
2033     VMXNET3State *s = qemu_get_nic_opaque(nc);
2034     size_t bytes_indicated;
2035     uint8_t min_buf[MIN_BUF_SIZE];
2036 
2037     if (!vmxnet3_can_receive(nc)) {
2038         VMW_PKPRN("Cannot receive now");
2039         return -1;
2040     }
2041 
2042     if (s->peer_has_vhdr) {
2043         net_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
2044         buf += sizeof(struct virtio_net_hdr);
2045         size -= sizeof(struct virtio_net_hdr);
2046     }
2047 
2048     /* Pad to minimum Ethernet frame length */
2049     if (size < sizeof(min_buf)) {
2050         memcpy(min_buf, buf, size);
2051         memset(&min_buf[size], 0, sizeof(min_buf) - size);
2052         buf = min_buf;
2053         size = sizeof(min_buf);
2054     }
2055 
2056     net_rx_pkt_set_packet_type(s->rx_pkt,
2057         get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
2058 
2059     if (vmxnet3_rx_filter_may_indicate(s, buf, size)) {
2060         net_rx_pkt_set_protocols(s->rx_pkt, buf, size);
2061         vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size);
2062         net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping);
2063         bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1;
2064         if (bytes_indicated < size) {
2065             VMW_PKPRN("RX: %zu of %zu bytes indicated", bytes_indicated, size);
2066         }
2067     } else {
2068         VMW_PKPRN("Packet dropped by RX filter");
2069         bytes_indicated = size;
2070     }
2071 
2072     assert(size > 0);
2073     assert(bytes_indicated != 0);
2074     return bytes_indicated;
2075 }
2076 
2077 static void vmxnet3_set_link_status(NetClientState *nc)
2078 {
2079     VMXNET3State *s = qemu_get_nic_opaque(nc);
2080 
2081     if (nc->link_down) {
2082         s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP;
2083     } else {
2084         s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP;
2085     }
2086 
2087     vmxnet3_set_events(s, VMXNET3_ECR_LINK);
2088     vmxnet3_trigger_interrupt(s, s->event_int_idx);
2089 }
2090 
2091 static NetClientInfo net_vmxnet3_info = {
2092         .type = NET_CLIENT_OPTIONS_KIND_NIC,
2093         .size = sizeof(NICState),
2094         .receive = vmxnet3_receive,
2095         .link_status_changed = vmxnet3_set_link_status,
2096 };
2097 
2098 static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s)
2099 {
2100     NetClientState *nc = qemu_get_queue(s->nic);
2101 
2102     if (qemu_has_vnet_hdr(nc->peer)) {
2103         return true;
2104     }
2105 
2106     return false;
2107 }
2108 
2109 static void vmxnet3_net_uninit(VMXNET3State *s)
2110 {
2111     g_free(s->mcast_list);
2112     vmxnet3_deactivate_device(s);
2113     qemu_del_nic(s->nic);
2114 }
2115 
2116 static void vmxnet3_net_init(VMXNET3State *s)
2117 {
2118     DeviceState *d = DEVICE(s);
2119 
2120     VMW_CBPRN("vmxnet3_net_init called...");
2121 
2122     qemu_macaddr_default_if_unset(&s->conf.macaddr);
2123 
2124     /* Windows guest will query the address that was set on init */
2125     memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a));
2126 
2127     s->mcast_list = NULL;
2128     s->mcast_list_len = 0;
2129 
2130     s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP;
2131 
2132     VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a));
2133 
2134     s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf,
2135                           object_get_typename(OBJECT(s)),
2136                           d->id, s);
2137 
2138     s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s);
2139     s->tx_sop = true;
2140     s->skip_current_tx_pkt = false;
2141     s->tx_pkt = NULL;
2142     s->rx_pkt = NULL;
2143     s->rx_vlan_stripping = false;
2144     s->lro_supported = false;
2145 
2146     if (s->peer_has_vhdr) {
2147         qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
2148             sizeof(struct virtio_net_hdr));
2149 
2150         qemu_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1);
2151     }
2152 
2153     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
2154 }
2155 
2156 static void
2157 vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors)
2158 {
2159     PCIDevice *d = PCI_DEVICE(s);
2160     int i;
2161     for (i = 0; i < num_vectors; i++) {
2162         msix_vector_unuse(d, i);
2163     }
2164 }
2165 
2166 static bool
2167 vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors)
2168 {
2169     PCIDevice *d = PCI_DEVICE(s);
2170     int i;
2171     for (i = 0; i < num_vectors; i++) {
2172         int res = msix_vector_use(d, i);
2173         if (0 > res) {
2174             VMW_WRPRN("Failed to use MSI-X vector %d, error %d", i, res);
2175             vmxnet3_unuse_msix_vectors(s, i);
2176             return false;
2177         }
2178     }
2179     return true;
2180 }
2181 
2182 static bool
2183 vmxnet3_init_msix(VMXNET3State *s)
2184 {
2185     PCIDevice *d = PCI_DEVICE(s);
2186     int res = msix_init(d, VMXNET3_MAX_INTRS,
2187                         &s->msix_bar,
2188                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
2189                         &s->msix_bar,
2190                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s),
2191                         VMXNET3_MSIX_OFFSET(s));
2192 
2193     if (0 > res) {
2194         VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
2195         s->msix_used = false;
2196     } else {
2197         if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
2198             VMW_WRPRN("Failed to use MSI-X vectors, error %d", res);
2199             msix_uninit(d, &s->msix_bar, &s->msix_bar);
2200             s->msix_used = false;
2201         } else {
2202             s->msix_used = true;
2203         }
2204     }
2205     return s->msix_used;
2206 }
2207 
2208 static void
2209 vmxnet3_cleanup_msix(VMXNET3State *s)
2210 {
2211     PCIDevice *d = PCI_DEVICE(s);
2212 
2213     if (s->msix_used) {
2214         vmxnet3_unuse_msix_vectors(s, VMXNET3_MAX_INTRS);
2215         msix_uninit(d, &s->msix_bar, &s->msix_bar);
2216     }
2217 }
2218 
2219 #define VMXNET3_USE_64BIT         (true)
2220 #define VMXNET3_PER_VECTOR_MASK   (false)
2221 
2222 static bool
2223 vmxnet3_init_msi(VMXNET3State *s)
2224 {
2225     PCIDevice *d = PCI_DEVICE(s);
2226     int res;
2227 
2228     res = msi_init(d, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
2229                    VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK);
2230     if (0 > res) {
2231         VMW_WRPRN("Failed to initialize MSI, error %d", res);
2232         s->msi_used = false;
2233     } else {
2234         s->msi_used = true;
2235     }
2236 
2237     return s->msi_used;
2238 }
2239 
2240 static void
2241 vmxnet3_cleanup_msi(VMXNET3State *s)
2242 {
2243     PCIDevice *d = PCI_DEVICE(s);
2244 
2245     if (s->msi_used) {
2246         msi_uninit(d);
2247     }
2248 }
2249 
2250 static void
2251 vmxnet3_msix_save(QEMUFile *f, void *opaque)
2252 {
2253     PCIDevice *d = PCI_DEVICE(opaque);
2254     msix_save(d, f);
2255 }
2256 
2257 static int
2258 vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id)
2259 {
2260     PCIDevice *d = PCI_DEVICE(opaque);
2261     msix_load(d, f);
2262     return 0;
2263 }
2264 
2265 static const MemoryRegionOps b0_ops = {
2266     .read = vmxnet3_io_bar0_read,
2267     .write = vmxnet3_io_bar0_write,
2268     .endianness = DEVICE_LITTLE_ENDIAN,
2269     .impl = {
2270             .min_access_size = 4,
2271             .max_access_size = 4,
2272     },
2273 };
2274 
2275 static const MemoryRegionOps b1_ops = {
2276     .read = vmxnet3_io_bar1_read,
2277     .write = vmxnet3_io_bar1_write,
2278     .endianness = DEVICE_LITTLE_ENDIAN,
2279     .impl = {
2280             .min_access_size = 4,
2281             .max_access_size = 4,
2282     },
2283 };
2284 
2285 static uint64_t vmxnet3_device_serial_num(VMXNET3State *s)
2286 {
2287     uint64_t dsn_payload;
2288     uint8_t *dsnp = (uint8_t *)&dsn_payload;
2289 
2290     dsnp[0] = 0xfe;
2291     dsnp[1] = s->conf.macaddr.a[3];
2292     dsnp[2] = s->conf.macaddr.a[4];
2293     dsnp[3] = s->conf.macaddr.a[5];
2294     dsnp[4] = s->conf.macaddr.a[0];
2295     dsnp[5] = s->conf.macaddr.a[1];
2296     dsnp[6] = s->conf.macaddr.a[2];
2297     dsnp[7] = 0xff;
2298     return dsn_payload;
2299 }
2300 
2301 static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
2302 {
2303     DeviceState *dev = DEVICE(pci_dev);
2304     VMXNET3State *s = VMXNET3(pci_dev);
2305 
2306     VMW_CBPRN("Starting init...");
2307 
2308     memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s,
2309                           "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
2310     pci_register_bar(pci_dev, VMXNET3_BAR0_IDX,
2311                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0);
2312 
2313     memory_region_init_io(&s->bar1, OBJECT(s), &b1_ops, s,
2314                           "vmxnet3-b1", VMXNET3_VD_REG_SIZE);
2315     pci_register_bar(pci_dev, VMXNET3_BAR1_IDX,
2316                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1);
2317 
2318     memory_region_init(&s->msix_bar, OBJECT(s), "vmxnet3-msix-bar",
2319                        VMXNET3_MSIX_BAR_SIZE);
2320     pci_register_bar(pci_dev, VMXNET3_MSIX_BAR_IDX,
2321                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar);
2322 
2323     vmxnet3_reset_interrupt_states(s);
2324 
2325     /* Interrupt pin A */
2326     pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
2327 
2328     if (!vmxnet3_init_msix(s)) {
2329         VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent.");
2330     }
2331 
2332     if (!vmxnet3_init_msi(s)) {
2333         VMW_WRPRN("Failed to initialize MSI, configuration is inconsistent.");
2334     }
2335 
2336     vmxnet3_net_init(s);
2337 
2338     if (pci_is_express(pci_dev)) {
2339         if (pci_bus_is_express(pci_dev->bus)) {
2340             pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET);
2341         }
2342 
2343         pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET,
2344                               vmxnet3_device_serial_num(s));
2345     }
2346 
2347     register_savevm(dev, "vmxnet3-msix", -1, 1,
2348                     vmxnet3_msix_save, vmxnet3_msix_load, s);
2349 }
2350 
2351 static void vmxnet3_instance_init(Object *obj)
2352 {
2353     VMXNET3State *s = VMXNET3(obj);
2354     device_add_bootindex_property(obj, &s->conf.bootindex,
2355                                   "bootindex", "/ethernet-phy@0",
2356                                   DEVICE(obj), NULL);
2357 }
2358 
2359 static void vmxnet3_pci_uninit(PCIDevice *pci_dev)
2360 {
2361     DeviceState *dev = DEVICE(pci_dev);
2362     VMXNET3State *s = VMXNET3(pci_dev);
2363 
2364     VMW_CBPRN("Starting uninit...");
2365 
2366     unregister_savevm(dev, "vmxnet3-msix", s);
2367 
2368     vmxnet3_net_uninit(s);
2369 
2370     vmxnet3_cleanup_msix(s);
2371 
2372     vmxnet3_cleanup_msi(s);
2373 }
2374 
2375 static void vmxnet3_qdev_reset(DeviceState *dev)
2376 {
2377     PCIDevice *d = PCI_DEVICE(dev);
2378     VMXNET3State *s = VMXNET3(d);
2379 
2380     VMW_CBPRN("Starting QDEV reset...");
2381     vmxnet3_reset(s);
2382 }
2383 
2384 static bool vmxnet3_mc_list_needed(void *opaque)
2385 {
2386     return true;
2387 }
2388 
2389 static int vmxnet3_mcast_list_pre_load(void *opaque)
2390 {
2391     VMXNET3State *s = opaque;
2392 
2393     s->mcast_list = g_malloc(s->mcast_list_buff_size);
2394 
2395     return 0;
2396 }
2397 
2398 
2399 static void vmxnet3_pre_save(void *opaque)
2400 {
2401     VMXNET3State *s = opaque;
2402 
2403     s->mcast_list_buff_size = s->mcast_list_len * sizeof(MACAddr);
2404 }
2405 
2406 static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
2407     .name = "vmxnet3/mcast_list",
2408     .version_id = 1,
2409     .minimum_version_id = 1,
2410     .pre_load = vmxnet3_mcast_list_pre_load,
2411     .needed = vmxnet3_mc_list_needed,
2412     .fields = (VMStateField[]) {
2413         VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0,
2414             mcast_list_buff_size),
2415         VMSTATE_END_OF_LIST()
2416     }
2417 };
2418 
2419 static void vmxnet3_get_ring_from_file(QEMUFile *f, Vmxnet3Ring *r)
2420 {
2421     r->pa = qemu_get_be64(f);
2422     r->size = qemu_get_be32(f);
2423     r->cell_size = qemu_get_be32(f);
2424     r->next = qemu_get_be32(f);
2425     r->gen = qemu_get_byte(f);
2426 }
2427 
2428 static void vmxnet3_put_ring_to_file(QEMUFile *f, Vmxnet3Ring *r)
2429 {
2430     qemu_put_be64(f, r->pa);
2431     qemu_put_be32(f, r->size);
2432     qemu_put_be32(f, r->cell_size);
2433     qemu_put_be32(f, r->next);
2434     qemu_put_byte(f, r->gen);
2435 }
2436 
2437 static void vmxnet3_get_tx_stats_from_file(QEMUFile *f,
2438     struct UPT1_TxStats *tx_stat)
2439 {
2440     tx_stat->TSOPktsTxOK = qemu_get_be64(f);
2441     tx_stat->TSOBytesTxOK = qemu_get_be64(f);
2442     tx_stat->ucastPktsTxOK = qemu_get_be64(f);
2443     tx_stat->ucastBytesTxOK = qemu_get_be64(f);
2444     tx_stat->mcastPktsTxOK = qemu_get_be64(f);
2445     tx_stat->mcastBytesTxOK = qemu_get_be64(f);
2446     tx_stat->bcastPktsTxOK = qemu_get_be64(f);
2447     tx_stat->bcastBytesTxOK = qemu_get_be64(f);
2448     tx_stat->pktsTxError = qemu_get_be64(f);
2449     tx_stat->pktsTxDiscard = qemu_get_be64(f);
2450 }
2451 
2452 static void vmxnet3_put_tx_stats_to_file(QEMUFile *f,
2453     struct UPT1_TxStats *tx_stat)
2454 {
2455     qemu_put_be64(f, tx_stat->TSOPktsTxOK);
2456     qemu_put_be64(f, tx_stat->TSOBytesTxOK);
2457     qemu_put_be64(f, tx_stat->ucastPktsTxOK);
2458     qemu_put_be64(f, tx_stat->ucastBytesTxOK);
2459     qemu_put_be64(f, tx_stat->mcastPktsTxOK);
2460     qemu_put_be64(f, tx_stat->mcastBytesTxOK);
2461     qemu_put_be64(f, tx_stat->bcastPktsTxOK);
2462     qemu_put_be64(f, tx_stat->bcastBytesTxOK);
2463     qemu_put_be64(f, tx_stat->pktsTxError);
2464     qemu_put_be64(f, tx_stat->pktsTxDiscard);
2465 }
2466 
2467 static int vmxnet3_get_txq_descr(QEMUFile *f, void *pv, size_t size)
2468 {
2469     Vmxnet3TxqDescr *r = pv;
2470 
2471     vmxnet3_get_ring_from_file(f, &r->tx_ring);
2472     vmxnet3_get_ring_from_file(f, &r->comp_ring);
2473     r->intr_idx = qemu_get_byte(f);
2474     r->tx_stats_pa = qemu_get_be64(f);
2475 
2476     vmxnet3_get_tx_stats_from_file(f, &r->txq_stats);
2477 
2478     return 0;
2479 }
2480 
2481 static void vmxnet3_put_txq_descr(QEMUFile *f, void *pv, size_t size)
2482 {
2483     Vmxnet3TxqDescr *r = pv;
2484 
2485     vmxnet3_put_ring_to_file(f, &r->tx_ring);
2486     vmxnet3_put_ring_to_file(f, &r->comp_ring);
2487     qemu_put_byte(f, r->intr_idx);
2488     qemu_put_be64(f, r->tx_stats_pa);
2489     vmxnet3_put_tx_stats_to_file(f, &r->txq_stats);
2490 }
2491 
2492 static const VMStateInfo txq_descr_info = {
2493     .name = "txq_descr",
2494     .get = vmxnet3_get_txq_descr,
2495     .put = vmxnet3_put_txq_descr
2496 };
2497 
2498 static void vmxnet3_get_rx_stats_from_file(QEMUFile *f,
2499     struct UPT1_RxStats *rx_stat)
2500 {
2501     rx_stat->LROPktsRxOK = qemu_get_be64(f);
2502     rx_stat->LROBytesRxOK = qemu_get_be64(f);
2503     rx_stat->ucastPktsRxOK = qemu_get_be64(f);
2504     rx_stat->ucastBytesRxOK = qemu_get_be64(f);
2505     rx_stat->mcastPktsRxOK = qemu_get_be64(f);
2506     rx_stat->mcastBytesRxOK = qemu_get_be64(f);
2507     rx_stat->bcastPktsRxOK = qemu_get_be64(f);
2508     rx_stat->bcastBytesRxOK = qemu_get_be64(f);
2509     rx_stat->pktsRxOutOfBuf = qemu_get_be64(f);
2510     rx_stat->pktsRxError = qemu_get_be64(f);
2511 }
2512 
2513 static void vmxnet3_put_rx_stats_to_file(QEMUFile *f,
2514     struct UPT1_RxStats *rx_stat)
2515 {
2516     qemu_put_be64(f, rx_stat->LROPktsRxOK);
2517     qemu_put_be64(f, rx_stat->LROBytesRxOK);
2518     qemu_put_be64(f, rx_stat->ucastPktsRxOK);
2519     qemu_put_be64(f, rx_stat->ucastBytesRxOK);
2520     qemu_put_be64(f, rx_stat->mcastPktsRxOK);
2521     qemu_put_be64(f, rx_stat->mcastBytesRxOK);
2522     qemu_put_be64(f, rx_stat->bcastPktsRxOK);
2523     qemu_put_be64(f, rx_stat->bcastBytesRxOK);
2524     qemu_put_be64(f, rx_stat->pktsRxOutOfBuf);
2525     qemu_put_be64(f, rx_stat->pktsRxError);
2526 }
2527 
2528 static int vmxnet3_get_rxq_descr(QEMUFile *f, void *pv, size_t size)
2529 {
2530     Vmxnet3RxqDescr *r = pv;
2531     int i;
2532 
2533     for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) {
2534         vmxnet3_get_ring_from_file(f, &r->rx_ring[i]);
2535     }
2536 
2537     vmxnet3_get_ring_from_file(f, &r->comp_ring);
2538     r->intr_idx = qemu_get_byte(f);
2539     r->rx_stats_pa = qemu_get_be64(f);
2540 
2541     vmxnet3_get_rx_stats_from_file(f, &r->rxq_stats);
2542 
2543     return 0;
2544 }
2545 
2546 static void vmxnet3_put_rxq_descr(QEMUFile *f, void *pv, size_t size)
2547 {
2548     Vmxnet3RxqDescr *r = pv;
2549     int i;
2550 
2551     for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) {
2552         vmxnet3_put_ring_to_file(f, &r->rx_ring[i]);
2553     }
2554 
2555     vmxnet3_put_ring_to_file(f, &r->comp_ring);
2556     qemu_put_byte(f, r->intr_idx);
2557     qemu_put_be64(f, r->rx_stats_pa);
2558     vmxnet3_put_rx_stats_to_file(f, &r->rxq_stats);
2559 }
2560 
2561 static int vmxnet3_post_load(void *opaque, int version_id)
2562 {
2563     VMXNET3State *s = opaque;
2564     PCIDevice *d = PCI_DEVICE(s);
2565 
2566     net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
2567                     s->max_tx_frags, s->peer_has_vhdr);
2568     net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
2569 
2570     if (s->msix_used) {
2571         if  (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
2572             VMW_WRPRN("Failed to re-use MSI-X vectors");
2573             msix_uninit(d, &s->msix_bar, &s->msix_bar);
2574             s->msix_used = false;
2575             return -1;
2576         }
2577     }
2578 
2579     vmxnet3_validate_queues(s);
2580     vmxnet3_validate_interrupts(s);
2581 
2582     return 0;
2583 }
2584 
2585 static const VMStateInfo rxq_descr_info = {
2586     .name = "rxq_descr",
2587     .get = vmxnet3_get_rxq_descr,
2588     .put = vmxnet3_put_rxq_descr
2589 };
2590 
2591 static int vmxnet3_get_int_state(QEMUFile *f, void *pv, size_t size)
2592 {
2593     Vmxnet3IntState *r = pv;
2594 
2595     r->is_masked = qemu_get_byte(f);
2596     r->is_pending = qemu_get_byte(f);
2597     r->is_asserted = qemu_get_byte(f);
2598 
2599     return 0;
2600 }
2601 
2602 static void vmxnet3_put_int_state(QEMUFile *f, void *pv, size_t size)
2603 {
2604     Vmxnet3IntState *r = pv;
2605 
2606     qemu_put_byte(f, r->is_masked);
2607     qemu_put_byte(f, r->is_pending);
2608     qemu_put_byte(f, r->is_asserted);
2609 }
2610 
2611 static const VMStateInfo int_state_info = {
2612     .name = "int_state",
2613     .get = vmxnet3_get_int_state,
2614     .put = vmxnet3_put_int_state
2615 };
2616 
2617 static bool vmxnet3_vmstate_need_pcie_device(void *opaque)
2618 {
2619     VMXNET3State *s = VMXNET3(opaque);
2620 
2621     return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE);
2622 }
2623 
2624 static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id)
2625 {
2626     return !vmxnet3_vmstate_need_pcie_device(opaque);
2627 }
2628 
2629 static const VMStateDescription vmstate_vmxnet3_pcie_device = {
2630     .name = "vmxnet3/pcie",
2631     .version_id = 1,
2632     .minimum_version_id = 1,
2633     .needed = vmxnet3_vmstate_need_pcie_device,
2634     .fields = (VMStateField[]) {
2635         VMSTATE_PCIE_DEVICE(parent_obj, VMXNET3State),
2636         VMSTATE_END_OF_LIST()
2637     }
2638 };
2639 
2640 static const VMStateDescription vmstate_vmxnet3 = {
2641     .name = "vmxnet3",
2642     .version_id = 1,
2643     .minimum_version_id = 1,
2644     .pre_save = vmxnet3_pre_save,
2645     .post_load = vmxnet3_post_load,
2646     .fields = (VMStateField[]) {
2647             VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State,
2648                                 vmxnet3_vmstate_test_pci_device, 0,
2649                                 vmstate_pci_device, PCIDevice),
2650             VMSTATE_BOOL(rx_packets_compound, VMXNET3State),
2651             VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State),
2652             VMSTATE_BOOL(lro_supported, VMXNET3State),
2653             VMSTATE_UINT32(rx_mode, VMXNET3State),
2654             VMSTATE_UINT32(mcast_list_len, VMXNET3State),
2655             VMSTATE_UINT32(mcast_list_buff_size, VMXNET3State),
2656             VMSTATE_UINT32_ARRAY(vlan_table, VMXNET3State, VMXNET3_VFT_SIZE),
2657             VMSTATE_UINT32(mtu, VMXNET3State),
2658             VMSTATE_UINT16(max_rx_frags, VMXNET3State),
2659             VMSTATE_UINT32(max_tx_frags, VMXNET3State),
2660             VMSTATE_UINT8(event_int_idx, VMXNET3State),
2661             VMSTATE_BOOL(auto_int_masking, VMXNET3State),
2662             VMSTATE_UINT8(txq_num, VMXNET3State),
2663             VMSTATE_UINT8(rxq_num, VMXNET3State),
2664             VMSTATE_UINT32(device_active, VMXNET3State),
2665             VMSTATE_UINT32(last_command, VMXNET3State),
2666             VMSTATE_UINT32(link_status_and_speed, VMXNET3State),
2667             VMSTATE_UINT32(temp_mac, VMXNET3State),
2668             VMSTATE_UINT64(drv_shmem, VMXNET3State),
2669             VMSTATE_UINT64(temp_shared_guest_driver_memory, VMXNET3State),
2670 
2671             VMSTATE_ARRAY(txq_descr, VMXNET3State,
2672                 VMXNET3_DEVICE_MAX_TX_QUEUES, 0, txq_descr_info,
2673                 Vmxnet3TxqDescr),
2674             VMSTATE_ARRAY(rxq_descr, VMXNET3State,
2675                 VMXNET3_DEVICE_MAX_RX_QUEUES, 0, rxq_descr_info,
2676                 Vmxnet3RxqDescr),
2677             VMSTATE_ARRAY(interrupt_states, VMXNET3State, VMXNET3_MAX_INTRS,
2678                 0, int_state_info, Vmxnet3IntState),
2679 
2680             VMSTATE_END_OF_LIST()
2681     },
2682     .subsections = (const VMStateDescription*[]) {
2683         &vmxstate_vmxnet3_mcast_list,
2684         &vmstate_vmxnet3_pcie_device,
2685         NULL
2686     }
2687 };
2688 
2689 static Property vmxnet3_properties[] = {
2690     DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
2691     DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags,
2692                     VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false),
2693     DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags,
2694                     VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false),
2695     DEFINE_PROP_END_OF_LIST(),
2696 };
2697 
2698 static void vmxnet3_realize(DeviceState *qdev, Error **errp)
2699 {
2700     VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
2701     PCIDevice *pci_dev = PCI_DEVICE(qdev);
2702     VMXNET3State *s = VMXNET3(qdev);
2703 
2704     if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
2705         pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
2706     }
2707 
2708     vc->parent_dc_realize(qdev, errp);
2709 }
2710 
2711 static void vmxnet3_class_init(ObjectClass *class, void *data)
2712 {
2713     DeviceClass *dc = DEVICE_CLASS(class);
2714     PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
2715     VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class);
2716 
2717     c->realize = vmxnet3_pci_realize;
2718     c->exit = vmxnet3_pci_uninit;
2719     c->vendor_id = PCI_VENDOR_ID_VMWARE;
2720     c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2721     c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION;
2722     c->class_id = PCI_CLASS_NETWORK_ETHERNET;
2723     c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
2724     c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2725     vc->parent_dc_realize = dc->realize;
2726     dc->realize = vmxnet3_realize;
2727     dc->desc = "VMWare Paravirtualized Ethernet v3";
2728     dc->reset = vmxnet3_qdev_reset;
2729     dc->vmsd = &vmstate_vmxnet3;
2730     dc->props = vmxnet3_properties;
2731     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2732 }
2733 
2734 static const TypeInfo vmxnet3_info = {
2735     .name          = TYPE_VMXNET3,
2736     .parent        = TYPE_PCI_DEVICE,
2737     .class_size    = sizeof(VMXNET3Class),
2738     .instance_size = sizeof(VMXNET3State),
2739     .class_init    = vmxnet3_class_init,
2740     .instance_init = vmxnet3_instance_init,
2741 };
2742 
2743 static void vmxnet3_register_types(void)
2744 {
2745     VMW_CBPRN("vmxnet3_register_types called...");
2746     type_register_static(&vmxnet3_info);
2747 }
2748 
2749 type_init(vmxnet3_register_types)
2750