xref: /qemu/hw/net/e1000.c (revision dc293f60)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/pci/pci.h"
30 #include "hw/qdev-properties.h"
31 #include "migration/vmstate.h"
32 #include "net/net.h"
33 #include "net/checksum.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/dma.h"
36 #include "qemu/iov.h"
37 #include "qemu/module.h"
38 #include "qemu/range.h"
39 
40 #include "e1000x_common.h"
41 #include "trace.h"
42 #include "qom/object.h"
43 
44 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
45 
46 /* #define E1000_DEBUG */
47 
48 #ifdef E1000_DEBUG
49 enum {
50     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
51     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
52     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
53     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
54 };
55 #define DBGBIT(x)    (1<<DEBUG_##x)
56 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
57 
58 #define DBGOUT(what, fmt, ...) do { \
59     if (debugflags & DBGBIT(what)) \
60         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
61     } while (0)
62 #else
63 #define DBGOUT(what, fmt, ...) do {} while (0)
64 #endif
65 
66 #define IOPORT_SIZE       0x40
67 #define PNPMMIO_SIZE      0x20000
68 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
69 
70 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
71 
72 /*
73  * HW models:
74  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
75  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
77  *  Others never tested
78  */
79 
80 struct E1000State_st {
81     /*< private >*/
82     PCIDevice parent_obj;
83     /*< public >*/
84 
85     NICState *nic;
86     NICConf conf;
87     MemoryRegion mmio;
88     MemoryRegion io;
89 
90     uint32_t mac_reg[0x8000];
91     uint16_t phy_reg[0x20];
92     uint16_t eeprom_data[64];
93 
94     uint32_t rxbuf_size;
95     uint32_t rxbuf_min_shift;
96     struct e1000_tx {
97         unsigned char header[256];
98         unsigned char vlan_header[4];
99         /* Fields vlan and data must not be reordered or separated. */
100         unsigned char vlan[4];
101         unsigned char data[0x10000];
102         uint16_t size;
103         unsigned char vlan_needed;
104         unsigned char sum_needed;
105         bool cptse;
106         e1000x_txd_props props;
107         e1000x_txd_props tso_props;
108         uint16_t tso_frames;
109     } tx;
110 
111     struct {
112         uint32_t val_in;    /* shifted in from guest driver */
113         uint16_t bitnum_in;
114         uint16_t bitnum_out;
115         uint16_t reading;
116         uint32_t old_eecd;
117     } eecd_state;
118 
119     QEMUTimer *autoneg_timer;
120 
121     QEMUTimer *mit_timer;      /* Mitigation timer. */
122     bool mit_timer_on;         /* Mitigation timer is running. */
123     bool mit_irq_level;        /* Tracks interrupt pin level. */
124     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
125 
126     QEMUTimer *flush_queue_timer;
127 
128 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
129 #define E1000_FLAG_AUTONEG_BIT 0
130 #define E1000_FLAG_MIT_BIT 1
131 #define E1000_FLAG_MAC_BIT 2
132 #define E1000_FLAG_TSO_BIT 3
133 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
134 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
135 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
136 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
137     uint32_t compat_flags;
138     bool received_tx_tso;
139     bool use_tso_for_migration;
140     e1000x_txd_props mig_props;
141 };
142 typedef struct E1000State_st E1000State;
143 
144 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
145 
146 struct E1000BaseClass {
147     PCIDeviceClass parent_class;
148     uint16_t phy_id2;
149 };
150 typedef struct E1000BaseClass E1000BaseClass;
151 
152 #define TYPE_E1000_BASE "e1000-base"
153 
154 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
155                      E1000, TYPE_E1000_BASE)
156 
157 
158 static void
159 e1000_link_up(E1000State *s)
160 {
161     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
162 
163     /* E1000_STATUS_LU is tested by e1000_can_receive() */
164     qemu_flush_queued_packets(qemu_get_queue(s->nic));
165 }
166 
167 static void
168 e1000_autoneg_done(E1000State *s)
169 {
170     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
171 
172     /* E1000_STATUS_LU is tested by e1000_can_receive() */
173     qemu_flush_queued_packets(qemu_get_queue(s->nic));
174 }
175 
176 static bool
177 have_autoneg(E1000State *s)
178 {
179     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
180 }
181 
182 static void
183 set_phy_ctrl(E1000State *s, int index, uint16_t val)
184 {
185     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
186     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
187                                    MII_CR_RESET |
188                                    MII_CR_RESTART_AUTO_NEG);
189 
190     /*
191      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
192      * migrate during auto negotiation, after migration the link will be
193      * down.
194      */
195     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
196         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
197     }
198 }
199 
200 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
201     [PHY_CTRL] = set_phy_ctrl,
202 };
203 
204 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
205 
206 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
207 static const char phy_regcap[0x20] = {
208     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
209     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
210     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
211     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
212     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
213     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
214     [PHY_AUTONEG_EXP] = PHY_R,
215 };
216 
217 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
218 static const uint16_t phy_reg_init[] = {
219     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
220                    MII_CR_FULL_DUPLEX |
221                    MII_CR_AUTO_NEG_EN,
222 
223     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
224                    MII_SR_LINK_STATUS |   /* link initially up */
225                    MII_SR_AUTONEG_CAPS |
226                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
227                    MII_SR_PREAMBLE_SUPPRESS |
228                    MII_SR_EXTENDED_STATUS |
229                    MII_SR_10T_HD_CAPS |
230                    MII_SR_10T_FD_CAPS |
231                    MII_SR_100X_HD_CAPS |
232                    MII_SR_100X_FD_CAPS,
233 
234     [PHY_ID1] = 0x141,
235     /* [PHY_ID2] configured per DevId, from e1000_reset() */
236     [PHY_AUTONEG_ADV] = 0xde1,
237     [PHY_LP_ABILITY] = 0x1e0,
238     [PHY_1000T_CTRL] = 0x0e00,
239     [PHY_1000T_STATUS] = 0x3c00,
240     [M88E1000_PHY_SPEC_CTRL] = 0x360,
241     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
242     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
243 };
244 
245 static const uint32_t mac_reg_init[] = {
246     [PBA]     = 0x00100030,
247     [LEDCTL]  = 0x602,
248     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
249                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
250     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
251                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
252                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
253                 E1000_STATUS_LU,
254     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
255                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
256                 E1000_MANC_RMCP_EN,
257 };
258 
259 /* Helper function, *curr == 0 means the value is not set */
260 static inline void
261 mit_update_delay(uint32_t *curr, uint32_t value)
262 {
263     if (value && (*curr == 0 || value < *curr)) {
264         *curr = value;
265     }
266 }
267 
268 static void
269 set_interrupt_cause(E1000State *s, int index, uint32_t val)
270 {
271     PCIDevice *d = PCI_DEVICE(s);
272     uint32_t pending_ints;
273     uint32_t mit_delay;
274 
275     s->mac_reg[ICR] = val;
276 
277     /*
278      * Make sure ICR and ICS registers have the same value.
279      * The spec says that the ICS register is write-only.  However in practice,
280      * on real hardware ICS is readable, and for reads it has the same value as
281      * ICR (except that ICS does not have the clear on read behaviour of ICR).
282      *
283      * The VxWorks PRO/1000 driver uses this behaviour.
284      */
285     s->mac_reg[ICS] = val;
286 
287     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
288     if (!s->mit_irq_level && pending_ints) {
289         /*
290          * Here we detect a potential raising edge. We postpone raising the
291          * interrupt line if we are inside the mitigation delay window
292          * (s->mit_timer_on == 1).
293          * We provide a partial implementation of interrupt mitigation,
294          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
295          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
296          * RADV; relative timers based on TIDV and RDTR are not implemented.
297          */
298         if (s->mit_timer_on) {
299             return;
300         }
301         if (chkflag(MIT)) {
302             /* Compute the next mitigation delay according to pending
303              * interrupts and the current values of RADV (provided
304              * RDTR!=0), TADV and ITR.
305              * Then rearm the timer.
306              */
307             mit_delay = 0;
308             if (s->mit_ide &&
309                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
310                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
311             }
312             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
313                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
314             }
315             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
316 
317             /*
318              * According to e1000 SPEC, the Ethernet controller guarantees
319              * a maximum observable interrupt rate of 7813 interrupts/sec.
320              * Thus if mit_delay < 500 then the delay should be set to the
321              * minimum delay possible which is 500.
322              */
323             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
324 
325             s->mit_timer_on = 1;
326             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
327                       mit_delay * 256);
328             s->mit_ide = 0;
329         }
330     }
331 
332     s->mit_irq_level = (pending_ints != 0);
333     pci_set_irq(d, s->mit_irq_level);
334 }
335 
336 static void
337 e1000_mit_timer(void *opaque)
338 {
339     E1000State *s = opaque;
340 
341     s->mit_timer_on = 0;
342     /* Call set_interrupt_cause to update the irq level (if necessary). */
343     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
344 }
345 
346 static void
347 set_ics(E1000State *s, int index, uint32_t val)
348 {
349     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
350         s->mac_reg[IMS]);
351     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
352 }
353 
354 static void
355 e1000_autoneg_timer(void *opaque)
356 {
357     E1000State *s = opaque;
358     if (!qemu_get_queue(s->nic)->link_down) {
359         e1000_autoneg_done(s);
360         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
361     }
362 }
363 
364 static void e1000_reset(void *opaque)
365 {
366     E1000State *d = opaque;
367     E1000BaseClass *edc = E1000_GET_CLASS(d);
368     uint8_t *macaddr = d->conf.macaddr.a;
369 
370     timer_del(d->autoneg_timer);
371     timer_del(d->mit_timer);
372     timer_del(d->flush_queue_timer);
373     d->mit_timer_on = 0;
374     d->mit_irq_level = 0;
375     d->mit_ide = 0;
376     memset(d->phy_reg, 0, sizeof d->phy_reg);
377     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
378     d->phy_reg[PHY_ID2] = edc->phy_id2;
379     memset(d->mac_reg, 0, sizeof d->mac_reg);
380     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
381     d->rxbuf_min_shift = 1;
382     memset(&d->tx, 0, sizeof d->tx);
383 
384     if (qemu_get_queue(d->nic)->link_down) {
385         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
386     }
387 
388     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
389 }
390 
391 static void
392 set_ctrl(E1000State *s, int index, uint32_t val)
393 {
394     /* RST is self clearing */
395     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
396 }
397 
398 static void
399 e1000_flush_queue_timer(void *opaque)
400 {
401     E1000State *s = opaque;
402 
403     qemu_flush_queued_packets(qemu_get_queue(s->nic));
404 }
405 
406 static void
407 set_rx_control(E1000State *s, int index, uint32_t val)
408 {
409     s->mac_reg[RCTL] = val;
410     s->rxbuf_size = e1000x_rxbufsize(val);
411     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
412     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
413            s->mac_reg[RCTL]);
414     timer_mod(s->flush_queue_timer,
415               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
416 }
417 
418 static void
419 set_mdic(E1000State *s, int index, uint32_t val)
420 {
421     uint32_t data = val & E1000_MDIC_DATA_MASK;
422     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
423 
424     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
425         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
426     else if (val & E1000_MDIC_OP_READ) {
427         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
428         if (!(phy_regcap[addr] & PHY_R)) {
429             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
430             val |= E1000_MDIC_ERROR;
431         } else
432             val = (val ^ data) | s->phy_reg[addr];
433     } else if (val & E1000_MDIC_OP_WRITE) {
434         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
435         if (!(phy_regcap[addr] & PHY_W)) {
436             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
437             val |= E1000_MDIC_ERROR;
438         } else {
439             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
440                 phyreg_writeops[addr](s, index, data);
441             } else {
442                 s->phy_reg[addr] = data;
443             }
444         }
445     }
446     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
447 
448     if (val & E1000_MDIC_INT_EN) {
449         set_ics(s, 0, E1000_ICR_MDAC);
450     }
451 }
452 
453 static uint32_t
454 get_eecd(E1000State *s, int index)
455 {
456     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
457 
458     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
459            s->eecd_state.bitnum_out, s->eecd_state.reading);
460     if (!s->eecd_state.reading ||
461         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
462           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
463         ret |= E1000_EECD_DO;
464     return ret;
465 }
466 
467 static void
468 set_eecd(E1000State *s, int index, uint32_t val)
469 {
470     uint32_t oldval = s->eecd_state.old_eecd;
471 
472     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
473             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
474     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
475         return;
476     }
477     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
478         s->eecd_state.val_in = 0;
479         s->eecd_state.bitnum_in = 0;
480         s->eecd_state.bitnum_out = 0;
481         s->eecd_state.reading = 0;
482     }
483     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
484         return;
485     }
486     if (!(E1000_EECD_SK & val)) {               /* falling edge */
487         s->eecd_state.bitnum_out++;
488         return;
489     }
490     s->eecd_state.val_in <<= 1;
491     if (val & E1000_EECD_DI)
492         s->eecd_state.val_in |= 1;
493     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
494         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
495         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
496             EEPROM_READ_OPCODE_MICROWIRE);
497     }
498     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
499            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
500            s->eecd_state.reading);
501 }
502 
503 static uint32_t
504 flash_eerd_read(E1000State *s, int x)
505 {
506     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
507 
508     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
509         return (s->mac_reg[EERD]);
510 
511     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
512         return (E1000_EEPROM_RW_REG_DONE | r);
513 
514     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
515            E1000_EEPROM_RW_REG_DONE | r);
516 }
517 
518 static void
519 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
520 {
521     uint32_t sum;
522 
523     if (cse && cse < n)
524         n = cse + 1;
525     if (sloc < n-1) {
526         sum = net_checksum_add(n-css, data+css);
527         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
528     }
529 }
530 
531 static inline void
532 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
533 {
534     if (!memcmp(arr, bcast, sizeof bcast)) {
535         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
536     } else if (arr[0] & 1) {
537         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
538     }
539 }
540 
541 static void
542 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
543 {
544     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
545                                     PTC1023, PTC1522 };
546 
547     NetClientState *nc = qemu_get_queue(s->nic);
548     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
549         qemu_receive_packet(nc, buf, size);
550     } else {
551         qemu_send_packet(nc, buf, size);
552     }
553     inc_tx_bcast_or_mcast_count(s, buf);
554     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
555 }
556 
557 static void
558 xmit_seg(E1000State *s)
559 {
560     uint16_t len;
561     unsigned int frames = s->tx.tso_frames, css, sofar;
562     struct e1000_tx *tp = &s->tx;
563     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
564 
565     if (tp->cptse) {
566         css = props->ipcss;
567         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
568                frames, tp->size, css);
569         if (props->ip) {    /* IPv4 */
570             stw_be_p(tp->data+css+2, tp->size - css);
571             stw_be_p(tp->data+css+4,
572                      lduw_be_p(tp->data + css + 4) + frames);
573         } else {         /* IPv6 */
574             stw_be_p(tp->data+css+4, tp->size - css);
575         }
576         css = props->tucss;
577         len = tp->size - css;
578         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
579         if (props->tcp) {
580             sofar = frames * props->mss;
581             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
582             if (props->paylen - sofar > props->mss) {
583                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
584             } else if (frames) {
585                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
586             }
587         } else {    /* UDP */
588             stw_be_p(tp->data+css+4, len);
589         }
590         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
591             unsigned int phsum;
592             // add pseudo-header length before checksum calculation
593             void *sp = tp->data + props->tucso;
594 
595             phsum = lduw_be_p(sp) + len;
596             phsum = (phsum >> 16) + (phsum & 0xffff);
597             stw_be_p(sp, phsum);
598         }
599         tp->tso_frames++;
600     }
601 
602     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
603         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
604     }
605     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
606         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
607     }
608     if (tp->vlan_needed) {
609         memmove(tp->vlan, tp->data, 4);
610         memmove(tp->data, tp->data + 4, 8);
611         memcpy(tp->data + 8, tp->vlan_header, 4);
612         e1000_send_packet(s, tp->vlan, tp->size + 4);
613     } else {
614         e1000_send_packet(s, tp->data, tp->size);
615     }
616 
617     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
618     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
619     s->mac_reg[GPTC] = s->mac_reg[TPT];
620     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
621     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
622 }
623 
624 static void
625 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
626 {
627     PCIDevice *d = PCI_DEVICE(s);
628     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
629     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
630     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
631     unsigned int msh = 0xfffff;
632     uint64_t addr;
633     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
634     struct e1000_tx *tp = &s->tx;
635 
636     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
637     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
638         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
639             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
640             s->use_tso_for_migration = 1;
641             tp->tso_frames = 0;
642         } else {
643             e1000x_read_tx_ctx_descr(xp, &tp->props);
644             s->use_tso_for_migration = 0;
645         }
646         return;
647     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
648         // data descriptor
649         if (tp->size == 0) {
650             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
651         }
652         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
653     } else {
654         // legacy descriptor
655         tp->cptse = 0;
656     }
657 
658     if (e1000x_vlan_enabled(s->mac_reg) &&
659         e1000x_is_vlan_txd(txd_lower) &&
660         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
661         tp->vlan_needed = 1;
662         stw_be_p(tp->vlan_header,
663                       le16_to_cpu(s->mac_reg[VET]));
664         stw_be_p(tp->vlan_header + 2,
665                       le16_to_cpu(dp->upper.fields.special));
666     }
667 
668     addr = le64_to_cpu(dp->buffer_addr);
669     if (tp->cptse) {
670         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
671         do {
672             bytes = split_size;
673             if (tp->size >= msh) {
674                 goto eop;
675             }
676             if (tp->size + bytes > msh)
677                 bytes = msh - tp->size;
678 
679             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
680             pci_dma_read(d, addr, tp->data + tp->size, bytes);
681             sz = tp->size + bytes;
682             if (sz >= tp->tso_props.hdr_len
683                 && tp->size < tp->tso_props.hdr_len) {
684                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
685             }
686             tp->size = sz;
687             addr += bytes;
688             if (sz == msh) {
689                 xmit_seg(s);
690                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
691                 tp->size = tp->tso_props.hdr_len;
692             }
693             split_size -= bytes;
694         } while (bytes && split_size);
695     } else {
696         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
697         pci_dma_read(d, addr, tp->data + tp->size, split_size);
698         tp->size += split_size;
699     }
700 
701 eop:
702     if (!(txd_lower & E1000_TXD_CMD_EOP))
703         return;
704     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
705         xmit_seg(s);
706     }
707     tp->tso_frames = 0;
708     tp->sum_needed = 0;
709     tp->vlan_needed = 0;
710     tp->size = 0;
711     tp->cptse = 0;
712 }
713 
714 static uint32_t
715 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
716 {
717     PCIDevice *d = PCI_DEVICE(s);
718     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
719 
720     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
721         return 0;
722     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
723                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
724     dp->upper.data = cpu_to_le32(txd_upper);
725     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
726                   &dp->upper, sizeof(dp->upper));
727     return E1000_ICR_TXDW;
728 }
729 
730 static uint64_t tx_desc_base(E1000State *s)
731 {
732     uint64_t bah = s->mac_reg[TDBAH];
733     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
734 
735     return (bah << 32) + bal;
736 }
737 
738 static void
739 start_xmit(E1000State *s)
740 {
741     PCIDevice *d = PCI_DEVICE(s);
742     dma_addr_t base;
743     struct e1000_tx_desc desc;
744     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
745 
746     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
747         DBGOUT(TX, "tx disabled\n");
748         return;
749     }
750 
751     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
752         base = tx_desc_base(s) +
753                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
754         pci_dma_read(d, base, &desc, sizeof(desc));
755 
756         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
757                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
758                desc.upper.data);
759 
760         process_tx_desc(s, &desc);
761         cause |= txdesc_writeback(s, base, &desc);
762 
763         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
764             s->mac_reg[TDH] = 0;
765         /*
766          * the following could happen only if guest sw assigns
767          * bogus values to TDT/TDLEN.
768          * there's nothing too intelligent we could do about this.
769          */
770         if (s->mac_reg[TDH] == tdh_start ||
771             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
772             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
773                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
774             break;
775         }
776     }
777     set_ics(s, 0, cause);
778 }
779 
780 static int
781 receive_filter(E1000State *s, const uint8_t *buf, int size)
782 {
783     uint32_t rctl = s->mac_reg[RCTL];
784     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
785 
786     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
787         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
788         uint16_t vid = lduw_be_p(buf + 14);
789         uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
790                                  ((vid >> 5) & 0x7f));
791         if ((vfta & (1 << (vid & 0x1f))) == 0)
792             return 0;
793     }
794 
795     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
796         return 1;
797     }
798 
799     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
800         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
801         return 1;
802     }
803 
804     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
805         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
806         return 1;
807     }
808 
809     return e1000x_rx_group_filter(s->mac_reg, buf);
810 }
811 
812 static void
813 e1000_set_link_status(NetClientState *nc)
814 {
815     E1000State *s = qemu_get_nic_opaque(nc);
816     uint32_t old_status = s->mac_reg[STATUS];
817 
818     if (nc->link_down) {
819         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
820     } else {
821         if (have_autoneg(s) &&
822             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
823             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
824         } else {
825             e1000_link_up(s);
826         }
827     }
828 
829     if (s->mac_reg[STATUS] != old_status)
830         set_ics(s, 0, E1000_ICR_LSC);
831 }
832 
833 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
834 {
835     int bufs;
836     /* Fast-path short packets */
837     if (total_size <= s->rxbuf_size) {
838         return s->mac_reg[RDH] != s->mac_reg[RDT];
839     }
840     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
841         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
842     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
843         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
844             s->mac_reg[RDT] - s->mac_reg[RDH];
845     } else {
846         return false;
847     }
848     return total_size <= bufs * s->rxbuf_size;
849 }
850 
851 static bool
852 e1000_can_receive(NetClientState *nc)
853 {
854     E1000State *s = qemu_get_nic_opaque(nc);
855 
856     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
857         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
858 }
859 
860 static uint64_t rx_desc_base(E1000State *s)
861 {
862     uint64_t bah = s->mac_reg[RDBAH];
863     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
864 
865     return (bah << 32) + bal;
866 }
867 
868 static void
869 e1000_receiver_overrun(E1000State *s, size_t size)
870 {
871     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
872     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
873     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
874     set_ics(s, 0, E1000_ICS_RXO);
875 }
876 
877 static ssize_t
878 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
879 {
880     E1000State *s = qemu_get_nic_opaque(nc);
881     PCIDevice *d = PCI_DEVICE(s);
882     struct e1000_rx_desc desc;
883     dma_addr_t base;
884     unsigned int n, rdt;
885     uint32_t rdh_start;
886     uint16_t vlan_special = 0;
887     uint8_t vlan_status = 0;
888     uint8_t min_buf[MIN_BUF_SIZE];
889     struct iovec min_iov;
890     uint8_t *filter_buf = iov->iov_base;
891     size_t size = iov_size(iov, iovcnt);
892     size_t iov_ofs = 0;
893     size_t desc_offset;
894     size_t desc_size;
895     size_t total_size;
896 
897     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
898         return -1;
899     }
900 
901     if (timer_pending(s->flush_queue_timer)) {
902         return 0;
903     }
904 
905     /* Pad to minimum Ethernet frame length */
906     if (size < sizeof(min_buf)) {
907         iov_to_buf(iov, iovcnt, 0, min_buf, size);
908         memset(&min_buf[size], 0, sizeof(min_buf) - size);
909         min_iov.iov_base = filter_buf = min_buf;
910         min_iov.iov_len = size = sizeof(min_buf);
911         iovcnt = 1;
912         iov = &min_iov;
913     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
914         /* This is very unlikely, but may happen. */
915         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
916         filter_buf = min_buf;
917     }
918 
919     /* Discard oversized packets if !LPE and !SBP. */
920     if (e1000x_is_oversized(s->mac_reg, size)) {
921         return size;
922     }
923 
924     if (!receive_filter(s, filter_buf, size)) {
925         return size;
926     }
927 
928     if (e1000x_vlan_enabled(s->mac_reg) &&
929         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
930         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
931         iov_ofs = 4;
932         if (filter_buf == iov->iov_base) {
933             memmove(filter_buf + 4, filter_buf, 12);
934         } else {
935             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
936             while (iov->iov_len <= iov_ofs) {
937                 iov_ofs -= iov->iov_len;
938                 iov++;
939             }
940         }
941         vlan_status = E1000_RXD_STAT_VP;
942         size -= 4;
943     }
944 
945     rdh_start = s->mac_reg[RDH];
946     desc_offset = 0;
947     total_size = size + e1000x_fcs_len(s->mac_reg);
948     if (!e1000_has_rxbufs(s, total_size)) {
949         e1000_receiver_overrun(s, total_size);
950         return -1;
951     }
952     do {
953         desc_size = total_size - desc_offset;
954         if (desc_size > s->rxbuf_size) {
955             desc_size = s->rxbuf_size;
956         }
957         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
958         pci_dma_read(d, base, &desc, sizeof(desc));
959         desc.special = vlan_special;
960         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
961         if (desc.buffer_addr) {
962             if (desc_offset < size) {
963                 size_t iov_copy;
964                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
965                 size_t copy_size = size - desc_offset;
966                 if (copy_size > s->rxbuf_size) {
967                     copy_size = s->rxbuf_size;
968                 }
969                 do {
970                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
971                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
972                     copy_size -= iov_copy;
973                     ba += iov_copy;
974                     iov_ofs += iov_copy;
975                     if (iov_ofs == iov->iov_len) {
976                         iov++;
977                         iov_ofs = 0;
978                     }
979                 } while (copy_size);
980             }
981             desc_offset += desc_size;
982             desc.length = cpu_to_le16(desc_size);
983             if (desc_offset >= total_size) {
984                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
985             } else {
986                 /* Guest zeroing out status is not a hardware requirement.
987                    Clear EOP in case guest didn't do it. */
988                 desc.status &= ~E1000_RXD_STAT_EOP;
989             }
990         } else { // as per intel docs; skip descriptors with null buf addr
991             DBGOUT(RX, "Null RX descriptor!!\n");
992         }
993         pci_dma_write(d, base, &desc, sizeof(desc));
994 
995         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
996             s->mac_reg[RDH] = 0;
997         /* see comment in start_xmit; same here */
998         if (s->mac_reg[RDH] == rdh_start ||
999             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1000             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1001                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1002             e1000_receiver_overrun(s, total_size);
1003             return -1;
1004         }
1005     } while (desc_offset < total_size);
1006 
1007     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1008 
1009     n = E1000_ICS_RXT0;
1010     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1011         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1012     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1013         s->rxbuf_min_shift)
1014         n |= E1000_ICS_RXDMT0;
1015 
1016     set_ics(s, 0, n);
1017 
1018     return size;
1019 }
1020 
1021 static ssize_t
1022 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1023 {
1024     const struct iovec iov = {
1025         .iov_base = (uint8_t *)buf,
1026         .iov_len = size
1027     };
1028 
1029     return e1000_receive_iov(nc, &iov, 1);
1030 }
1031 
1032 static uint32_t
1033 mac_readreg(E1000State *s, int index)
1034 {
1035     return s->mac_reg[index];
1036 }
1037 
1038 static uint32_t
1039 mac_low4_read(E1000State *s, int index)
1040 {
1041     return s->mac_reg[index] & 0xf;
1042 }
1043 
1044 static uint32_t
1045 mac_low11_read(E1000State *s, int index)
1046 {
1047     return s->mac_reg[index] & 0x7ff;
1048 }
1049 
1050 static uint32_t
1051 mac_low13_read(E1000State *s, int index)
1052 {
1053     return s->mac_reg[index] & 0x1fff;
1054 }
1055 
1056 static uint32_t
1057 mac_low16_read(E1000State *s, int index)
1058 {
1059     return s->mac_reg[index] & 0xffff;
1060 }
1061 
1062 static uint32_t
1063 mac_icr_read(E1000State *s, int index)
1064 {
1065     uint32_t ret = s->mac_reg[ICR];
1066 
1067     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1068     set_interrupt_cause(s, 0, 0);
1069     return ret;
1070 }
1071 
1072 static uint32_t
1073 mac_read_clr4(E1000State *s, int index)
1074 {
1075     uint32_t ret = s->mac_reg[index];
1076 
1077     s->mac_reg[index] = 0;
1078     return ret;
1079 }
1080 
1081 static uint32_t
1082 mac_read_clr8(E1000State *s, int index)
1083 {
1084     uint32_t ret = s->mac_reg[index];
1085 
1086     s->mac_reg[index] = 0;
1087     s->mac_reg[index-1] = 0;
1088     return ret;
1089 }
1090 
1091 static void
1092 mac_writereg(E1000State *s, int index, uint32_t val)
1093 {
1094     uint32_t macaddr[2];
1095 
1096     s->mac_reg[index] = val;
1097 
1098     if (index == RA + 1) {
1099         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1100         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1101         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1102     }
1103 }
1104 
1105 static void
1106 set_rdt(E1000State *s, int index, uint32_t val)
1107 {
1108     s->mac_reg[index] = val & 0xffff;
1109     if (e1000_has_rxbufs(s, 1)) {
1110         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1111     }
1112 }
1113 
1114 static void
1115 set_16bit(E1000State *s, int index, uint32_t val)
1116 {
1117     s->mac_reg[index] = val & 0xffff;
1118 }
1119 
1120 static void
1121 set_dlen(E1000State *s, int index, uint32_t val)
1122 {
1123     s->mac_reg[index] = val & 0xfff80;
1124 }
1125 
1126 static void
1127 set_tctl(E1000State *s, int index, uint32_t val)
1128 {
1129     s->mac_reg[index] = val;
1130     s->mac_reg[TDT] &= 0xffff;
1131     start_xmit(s);
1132 }
1133 
1134 static void
1135 set_icr(E1000State *s, int index, uint32_t val)
1136 {
1137     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1138     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1139 }
1140 
1141 static void
1142 set_imc(E1000State *s, int index, uint32_t val)
1143 {
1144     s->mac_reg[IMS] &= ~val;
1145     set_ics(s, 0, 0);
1146 }
1147 
1148 static void
1149 set_ims(E1000State *s, int index, uint32_t val)
1150 {
1151     s->mac_reg[IMS] |= val;
1152     set_ics(s, 0, 0);
1153 }
1154 
1155 #define getreg(x)    [x] = mac_readreg
1156 typedef uint32_t (*readops)(E1000State *, int);
1157 static const readops macreg_readops[] = {
1158     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1159     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1160     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1161     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1162     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1163     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1164     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1165     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1166     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1167     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1168     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1169     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1170     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1171     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1172     getreg(GOTCL),
1173 
1174     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1175     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1176     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1177     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1178     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1179     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1180     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1181     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1182     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1183     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1184     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1185     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1186     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1187     [MPTC]    = mac_read_clr4,
1188     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1189     [EERD]    = flash_eerd_read,
1190     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1191     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1192     [RDFPC]   = mac_low13_read,
1193     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1194     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1195     [TDFPC]   = mac_low13_read,
1196     [AIT]     = mac_low16_read,
1197 
1198     [CRCERRS ... MPC]   = &mac_readreg,
1199     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1200     [FFLT ... FFLT+6]   = &mac_low11_read,
1201     [RA ... RA+31]      = &mac_readreg,
1202     [WUPM ... WUPM+31]  = &mac_readreg,
1203     [MTA ... MTA+127]   = &mac_readreg,
1204     [VFTA ... VFTA+127] = &mac_readreg,
1205     [FFMT ... FFMT+254] = &mac_low4_read,
1206     [FFVT ... FFVT+254] = &mac_readreg,
1207     [PBM ... PBM+16383] = &mac_readreg,
1208 };
1209 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1210 
1211 #define putreg(x)    [x] = mac_writereg
1212 typedef void (*writeops)(E1000State *, int, uint32_t);
1213 static const writeops macreg_writeops[] = {
1214     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1215     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1216     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1217     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1218     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1219     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1220     putreg(WUS),      putreg(AIT),
1221 
1222     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1223     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1224     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1225     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1226     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1227     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1228     [ITR]    = set_16bit,
1229 
1230     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1231     [FFLT ... FFLT+6]   = &mac_writereg,
1232     [RA ... RA+31]      = &mac_writereg,
1233     [WUPM ... WUPM+31]  = &mac_writereg,
1234     [MTA ... MTA+127]   = &mac_writereg,
1235     [VFTA ... VFTA+127] = &mac_writereg,
1236     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1237     [PBM ... PBM+16383] = &mac_writereg,
1238 };
1239 
1240 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1241 
1242 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1243 
1244 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1245 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1246  * f - flag bits (up to 6 possible flags)
1247  * n - flag needed
1248  * p - partially implenented */
1249 static const uint8_t mac_reg_access[0x8000] = {
1250     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1251     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1252 
1253     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1254     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1255     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1256     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1257     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1258     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1259     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1260     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1261     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1262     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1263     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1264     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1265     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1266     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1267     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1268     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1269     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1270     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1271     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1272     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1273     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1274     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1275     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1276     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1277     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1278     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1279     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1280     [BPTC]    = markflag(MAC),
1281 
1282     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1283     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1284     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1285     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1286     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1287     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1288     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1289     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1290     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1291     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1292     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1293 };
1294 
1295 static void
1296 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1297                  unsigned size)
1298 {
1299     E1000State *s = opaque;
1300     unsigned int index = (addr & 0x1ffff) >> 2;
1301 
1302     if (index < NWRITEOPS && macreg_writeops[index]) {
1303         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1304             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1305             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1306                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1307                        "It is not fully implemented.\n", index<<2);
1308             }
1309             macreg_writeops[index](s, index, val);
1310         } else {    /* "flag needed" bit is set, but the flag is not active */
1311             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1312                    index<<2);
1313         }
1314     } else if (index < NREADOPS && macreg_readops[index]) {
1315         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1316                index<<2, val);
1317     } else {
1318         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1319                index<<2, val);
1320     }
1321 }
1322 
1323 static uint64_t
1324 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1325 {
1326     E1000State *s = opaque;
1327     unsigned int index = (addr & 0x1ffff) >> 2;
1328 
1329     if (index < NREADOPS && macreg_readops[index]) {
1330         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1331             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1332             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1333                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1334                        "It is not fully implemented.\n", index<<2);
1335             }
1336             return macreg_readops[index](s, index);
1337         } else {    /* "flag needed" bit is set, but the flag is not active */
1338             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1339                    index<<2);
1340         }
1341     } else {
1342         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1343     }
1344     return 0;
1345 }
1346 
1347 static const MemoryRegionOps e1000_mmio_ops = {
1348     .read = e1000_mmio_read,
1349     .write = e1000_mmio_write,
1350     .endianness = DEVICE_LITTLE_ENDIAN,
1351     .impl = {
1352         .min_access_size = 4,
1353         .max_access_size = 4,
1354     },
1355 };
1356 
1357 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1358                               unsigned size)
1359 {
1360     E1000State *s = opaque;
1361 
1362     (void)s;
1363     return 0;
1364 }
1365 
1366 static void e1000_io_write(void *opaque, hwaddr addr,
1367                            uint64_t val, unsigned size)
1368 {
1369     E1000State *s = opaque;
1370 
1371     (void)s;
1372 }
1373 
1374 static const MemoryRegionOps e1000_io_ops = {
1375     .read = e1000_io_read,
1376     .write = e1000_io_write,
1377     .endianness = DEVICE_LITTLE_ENDIAN,
1378 };
1379 
1380 static bool is_version_1(void *opaque, int version_id)
1381 {
1382     return version_id == 1;
1383 }
1384 
1385 static int e1000_pre_save(void *opaque)
1386 {
1387     E1000State *s = opaque;
1388     NetClientState *nc = qemu_get_queue(s->nic);
1389 
1390     /*
1391      * If link is down and auto-negotiation is supported and ongoing,
1392      * complete auto-negotiation immediately. This allows us to look
1393      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1394      */
1395     if (nc->link_down && have_autoneg(s)) {
1396         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1397     }
1398 
1399     /* Decide which set of props to migrate in the main structure */
1400     if (chkflag(TSO) || !s->use_tso_for_migration) {
1401         /* Either we're migrating with the extra subsection, in which
1402          * case the mig_props is always 'props' OR
1403          * we've not got the subsection, but 'props' was the last
1404          * updated.
1405          */
1406         s->mig_props = s->tx.props;
1407     } else {
1408         /* We're not using the subsection, and 'tso_props' was
1409          * the last updated.
1410          */
1411         s->mig_props = s->tx.tso_props;
1412     }
1413     return 0;
1414 }
1415 
1416 static int e1000_post_load(void *opaque, int version_id)
1417 {
1418     E1000State *s = opaque;
1419     NetClientState *nc = qemu_get_queue(s->nic);
1420 
1421     if (!chkflag(MIT)) {
1422         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1423             s->mac_reg[TADV] = 0;
1424         s->mit_irq_level = false;
1425     }
1426     s->mit_ide = 0;
1427     s->mit_timer_on = true;
1428     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1429 
1430     /* nc.link_down can't be migrated, so infer link_down according
1431      * to link status bit in mac_reg[STATUS].
1432      * Alternatively, restart link negotiation if it was in progress. */
1433     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1434 
1435     if (have_autoneg(s) &&
1436         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1437         nc->link_down = false;
1438         timer_mod(s->autoneg_timer,
1439                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1440     }
1441 
1442     s->tx.props = s->mig_props;
1443     if (!s->received_tx_tso) {
1444         /* We received only one set of offload data (tx.props)
1445          * and haven't got tx.tso_props.  The best we can do
1446          * is dupe the data.
1447          */
1448         s->tx.tso_props = s->mig_props;
1449     }
1450     return 0;
1451 }
1452 
1453 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1454 {
1455     E1000State *s = opaque;
1456     s->received_tx_tso = true;
1457     return 0;
1458 }
1459 
1460 static bool e1000_mit_state_needed(void *opaque)
1461 {
1462     E1000State *s = opaque;
1463 
1464     return chkflag(MIT);
1465 }
1466 
1467 static bool e1000_full_mac_needed(void *opaque)
1468 {
1469     E1000State *s = opaque;
1470 
1471     return chkflag(MAC);
1472 }
1473 
1474 static bool e1000_tso_state_needed(void *opaque)
1475 {
1476     E1000State *s = opaque;
1477 
1478     return chkflag(TSO);
1479 }
1480 
1481 static const VMStateDescription vmstate_e1000_mit_state = {
1482     .name = "e1000/mit_state",
1483     .version_id = 1,
1484     .minimum_version_id = 1,
1485     .needed = e1000_mit_state_needed,
1486     .fields = (VMStateField[]) {
1487         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1488         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1489         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1490         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1491         VMSTATE_BOOL(mit_irq_level, E1000State),
1492         VMSTATE_END_OF_LIST()
1493     }
1494 };
1495 
1496 static const VMStateDescription vmstate_e1000_full_mac_state = {
1497     .name = "e1000/full_mac_state",
1498     .version_id = 1,
1499     .minimum_version_id = 1,
1500     .needed = e1000_full_mac_needed,
1501     .fields = (VMStateField[]) {
1502         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1503         VMSTATE_END_OF_LIST()
1504     }
1505 };
1506 
1507 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1508     .name = "e1000/tx_tso_state",
1509     .version_id = 1,
1510     .minimum_version_id = 1,
1511     .needed = e1000_tso_state_needed,
1512     .post_load = e1000_tx_tso_post_load,
1513     .fields = (VMStateField[]) {
1514         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1515         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1516         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1517         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1518         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1519         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1520         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1521         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1522         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1523         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1524         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1525         VMSTATE_END_OF_LIST()
1526     }
1527 };
1528 
1529 static const VMStateDescription vmstate_e1000 = {
1530     .name = "e1000",
1531     .version_id = 2,
1532     .minimum_version_id = 1,
1533     .pre_save = e1000_pre_save,
1534     .post_load = e1000_post_load,
1535     .fields = (VMStateField[]) {
1536         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1537         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1538         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1539         VMSTATE_UINT32(rxbuf_size, E1000State),
1540         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1541         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1542         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1543         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1544         VMSTATE_UINT16(eecd_state.reading, E1000State),
1545         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1546         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1547         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1548         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1549         VMSTATE_UINT8(mig_props.tucss, E1000State),
1550         VMSTATE_UINT8(mig_props.tucso, E1000State),
1551         VMSTATE_UINT16(mig_props.tucse, E1000State),
1552         VMSTATE_UINT32(mig_props.paylen, E1000State),
1553         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1554         VMSTATE_UINT16(mig_props.mss, E1000State),
1555         VMSTATE_UINT16(tx.size, E1000State),
1556         VMSTATE_UINT16(tx.tso_frames, E1000State),
1557         VMSTATE_UINT8(tx.sum_needed, E1000State),
1558         VMSTATE_INT8(mig_props.ip, E1000State),
1559         VMSTATE_INT8(mig_props.tcp, E1000State),
1560         VMSTATE_BUFFER(tx.header, E1000State),
1561         VMSTATE_BUFFER(tx.data, E1000State),
1562         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1563         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1564         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1565         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1566         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1567         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1568         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1569         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1570         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1571         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1572         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1573         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1574         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1575         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1576         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1577         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1578         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1579         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1580         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1581         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1582         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1583         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1584         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1585         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1586         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1587         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1588         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1589         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1590         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1591         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1592         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1593         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1594         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1595         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1596         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1597         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1598         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1599         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1600         VMSTATE_UINT32(mac_reg[VET], E1000State),
1601         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1602         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1603         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1604         VMSTATE_END_OF_LIST()
1605     },
1606     .subsections = (const VMStateDescription*[]) {
1607         &vmstate_e1000_mit_state,
1608         &vmstate_e1000_full_mac_state,
1609         &vmstate_e1000_tx_tso_state,
1610         NULL
1611     }
1612 };
1613 
1614 /*
1615  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1616  * Note: A valid DevId will be inserted during pci_e1000_realize().
1617  */
1618 static const uint16_t e1000_eeprom_template[64] = {
1619     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1620     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1621     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1622     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1623     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1624     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1625     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1626     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1627 };
1628 
1629 /* PCI interface */
1630 
1631 static void
1632 e1000_mmio_setup(E1000State *d)
1633 {
1634     int i;
1635     const uint32_t excluded_regs[] = {
1636         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1637         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1638     };
1639 
1640     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1641                           "e1000-mmio", PNPMMIO_SIZE);
1642     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1643     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1644         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1645                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1646     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1647 }
1648 
1649 static void
1650 pci_e1000_uninit(PCIDevice *dev)
1651 {
1652     E1000State *d = E1000(dev);
1653 
1654     timer_free(d->autoneg_timer);
1655     timer_free(d->mit_timer);
1656     timer_free(d->flush_queue_timer);
1657     qemu_del_nic(d->nic);
1658 }
1659 
1660 static NetClientInfo net_e1000_info = {
1661     .type = NET_CLIENT_DRIVER_NIC,
1662     .size = sizeof(NICState),
1663     .can_receive = e1000_can_receive,
1664     .receive = e1000_receive,
1665     .receive_iov = e1000_receive_iov,
1666     .link_status_changed = e1000_set_link_status,
1667 };
1668 
1669 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1670                                 uint32_t val, int len)
1671 {
1672     E1000State *s = E1000(pci_dev);
1673 
1674     pci_default_write_config(pci_dev, address, val, len);
1675 
1676     if (range_covers_byte(address, len, PCI_COMMAND) &&
1677         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1678         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1679     }
1680 }
1681 
1682 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1683 {
1684     DeviceState *dev = DEVICE(pci_dev);
1685     E1000State *d = E1000(pci_dev);
1686     uint8_t *pci_conf;
1687     uint8_t *macaddr;
1688 
1689     pci_dev->config_write = e1000_write_config;
1690 
1691     pci_conf = pci_dev->config;
1692 
1693     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1694     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1695 
1696     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1697 
1698     e1000_mmio_setup(d);
1699 
1700     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1701 
1702     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1703 
1704     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1705     macaddr = d->conf.macaddr.a;
1706 
1707     e1000x_core_prepare_eeprom(d->eeprom_data,
1708                                e1000_eeprom_template,
1709                                sizeof(e1000_eeprom_template),
1710                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1711                                macaddr);
1712 
1713     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1714                           object_get_typename(OBJECT(d)), dev->id, d);
1715 
1716     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1717 
1718     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1719     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1720     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1721                                         e1000_flush_queue_timer, d);
1722 }
1723 
1724 static void qdev_e1000_reset(DeviceState *dev)
1725 {
1726     E1000State *d = E1000(dev);
1727     e1000_reset(d);
1728 }
1729 
1730 static Property e1000_properties[] = {
1731     DEFINE_NIC_PROPERTIES(E1000State, conf),
1732     DEFINE_PROP_BIT("autonegotiation", E1000State,
1733                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1734     DEFINE_PROP_BIT("mitigation", E1000State,
1735                     compat_flags, E1000_FLAG_MIT_BIT, true),
1736     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1737                     compat_flags, E1000_FLAG_MAC_BIT, true),
1738     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1739                     compat_flags, E1000_FLAG_TSO_BIT, true),
1740     DEFINE_PROP_END_OF_LIST(),
1741 };
1742 
1743 typedef struct E1000Info {
1744     const char *name;
1745     uint16_t   device_id;
1746     uint8_t    revision;
1747     uint16_t   phy_id2;
1748 } E1000Info;
1749 
1750 static void e1000_class_init(ObjectClass *klass, void *data)
1751 {
1752     DeviceClass *dc = DEVICE_CLASS(klass);
1753     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1754     E1000BaseClass *e = E1000_CLASS(klass);
1755     const E1000Info *info = data;
1756 
1757     k->realize = pci_e1000_realize;
1758     k->exit = pci_e1000_uninit;
1759     k->romfile = "efi-e1000.rom";
1760     k->vendor_id = PCI_VENDOR_ID_INTEL;
1761     k->device_id = info->device_id;
1762     k->revision = info->revision;
1763     e->phy_id2 = info->phy_id2;
1764     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1765     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1766     dc->desc = "Intel Gigabit Ethernet";
1767     dc->reset = qdev_e1000_reset;
1768     dc->vmsd = &vmstate_e1000;
1769     device_class_set_props(dc, e1000_properties);
1770 }
1771 
1772 static void e1000_instance_init(Object *obj)
1773 {
1774     E1000State *n = E1000(obj);
1775     device_add_bootindex_property(obj, &n->conf.bootindex,
1776                                   "bootindex", "/ethernet-phy@0",
1777                                   DEVICE(n));
1778 }
1779 
1780 static const TypeInfo e1000_base_info = {
1781     .name          = TYPE_E1000_BASE,
1782     .parent        = TYPE_PCI_DEVICE,
1783     .instance_size = sizeof(E1000State),
1784     .instance_init = e1000_instance_init,
1785     .class_size    = sizeof(E1000BaseClass),
1786     .abstract      = true,
1787     .interfaces = (InterfaceInfo[]) {
1788         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1789         { },
1790     },
1791 };
1792 
1793 static const E1000Info e1000_devices[] = {
1794     {
1795         .name      = "e1000",
1796         .device_id = E1000_DEV_ID_82540EM,
1797         .revision  = 0x03,
1798         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1799     },
1800     {
1801         .name      = "e1000-82544gc",
1802         .device_id = E1000_DEV_ID_82544GC_COPPER,
1803         .revision  = 0x03,
1804         .phy_id2   = E1000_PHY_ID2_82544x,
1805     },
1806     {
1807         .name      = "e1000-82545em",
1808         .device_id = E1000_DEV_ID_82545EM_COPPER,
1809         .revision  = 0x03,
1810         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1811     },
1812 };
1813 
1814 static void e1000_register_types(void)
1815 {
1816     int i;
1817 
1818     type_register_static(&e1000_base_info);
1819     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1820         const E1000Info *info = &e1000_devices[i];
1821         TypeInfo type_info = {};
1822 
1823         type_info.name = info->name;
1824         type_info.parent = TYPE_E1000_BASE;
1825         type_info.class_data = (void *)info;
1826         type_info.class_init = e1000_class_init;
1827 
1828         type_register(&type_info);
1829     }
1830 }
1831 
1832 type_init(e1000_register_types)
1833