xref: /qemu/hw/net/e1000.c (revision c5955f4f)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/pci/pci.h"
30 #include "hw/qdev-properties.h"
31 #include "migration/vmstate.h"
32 #include "net/eth.h"
33 #include "net/net.h"
34 #include "net/checksum.h"
35 #include "sysemu/sysemu.h"
36 #include "sysemu/dma.h"
37 #include "qemu/iov.h"
38 #include "qemu/module.h"
39 #include "qemu/range.h"
40 
41 #include "e1000x_common.h"
42 #include "trace.h"
43 #include "qom/object.h"
44 
45 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
46 
47 /* #define E1000_DEBUG */
48 
49 #ifdef E1000_DEBUG
50 enum {
51     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
52     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
53     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
54     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
55 };
56 #define DBGBIT(x)    (1<<DEBUG_##x)
57 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
58 
59 #define DBGOUT(what, fmt, ...) do { \
60     if (debugflags & DBGBIT(what)) \
61         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
62     } while (0)
63 #else
64 #define DBGOUT(what, fmt, ...) do {} while (0)
65 #endif
66 
67 #define IOPORT_SIZE       0x40
68 #define PNPMMIO_SIZE      0x20000
69 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
70 
71 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
72 
73 /*
74  * HW models:
75  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
76  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
77  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
78  *  Others never tested
79  */
80 
81 struct E1000State_st {
82     /*< private >*/
83     PCIDevice parent_obj;
84     /*< public >*/
85 
86     NICState *nic;
87     NICConf conf;
88     MemoryRegion mmio;
89     MemoryRegion io;
90 
91     uint32_t mac_reg[0x8000];
92     uint16_t phy_reg[0x20];
93     uint16_t eeprom_data[64];
94 
95     uint32_t rxbuf_size;
96     uint32_t rxbuf_min_shift;
97     struct e1000_tx {
98         unsigned char header[256];
99         unsigned char vlan_header[4];
100         /* Fields vlan and data must not be reordered or separated. */
101         unsigned char vlan[4];
102         unsigned char data[0x10000];
103         uint16_t size;
104         unsigned char vlan_needed;
105         unsigned char sum_needed;
106         bool cptse;
107         e1000x_txd_props props;
108         e1000x_txd_props tso_props;
109         uint16_t tso_frames;
110         bool busy;
111     } tx;
112 
113     struct {
114         uint32_t val_in;    /* shifted in from guest driver */
115         uint16_t bitnum_in;
116         uint16_t bitnum_out;
117         uint16_t reading;
118         uint32_t old_eecd;
119     } eecd_state;
120 
121     QEMUTimer *autoneg_timer;
122 
123     QEMUTimer *mit_timer;      /* Mitigation timer. */
124     bool mit_timer_on;         /* Mitigation timer is running. */
125     bool mit_irq_level;        /* Tracks interrupt pin level. */
126     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
127 
128     QEMUTimer *flush_queue_timer;
129 
130 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
131 #define E1000_FLAG_AUTONEG_BIT 0
132 #define E1000_FLAG_MIT_BIT 1
133 #define E1000_FLAG_MAC_BIT 2
134 #define E1000_FLAG_TSO_BIT 3
135 #define E1000_FLAG_VET_BIT 4
136 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
137 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
138 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
139 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
140 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
141 
142     uint32_t compat_flags;
143     bool received_tx_tso;
144     bool use_tso_for_migration;
145     e1000x_txd_props mig_props;
146 };
147 typedef struct E1000State_st E1000State;
148 
149 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
150 
151 struct E1000BaseClass {
152     PCIDeviceClass parent_class;
153     uint16_t phy_id2;
154 };
155 typedef struct E1000BaseClass E1000BaseClass;
156 
157 #define TYPE_E1000_BASE "e1000-base"
158 
159 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
160                      E1000, TYPE_E1000_BASE)
161 
162 
163 static void
164 e1000_link_up(E1000State *s)
165 {
166     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
167 
168     /* E1000_STATUS_LU is tested by e1000_can_receive() */
169     qemu_flush_queued_packets(qemu_get_queue(s->nic));
170 }
171 
172 static void
173 e1000_autoneg_done(E1000State *s)
174 {
175     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
176 
177     /* E1000_STATUS_LU is tested by e1000_can_receive() */
178     qemu_flush_queued_packets(qemu_get_queue(s->nic));
179 }
180 
181 static bool
182 have_autoneg(E1000State *s)
183 {
184     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
185 }
186 
187 static void
188 set_phy_ctrl(E1000State *s, int index, uint16_t val)
189 {
190     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
191     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
192                                    MII_CR_RESET |
193                                    MII_CR_RESTART_AUTO_NEG);
194 
195     /*
196      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
197      * migrate during auto negotiation, after migration the link will be
198      * down.
199      */
200     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
201         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
202     }
203 }
204 
205 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
206     [PHY_CTRL] = set_phy_ctrl,
207 };
208 
209 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
210 
211 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
212 static const char phy_regcap[0x20] = {
213     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
214     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
215     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
216     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
217     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
218     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
219     [PHY_AUTONEG_EXP] = PHY_R,
220 };
221 
222 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
223 static const uint16_t phy_reg_init[] = {
224     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
225                    MII_CR_FULL_DUPLEX |
226                    MII_CR_AUTO_NEG_EN,
227 
228     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
229                    MII_SR_LINK_STATUS |   /* link initially up */
230                    MII_SR_AUTONEG_CAPS |
231                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
232                    MII_SR_PREAMBLE_SUPPRESS |
233                    MII_SR_EXTENDED_STATUS |
234                    MII_SR_10T_HD_CAPS |
235                    MII_SR_10T_FD_CAPS |
236                    MII_SR_100X_HD_CAPS |
237                    MII_SR_100X_FD_CAPS,
238 
239     [PHY_ID1] = 0x141,
240     /* [PHY_ID2] configured per DevId, from e1000_reset() */
241     [PHY_AUTONEG_ADV] = 0xde1,
242     [PHY_LP_ABILITY] = 0x1e0,
243     [PHY_1000T_CTRL] = 0x0e00,
244     [PHY_1000T_STATUS] = 0x3c00,
245     [M88E1000_PHY_SPEC_CTRL] = 0x360,
246     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
247     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
248 };
249 
250 static const uint32_t mac_reg_init[] = {
251     [PBA]     = 0x00100030,
252     [LEDCTL]  = 0x602,
253     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
254                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
255     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
256                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
257                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
258                 E1000_STATUS_LU,
259     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
260                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
261                 E1000_MANC_RMCP_EN,
262 };
263 
264 /* Helper function, *curr == 0 means the value is not set */
265 static inline void
266 mit_update_delay(uint32_t *curr, uint32_t value)
267 {
268     if (value && (*curr == 0 || value < *curr)) {
269         *curr = value;
270     }
271 }
272 
273 static void
274 set_interrupt_cause(E1000State *s, int index, uint32_t val)
275 {
276     PCIDevice *d = PCI_DEVICE(s);
277     uint32_t pending_ints;
278     uint32_t mit_delay;
279 
280     s->mac_reg[ICR] = val;
281 
282     /*
283      * Make sure ICR and ICS registers have the same value.
284      * The spec says that the ICS register is write-only.  However in practice,
285      * on real hardware ICS is readable, and for reads it has the same value as
286      * ICR (except that ICS does not have the clear on read behaviour of ICR).
287      *
288      * The VxWorks PRO/1000 driver uses this behaviour.
289      */
290     s->mac_reg[ICS] = val;
291 
292     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
293     if (!s->mit_irq_level && pending_ints) {
294         /*
295          * Here we detect a potential raising edge. We postpone raising the
296          * interrupt line if we are inside the mitigation delay window
297          * (s->mit_timer_on == 1).
298          * We provide a partial implementation of interrupt mitigation,
299          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
300          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
301          * RADV; relative timers based on TIDV and RDTR are not implemented.
302          */
303         if (s->mit_timer_on) {
304             return;
305         }
306         if (chkflag(MIT)) {
307             /* Compute the next mitigation delay according to pending
308              * interrupts and the current values of RADV (provided
309              * RDTR!=0), TADV and ITR.
310              * Then rearm the timer.
311              */
312             mit_delay = 0;
313             if (s->mit_ide &&
314                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
315                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
316             }
317             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
318                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
319             }
320             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
321 
322             /*
323              * According to e1000 SPEC, the Ethernet controller guarantees
324              * a maximum observable interrupt rate of 7813 interrupts/sec.
325              * Thus if mit_delay < 500 then the delay should be set to the
326              * minimum delay possible which is 500.
327              */
328             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
329 
330             s->mit_timer_on = 1;
331             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
332                       mit_delay * 256);
333             s->mit_ide = 0;
334         }
335     }
336 
337     s->mit_irq_level = (pending_ints != 0);
338     pci_set_irq(d, s->mit_irq_level);
339 }
340 
341 static void
342 e1000_mit_timer(void *opaque)
343 {
344     E1000State *s = opaque;
345 
346     s->mit_timer_on = 0;
347     /* Call set_interrupt_cause to update the irq level (if necessary). */
348     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
349 }
350 
351 static void
352 set_ics(E1000State *s, int index, uint32_t val)
353 {
354     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
355         s->mac_reg[IMS]);
356     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
357 }
358 
359 static void
360 e1000_autoneg_timer(void *opaque)
361 {
362     E1000State *s = opaque;
363     if (!qemu_get_queue(s->nic)->link_down) {
364         e1000_autoneg_done(s);
365         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
366     }
367 }
368 
369 static bool e1000_vet_init_need(void *opaque)
370 {
371     E1000State *s = opaque;
372 
373     return chkflag(VET);
374 }
375 
376 static void e1000_reset(void *opaque)
377 {
378     E1000State *d = opaque;
379     E1000BaseClass *edc = E1000_GET_CLASS(d);
380     uint8_t *macaddr = d->conf.macaddr.a;
381 
382     timer_del(d->autoneg_timer);
383     timer_del(d->mit_timer);
384     timer_del(d->flush_queue_timer);
385     d->mit_timer_on = 0;
386     d->mit_irq_level = 0;
387     d->mit_ide = 0;
388     memset(d->phy_reg, 0, sizeof d->phy_reg);
389     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
390     d->phy_reg[PHY_ID2] = edc->phy_id2;
391     memset(d->mac_reg, 0, sizeof d->mac_reg);
392     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
393     d->rxbuf_min_shift = 1;
394     memset(&d->tx, 0, sizeof d->tx);
395 
396     if (qemu_get_queue(d->nic)->link_down) {
397         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
398     }
399 
400     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
401 
402     if (e1000_vet_init_need(d)) {
403         d->mac_reg[VET] = ETH_P_VLAN;
404     }
405 }
406 
407 static void
408 set_ctrl(E1000State *s, int index, uint32_t val)
409 {
410     /* RST is self clearing */
411     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
412 }
413 
414 static void
415 e1000_flush_queue_timer(void *opaque)
416 {
417     E1000State *s = opaque;
418 
419     qemu_flush_queued_packets(qemu_get_queue(s->nic));
420 }
421 
422 static void
423 set_rx_control(E1000State *s, int index, uint32_t val)
424 {
425     s->mac_reg[RCTL] = val;
426     s->rxbuf_size = e1000x_rxbufsize(val);
427     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
428     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
429            s->mac_reg[RCTL]);
430     timer_mod(s->flush_queue_timer,
431               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
432 }
433 
434 static void
435 set_mdic(E1000State *s, int index, uint32_t val)
436 {
437     uint32_t data = val & E1000_MDIC_DATA_MASK;
438     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
439 
440     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
441         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
442     else if (val & E1000_MDIC_OP_READ) {
443         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
444         if (!(phy_regcap[addr] & PHY_R)) {
445             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
446             val |= E1000_MDIC_ERROR;
447         } else
448             val = (val ^ data) | s->phy_reg[addr];
449     } else if (val & E1000_MDIC_OP_WRITE) {
450         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
451         if (!(phy_regcap[addr] & PHY_W)) {
452             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
453             val |= E1000_MDIC_ERROR;
454         } else {
455             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
456                 phyreg_writeops[addr](s, index, data);
457             } else {
458                 s->phy_reg[addr] = data;
459             }
460         }
461     }
462     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
463 
464     if (val & E1000_MDIC_INT_EN) {
465         set_ics(s, 0, E1000_ICR_MDAC);
466     }
467 }
468 
469 static uint32_t
470 get_eecd(E1000State *s, int index)
471 {
472     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
473 
474     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
475            s->eecd_state.bitnum_out, s->eecd_state.reading);
476     if (!s->eecd_state.reading ||
477         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
478           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
479         ret |= E1000_EECD_DO;
480     return ret;
481 }
482 
483 static void
484 set_eecd(E1000State *s, int index, uint32_t val)
485 {
486     uint32_t oldval = s->eecd_state.old_eecd;
487 
488     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
489             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
490     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
491         return;
492     }
493     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
494         s->eecd_state.val_in = 0;
495         s->eecd_state.bitnum_in = 0;
496         s->eecd_state.bitnum_out = 0;
497         s->eecd_state.reading = 0;
498     }
499     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
500         return;
501     }
502     if (!(E1000_EECD_SK & val)) {               /* falling edge */
503         s->eecd_state.bitnum_out++;
504         return;
505     }
506     s->eecd_state.val_in <<= 1;
507     if (val & E1000_EECD_DI)
508         s->eecd_state.val_in |= 1;
509     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
510         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
511         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
512             EEPROM_READ_OPCODE_MICROWIRE);
513     }
514     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
515            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
516            s->eecd_state.reading);
517 }
518 
519 static uint32_t
520 flash_eerd_read(E1000State *s, int x)
521 {
522     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
523 
524     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
525         return (s->mac_reg[EERD]);
526 
527     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
528         return (E1000_EEPROM_RW_REG_DONE | r);
529 
530     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
531            E1000_EEPROM_RW_REG_DONE | r);
532 }
533 
534 static void
535 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
536 {
537     uint32_t sum;
538 
539     if (cse && cse < n)
540         n = cse + 1;
541     if (sloc < n-1) {
542         sum = net_checksum_add(n-css, data+css);
543         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
544     }
545 }
546 
547 static inline void
548 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
549 {
550     if (!memcmp(arr, bcast, sizeof bcast)) {
551         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
552     } else if (arr[0] & 1) {
553         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
554     }
555 }
556 
557 static void
558 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
559 {
560     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
561                                     PTC1023, PTC1522 };
562 
563     NetClientState *nc = qemu_get_queue(s->nic);
564     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
565         qemu_receive_packet(nc, buf, size);
566     } else {
567         qemu_send_packet(nc, buf, size);
568     }
569     inc_tx_bcast_or_mcast_count(s, buf);
570     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
571 }
572 
573 static void
574 xmit_seg(E1000State *s)
575 {
576     uint16_t len;
577     unsigned int frames = s->tx.tso_frames, css, sofar;
578     struct e1000_tx *tp = &s->tx;
579     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
580 
581     if (tp->cptse) {
582         css = props->ipcss;
583         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
584                frames, tp->size, css);
585         if (props->ip) {    /* IPv4 */
586             stw_be_p(tp->data+css+2, tp->size - css);
587             stw_be_p(tp->data+css+4,
588                      lduw_be_p(tp->data + css + 4) + frames);
589         } else {         /* IPv6 */
590             stw_be_p(tp->data+css+4, tp->size - css);
591         }
592         css = props->tucss;
593         len = tp->size - css;
594         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
595         if (props->tcp) {
596             sofar = frames * props->mss;
597             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
598             if (props->paylen - sofar > props->mss) {
599                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
600             } else if (frames) {
601                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
602             }
603         } else {    /* UDP */
604             stw_be_p(tp->data+css+4, len);
605         }
606         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
607             unsigned int phsum;
608             // add pseudo-header length before checksum calculation
609             void *sp = tp->data + props->tucso;
610 
611             phsum = lduw_be_p(sp) + len;
612             phsum = (phsum >> 16) + (phsum & 0xffff);
613             stw_be_p(sp, phsum);
614         }
615         tp->tso_frames++;
616     }
617 
618     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
619         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
620     }
621     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
622         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
623     }
624     if (tp->vlan_needed) {
625         memmove(tp->vlan, tp->data, 4);
626         memmove(tp->data, tp->data + 4, 8);
627         memcpy(tp->data + 8, tp->vlan_header, 4);
628         e1000_send_packet(s, tp->vlan, tp->size + 4);
629     } else {
630         e1000_send_packet(s, tp->data, tp->size);
631     }
632 
633     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
634     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
635     s->mac_reg[GPTC] = s->mac_reg[TPT];
636     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
637     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
638 }
639 
640 static void
641 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
642 {
643     PCIDevice *d = PCI_DEVICE(s);
644     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
645     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
646     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
647     unsigned int msh = 0xfffff;
648     uint64_t addr;
649     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
650     struct e1000_tx *tp = &s->tx;
651 
652     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
653     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
654         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
655             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
656             s->use_tso_for_migration = 1;
657             tp->tso_frames = 0;
658         } else {
659             e1000x_read_tx_ctx_descr(xp, &tp->props);
660             s->use_tso_for_migration = 0;
661         }
662         return;
663     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
664         // data descriptor
665         if (tp->size == 0) {
666             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
667         }
668         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
669     } else {
670         // legacy descriptor
671         tp->cptse = 0;
672     }
673 
674     if (e1000x_vlan_enabled(s->mac_reg) &&
675         e1000x_is_vlan_txd(txd_lower) &&
676         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
677         tp->vlan_needed = 1;
678         stw_be_p(tp->vlan_header,
679                       le16_to_cpu(s->mac_reg[VET]));
680         stw_be_p(tp->vlan_header + 2,
681                       le16_to_cpu(dp->upper.fields.special));
682     }
683 
684     addr = le64_to_cpu(dp->buffer_addr);
685     if (tp->cptse) {
686         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
687         do {
688             bytes = split_size;
689             if (tp->size >= msh) {
690                 goto eop;
691             }
692             if (tp->size + bytes > msh)
693                 bytes = msh - tp->size;
694 
695             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
696             pci_dma_read(d, addr, tp->data + tp->size, bytes);
697             sz = tp->size + bytes;
698             if (sz >= tp->tso_props.hdr_len
699                 && tp->size < tp->tso_props.hdr_len) {
700                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
701             }
702             tp->size = sz;
703             addr += bytes;
704             if (sz == msh) {
705                 xmit_seg(s);
706                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
707                 tp->size = tp->tso_props.hdr_len;
708             }
709             split_size -= bytes;
710         } while (bytes && split_size);
711     } else {
712         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
713         pci_dma_read(d, addr, tp->data + tp->size, split_size);
714         tp->size += split_size;
715     }
716 
717 eop:
718     if (!(txd_lower & E1000_TXD_CMD_EOP))
719         return;
720     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
721         xmit_seg(s);
722     }
723     tp->tso_frames = 0;
724     tp->sum_needed = 0;
725     tp->vlan_needed = 0;
726     tp->size = 0;
727     tp->cptse = 0;
728 }
729 
730 static uint32_t
731 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
732 {
733     PCIDevice *d = PCI_DEVICE(s);
734     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
735 
736     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
737         return 0;
738     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
739                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
740     dp->upper.data = cpu_to_le32(txd_upper);
741     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
742                   &dp->upper, sizeof(dp->upper));
743     return E1000_ICR_TXDW;
744 }
745 
746 static uint64_t tx_desc_base(E1000State *s)
747 {
748     uint64_t bah = s->mac_reg[TDBAH];
749     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
750 
751     return (bah << 32) + bal;
752 }
753 
754 static void
755 start_xmit(E1000State *s)
756 {
757     PCIDevice *d = PCI_DEVICE(s);
758     dma_addr_t base;
759     struct e1000_tx_desc desc;
760     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
761 
762     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
763         DBGOUT(TX, "tx disabled\n");
764         return;
765     }
766 
767     if (s->tx.busy) {
768         return;
769     }
770     s->tx.busy = true;
771 
772     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
773         base = tx_desc_base(s) +
774                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
775         pci_dma_read(d, base, &desc, sizeof(desc));
776 
777         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
778                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
779                desc.upper.data);
780 
781         process_tx_desc(s, &desc);
782         cause |= txdesc_writeback(s, base, &desc);
783 
784         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
785             s->mac_reg[TDH] = 0;
786         /*
787          * the following could happen only if guest sw assigns
788          * bogus values to TDT/TDLEN.
789          * there's nothing too intelligent we could do about this.
790          */
791         if (s->mac_reg[TDH] == tdh_start ||
792             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
793             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
794                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
795             break;
796         }
797     }
798     s->tx.busy = false;
799     set_ics(s, 0, cause);
800 }
801 
802 static int
803 receive_filter(E1000State *s, const uint8_t *buf, int size)
804 {
805     uint32_t rctl = s->mac_reg[RCTL];
806     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
807 
808     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
809         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
810         uint16_t vid = lduw_be_p(buf + 14);
811         uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
812                                  ((vid >> 5) & 0x7f));
813         if ((vfta & (1 << (vid & 0x1f))) == 0)
814             return 0;
815     }
816 
817     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
818         return 1;
819     }
820 
821     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
822         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
823         return 1;
824     }
825 
826     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
827         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
828         return 1;
829     }
830 
831     return e1000x_rx_group_filter(s->mac_reg, buf);
832 }
833 
834 static void
835 e1000_set_link_status(NetClientState *nc)
836 {
837     E1000State *s = qemu_get_nic_opaque(nc);
838     uint32_t old_status = s->mac_reg[STATUS];
839 
840     if (nc->link_down) {
841         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
842     } else {
843         if (have_autoneg(s) &&
844             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
845             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
846         } else {
847             e1000_link_up(s);
848         }
849     }
850 
851     if (s->mac_reg[STATUS] != old_status)
852         set_ics(s, 0, E1000_ICR_LSC);
853 }
854 
855 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
856 {
857     int bufs;
858     /* Fast-path short packets */
859     if (total_size <= s->rxbuf_size) {
860         return s->mac_reg[RDH] != s->mac_reg[RDT];
861     }
862     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
863         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
864     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
865         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
866             s->mac_reg[RDT] - s->mac_reg[RDH];
867     } else {
868         return false;
869     }
870     return total_size <= bufs * s->rxbuf_size;
871 }
872 
873 static bool
874 e1000_can_receive(NetClientState *nc)
875 {
876     E1000State *s = qemu_get_nic_opaque(nc);
877 
878     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
879         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
880 }
881 
882 static uint64_t rx_desc_base(E1000State *s)
883 {
884     uint64_t bah = s->mac_reg[RDBAH];
885     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
886 
887     return (bah << 32) + bal;
888 }
889 
890 static void
891 e1000_receiver_overrun(E1000State *s, size_t size)
892 {
893     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
894     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
895     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
896     set_ics(s, 0, E1000_ICS_RXO);
897 }
898 
899 static ssize_t
900 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
901 {
902     E1000State *s = qemu_get_nic_opaque(nc);
903     PCIDevice *d = PCI_DEVICE(s);
904     struct e1000_rx_desc desc;
905     dma_addr_t base;
906     unsigned int n, rdt;
907     uint32_t rdh_start;
908     uint16_t vlan_special = 0;
909     uint8_t vlan_status = 0;
910     uint8_t min_buf[MIN_BUF_SIZE];
911     struct iovec min_iov;
912     uint8_t *filter_buf = iov->iov_base;
913     size_t size = iov_size(iov, iovcnt);
914     size_t iov_ofs = 0;
915     size_t desc_offset;
916     size_t desc_size;
917     size_t total_size;
918 
919     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
920         return -1;
921     }
922 
923     if (timer_pending(s->flush_queue_timer)) {
924         return 0;
925     }
926 
927     /* Pad to minimum Ethernet frame length */
928     if (size < sizeof(min_buf)) {
929         iov_to_buf(iov, iovcnt, 0, min_buf, size);
930         memset(&min_buf[size], 0, sizeof(min_buf) - size);
931         min_iov.iov_base = filter_buf = min_buf;
932         min_iov.iov_len = size = sizeof(min_buf);
933         iovcnt = 1;
934         iov = &min_iov;
935     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
936         /* This is very unlikely, but may happen. */
937         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
938         filter_buf = min_buf;
939     }
940 
941     /* Discard oversized packets if !LPE and !SBP. */
942     if (e1000x_is_oversized(s->mac_reg, size)) {
943         return size;
944     }
945 
946     if (!receive_filter(s, filter_buf, size)) {
947         return size;
948     }
949 
950     if (e1000x_vlan_enabled(s->mac_reg) &&
951         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
952         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
953         iov_ofs = 4;
954         if (filter_buf == iov->iov_base) {
955             memmove(filter_buf + 4, filter_buf, 12);
956         } else {
957             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
958             while (iov->iov_len <= iov_ofs) {
959                 iov_ofs -= iov->iov_len;
960                 iov++;
961             }
962         }
963         vlan_status = E1000_RXD_STAT_VP;
964         size -= 4;
965     }
966 
967     rdh_start = s->mac_reg[RDH];
968     desc_offset = 0;
969     total_size = size + e1000x_fcs_len(s->mac_reg);
970     if (!e1000_has_rxbufs(s, total_size)) {
971         e1000_receiver_overrun(s, total_size);
972         return -1;
973     }
974     do {
975         desc_size = total_size - desc_offset;
976         if (desc_size > s->rxbuf_size) {
977             desc_size = s->rxbuf_size;
978         }
979         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
980         pci_dma_read(d, base, &desc, sizeof(desc));
981         desc.special = vlan_special;
982         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
983         if (desc.buffer_addr) {
984             if (desc_offset < size) {
985                 size_t iov_copy;
986                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
987                 size_t copy_size = size - desc_offset;
988                 if (copy_size > s->rxbuf_size) {
989                     copy_size = s->rxbuf_size;
990                 }
991                 do {
992                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
993                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
994                     copy_size -= iov_copy;
995                     ba += iov_copy;
996                     iov_ofs += iov_copy;
997                     if (iov_ofs == iov->iov_len) {
998                         iov++;
999                         iov_ofs = 0;
1000                     }
1001                 } while (copy_size);
1002             }
1003             desc_offset += desc_size;
1004             desc.length = cpu_to_le16(desc_size);
1005             if (desc_offset >= total_size) {
1006                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1007             } else {
1008                 /* Guest zeroing out status is not a hardware requirement.
1009                    Clear EOP in case guest didn't do it. */
1010                 desc.status &= ~E1000_RXD_STAT_EOP;
1011             }
1012         } else { // as per intel docs; skip descriptors with null buf addr
1013             DBGOUT(RX, "Null RX descriptor!!\n");
1014         }
1015         pci_dma_write(d, base, &desc, sizeof(desc));
1016 
1017         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1018             s->mac_reg[RDH] = 0;
1019         /* see comment in start_xmit; same here */
1020         if (s->mac_reg[RDH] == rdh_start ||
1021             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1022             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1023                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1024             e1000_receiver_overrun(s, total_size);
1025             return -1;
1026         }
1027     } while (desc_offset < total_size);
1028 
1029     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1030 
1031     n = E1000_ICS_RXT0;
1032     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1033         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1034     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1035         s->rxbuf_min_shift)
1036         n |= E1000_ICS_RXDMT0;
1037 
1038     set_ics(s, 0, n);
1039 
1040     return size;
1041 }
1042 
1043 static ssize_t
1044 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1045 {
1046     const struct iovec iov = {
1047         .iov_base = (uint8_t *)buf,
1048         .iov_len = size
1049     };
1050 
1051     return e1000_receive_iov(nc, &iov, 1);
1052 }
1053 
1054 static uint32_t
1055 mac_readreg(E1000State *s, int index)
1056 {
1057     return s->mac_reg[index];
1058 }
1059 
1060 static uint32_t
1061 mac_low4_read(E1000State *s, int index)
1062 {
1063     return s->mac_reg[index] & 0xf;
1064 }
1065 
1066 static uint32_t
1067 mac_low11_read(E1000State *s, int index)
1068 {
1069     return s->mac_reg[index] & 0x7ff;
1070 }
1071 
1072 static uint32_t
1073 mac_low13_read(E1000State *s, int index)
1074 {
1075     return s->mac_reg[index] & 0x1fff;
1076 }
1077 
1078 static uint32_t
1079 mac_low16_read(E1000State *s, int index)
1080 {
1081     return s->mac_reg[index] & 0xffff;
1082 }
1083 
1084 static uint32_t
1085 mac_icr_read(E1000State *s, int index)
1086 {
1087     uint32_t ret = s->mac_reg[ICR];
1088 
1089     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1090     set_interrupt_cause(s, 0, 0);
1091     return ret;
1092 }
1093 
1094 static uint32_t
1095 mac_read_clr4(E1000State *s, int index)
1096 {
1097     uint32_t ret = s->mac_reg[index];
1098 
1099     s->mac_reg[index] = 0;
1100     return ret;
1101 }
1102 
1103 static uint32_t
1104 mac_read_clr8(E1000State *s, int index)
1105 {
1106     uint32_t ret = s->mac_reg[index];
1107 
1108     s->mac_reg[index] = 0;
1109     s->mac_reg[index-1] = 0;
1110     return ret;
1111 }
1112 
1113 static void
1114 mac_writereg(E1000State *s, int index, uint32_t val)
1115 {
1116     uint32_t macaddr[2];
1117 
1118     s->mac_reg[index] = val;
1119 
1120     if (index == RA + 1) {
1121         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1122         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1123         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1124     }
1125 }
1126 
1127 static void
1128 set_rdt(E1000State *s, int index, uint32_t val)
1129 {
1130     s->mac_reg[index] = val & 0xffff;
1131     if (e1000_has_rxbufs(s, 1)) {
1132         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1133     }
1134 }
1135 
1136 static void
1137 set_16bit(E1000State *s, int index, uint32_t val)
1138 {
1139     s->mac_reg[index] = val & 0xffff;
1140 }
1141 
1142 static void
1143 set_dlen(E1000State *s, int index, uint32_t val)
1144 {
1145     s->mac_reg[index] = val & 0xfff80;
1146 }
1147 
1148 static void
1149 set_tctl(E1000State *s, int index, uint32_t val)
1150 {
1151     s->mac_reg[index] = val;
1152     s->mac_reg[TDT] &= 0xffff;
1153     start_xmit(s);
1154 }
1155 
1156 static void
1157 set_icr(E1000State *s, int index, uint32_t val)
1158 {
1159     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1160     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1161 }
1162 
1163 static void
1164 set_imc(E1000State *s, int index, uint32_t val)
1165 {
1166     s->mac_reg[IMS] &= ~val;
1167     set_ics(s, 0, 0);
1168 }
1169 
1170 static void
1171 set_ims(E1000State *s, int index, uint32_t val)
1172 {
1173     s->mac_reg[IMS] |= val;
1174     set_ics(s, 0, 0);
1175 }
1176 
1177 #define getreg(x)    [x] = mac_readreg
1178 typedef uint32_t (*readops)(E1000State *, int);
1179 static const readops macreg_readops[] = {
1180     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1181     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1182     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1183     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1184     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1185     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1186     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1187     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1188     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1189     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1190     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1191     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1192     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1193     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1194     getreg(GOTCL),
1195 
1196     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1197     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1198     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1199     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1200     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1201     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1202     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1203     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1204     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1205     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1206     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1207     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1208     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1209     [MPTC]    = mac_read_clr4,
1210     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1211     [EERD]    = flash_eerd_read,
1212     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1213     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1214     [RDFPC]   = mac_low13_read,
1215     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1216     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1217     [TDFPC]   = mac_low13_read,
1218     [AIT]     = mac_low16_read,
1219 
1220     [CRCERRS ... MPC]   = &mac_readreg,
1221     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1222     [FFLT ... FFLT+6]   = &mac_low11_read,
1223     [RA ... RA+31]      = &mac_readreg,
1224     [WUPM ... WUPM+31]  = &mac_readreg,
1225     [MTA ... MTA+127]   = &mac_readreg,
1226     [VFTA ... VFTA+127] = &mac_readreg,
1227     [FFMT ... FFMT+254] = &mac_low4_read,
1228     [FFVT ... FFVT+254] = &mac_readreg,
1229     [PBM ... PBM+16383] = &mac_readreg,
1230 };
1231 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1232 
1233 #define putreg(x)    [x] = mac_writereg
1234 typedef void (*writeops)(E1000State *, int, uint32_t);
1235 static const writeops macreg_writeops[] = {
1236     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1237     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1238     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1239     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1240     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1241     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1242     putreg(WUS),      putreg(AIT),
1243 
1244     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1245     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1246     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1247     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1248     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1249     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1250     [ITR]    = set_16bit,
1251 
1252     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1253     [FFLT ... FFLT+6]   = &mac_writereg,
1254     [RA ... RA+31]      = &mac_writereg,
1255     [WUPM ... WUPM+31]  = &mac_writereg,
1256     [MTA ... MTA+127]   = &mac_writereg,
1257     [VFTA ... VFTA+127] = &mac_writereg,
1258     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1259     [PBM ... PBM+16383] = &mac_writereg,
1260 };
1261 
1262 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1263 
1264 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1265 
1266 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1267 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1268  * f - flag bits (up to 6 possible flags)
1269  * n - flag needed
1270  * p - partially implenented */
1271 static const uint8_t mac_reg_access[0x8000] = {
1272     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1273     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1274 
1275     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1276     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1277     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1278     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1279     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1280     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1281     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1282     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1283     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1284     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1285     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1286     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1287     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1288     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1289     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1290     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1291     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1292     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1293     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1294     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1295     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1296     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1297     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1298     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1299     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1300     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1301     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1302     [BPTC]    = markflag(MAC),
1303 
1304     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1305     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1306     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1307     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1308     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1309     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1310     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1311     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1312     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1313     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1314     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1315 };
1316 
1317 static void
1318 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1319                  unsigned size)
1320 {
1321     E1000State *s = opaque;
1322     unsigned int index = (addr & 0x1ffff) >> 2;
1323 
1324     if (index < NWRITEOPS && macreg_writeops[index]) {
1325         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1326             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1327             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1328                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1329                        "It is not fully implemented.\n", index<<2);
1330             }
1331             macreg_writeops[index](s, index, val);
1332         } else {    /* "flag needed" bit is set, but the flag is not active */
1333             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1334                    index<<2);
1335         }
1336     } else if (index < NREADOPS && macreg_readops[index]) {
1337         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1338                index<<2, val);
1339     } else {
1340         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1341                index<<2, val);
1342     }
1343 }
1344 
1345 static uint64_t
1346 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1347 {
1348     E1000State *s = opaque;
1349     unsigned int index = (addr & 0x1ffff) >> 2;
1350 
1351     if (index < NREADOPS && macreg_readops[index]) {
1352         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1353             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1354             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1355                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1356                        "It is not fully implemented.\n", index<<2);
1357             }
1358             return macreg_readops[index](s, index);
1359         } else {    /* "flag needed" bit is set, but the flag is not active */
1360             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1361                    index<<2);
1362         }
1363     } else {
1364         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1365     }
1366     return 0;
1367 }
1368 
1369 static const MemoryRegionOps e1000_mmio_ops = {
1370     .read = e1000_mmio_read,
1371     .write = e1000_mmio_write,
1372     .endianness = DEVICE_LITTLE_ENDIAN,
1373     .impl = {
1374         .min_access_size = 4,
1375         .max_access_size = 4,
1376     },
1377 };
1378 
1379 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1380                               unsigned size)
1381 {
1382     E1000State *s = opaque;
1383 
1384     (void)s;
1385     return 0;
1386 }
1387 
1388 static void e1000_io_write(void *opaque, hwaddr addr,
1389                            uint64_t val, unsigned size)
1390 {
1391     E1000State *s = opaque;
1392 
1393     (void)s;
1394 }
1395 
1396 static const MemoryRegionOps e1000_io_ops = {
1397     .read = e1000_io_read,
1398     .write = e1000_io_write,
1399     .endianness = DEVICE_LITTLE_ENDIAN,
1400 };
1401 
1402 static bool is_version_1(void *opaque, int version_id)
1403 {
1404     return version_id == 1;
1405 }
1406 
1407 static int e1000_pre_save(void *opaque)
1408 {
1409     E1000State *s = opaque;
1410     NetClientState *nc = qemu_get_queue(s->nic);
1411 
1412     /*
1413      * If link is down and auto-negotiation is supported and ongoing,
1414      * complete auto-negotiation immediately. This allows us to look
1415      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1416      */
1417     if (nc->link_down && have_autoneg(s)) {
1418         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1419     }
1420 
1421     /* Decide which set of props to migrate in the main structure */
1422     if (chkflag(TSO) || !s->use_tso_for_migration) {
1423         /* Either we're migrating with the extra subsection, in which
1424          * case the mig_props is always 'props' OR
1425          * we've not got the subsection, but 'props' was the last
1426          * updated.
1427          */
1428         s->mig_props = s->tx.props;
1429     } else {
1430         /* We're not using the subsection, and 'tso_props' was
1431          * the last updated.
1432          */
1433         s->mig_props = s->tx.tso_props;
1434     }
1435     return 0;
1436 }
1437 
1438 static int e1000_post_load(void *opaque, int version_id)
1439 {
1440     E1000State *s = opaque;
1441     NetClientState *nc = qemu_get_queue(s->nic);
1442 
1443     if (!chkflag(MIT)) {
1444         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1445             s->mac_reg[TADV] = 0;
1446         s->mit_irq_level = false;
1447     }
1448     s->mit_ide = 0;
1449     s->mit_timer_on = true;
1450     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1451 
1452     /* nc.link_down can't be migrated, so infer link_down according
1453      * to link status bit in mac_reg[STATUS].
1454      * Alternatively, restart link negotiation if it was in progress. */
1455     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1456 
1457     if (have_autoneg(s) &&
1458         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1459         nc->link_down = false;
1460         timer_mod(s->autoneg_timer,
1461                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1462     }
1463 
1464     s->tx.props = s->mig_props;
1465     if (!s->received_tx_tso) {
1466         /* We received only one set of offload data (tx.props)
1467          * and haven't got tx.tso_props.  The best we can do
1468          * is dupe the data.
1469          */
1470         s->tx.tso_props = s->mig_props;
1471     }
1472     return 0;
1473 }
1474 
1475 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1476 {
1477     E1000State *s = opaque;
1478     s->received_tx_tso = true;
1479     return 0;
1480 }
1481 
1482 static bool e1000_mit_state_needed(void *opaque)
1483 {
1484     E1000State *s = opaque;
1485 
1486     return chkflag(MIT);
1487 }
1488 
1489 static bool e1000_full_mac_needed(void *opaque)
1490 {
1491     E1000State *s = opaque;
1492 
1493     return chkflag(MAC);
1494 }
1495 
1496 static bool e1000_tso_state_needed(void *opaque)
1497 {
1498     E1000State *s = opaque;
1499 
1500     return chkflag(TSO);
1501 }
1502 
1503 static const VMStateDescription vmstate_e1000_mit_state = {
1504     .name = "e1000/mit_state",
1505     .version_id = 1,
1506     .minimum_version_id = 1,
1507     .needed = e1000_mit_state_needed,
1508     .fields = (VMStateField[]) {
1509         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1510         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1511         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1512         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1513         VMSTATE_BOOL(mit_irq_level, E1000State),
1514         VMSTATE_END_OF_LIST()
1515     }
1516 };
1517 
1518 static const VMStateDescription vmstate_e1000_full_mac_state = {
1519     .name = "e1000/full_mac_state",
1520     .version_id = 1,
1521     .minimum_version_id = 1,
1522     .needed = e1000_full_mac_needed,
1523     .fields = (VMStateField[]) {
1524         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1525         VMSTATE_END_OF_LIST()
1526     }
1527 };
1528 
1529 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1530     .name = "e1000/tx_tso_state",
1531     .version_id = 1,
1532     .minimum_version_id = 1,
1533     .needed = e1000_tso_state_needed,
1534     .post_load = e1000_tx_tso_post_load,
1535     .fields = (VMStateField[]) {
1536         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1537         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1538         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1539         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1540         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1541         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1542         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1543         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1544         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1545         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1546         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1547         VMSTATE_END_OF_LIST()
1548     }
1549 };
1550 
1551 static const VMStateDescription vmstate_e1000 = {
1552     .name = "e1000",
1553     .version_id = 2,
1554     .minimum_version_id = 1,
1555     .pre_save = e1000_pre_save,
1556     .post_load = e1000_post_load,
1557     .fields = (VMStateField[]) {
1558         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1559         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1560         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1561         VMSTATE_UINT32(rxbuf_size, E1000State),
1562         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1563         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1564         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1565         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1566         VMSTATE_UINT16(eecd_state.reading, E1000State),
1567         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1568         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1569         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1570         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1571         VMSTATE_UINT8(mig_props.tucss, E1000State),
1572         VMSTATE_UINT8(mig_props.tucso, E1000State),
1573         VMSTATE_UINT16(mig_props.tucse, E1000State),
1574         VMSTATE_UINT32(mig_props.paylen, E1000State),
1575         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1576         VMSTATE_UINT16(mig_props.mss, E1000State),
1577         VMSTATE_UINT16(tx.size, E1000State),
1578         VMSTATE_UINT16(tx.tso_frames, E1000State),
1579         VMSTATE_UINT8(tx.sum_needed, E1000State),
1580         VMSTATE_INT8(mig_props.ip, E1000State),
1581         VMSTATE_INT8(mig_props.tcp, E1000State),
1582         VMSTATE_BUFFER(tx.header, E1000State),
1583         VMSTATE_BUFFER(tx.data, E1000State),
1584         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1585         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1586         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1587         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1588         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1589         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1590         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1591         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1592         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1593         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1594         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1595         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1596         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1597         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1598         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1599         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1600         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1601         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1602         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1603         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1604         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1605         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1606         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1607         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1608         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1609         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1610         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1611         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1612         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1613         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1614         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1615         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1616         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1617         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1618         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1619         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1620         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1621         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1622         VMSTATE_UINT32(mac_reg[VET], E1000State),
1623         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1624         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1625         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1626         VMSTATE_END_OF_LIST()
1627     },
1628     .subsections = (const VMStateDescription*[]) {
1629         &vmstate_e1000_mit_state,
1630         &vmstate_e1000_full_mac_state,
1631         &vmstate_e1000_tx_tso_state,
1632         NULL
1633     }
1634 };
1635 
1636 /*
1637  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1638  * Note: A valid DevId will be inserted during pci_e1000_realize().
1639  */
1640 static const uint16_t e1000_eeprom_template[64] = {
1641     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1642     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1643     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1644     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1645     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1646     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1647     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1648     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1649 };
1650 
1651 /* PCI interface */
1652 
1653 static void
1654 e1000_mmio_setup(E1000State *d)
1655 {
1656     int i;
1657     const uint32_t excluded_regs[] = {
1658         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1659         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1660     };
1661 
1662     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1663                           "e1000-mmio", PNPMMIO_SIZE);
1664     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1665     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1666         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1667                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1668     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1669 }
1670 
1671 static void
1672 pci_e1000_uninit(PCIDevice *dev)
1673 {
1674     E1000State *d = E1000(dev);
1675 
1676     timer_free(d->autoneg_timer);
1677     timer_free(d->mit_timer);
1678     timer_free(d->flush_queue_timer);
1679     qemu_del_nic(d->nic);
1680 }
1681 
1682 static NetClientInfo net_e1000_info = {
1683     .type = NET_CLIENT_DRIVER_NIC,
1684     .size = sizeof(NICState),
1685     .can_receive = e1000_can_receive,
1686     .receive = e1000_receive,
1687     .receive_iov = e1000_receive_iov,
1688     .link_status_changed = e1000_set_link_status,
1689 };
1690 
1691 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1692                                 uint32_t val, int len)
1693 {
1694     E1000State *s = E1000(pci_dev);
1695 
1696     pci_default_write_config(pci_dev, address, val, len);
1697 
1698     if (range_covers_byte(address, len, PCI_COMMAND) &&
1699         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1700         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1701     }
1702 }
1703 
1704 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1705 {
1706     DeviceState *dev = DEVICE(pci_dev);
1707     E1000State *d = E1000(pci_dev);
1708     uint8_t *pci_conf;
1709     uint8_t *macaddr;
1710 
1711     pci_dev->config_write = e1000_write_config;
1712 
1713     pci_conf = pci_dev->config;
1714 
1715     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1716     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1717 
1718     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1719 
1720     e1000_mmio_setup(d);
1721 
1722     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1723 
1724     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1725 
1726     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1727     macaddr = d->conf.macaddr.a;
1728 
1729     e1000x_core_prepare_eeprom(d->eeprom_data,
1730                                e1000_eeprom_template,
1731                                sizeof(e1000_eeprom_template),
1732                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1733                                macaddr);
1734 
1735     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1736                           object_get_typename(OBJECT(d)), dev->id, d);
1737 
1738     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1739 
1740     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1741     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1742     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1743                                         e1000_flush_queue_timer, d);
1744 }
1745 
1746 static void qdev_e1000_reset(DeviceState *dev)
1747 {
1748     E1000State *d = E1000(dev);
1749     e1000_reset(d);
1750 }
1751 
1752 static Property e1000_properties[] = {
1753     DEFINE_NIC_PROPERTIES(E1000State, conf),
1754     DEFINE_PROP_BIT("autonegotiation", E1000State,
1755                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1756     DEFINE_PROP_BIT("mitigation", E1000State,
1757                     compat_flags, E1000_FLAG_MIT_BIT, true),
1758     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1759                     compat_flags, E1000_FLAG_MAC_BIT, true),
1760     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1761                     compat_flags, E1000_FLAG_TSO_BIT, true),
1762     DEFINE_PROP_BIT("init-vet", E1000State,
1763                     compat_flags, E1000_FLAG_VET_BIT, true),
1764     DEFINE_PROP_END_OF_LIST(),
1765 };
1766 
1767 typedef struct E1000Info {
1768     const char *name;
1769     uint16_t   device_id;
1770     uint8_t    revision;
1771     uint16_t   phy_id2;
1772 } E1000Info;
1773 
1774 static void e1000_class_init(ObjectClass *klass, void *data)
1775 {
1776     DeviceClass *dc = DEVICE_CLASS(klass);
1777     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1778     E1000BaseClass *e = E1000_CLASS(klass);
1779     const E1000Info *info = data;
1780 
1781     k->realize = pci_e1000_realize;
1782     k->exit = pci_e1000_uninit;
1783     k->romfile = "efi-e1000.rom";
1784     k->vendor_id = PCI_VENDOR_ID_INTEL;
1785     k->device_id = info->device_id;
1786     k->revision = info->revision;
1787     e->phy_id2 = info->phy_id2;
1788     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1789     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1790     dc->desc = "Intel Gigabit Ethernet";
1791     dc->reset = qdev_e1000_reset;
1792     dc->vmsd = &vmstate_e1000;
1793     device_class_set_props(dc, e1000_properties);
1794 }
1795 
1796 static void e1000_instance_init(Object *obj)
1797 {
1798     E1000State *n = E1000(obj);
1799     device_add_bootindex_property(obj, &n->conf.bootindex,
1800                                   "bootindex", "/ethernet-phy@0",
1801                                   DEVICE(n));
1802 }
1803 
1804 static const TypeInfo e1000_base_info = {
1805     .name          = TYPE_E1000_BASE,
1806     .parent        = TYPE_PCI_DEVICE,
1807     .instance_size = sizeof(E1000State),
1808     .instance_init = e1000_instance_init,
1809     .class_size    = sizeof(E1000BaseClass),
1810     .abstract      = true,
1811     .interfaces = (InterfaceInfo[]) {
1812         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1813         { },
1814     },
1815 };
1816 
1817 static const E1000Info e1000_devices[] = {
1818     {
1819         .name      = "e1000",
1820         .device_id = E1000_DEV_ID_82540EM,
1821         .revision  = 0x03,
1822         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1823     },
1824     {
1825         .name      = "e1000-82544gc",
1826         .device_id = E1000_DEV_ID_82544GC_COPPER,
1827         .revision  = 0x03,
1828         .phy_id2   = E1000_PHY_ID2_82544x,
1829     },
1830     {
1831         .name      = "e1000-82545em",
1832         .device_id = E1000_DEV_ID_82545EM_COPPER,
1833         .revision  = 0x03,
1834         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1835     },
1836 };
1837 
1838 static void e1000_register_types(void)
1839 {
1840     int i;
1841 
1842     type_register_static(&e1000_base_info);
1843     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1844         const E1000Info *info = &e1000_devices[i];
1845         TypeInfo type_info = {};
1846 
1847         type_info.name = info->name;
1848         type_info.parent = TYPE_E1000_BASE;
1849         type_info.class_data = (void *)info;
1850         type_info.class_init = e1000_class_init;
1851 
1852         type_register(&type_info);
1853     }
1854 }
1855 
1856 type_init(e1000_register_types)
1857