xref: /qemu/hw/net/e1000.c (revision 2e8f72ac)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/pci/pci.h"
30 #include "hw/qdev-properties.h"
31 #include "migration/vmstate.h"
32 #include "net/net.h"
33 #include "net/checksum.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/dma.h"
36 #include "qemu/iov.h"
37 #include "qemu/module.h"
38 #include "qemu/range.h"
39 
40 #include "e1000x_common.h"
41 #include "trace.h"
42 #include "qom/object.h"
43 
44 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
45 
46 /* #define E1000_DEBUG */
47 
48 #ifdef E1000_DEBUG
49 enum {
50     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
51     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
52     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
53     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
54 };
55 #define DBGBIT(x)    (1<<DEBUG_##x)
56 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
57 
58 #define DBGOUT(what, fmt, ...) do { \
59     if (debugflags & DBGBIT(what)) \
60         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
61     } while (0)
62 #else
63 #define DBGOUT(what, fmt, ...) do {} while (0)
64 #endif
65 
66 #define IOPORT_SIZE       0x40
67 #define PNPMMIO_SIZE      0x20000
68 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
69 
70 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
71 
72 /*
73  * HW models:
74  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
75  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
77  *  Others never tested
78  */
79 
80 struct E1000State_st {
81     /*< private >*/
82     PCIDevice parent_obj;
83     /*< public >*/
84 
85     NICState *nic;
86     NICConf conf;
87     MemoryRegion mmio;
88     MemoryRegion io;
89 
90     uint32_t mac_reg[0x8000];
91     uint16_t phy_reg[0x20];
92     uint16_t eeprom_data[64];
93 
94     uint32_t rxbuf_size;
95     uint32_t rxbuf_min_shift;
96     struct e1000_tx {
97         unsigned char header[256];
98         unsigned char vlan_header[4];
99         /* Fields vlan and data must not be reordered or separated. */
100         unsigned char vlan[4];
101         unsigned char data[0x10000];
102         uint16_t size;
103         unsigned char vlan_needed;
104         unsigned char sum_needed;
105         bool cptse;
106         e1000x_txd_props props;
107         e1000x_txd_props tso_props;
108         uint16_t tso_frames;
109     } tx;
110 
111     struct {
112         uint32_t val_in;    /* shifted in from guest driver */
113         uint16_t bitnum_in;
114         uint16_t bitnum_out;
115         uint16_t reading;
116         uint32_t old_eecd;
117     } eecd_state;
118 
119     QEMUTimer *autoneg_timer;
120 
121     QEMUTimer *mit_timer;      /* Mitigation timer. */
122     bool mit_timer_on;         /* Mitigation timer is running. */
123     bool mit_irq_level;        /* Tracks interrupt pin level. */
124     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
125 
126     QEMUTimer *flush_queue_timer;
127 
128 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
129 #define E1000_FLAG_AUTONEG_BIT 0
130 #define E1000_FLAG_MIT_BIT 1
131 #define E1000_FLAG_MAC_BIT 2
132 #define E1000_FLAG_TSO_BIT 3
133 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
134 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
135 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
136 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
137     uint32_t compat_flags;
138     bool received_tx_tso;
139     bool use_tso_for_migration;
140     e1000x_txd_props mig_props;
141 };
142 typedef struct E1000State_st E1000State;
143 
144 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
145 
146 struct E1000BaseClass {
147     PCIDeviceClass parent_class;
148     uint16_t phy_id2;
149 };
150 typedef struct E1000BaseClass E1000BaseClass;
151 
152 #define TYPE_E1000_BASE "e1000-base"
153 
154 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
155                      E1000, TYPE_E1000_BASE)
156 
157 
158 static void
159 e1000_link_up(E1000State *s)
160 {
161     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
162 
163     /* E1000_STATUS_LU is tested by e1000_can_receive() */
164     qemu_flush_queued_packets(qemu_get_queue(s->nic));
165 }
166 
167 static void
168 e1000_autoneg_done(E1000State *s)
169 {
170     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
171 
172     /* E1000_STATUS_LU is tested by e1000_can_receive() */
173     qemu_flush_queued_packets(qemu_get_queue(s->nic));
174 }
175 
176 static bool
177 have_autoneg(E1000State *s)
178 {
179     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
180 }
181 
182 static void
183 set_phy_ctrl(E1000State *s, int index, uint16_t val)
184 {
185     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
186     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
187                                    MII_CR_RESET |
188                                    MII_CR_RESTART_AUTO_NEG);
189 
190     /*
191      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
192      * migrate during auto negotiation, after migration the link will be
193      * down.
194      */
195     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
196         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
197     }
198 }
199 
200 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
201     [PHY_CTRL] = set_phy_ctrl,
202 };
203 
204 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
205 
206 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
207 static const char phy_regcap[0x20] = {
208     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
209     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
210     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
211     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
212     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
213     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
214     [PHY_AUTONEG_EXP] = PHY_R,
215 };
216 
217 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
218 static const uint16_t phy_reg_init[] = {
219     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
220                    MII_CR_FULL_DUPLEX |
221                    MII_CR_AUTO_NEG_EN,
222 
223     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
224                    MII_SR_LINK_STATUS |   /* link initially up */
225                    MII_SR_AUTONEG_CAPS |
226                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
227                    MII_SR_PREAMBLE_SUPPRESS |
228                    MII_SR_EXTENDED_STATUS |
229                    MII_SR_10T_HD_CAPS |
230                    MII_SR_10T_FD_CAPS |
231                    MII_SR_100X_HD_CAPS |
232                    MII_SR_100X_FD_CAPS,
233 
234     [PHY_ID1] = 0x141,
235     /* [PHY_ID2] configured per DevId, from e1000_reset() */
236     [PHY_AUTONEG_ADV] = 0xde1,
237     [PHY_LP_ABILITY] = 0x1e0,
238     [PHY_1000T_CTRL] = 0x0e00,
239     [PHY_1000T_STATUS] = 0x3c00,
240     [M88E1000_PHY_SPEC_CTRL] = 0x360,
241     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
242     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
243 };
244 
245 static const uint32_t mac_reg_init[] = {
246     [PBA]     = 0x00100030,
247     [LEDCTL]  = 0x602,
248     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
249                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
250     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
251                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
252                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
253                 E1000_STATUS_LU,
254     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
255                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
256                 E1000_MANC_RMCP_EN,
257 };
258 
259 /* Helper function, *curr == 0 means the value is not set */
260 static inline void
261 mit_update_delay(uint32_t *curr, uint32_t value)
262 {
263     if (value && (*curr == 0 || value < *curr)) {
264         *curr = value;
265     }
266 }
267 
268 static void
269 set_interrupt_cause(E1000State *s, int index, uint32_t val)
270 {
271     PCIDevice *d = PCI_DEVICE(s);
272     uint32_t pending_ints;
273     uint32_t mit_delay;
274 
275     s->mac_reg[ICR] = val;
276 
277     /*
278      * Make sure ICR and ICS registers have the same value.
279      * The spec says that the ICS register is write-only.  However in practice,
280      * on real hardware ICS is readable, and for reads it has the same value as
281      * ICR (except that ICS does not have the clear on read behaviour of ICR).
282      *
283      * The VxWorks PRO/1000 driver uses this behaviour.
284      */
285     s->mac_reg[ICS] = val;
286 
287     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
288     if (!s->mit_irq_level && pending_ints) {
289         /*
290          * Here we detect a potential raising edge. We postpone raising the
291          * interrupt line if we are inside the mitigation delay window
292          * (s->mit_timer_on == 1).
293          * We provide a partial implementation of interrupt mitigation,
294          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
295          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
296          * RADV; relative timers based on TIDV and RDTR are not implemented.
297          */
298         if (s->mit_timer_on) {
299             return;
300         }
301         if (chkflag(MIT)) {
302             /* Compute the next mitigation delay according to pending
303              * interrupts and the current values of RADV (provided
304              * RDTR!=0), TADV and ITR.
305              * Then rearm the timer.
306              */
307             mit_delay = 0;
308             if (s->mit_ide &&
309                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
310                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
311             }
312             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
313                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
314             }
315             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
316 
317             /*
318              * According to e1000 SPEC, the Ethernet controller guarantees
319              * a maximum observable interrupt rate of 7813 interrupts/sec.
320              * Thus if mit_delay < 500 then the delay should be set to the
321              * minimum delay possible which is 500.
322              */
323             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
324 
325             s->mit_timer_on = 1;
326             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
327                       mit_delay * 256);
328             s->mit_ide = 0;
329         }
330     }
331 
332     s->mit_irq_level = (pending_ints != 0);
333     pci_set_irq(d, s->mit_irq_level);
334 }
335 
336 static void
337 e1000_mit_timer(void *opaque)
338 {
339     E1000State *s = opaque;
340 
341     s->mit_timer_on = 0;
342     /* Call set_interrupt_cause to update the irq level (if necessary). */
343     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
344 }
345 
346 static void
347 set_ics(E1000State *s, int index, uint32_t val)
348 {
349     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
350         s->mac_reg[IMS]);
351     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
352 }
353 
354 static void
355 e1000_autoneg_timer(void *opaque)
356 {
357     E1000State *s = opaque;
358     if (!qemu_get_queue(s->nic)->link_down) {
359         e1000_autoneg_done(s);
360         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
361     }
362 }
363 
364 static void e1000_reset(void *opaque)
365 {
366     E1000State *d = opaque;
367     E1000BaseClass *edc = E1000_GET_CLASS(d);
368     uint8_t *macaddr = d->conf.macaddr.a;
369 
370     timer_del(d->autoneg_timer);
371     timer_del(d->mit_timer);
372     timer_del(d->flush_queue_timer);
373     d->mit_timer_on = 0;
374     d->mit_irq_level = 0;
375     d->mit_ide = 0;
376     memset(d->phy_reg, 0, sizeof d->phy_reg);
377     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
378     d->phy_reg[PHY_ID2] = edc->phy_id2;
379     memset(d->mac_reg, 0, sizeof d->mac_reg);
380     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
381     d->rxbuf_min_shift = 1;
382     memset(&d->tx, 0, sizeof d->tx);
383 
384     if (qemu_get_queue(d->nic)->link_down) {
385         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
386     }
387 
388     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
389 }
390 
391 static void
392 set_ctrl(E1000State *s, int index, uint32_t val)
393 {
394     /* RST is self clearing */
395     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
396 }
397 
398 static void
399 e1000_flush_queue_timer(void *opaque)
400 {
401     E1000State *s = opaque;
402 
403     qemu_flush_queued_packets(qemu_get_queue(s->nic));
404 }
405 
406 static void
407 set_rx_control(E1000State *s, int index, uint32_t val)
408 {
409     s->mac_reg[RCTL] = val;
410     s->rxbuf_size = e1000x_rxbufsize(val);
411     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
412     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
413            s->mac_reg[RCTL]);
414     timer_mod(s->flush_queue_timer,
415               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
416 }
417 
418 static void
419 set_mdic(E1000State *s, int index, uint32_t val)
420 {
421     uint32_t data = val & E1000_MDIC_DATA_MASK;
422     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
423 
424     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
425         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
426     else if (val & E1000_MDIC_OP_READ) {
427         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
428         if (!(phy_regcap[addr] & PHY_R)) {
429             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
430             val |= E1000_MDIC_ERROR;
431         } else
432             val = (val ^ data) | s->phy_reg[addr];
433     } else if (val & E1000_MDIC_OP_WRITE) {
434         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
435         if (!(phy_regcap[addr] & PHY_W)) {
436             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
437             val |= E1000_MDIC_ERROR;
438         } else {
439             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
440                 phyreg_writeops[addr](s, index, data);
441             } else {
442                 s->phy_reg[addr] = data;
443             }
444         }
445     }
446     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
447 
448     if (val & E1000_MDIC_INT_EN) {
449         set_ics(s, 0, E1000_ICR_MDAC);
450     }
451 }
452 
453 static uint32_t
454 get_eecd(E1000State *s, int index)
455 {
456     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
457 
458     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
459            s->eecd_state.bitnum_out, s->eecd_state.reading);
460     if (!s->eecd_state.reading ||
461         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
462           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
463         ret |= E1000_EECD_DO;
464     return ret;
465 }
466 
467 static void
468 set_eecd(E1000State *s, int index, uint32_t val)
469 {
470     uint32_t oldval = s->eecd_state.old_eecd;
471 
472     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
473             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
474     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
475         return;
476     }
477     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
478         s->eecd_state.val_in = 0;
479         s->eecd_state.bitnum_in = 0;
480         s->eecd_state.bitnum_out = 0;
481         s->eecd_state.reading = 0;
482     }
483     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
484         return;
485     }
486     if (!(E1000_EECD_SK & val)) {               /* falling edge */
487         s->eecd_state.bitnum_out++;
488         return;
489     }
490     s->eecd_state.val_in <<= 1;
491     if (val & E1000_EECD_DI)
492         s->eecd_state.val_in |= 1;
493     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
494         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
495         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
496             EEPROM_READ_OPCODE_MICROWIRE);
497     }
498     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
499            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
500            s->eecd_state.reading);
501 }
502 
503 static uint32_t
504 flash_eerd_read(E1000State *s, int x)
505 {
506     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
507 
508     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
509         return (s->mac_reg[EERD]);
510 
511     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
512         return (E1000_EEPROM_RW_REG_DONE | r);
513 
514     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
515            E1000_EEPROM_RW_REG_DONE | r);
516 }
517 
518 static void
519 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
520 {
521     uint32_t sum;
522 
523     if (cse && cse < n)
524         n = cse + 1;
525     if (sloc < n-1) {
526         sum = net_checksum_add(n-css, data+css);
527         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
528     }
529 }
530 
531 static inline void
532 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
533 {
534     if (!memcmp(arr, bcast, sizeof bcast)) {
535         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
536     } else if (arr[0] & 1) {
537         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
538     }
539 }
540 
541 static void
542 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
543 {
544     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
545                                     PTC1023, PTC1522 };
546 
547     NetClientState *nc = qemu_get_queue(s->nic);
548     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
549         nc->info->receive(nc, buf, size);
550     } else {
551         qemu_send_packet(nc, buf, size);
552     }
553     inc_tx_bcast_or_mcast_count(s, buf);
554     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
555 }
556 
557 static void
558 xmit_seg(E1000State *s)
559 {
560     uint16_t len;
561     unsigned int frames = s->tx.tso_frames, css, sofar;
562     struct e1000_tx *tp = &s->tx;
563     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
564 
565     if (tp->cptse) {
566         css = props->ipcss;
567         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
568                frames, tp->size, css);
569         if (props->ip) {    /* IPv4 */
570             stw_be_p(tp->data+css+2, tp->size - css);
571             stw_be_p(tp->data+css+4,
572                      lduw_be_p(tp->data + css + 4) + frames);
573         } else {         /* IPv6 */
574             stw_be_p(tp->data+css+4, tp->size - css);
575         }
576         css = props->tucss;
577         len = tp->size - css;
578         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
579         if (props->tcp) {
580             sofar = frames * props->mss;
581             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
582             if (props->paylen - sofar > props->mss) {
583                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
584             } else if (frames) {
585                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
586             }
587         } else {    /* UDP */
588             stw_be_p(tp->data+css+4, len);
589         }
590         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
591             unsigned int phsum;
592             // add pseudo-header length before checksum calculation
593             void *sp = tp->data + props->tucso;
594 
595             phsum = lduw_be_p(sp) + len;
596             phsum = (phsum >> 16) + (phsum & 0xffff);
597             stw_be_p(sp, phsum);
598         }
599         tp->tso_frames++;
600     }
601 
602     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
603         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
604     }
605     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
606         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
607     }
608     if (tp->vlan_needed) {
609         memmove(tp->vlan, tp->data, 4);
610         memmove(tp->data, tp->data + 4, 8);
611         memcpy(tp->data + 8, tp->vlan_header, 4);
612         e1000_send_packet(s, tp->vlan, tp->size + 4);
613     } else {
614         e1000_send_packet(s, tp->data, tp->size);
615     }
616 
617     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
618     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
619     s->mac_reg[GPTC] = s->mac_reg[TPT];
620     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
621     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
622 }
623 
624 static void
625 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
626 {
627     PCIDevice *d = PCI_DEVICE(s);
628     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
629     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
630     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
631     unsigned int msh = 0xfffff;
632     uint64_t addr;
633     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
634     struct e1000_tx *tp = &s->tx;
635 
636     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
637     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
638         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
639             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
640             s->use_tso_for_migration = 1;
641             tp->tso_frames = 0;
642         } else {
643             e1000x_read_tx_ctx_descr(xp, &tp->props);
644             s->use_tso_for_migration = 0;
645         }
646         return;
647     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
648         // data descriptor
649         if (tp->size == 0) {
650             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
651         }
652         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
653     } else {
654         // legacy descriptor
655         tp->cptse = 0;
656     }
657 
658     if (e1000x_vlan_enabled(s->mac_reg) &&
659         e1000x_is_vlan_txd(txd_lower) &&
660         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
661         tp->vlan_needed = 1;
662         stw_be_p(tp->vlan_header,
663                       le16_to_cpu(s->mac_reg[VET]));
664         stw_be_p(tp->vlan_header + 2,
665                       le16_to_cpu(dp->upper.fields.special));
666     }
667 
668     addr = le64_to_cpu(dp->buffer_addr);
669     if (tp->cptse) {
670         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
671         do {
672             bytes = split_size;
673             if (tp->size + bytes > msh)
674                 bytes = msh - tp->size;
675 
676             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
677             pci_dma_read(d, addr, tp->data + tp->size, bytes);
678             sz = tp->size + bytes;
679             if (sz >= tp->tso_props.hdr_len
680                 && tp->size < tp->tso_props.hdr_len) {
681                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
682             }
683             tp->size = sz;
684             addr += bytes;
685             if (sz == msh) {
686                 xmit_seg(s);
687                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
688                 tp->size = tp->tso_props.hdr_len;
689             }
690             split_size -= bytes;
691         } while (bytes && split_size);
692     } else {
693         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
694         pci_dma_read(d, addr, tp->data + tp->size, split_size);
695         tp->size += split_size;
696     }
697 
698     if (!(txd_lower & E1000_TXD_CMD_EOP))
699         return;
700     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
701         xmit_seg(s);
702     }
703     tp->tso_frames = 0;
704     tp->sum_needed = 0;
705     tp->vlan_needed = 0;
706     tp->size = 0;
707     tp->cptse = 0;
708 }
709 
710 static uint32_t
711 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
712 {
713     PCIDevice *d = PCI_DEVICE(s);
714     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
715 
716     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
717         return 0;
718     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
719                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
720     dp->upper.data = cpu_to_le32(txd_upper);
721     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
722                   &dp->upper, sizeof(dp->upper));
723     return E1000_ICR_TXDW;
724 }
725 
726 static uint64_t tx_desc_base(E1000State *s)
727 {
728     uint64_t bah = s->mac_reg[TDBAH];
729     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
730 
731     return (bah << 32) + bal;
732 }
733 
734 static void
735 start_xmit(E1000State *s)
736 {
737     PCIDevice *d = PCI_DEVICE(s);
738     dma_addr_t base;
739     struct e1000_tx_desc desc;
740     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
741 
742     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
743         DBGOUT(TX, "tx disabled\n");
744         return;
745     }
746 
747     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
748         base = tx_desc_base(s) +
749                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
750         pci_dma_read(d, base, &desc, sizeof(desc));
751 
752         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
753                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
754                desc.upper.data);
755 
756         process_tx_desc(s, &desc);
757         cause |= txdesc_writeback(s, base, &desc);
758 
759         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
760             s->mac_reg[TDH] = 0;
761         /*
762          * the following could happen only if guest sw assigns
763          * bogus values to TDT/TDLEN.
764          * there's nothing too intelligent we could do about this.
765          */
766         if (s->mac_reg[TDH] == tdh_start ||
767             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
768             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
769                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
770             break;
771         }
772     }
773     set_ics(s, 0, cause);
774 }
775 
776 static int
777 receive_filter(E1000State *s, const uint8_t *buf, int size)
778 {
779     uint32_t rctl = s->mac_reg[RCTL];
780     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
781 
782     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
783         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
784         uint16_t vid = lduw_be_p(buf + 14);
785         uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
786                                  ((vid >> 5) & 0x7f));
787         if ((vfta & (1 << (vid & 0x1f))) == 0)
788             return 0;
789     }
790 
791     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
792         return 1;
793     }
794 
795     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
796         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
797         return 1;
798     }
799 
800     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
801         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
802         return 1;
803     }
804 
805     return e1000x_rx_group_filter(s->mac_reg, buf);
806 }
807 
808 static void
809 e1000_set_link_status(NetClientState *nc)
810 {
811     E1000State *s = qemu_get_nic_opaque(nc);
812     uint32_t old_status = s->mac_reg[STATUS];
813 
814     if (nc->link_down) {
815         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
816     } else {
817         if (have_autoneg(s) &&
818             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
819             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
820         } else {
821             e1000_link_up(s);
822         }
823     }
824 
825     if (s->mac_reg[STATUS] != old_status)
826         set_ics(s, 0, E1000_ICR_LSC);
827 }
828 
829 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
830 {
831     int bufs;
832     /* Fast-path short packets */
833     if (total_size <= s->rxbuf_size) {
834         return s->mac_reg[RDH] != s->mac_reg[RDT];
835     }
836     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
837         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
838     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
839         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
840             s->mac_reg[RDT] - s->mac_reg[RDH];
841     } else {
842         return false;
843     }
844     return total_size <= bufs * s->rxbuf_size;
845 }
846 
847 static bool
848 e1000_can_receive(NetClientState *nc)
849 {
850     E1000State *s = qemu_get_nic_opaque(nc);
851 
852     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
853         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
854 }
855 
856 static uint64_t rx_desc_base(E1000State *s)
857 {
858     uint64_t bah = s->mac_reg[RDBAH];
859     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
860 
861     return (bah << 32) + bal;
862 }
863 
864 static void
865 e1000_receiver_overrun(E1000State *s, size_t size)
866 {
867     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
868     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
869     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
870     set_ics(s, 0, E1000_ICS_RXO);
871 }
872 
873 static ssize_t
874 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
875 {
876     E1000State *s = qemu_get_nic_opaque(nc);
877     PCIDevice *d = PCI_DEVICE(s);
878     struct e1000_rx_desc desc;
879     dma_addr_t base;
880     unsigned int n, rdt;
881     uint32_t rdh_start;
882     uint16_t vlan_special = 0;
883     uint8_t vlan_status = 0;
884     uint8_t min_buf[MIN_BUF_SIZE];
885     struct iovec min_iov;
886     uint8_t *filter_buf = iov->iov_base;
887     size_t size = iov_size(iov, iovcnt);
888     size_t iov_ofs = 0;
889     size_t desc_offset;
890     size_t desc_size;
891     size_t total_size;
892 
893     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
894         return -1;
895     }
896 
897     if (timer_pending(s->flush_queue_timer)) {
898         return 0;
899     }
900 
901     /* Pad to minimum Ethernet frame length */
902     if (size < sizeof(min_buf)) {
903         iov_to_buf(iov, iovcnt, 0, min_buf, size);
904         memset(&min_buf[size], 0, sizeof(min_buf) - size);
905         min_iov.iov_base = filter_buf = min_buf;
906         min_iov.iov_len = size = sizeof(min_buf);
907         iovcnt = 1;
908         iov = &min_iov;
909     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
910         /* This is very unlikely, but may happen. */
911         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
912         filter_buf = min_buf;
913     }
914 
915     /* Discard oversized packets if !LPE and !SBP. */
916     if (e1000x_is_oversized(s->mac_reg, size)) {
917         return size;
918     }
919 
920     if (!receive_filter(s, filter_buf, size)) {
921         return size;
922     }
923 
924     if (e1000x_vlan_enabled(s->mac_reg) &&
925         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
926         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
927         iov_ofs = 4;
928         if (filter_buf == iov->iov_base) {
929             memmove(filter_buf + 4, filter_buf, 12);
930         } else {
931             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
932             while (iov->iov_len <= iov_ofs) {
933                 iov_ofs -= iov->iov_len;
934                 iov++;
935             }
936         }
937         vlan_status = E1000_RXD_STAT_VP;
938         size -= 4;
939     }
940 
941     rdh_start = s->mac_reg[RDH];
942     desc_offset = 0;
943     total_size = size + e1000x_fcs_len(s->mac_reg);
944     if (!e1000_has_rxbufs(s, total_size)) {
945         e1000_receiver_overrun(s, total_size);
946         return -1;
947     }
948     do {
949         desc_size = total_size - desc_offset;
950         if (desc_size > s->rxbuf_size) {
951             desc_size = s->rxbuf_size;
952         }
953         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
954         pci_dma_read(d, base, &desc, sizeof(desc));
955         desc.special = vlan_special;
956         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
957         if (desc.buffer_addr) {
958             if (desc_offset < size) {
959                 size_t iov_copy;
960                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
961                 size_t copy_size = size - desc_offset;
962                 if (copy_size > s->rxbuf_size) {
963                     copy_size = s->rxbuf_size;
964                 }
965                 do {
966                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
967                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
968                     copy_size -= iov_copy;
969                     ba += iov_copy;
970                     iov_ofs += iov_copy;
971                     if (iov_ofs == iov->iov_len) {
972                         iov++;
973                         iov_ofs = 0;
974                     }
975                 } while (copy_size);
976             }
977             desc_offset += desc_size;
978             desc.length = cpu_to_le16(desc_size);
979             if (desc_offset >= total_size) {
980                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
981             } else {
982                 /* Guest zeroing out status is not a hardware requirement.
983                    Clear EOP in case guest didn't do it. */
984                 desc.status &= ~E1000_RXD_STAT_EOP;
985             }
986         } else { // as per intel docs; skip descriptors with null buf addr
987             DBGOUT(RX, "Null RX descriptor!!\n");
988         }
989         pci_dma_write(d, base, &desc, sizeof(desc));
990 
991         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
992             s->mac_reg[RDH] = 0;
993         /* see comment in start_xmit; same here */
994         if (s->mac_reg[RDH] == rdh_start ||
995             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
996             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
997                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
998             e1000_receiver_overrun(s, total_size);
999             return -1;
1000         }
1001     } while (desc_offset < total_size);
1002 
1003     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1004 
1005     n = E1000_ICS_RXT0;
1006     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1007         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1008     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1009         s->rxbuf_min_shift)
1010         n |= E1000_ICS_RXDMT0;
1011 
1012     set_ics(s, 0, n);
1013 
1014     return size;
1015 }
1016 
1017 static ssize_t
1018 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1019 {
1020     const struct iovec iov = {
1021         .iov_base = (uint8_t *)buf,
1022         .iov_len = size
1023     };
1024 
1025     return e1000_receive_iov(nc, &iov, 1);
1026 }
1027 
1028 static uint32_t
1029 mac_readreg(E1000State *s, int index)
1030 {
1031     return s->mac_reg[index];
1032 }
1033 
1034 static uint32_t
1035 mac_low4_read(E1000State *s, int index)
1036 {
1037     return s->mac_reg[index] & 0xf;
1038 }
1039 
1040 static uint32_t
1041 mac_low11_read(E1000State *s, int index)
1042 {
1043     return s->mac_reg[index] & 0x7ff;
1044 }
1045 
1046 static uint32_t
1047 mac_low13_read(E1000State *s, int index)
1048 {
1049     return s->mac_reg[index] & 0x1fff;
1050 }
1051 
1052 static uint32_t
1053 mac_low16_read(E1000State *s, int index)
1054 {
1055     return s->mac_reg[index] & 0xffff;
1056 }
1057 
1058 static uint32_t
1059 mac_icr_read(E1000State *s, int index)
1060 {
1061     uint32_t ret = s->mac_reg[ICR];
1062 
1063     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1064     set_interrupt_cause(s, 0, 0);
1065     return ret;
1066 }
1067 
1068 static uint32_t
1069 mac_read_clr4(E1000State *s, int index)
1070 {
1071     uint32_t ret = s->mac_reg[index];
1072 
1073     s->mac_reg[index] = 0;
1074     return ret;
1075 }
1076 
1077 static uint32_t
1078 mac_read_clr8(E1000State *s, int index)
1079 {
1080     uint32_t ret = s->mac_reg[index];
1081 
1082     s->mac_reg[index] = 0;
1083     s->mac_reg[index-1] = 0;
1084     return ret;
1085 }
1086 
1087 static void
1088 mac_writereg(E1000State *s, int index, uint32_t val)
1089 {
1090     uint32_t macaddr[2];
1091 
1092     s->mac_reg[index] = val;
1093 
1094     if (index == RA + 1) {
1095         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1096         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1097         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1098     }
1099 }
1100 
1101 static void
1102 set_rdt(E1000State *s, int index, uint32_t val)
1103 {
1104     s->mac_reg[index] = val & 0xffff;
1105     if (e1000_has_rxbufs(s, 1)) {
1106         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1107     }
1108 }
1109 
1110 static void
1111 set_16bit(E1000State *s, int index, uint32_t val)
1112 {
1113     s->mac_reg[index] = val & 0xffff;
1114 }
1115 
1116 static void
1117 set_dlen(E1000State *s, int index, uint32_t val)
1118 {
1119     s->mac_reg[index] = val & 0xfff80;
1120 }
1121 
1122 static void
1123 set_tctl(E1000State *s, int index, uint32_t val)
1124 {
1125     s->mac_reg[index] = val;
1126     s->mac_reg[TDT] &= 0xffff;
1127     start_xmit(s);
1128 }
1129 
1130 static void
1131 set_icr(E1000State *s, int index, uint32_t val)
1132 {
1133     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1134     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1135 }
1136 
1137 static void
1138 set_imc(E1000State *s, int index, uint32_t val)
1139 {
1140     s->mac_reg[IMS] &= ~val;
1141     set_ics(s, 0, 0);
1142 }
1143 
1144 static void
1145 set_ims(E1000State *s, int index, uint32_t val)
1146 {
1147     s->mac_reg[IMS] |= val;
1148     set_ics(s, 0, 0);
1149 }
1150 
1151 #define getreg(x)    [x] = mac_readreg
1152 typedef uint32_t (*readops)(E1000State *, int);
1153 static const readops macreg_readops[] = {
1154     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1155     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1156     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1157     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1158     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1159     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1160     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1161     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1162     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1163     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1164     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1165     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1166     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1167     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1168     getreg(GOTCL),
1169 
1170     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1171     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1172     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1173     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1174     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1175     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1176     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1177     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1178     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1179     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1180     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1181     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1182     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1183     [MPTC]    = mac_read_clr4,
1184     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1185     [EERD]    = flash_eerd_read,
1186     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1187     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1188     [RDFPC]   = mac_low13_read,
1189     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1190     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1191     [TDFPC]   = mac_low13_read,
1192     [AIT]     = mac_low16_read,
1193 
1194     [CRCERRS ... MPC]   = &mac_readreg,
1195     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1196     [FFLT ... FFLT+6]   = &mac_low11_read,
1197     [RA ... RA+31]      = &mac_readreg,
1198     [WUPM ... WUPM+31]  = &mac_readreg,
1199     [MTA ... MTA+127]   = &mac_readreg,
1200     [VFTA ... VFTA+127] = &mac_readreg,
1201     [FFMT ... FFMT+254] = &mac_low4_read,
1202     [FFVT ... FFVT+254] = &mac_readreg,
1203     [PBM ... PBM+16383] = &mac_readreg,
1204 };
1205 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1206 
1207 #define putreg(x)    [x] = mac_writereg
1208 typedef void (*writeops)(E1000State *, int, uint32_t);
1209 static const writeops macreg_writeops[] = {
1210     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1211     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1212     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1213     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1214     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1215     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1216     putreg(WUS),      putreg(AIT),
1217 
1218     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1219     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1220     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1221     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1222     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1223     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1224     [ITR]    = set_16bit,
1225 
1226     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1227     [FFLT ... FFLT+6]   = &mac_writereg,
1228     [RA ... RA+31]      = &mac_writereg,
1229     [WUPM ... WUPM+31]  = &mac_writereg,
1230     [MTA ... MTA+127]   = &mac_writereg,
1231     [VFTA ... VFTA+127] = &mac_writereg,
1232     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1233     [PBM ... PBM+16383] = &mac_writereg,
1234 };
1235 
1236 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1237 
1238 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1239 
1240 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1241 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1242  * f - flag bits (up to 6 possible flags)
1243  * n - flag needed
1244  * p - partially implenented */
1245 static const uint8_t mac_reg_access[0x8000] = {
1246     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1247     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1248 
1249     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1250     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1251     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1252     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1253     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1254     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1255     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1256     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1257     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1258     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1259     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1260     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1261     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1262     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1263     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1264     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1265     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1266     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1267     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1268     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1269     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1270     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1271     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1272     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1273     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1274     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1275     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1276     [BPTC]    = markflag(MAC),
1277 
1278     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1279     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1280     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1281     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1282     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1283     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1284     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1285     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1286     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1287     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1288     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1289 };
1290 
1291 static void
1292 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1293                  unsigned size)
1294 {
1295     E1000State *s = opaque;
1296     unsigned int index = (addr & 0x1ffff) >> 2;
1297 
1298     if (index < NWRITEOPS && macreg_writeops[index]) {
1299         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1300             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1301             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1302                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1303                        "It is not fully implemented.\n", index<<2);
1304             }
1305             macreg_writeops[index](s, index, val);
1306         } else {    /* "flag needed" bit is set, but the flag is not active */
1307             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1308                    index<<2);
1309         }
1310     } else if (index < NREADOPS && macreg_readops[index]) {
1311         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1312                index<<2, val);
1313     } else {
1314         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1315                index<<2, val);
1316     }
1317 }
1318 
1319 static uint64_t
1320 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1321 {
1322     E1000State *s = opaque;
1323     unsigned int index = (addr & 0x1ffff) >> 2;
1324 
1325     if (index < NREADOPS && macreg_readops[index]) {
1326         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1327             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1328             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1329                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1330                        "It is not fully implemented.\n", index<<2);
1331             }
1332             return macreg_readops[index](s, index);
1333         } else {    /* "flag needed" bit is set, but the flag is not active */
1334             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1335                    index<<2);
1336         }
1337     } else {
1338         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1339     }
1340     return 0;
1341 }
1342 
1343 static const MemoryRegionOps e1000_mmio_ops = {
1344     .read = e1000_mmio_read,
1345     .write = e1000_mmio_write,
1346     .endianness = DEVICE_LITTLE_ENDIAN,
1347     .impl = {
1348         .min_access_size = 4,
1349         .max_access_size = 4,
1350     },
1351 };
1352 
1353 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1354                               unsigned size)
1355 {
1356     E1000State *s = opaque;
1357 
1358     (void)s;
1359     return 0;
1360 }
1361 
1362 static void e1000_io_write(void *opaque, hwaddr addr,
1363                            uint64_t val, unsigned size)
1364 {
1365     E1000State *s = opaque;
1366 
1367     (void)s;
1368 }
1369 
1370 static const MemoryRegionOps e1000_io_ops = {
1371     .read = e1000_io_read,
1372     .write = e1000_io_write,
1373     .endianness = DEVICE_LITTLE_ENDIAN,
1374 };
1375 
1376 static bool is_version_1(void *opaque, int version_id)
1377 {
1378     return version_id == 1;
1379 }
1380 
1381 static int e1000_pre_save(void *opaque)
1382 {
1383     E1000State *s = opaque;
1384     NetClientState *nc = qemu_get_queue(s->nic);
1385 
1386     /*
1387      * If link is down and auto-negotiation is supported and ongoing,
1388      * complete auto-negotiation immediately. This allows us to look
1389      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1390      */
1391     if (nc->link_down && have_autoneg(s)) {
1392         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1393     }
1394 
1395     /* Decide which set of props to migrate in the main structure */
1396     if (chkflag(TSO) || !s->use_tso_for_migration) {
1397         /* Either we're migrating with the extra subsection, in which
1398          * case the mig_props is always 'props' OR
1399          * we've not got the subsection, but 'props' was the last
1400          * updated.
1401          */
1402         s->mig_props = s->tx.props;
1403     } else {
1404         /* We're not using the subsection, and 'tso_props' was
1405          * the last updated.
1406          */
1407         s->mig_props = s->tx.tso_props;
1408     }
1409     return 0;
1410 }
1411 
1412 static int e1000_post_load(void *opaque, int version_id)
1413 {
1414     E1000State *s = opaque;
1415     NetClientState *nc = qemu_get_queue(s->nic);
1416 
1417     if (!chkflag(MIT)) {
1418         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1419             s->mac_reg[TADV] = 0;
1420         s->mit_irq_level = false;
1421     }
1422     s->mit_ide = 0;
1423     s->mit_timer_on = true;
1424     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1425 
1426     /* nc.link_down can't be migrated, so infer link_down according
1427      * to link status bit in mac_reg[STATUS].
1428      * Alternatively, restart link negotiation if it was in progress. */
1429     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1430 
1431     if (have_autoneg(s) &&
1432         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1433         nc->link_down = false;
1434         timer_mod(s->autoneg_timer,
1435                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1436     }
1437 
1438     s->tx.props = s->mig_props;
1439     if (!s->received_tx_tso) {
1440         /* We received only one set of offload data (tx.props)
1441          * and haven't got tx.tso_props.  The best we can do
1442          * is dupe the data.
1443          */
1444         s->tx.tso_props = s->mig_props;
1445     }
1446     return 0;
1447 }
1448 
1449 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1450 {
1451     E1000State *s = opaque;
1452     s->received_tx_tso = true;
1453     return 0;
1454 }
1455 
1456 static bool e1000_mit_state_needed(void *opaque)
1457 {
1458     E1000State *s = opaque;
1459 
1460     return chkflag(MIT);
1461 }
1462 
1463 static bool e1000_full_mac_needed(void *opaque)
1464 {
1465     E1000State *s = opaque;
1466 
1467     return chkflag(MAC);
1468 }
1469 
1470 static bool e1000_tso_state_needed(void *opaque)
1471 {
1472     E1000State *s = opaque;
1473 
1474     return chkflag(TSO);
1475 }
1476 
1477 static const VMStateDescription vmstate_e1000_mit_state = {
1478     .name = "e1000/mit_state",
1479     .version_id = 1,
1480     .minimum_version_id = 1,
1481     .needed = e1000_mit_state_needed,
1482     .fields = (VMStateField[]) {
1483         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1484         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1485         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1486         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1487         VMSTATE_BOOL(mit_irq_level, E1000State),
1488         VMSTATE_END_OF_LIST()
1489     }
1490 };
1491 
1492 static const VMStateDescription vmstate_e1000_full_mac_state = {
1493     .name = "e1000/full_mac_state",
1494     .version_id = 1,
1495     .minimum_version_id = 1,
1496     .needed = e1000_full_mac_needed,
1497     .fields = (VMStateField[]) {
1498         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1499         VMSTATE_END_OF_LIST()
1500     }
1501 };
1502 
1503 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1504     .name = "e1000/tx_tso_state",
1505     .version_id = 1,
1506     .minimum_version_id = 1,
1507     .needed = e1000_tso_state_needed,
1508     .post_load = e1000_tx_tso_post_load,
1509     .fields = (VMStateField[]) {
1510         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1511         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1512         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1513         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1514         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1515         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1516         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1517         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1518         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1519         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1520         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1521         VMSTATE_END_OF_LIST()
1522     }
1523 };
1524 
1525 static const VMStateDescription vmstate_e1000 = {
1526     .name = "e1000",
1527     .version_id = 2,
1528     .minimum_version_id = 1,
1529     .pre_save = e1000_pre_save,
1530     .post_load = e1000_post_load,
1531     .fields = (VMStateField[]) {
1532         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1533         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1534         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1535         VMSTATE_UINT32(rxbuf_size, E1000State),
1536         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1537         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1538         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1539         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1540         VMSTATE_UINT16(eecd_state.reading, E1000State),
1541         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1542         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1543         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1544         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1545         VMSTATE_UINT8(mig_props.tucss, E1000State),
1546         VMSTATE_UINT8(mig_props.tucso, E1000State),
1547         VMSTATE_UINT16(mig_props.tucse, E1000State),
1548         VMSTATE_UINT32(mig_props.paylen, E1000State),
1549         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1550         VMSTATE_UINT16(mig_props.mss, E1000State),
1551         VMSTATE_UINT16(tx.size, E1000State),
1552         VMSTATE_UINT16(tx.tso_frames, E1000State),
1553         VMSTATE_UINT8(tx.sum_needed, E1000State),
1554         VMSTATE_INT8(mig_props.ip, E1000State),
1555         VMSTATE_INT8(mig_props.tcp, E1000State),
1556         VMSTATE_BUFFER(tx.header, E1000State),
1557         VMSTATE_BUFFER(tx.data, E1000State),
1558         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1559         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1560         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1561         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1562         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1563         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1564         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1565         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1566         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1567         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1568         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1569         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1570         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1571         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1572         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1573         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1574         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1575         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1576         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1577         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1578         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1579         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1580         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1581         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1582         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1583         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1584         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1585         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1586         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1587         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1588         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1589         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1590         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1591         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1592         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1593         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1594         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1595         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1596         VMSTATE_UINT32(mac_reg[VET], E1000State),
1597         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1598         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1599         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1600         VMSTATE_END_OF_LIST()
1601     },
1602     .subsections = (const VMStateDescription*[]) {
1603         &vmstate_e1000_mit_state,
1604         &vmstate_e1000_full_mac_state,
1605         &vmstate_e1000_tx_tso_state,
1606         NULL
1607     }
1608 };
1609 
1610 /*
1611  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1612  * Note: A valid DevId will be inserted during pci_e1000_realize().
1613  */
1614 static const uint16_t e1000_eeprom_template[64] = {
1615     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1616     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1617     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1618     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1619     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1620     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1621     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1622     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1623 };
1624 
1625 /* PCI interface */
1626 
1627 static void
1628 e1000_mmio_setup(E1000State *d)
1629 {
1630     int i;
1631     const uint32_t excluded_regs[] = {
1632         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1633         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1634     };
1635 
1636     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1637                           "e1000-mmio", PNPMMIO_SIZE);
1638     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1639     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1640         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1641                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1642     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1643 }
1644 
1645 static void
1646 pci_e1000_uninit(PCIDevice *dev)
1647 {
1648     E1000State *d = E1000(dev);
1649 
1650     timer_free(d->autoneg_timer);
1651     timer_free(d->mit_timer);
1652     timer_free(d->flush_queue_timer);
1653     qemu_del_nic(d->nic);
1654 }
1655 
1656 static NetClientInfo net_e1000_info = {
1657     .type = NET_CLIENT_DRIVER_NIC,
1658     .size = sizeof(NICState),
1659     .can_receive = e1000_can_receive,
1660     .receive = e1000_receive,
1661     .receive_iov = e1000_receive_iov,
1662     .link_status_changed = e1000_set_link_status,
1663 };
1664 
1665 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1666                                 uint32_t val, int len)
1667 {
1668     E1000State *s = E1000(pci_dev);
1669 
1670     pci_default_write_config(pci_dev, address, val, len);
1671 
1672     if (range_covers_byte(address, len, PCI_COMMAND) &&
1673         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1674         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1675     }
1676 }
1677 
1678 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1679 {
1680     DeviceState *dev = DEVICE(pci_dev);
1681     E1000State *d = E1000(pci_dev);
1682     uint8_t *pci_conf;
1683     uint8_t *macaddr;
1684 
1685     pci_dev->config_write = e1000_write_config;
1686 
1687     pci_conf = pci_dev->config;
1688 
1689     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1690     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1691 
1692     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1693 
1694     e1000_mmio_setup(d);
1695 
1696     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1697 
1698     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1699 
1700     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1701     macaddr = d->conf.macaddr.a;
1702 
1703     e1000x_core_prepare_eeprom(d->eeprom_data,
1704                                e1000_eeprom_template,
1705                                sizeof(e1000_eeprom_template),
1706                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1707                                macaddr);
1708 
1709     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1710                           object_get_typename(OBJECT(d)), dev->id, d);
1711 
1712     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1713 
1714     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1715     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1716     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1717                                         e1000_flush_queue_timer, d);
1718 }
1719 
1720 static void qdev_e1000_reset(DeviceState *dev)
1721 {
1722     E1000State *d = E1000(dev);
1723     e1000_reset(d);
1724 }
1725 
1726 static Property e1000_properties[] = {
1727     DEFINE_NIC_PROPERTIES(E1000State, conf),
1728     DEFINE_PROP_BIT("autonegotiation", E1000State,
1729                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1730     DEFINE_PROP_BIT("mitigation", E1000State,
1731                     compat_flags, E1000_FLAG_MIT_BIT, true),
1732     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1733                     compat_flags, E1000_FLAG_MAC_BIT, true),
1734     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1735                     compat_flags, E1000_FLAG_TSO_BIT, true),
1736     DEFINE_PROP_END_OF_LIST(),
1737 };
1738 
1739 typedef struct E1000Info {
1740     const char *name;
1741     uint16_t   device_id;
1742     uint8_t    revision;
1743     uint16_t   phy_id2;
1744 } E1000Info;
1745 
1746 static void e1000_class_init(ObjectClass *klass, void *data)
1747 {
1748     DeviceClass *dc = DEVICE_CLASS(klass);
1749     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1750     E1000BaseClass *e = E1000_CLASS(klass);
1751     const E1000Info *info = data;
1752 
1753     k->realize = pci_e1000_realize;
1754     k->exit = pci_e1000_uninit;
1755     k->romfile = "efi-e1000.rom";
1756     k->vendor_id = PCI_VENDOR_ID_INTEL;
1757     k->device_id = info->device_id;
1758     k->revision = info->revision;
1759     e->phy_id2 = info->phy_id2;
1760     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1761     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1762     dc->desc = "Intel Gigabit Ethernet";
1763     dc->reset = qdev_e1000_reset;
1764     dc->vmsd = &vmstate_e1000;
1765     device_class_set_props(dc, e1000_properties);
1766 }
1767 
1768 static void e1000_instance_init(Object *obj)
1769 {
1770     E1000State *n = E1000(obj);
1771     device_add_bootindex_property(obj, &n->conf.bootindex,
1772                                   "bootindex", "/ethernet-phy@0",
1773                                   DEVICE(n));
1774 }
1775 
1776 static const TypeInfo e1000_base_info = {
1777     .name          = TYPE_E1000_BASE,
1778     .parent        = TYPE_PCI_DEVICE,
1779     .instance_size = sizeof(E1000State),
1780     .instance_init = e1000_instance_init,
1781     .class_size    = sizeof(E1000BaseClass),
1782     .abstract      = true,
1783     .interfaces = (InterfaceInfo[]) {
1784         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1785         { },
1786     },
1787 };
1788 
1789 static const E1000Info e1000_devices[] = {
1790     {
1791         .name      = "e1000",
1792         .device_id = E1000_DEV_ID_82540EM,
1793         .revision  = 0x03,
1794         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1795     },
1796     {
1797         .name      = "e1000-82544gc",
1798         .device_id = E1000_DEV_ID_82544GC_COPPER,
1799         .revision  = 0x03,
1800         .phy_id2   = E1000_PHY_ID2_82544x,
1801     },
1802     {
1803         .name      = "e1000-82545em",
1804         .device_id = E1000_DEV_ID_82545EM_COPPER,
1805         .revision  = 0x03,
1806         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1807     },
1808 };
1809 
1810 static void e1000_register_types(void)
1811 {
1812     int i;
1813 
1814     type_register_static(&e1000_base_info);
1815     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1816         const E1000Info *info = &e1000_devices[i];
1817         TypeInfo type_info = {};
1818 
1819         type_info.name = info->name;
1820         type_info.parent = TYPE_E1000_BASE;
1821         type_info.class_data = (void *)info;
1822         type_info.class_init = e1000_class_init;
1823 
1824         type_register(&type_info);
1825     }
1826 }
1827 
1828 type_init(e1000_register_types)
1829