xref: /qemu/hw/net/e1000.c (revision 651ccdfa)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/net/mii.h"
30 #include "hw/pci/pci_device.h"
31 #include "hw/qdev-properties.h"
32 #include "migration/vmstate.h"
33 #include "net/eth.h"
34 #include "net/net.h"
35 #include "net/checksum.h"
36 #include "sysemu/sysemu.h"
37 #include "sysemu/dma.h"
38 #include "qemu/iov.h"
39 #include "qemu/module.h"
40 #include "qemu/range.h"
41 
42 #include "e1000_common.h"
43 #include "e1000x_common.h"
44 #include "trace.h"
45 #include "qom/object.h"
46 
47 /* #define E1000_DEBUG */
48 
49 #ifdef E1000_DEBUG
50 enum {
51     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
52     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
53     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
54     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
55 };
56 #define DBGBIT(x)    (1<<DEBUG_##x)
57 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
58 
59 #define DBGOUT(what, fmt, ...) do { \
60     if (debugflags & DBGBIT(what)) \
61         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
62     } while (0)
63 #else
64 #define DBGOUT(what, fmt, ...) do {} while (0)
65 #endif
66 
67 #define IOPORT_SIZE       0x40
68 #define PNPMMIO_SIZE      0x20000
69 
70 #define MAXIMUM_ETHERNET_HDR_LEN (ETH_HLEN + 4)
71 
72 /*
73  * HW models:
74  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
75  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
77  *  Others never tested
78  */
79 
80 struct E1000State_st {
81     /*< private >*/
82     PCIDevice parent_obj;
83     /*< public >*/
84 
85     NICState *nic;
86     NICConf conf;
87     MemoryRegion mmio;
88     MemoryRegion io;
89 
90     uint32_t mac_reg[0x8000];
91     uint16_t phy_reg[0x20];
92     uint16_t eeprom_data[64];
93 
94     uint32_t rxbuf_size;
95     uint32_t rxbuf_min_shift;
96     struct e1000_tx {
97         unsigned char header[256];
98         unsigned char vlan_header[4];
99         /* Fields vlan and data must not be reordered or separated. */
100         unsigned char vlan[4];
101         unsigned char data[0x10000];
102         uint16_t size;
103         unsigned char vlan_needed;
104         unsigned char sum_needed;
105         bool cptse;
106         e1000x_txd_props props;
107         e1000x_txd_props tso_props;
108         uint16_t tso_frames;
109         bool busy;
110     } tx;
111 
112     struct {
113         uint32_t val_in;    /* shifted in from guest driver */
114         uint16_t bitnum_in;
115         uint16_t bitnum_out;
116         uint16_t reading;
117         uint32_t old_eecd;
118     } eecd_state;
119 
120     QEMUTimer *autoneg_timer;
121 
122     QEMUTimer *mit_timer;      /* Mitigation timer. */
123     bool mit_timer_on;         /* Mitigation timer is running. */
124     bool mit_irq_level;        /* Tracks interrupt pin level. */
125     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
126 
127     QEMUTimer *flush_queue_timer;
128 
129 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
130 #define E1000_FLAG_AUTONEG_BIT 0
131 #define E1000_FLAG_MIT_BIT 1
132 #define E1000_FLAG_MAC_BIT 2
133 #define E1000_FLAG_TSO_BIT 3
134 #define E1000_FLAG_VET_BIT 4
135 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
136 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
137 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
138 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
139 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
140 
141     uint32_t compat_flags;
142     bool received_tx_tso;
143     bool use_tso_for_migration;
144     e1000x_txd_props mig_props;
145 };
146 typedef struct E1000State_st E1000State;
147 
148 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
149 
150 struct E1000BaseClass {
151     PCIDeviceClass parent_class;
152     uint16_t phy_id2;
153 };
154 typedef struct E1000BaseClass E1000BaseClass;
155 
156 #define TYPE_E1000_BASE "e1000-base"
157 
158 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
159                      E1000, TYPE_E1000_BASE)
160 
161 
162 static void
163 e1000_link_up(E1000State *s)
164 {
165     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
166 
167     /* E1000_STATUS_LU is tested by e1000_can_receive() */
168     qemu_flush_queued_packets(qemu_get_queue(s->nic));
169 }
170 
171 static void
172 e1000_autoneg_done(E1000State *s)
173 {
174     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
175 
176     /* E1000_STATUS_LU is tested by e1000_can_receive() */
177     qemu_flush_queued_packets(qemu_get_queue(s->nic));
178 }
179 
180 static bool
181 have_autoneg(E1000State *s)
182 {
183     return chkflag(AUTONEG) && (s->phy_reg[MII_BMCR] & MII_BMCR_AUTOEN);
184 }
185 
186 static void
187 set_phy_ctrl(E1000State *s, int index, uint16_t val)
188 {
189     /* bits 0-5 reserved; MII_BMCR_[ANRESTART,RESET] are self clearing */
190     s->phy_reg[MII_BMCR] = val & ~(0x3f |
191                                    MII_BMCR_RESET |
192                                    MII_BMCR_ANRESTART);
193 
194     /*
195      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
196      * migrate during auto negotiation, after migration the link will be
197      * down.
198      */
199     if (have_autoneg(s) && (val & MII_BMCR_ANRESTART)) {
200         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
201     }
202 }
203 
204 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
205     [MII_BMCR] = set_phy_ctrl,
206 };
207 
208 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
209 
210 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
211 static const char phy_regcap[0x20] = {
212     [MII_BMSR]   = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
213     [MII_PHYID1] = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
214     [MII_BMCR]   = PHY_RW,    [MII_CTRL1000]               = PHY_RW,
215     [MII_ANLPAR] = PHY_R,     [MII_STAT1000]               = PHY_R,
216     [MII_ANAR]   = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
217     [MII_PHYID2] = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
218     [MII_ANER]   = PHY_R,
219 };
220 
221 /* MII_PHYID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
222 static const uint16_t phy_reg_init[] = {
223     [MII_BMCR] = MII_BMCR_SPEED1000 |
224                  MII_BMCR_FD |
225                  MII_BMCR_AUTOEN,
226 
227     [MII_BMSR] = MII_BMSR_EXTCAP |
228                  MII_BMSR_LINK_ST |   /* link initially up */
229                  MII_BMSR_AUTONEG |
230                  /* MII_BMSR_AN_COMP: initially NOT completed */
231                  MII_BMSR_MFPS |
232                  MII_BMSR_EXTSTAT |
233                  MII_BMSR_10T_HD |
234                  MII_BMSR_10T_FD |
235                  MII_BMSR_100TX_HD |
236                  MII_BMSR_100TX_FD,
237 
238     [MII_PHYID1] = 0x141,
239     /* [MII_PHYID2] configured per DevId, from e1000_reset() */
240     [MII_ANAR] = MII_ANAR_CSMACD | MII_ANAR_10 |
241                  MII_ANAR_10FD | MII_ANAR_TX |
242                  MII_ANAR_TXFD | MII_ANAR_PAUSE |
243                  MII_ANAR_PAUSE_ASYM,
244     [MII_ANLPAR] = MII_ANLPAR_10 | MII_ANLPAR_10FD |
245                    MII_ANLPAR_TX | MII_ANLPAR_TXFD,
246     [MII_CTRL1000] = MII_CTRL1000_FULL | MII_CTRL1000_PORT |
247                      MII_CTRL1000_MASTER,
248     [MII_STAT1000] = MII_STAT1000_HALF | MII_STAT1000_FULL |
249                      MII_STAT1000_ROK | MII_STAT1000_LOK,
250     [M88E1000_PHY_SPEC_CTRL] = 0x360,
251     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
252     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
253 };
254 
255 static const uint32_t mac_reg_init[] = {
256     [PBA]     = 0x00100030,
257     [LEDCTL]  = 0x602,
258     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
259                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
260     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
261                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
262                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
263                 E1000_STATUS_LU,
264     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
265                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
266                 E1000_MANC_RMCP_EN,
267 };
268 
269 /* Helper function, *curr == 0 means the value is not set */
270 static inline void
271 mit_update_delay(uint32_t *curr, uint32_t value)
272 {
273     if (value && (*curr == 0 || value < *curr)) {
274         *curr = value;
275     }
276 }
277 
278 static void
279 set_interrupt_cause(E1000State *s, int index, uint32_t val)
280 {
281     PCIDevice *d = PCI_DEVICE(s);
282     uint32_t pending_ints;
283     uint32_t mit_delay;
284 
285     s->mac_reg[ICR] = val;
286 
287     /*
288      * Make sure ICR and ICS registers have the same value.
289      * The spec says that the ICS register is write-only.  However in practice,
290      * on real hardware ICS is readable, and for reads it has the same value as
291      * ICR (except that ICS does not have the clear on read behaviour of ICR).
292      *
293      * The VxWorks PRO/1000 driver uses this behaviour.
294      */
295     s->mac_reg[ICS] = val;
296 
297     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
298     if (!s->mit_irq_level && pending_ints) {
299         /*
300          * Here we detect a potential raising edge. We postpone raising the
301          * interrupt line if we are inside the mitigation delay window
302          * (s->mit_timer_on == 1).
303          * We provide a partial implementation of interrupt mitigation,
304          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
305          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
306          * RADV; relative timers based on TIDV and RDTR are not implemented.
307          */
308         if (s->mit_timer_on) {
309             return;
310         }
311         if (chkflag(MIT)) {
312             /* Compute the next mitigation delay according to pending
313              * interrupts and the current values of RADV (provided
314              * RDTR!=0), TADV and ITR.
315              * Then rearm the timer.
316              */
317             mit_delay = 0;
318             if (s->mit_ide &&
319                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
320                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
321             }
322             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
323                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
324             }
325             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
326 
327             /*
328              * According to e1000 SPEC, the Ethernet controller guarantees
329              * a maximum observable interrupt rate of 7813 interrupts/sec.
330              * Thus if mit_delay < 500 then the delay should be set to the
331              * minimum delay possible which is 500.
332              */
333             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
334 
335             s->mit_timer_on = 1;
336             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
337                       mit_delay * 256);
338             s->mit_ide = 0;
339         }
340     }
341 
342     s->mit_irq_level = (pending_ints != 0);
343     pci_set_irq(d, s->mit_irq_level);
344 }
345 
346 static void
347 e1000_mit_timer(void *opaque)
348 {
349     E1000State *s = opaque;
350 
351     s->mit_timer_on = 0;
352     /* Call set_interrupt_cause to update the irq level (if necessary). */
353     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
354 }
355 
356 static void
357 set_ics(E1000State *s, int index, uint32_t val)
358 {
359     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
360         s->mac_reg[IMS]);
361     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
362 }
363 
364 static void
365 e1000_autoneg_timer(void *opaque)
366 {
367     E1000State *s = opaque;
368     if (!qemu_get_queue(s->nic)->link_down) {
369         e1000_autoneg_done(s);
370         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
371     }
372 }
373 
374 static bool e1000_vet_init_need(void *opaque)
375 {
376     E1000State *s = opaque;
377 
378     return chkflag(VET);
379 }
380 
381 static void e1000_reset_hold(Object *obj)
382 {
383     E1000State *d = E1000(obj);
384     E1000BaseClass *edc = E1000_GET_CLASS(d);
385     uint8_t *macaddr = d->conf.macaddr.a;
386 
387     timer_del(d->autoneg_timer);
388     timer_del(d->mit_timer);
389     timer_del(d->flush_queue_timer);
390     d->mit_timer_on = 0;
391     d->mit_irq_level = 0;
392     d->mit_ide = 0;
393     memset(d->phy_reg, 0, sizeof d->phy_reg);
394     memcpy(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
395     d->phy_reg[MII_PHYID2] = edc->phy_id2;
396     memset(d->mac_reg, 0, sizeof d->mac_reg);
397     memcpy(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
398     d->rxbuf_min_shift = 1;
399     memset(&d->tx, 0, sizeof d->tx);
400 
401     if (qemu_get_queue(d->nic)->link_down) {
402         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
403     }
404 
405     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
406 
407     if (e1000_vet_init_need(d)) {
408         d->mac_reg[VET] = ETH_P_VLAN;
409     }
410 }
411 
412 static void
413 set_ctrl(E1000State *s, int index, uint32_t val)
414 {
415     /* RST is self clearing */
416     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
417 }
418 
419 static void
420 e1000_flush_queue_timer(void *opaque)
421 {
422     E1000State *s = opaque;
423 
424     qemu_flush_queued_packets(qemu_get_queue(s->nic));
425 }
426 
427 static void
428 set_rx_control(E1000State *s, int index, uint32_t val)
429 {
430     s->mac_reg[RCTL] = val;
431     s->rxbuf_size = e1000x_rxbufsize(val);
432     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
433     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
434            s->mac_reg[RCTL]);
435     timer_mod(s->flush_queue_timer,
436               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
437 }
438 
439 static void
440 set_mdic(E1000State *s, int index, uint32_t val)
441 {
442     uint32_t data = val & E1000_MDIC_DATA_MASK;
443     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
444 
445     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
446         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
447     else if (val & E1000_MDIC_OP_READ) {
448         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
449         if (!(phy_regcap[addr] & PHY_R)) {
450             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
451             val |= E1000_MDIC_ERROR;
452         } else
453             val = (val ^ data) | s->phy_reg[addr];
454     } else if (val & E1000_MDIC_OP_WRITE) {
455         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
456         if (!(phy_regcap[addr] & PHY_W)) {
457             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
458             val |= E1000_MDIC_ERROR;
459         } else {
460             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
461                 phyreg_writeops[addr](s, index, data);
462             } else {
463                 s->phy_reg[addr] = data;
464             }
465         }
466     }
467     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
468 
469     if (val & E1000_MDIC_INT_EN) {
470         set_ics(s, 0, E1000_ICR_MDAC);
471     }
472 }
473 
474 static uint32_t
475 get_eecd(E1000State *s, int index)
476 {
477     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
478 
479     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
480            s->eecd_state.bitnum_out, s->eecd_state.reading);
481     if (!s->eecd_state.reading ||
482         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
483           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
484         ret |= E1000_EECD_DO;
485     return ret;
486 }
487 
488 static void
489 set_eecd(E1000State *s, int index, uint32_t val)
490 {
491     uint32_t oldval = s->eecd_state.old_eecd;
492 
493     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
494             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
495     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
496         return;
497     }
498     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
499         s->eecd_state.val_in = 0;
500         s->eecd_state.bitnum_in = 0;
501         s->eecd_state.bitnum_out = 0;
502         s->eecd_state.reading = 0;
503     }
504     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
505         return;
506     }
507     if (!(E1000_EECD_SK & val)) {               /* falling edge */
508         s->eecd_state.bitnum_out++;
509         return;
510     }
511     s->eecd_state.val_in <<= 1;
512     if (val & E1000_EECD_DI)
513         s->eecd_state.val_in |= 1;
514     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
515         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
516         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
517             EEPROM_READ_OPCODE_MICROWIRE);
518     }
519     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
520            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
521            s->eecd_state.reading);
522 }
523 
524 static uint32_t
525 flash_eerd_read(E1000State *s, int x)
526 {
527     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
528 
529     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
530         return (s->mac_reg[EERD]);
531 
532     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
533         return (E1000_EEPROM_RW_REG_DONE | r);
534 
535     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
536            E1000_EEPROM_RW_REG_DONE | r);
537 }
538 
539 static void
540 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
541 {
542     uint32_t sum;
543 
544     if (cse && cse < n)
545         n = cse + 1;
546     if (sloc < n-1) {
547         sum = net_checksum_add(n-css, data+css);
548         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
549     }
550 }
551 
552 static inline void
553 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
554 {
555     if (is_broadcast_ether_addr(arr)) {
556         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
557     } else if (is_multicast_ether_addr(arr)) {
558         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
559     }
560 }
561 
562 static void
563 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
564 {
565     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
566                                     PTC1023, PTC1522 };
567 
568     NetClientState *nc = qemu_get_queue(s->nic);
569     if (s->phy_reg[MII_BMCR] & MII_BMCR_LOOPBACK) {
570         qemu_receive_packet(nc, buf, size);
571     } else {
572         qemu_send_packet(nc, buf, size);
573     }
574     inc_tx_bcast_or_mcast_count(s, buf);
575     e1000x_increase_size_stats(s->mac_reg, PTCregs, size + 4);
576 }
577 
578 static void
579 xmit_seg(E1000State *s)
580 {
581     uint16_t len;
582     unsigned int frames = s->tx.tso_frames, css, sofar;
583     struct e1000_tx *tp = &s->tx;
584     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
585 
586     if (tp->cptse) {
587         css = props->ipcss;
588         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
589                frames, tp->size, css);
590         if (props->ip) {    /* IPv4 */
591             stw_be_p(tp->data+css+2, tp->size - css);
592             stw_be_p(tp->data+css+4,
593                      lduw_be_p(tp->data + css + 4) + frames);
594         } else {         /* IPv6 */
595             stw_be_p(tp->data+css+4, tp->size - css);
596         }
597         css = props->tucss;
598         len = tp->size - css;
599         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
600         if (props->tcp) {
601             sofar = frames * props->mss;
602             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
603             if (props->paylen - sofar > props->mss) {
604                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
605             } else if (frames) {
606                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
607             }
608         } else {    /* UDP */
609             stw_be_p(tp->data+css+4, len);
610         }
611         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
612             unsigned int phsum;
613             // add pseudo-header length before checksum calculation
614             void *sp = tp->data + props->tucso;
615 
616             phsum = lduw_be_p(sp) + len;
617             phsum = (phsum >> 16) + (phsum & 0xffff);
618             stw_be_p(sp, phsum);
619         }
620         tp->tso_frames++;
621     }
622 
623     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
624         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
625     }
626     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
627         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
628     }
629     if (tp->vlan_needed) {
630         memmove(tp->vlan, tp->data, 4);
631         memmove(tp->data, tp->data + 4, 8);
632         memcpy(tp->data + 8, tp->vlan_header, 4);
633         e1000_send_packet(s, tp->vlan, tp->size + 4);
634     } else {
635         e1000_send_packet(s, tp->data, tp->size);
636     }
637 
638     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
639     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size + 4);
640     s->mac_reg[GPTC] = s->mac_reg[TPT];
641     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
642     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
643 }
644 
645 static void
646 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
647 {
648     PCIDevice *d = PCI_DEVICE(s);
649     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
650     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
651     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
652     unsigned int msh = 0xfffff;
653     uint64_t addr;
654     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
655     struct e1000_tx *tp = &s->tx;
656 
657     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
658     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
659         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
660             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
661             s->use_tso_for_migration = 1;
662             tp->tso_frames = 0;
663         } else {
664             e1000x_read_tx_ctx_descr(xp, &tp->props);
665             s->use_tso_for_migration = 0;
666         }
667         return;
668     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
669         // data descriptor
670         if (tp->size == 0) {
671             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
672         }
673         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
674     } else {
675         // legacy descriptor
676         tp->cptse = 0;
677     }
678 
679     if (e1000x_vlan_enabled(s->mac_reg) &&
680         e1000x_is_vlan_txd(txd_lower) &&
681         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
682         tp->vlan_needed = 1;
683         stw_be_p(tp->vlan_header,
684                       le16_to_cpu(s->mac_reg[VET]));
685         stw_be_p(tp->vlan_header + 2,
686                       le16_to_cpu(dp->upper.fields.special));
687     }
688 
689     addr = le64_to_cpu(dp->buffer_addr);
690     if (tp->cptse) {
691         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
692         do {
693             bytes = split_size;
694             if (tp->size >= msh) {
695                 goto eop;
696             }
697             if (tp->size + bytes > msh)
698                 bytes = msh - tp->size;
699 
700             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
701             pci_dma_read(d, addr, tp->data + tp->size, bytes);
702             sz = tp->size + bytes;
703             if (sz >= tp->tso_props.hdr_len
704                 && tp->size < tp->tso_props.hdr_len) {
705                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
706             }
707             tp->size = sz;
708             addr += bytes;
709             if (sz == msh) {
710                 xmit_seg(s);
711                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
712                 tp->size = tp->tso_props.hdr_len;
713             }
714             split_size -= bytes;
715         } while (bytes && split_size);
716     } else {
717         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
718         pci_dma_read(d, addr, tp->data + tp->size, split_size);
719         tp->size += split_size;
720     }
721 
722 eop:
723     if (!(txd_lower & E1000_TXD_CMD_EOP))
724         return;
725     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
726         xmit_seg(s);
727     }
728     tp->tso_frames = 0;
729     tp->sum_needed = 0;
730     tp->vlan_needed = 0;
731     tp->size = 0;
732     tp->cptse = 0;
733 }
734 
735 static uint32_t
736 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
737 {
738     PCIDevice *d = PCI_DEVICE(s);
739     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
740 
741     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
742         return 0;
743     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
744                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
745     dp->upper.data = cpu_to_le32(txd_upper);
746     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
747                   &dp->upper, sizeof(dp->upper));
748     return E1000_ICR_TXDW;
749 }
750 
751 static uint64_t tx_desc_base(E1000State *s)
752 {
753     uint64_t bah = s->mac_reg[TDBAH];
754     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
755 
756     return (bah << 32) + bal;
757 }
758 
759 static void
760 start_xmit(E1000State *s)
761 {
762     PCIDevice *d = PCI_DEVICE(s);
763     dma_addr_t base;
764     struct e1000_tx_desc desc;
765     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
766 
767     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
768         DBGOUT(TX, "tx disabled\n");
769         return;
770     }
771 
772     if (s->tx.busy) {
773         return;
774     }
775     s->tx.busy = true;
776 
777     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
778         base = tx_desc_base(s) +
779                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
780         pci_dma_read(d, base, &desc, sizeof(desc));
781 
782         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
783                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
784                desc.upper.data);
785 
786         process_tx_desc(s, &desc);
787         cause |= txdesc_writeback(s, base, &desc);
788 
789         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
790             s->mac_reg[TDH] = 0;
791         /*
792          * the following could happen only if guest sw assigns
793          * bogus values to TDT/TDLEN.
794          * there's nothing too intelligent we could do about this.
795          */
796         if (s->mac_reg[TDH] == tdh_start ||
797             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
798             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
799                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
800             break;
801         }
802     }
803     s->tx.busy = false;
804     set_ics(s, 0, cause);
805 }
806 
807 static int
808 receive_filter(E1000State *s, const uint8_t *buf, int size)
809 {
810     uint32_t rctl = s->mac_reg[RCTL];
811     int isbcast = is_broadcast_ether_addr(buf);
812     int ismcast = is_multicast_ether_addr(buf);
813 
814     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
815         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
816         uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(buf)->h_tci);
817         uint32_t vfta =
818             ldl_le_p((uint32_t *)(s->mac_reg + VFTA) +
819                      ((vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK));
820         if ((vfta & (1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK))) == 0) {
821             return 0;
822         }
823     }
824 
825     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
826         return 1;
827     }
828 
829     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
830         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
831         return 1;
832     }
833 
834     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
835         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
836         return 1;
837     }
838 
839     return e1000x_rx_group_filter(s->mac_reg, buf);
840 }
841 
842 static void
843 e1000_set_link_status(NetClientState *nc)
844 {
845     E1000State *s = qemu_get_nic_opaque(nc);
846     uint32_t old_status = s->mac_reg[STATUS];
847 
848     if (nc->link_down) {
849         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
850     } else {
851         if (have_autoneg(s) &&
852             !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
853             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
854         } else {
855             e1000_link_up(s);
856         }
857     }
858 
859     if (s->mac_reg[STATUS] != old_status)
860         set_ics(s, 0, E1000_ICR_LSC);
861 }
862 
863 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
864 {
865     int bufs;
866     /* Fast-path short packets */
867     if (total_size <= s->rxbuf_size) {
868         return s->mac_reg[RDH] != s->mac_reg[RDT];
869     }
870     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
871         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
872     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
873         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
874             s->mac_reg[RDT] - s->mac_reg[RDH];
875     } else {
876         return false;
877     }
878     return total_size <= bufs * s->rxbuf_size;
879 }
880 
881 static bool
882 e1000_can_receive(NetClientState *nc)
883 {
884     E1000State *s = qemu_get_nic_opaque(nc);
885 
886     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
887         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
888 }
889 
890 static uint64_t rx_desc_base(E1000State *s)
891 {
892     uint64_t bah = s->mac_reg[RDBAH];
893     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
894 
895     return (bah << 32) + bal;
896 }
897 
898 static void
899 e1000_receiver_overrun(E1000State *s, size_t size)
900 {
901     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
902     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
903     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
904     set_ics(s, 0, E1000_ICS_RXO);
905 }
906 
907 static ssize_t
908 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
909 {
910     E1000State *s = qemu_get_nic_opaque(nc);
911     PCIDevice *d = PCI_DEVICE(s);
912     struct e1000_rx_desc desc;
913     dma_addr_t base;
914     unsigned int n, rdt;
915     uint32_t rdh_start;
916     uint16_t vlan_special = 0;
917     uint8_t vlan_status = 0;
918     uint8_t min_buf[ETH_ZLEN];
919     struct iovec min_iov;
920     uint8_t *filter_buf = iov->iov_base;
921     size_t size = iov_size(iov, iovcnt);
922     size_t iov_ofs = 0;
923     size_t desc_offset;
924     size_t desc_size;
925     size_t total_size;
926 
927     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
928         return -1;
929     }
930 
931     if (timer_pending(s->flush_queue_timer)) {
932         return 0;
933     }
934 
935     /* Pad to minimum Ethernet frame length */
936     if (size < sizeof(min_buf)) {
937         iov_to_buf(iov, iovcnt, 0, min_buf, size);
938         memset(&min_buf[size], 0, sizeof(min_buf) - size);
939         min_iov.iov_base = filter_buf = min_buf;
940         min_iov.iov_len = size = sizeof(min_buf);
941         iovcnt = 1;
942         iov = &min_iov;
943     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
944         /* This is very unlikely, but may happen. */
945         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
946         filter_buf = min_buf;
947     }
948 
949     /* Discard oversized packets if !LPE and !SBP. */
950     if (e1000x_is_oversized(s->mac_reg, size)) {
951         return size;
952     }
953 
954     if (!receive_filter(s, filter_buf, size)) {
955         return size;
956     }
957 
958     if (e1000x_vlan_enabled(s->mac_reg) &&
959         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
960         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
961         iov_ofs = 4;
962         if (filter_buf == iov->iov_base) {
963             memmove(filter_buf + 4, filter_buf, 12);
964         } else {
965             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
966             while (iov->iov_len <= iov_ofs) {
967                 iov_ofs -= iov->iov_len;
968                 iov++;
969             }
970         }
971         vlan_status = E1000_RXD_STAT_VP;
972         size -= 4;
973     }
974 
975     rdh_start = s->mac_reg[RDH];
976     desc_offset = 0;
977     total_size = size + e1000x_fcs_len(s->mac_reg);
978     if (!e1000_has_rxbufs(s, total_size)) {
979         e1000_receiver_overrun(s, total_size);
980         return -1;
981     }
982     do {
983         desc_size = total_size - desc_offset;
984         if (desc_size > s->rxbuf_size) {
985             desc_size = s->rxbuf_size;
986         }
987         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
988         pci_dma_read(d, base, &desc, sizeof(desc));
989         desc.special = vlan_special;
990         desc.status &= ~E1000_RXD_STAT_DD;
991         if (desc.buffer_addr) {
992             if (desc_offset < size) {
993                 size_t iov_copy;
994                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
995                 size_t copy_size = size - desc_offset;
996                 if (copy_size > s->rxbuf_size) {
997                     copy_size = s->rxbuf_size;
998                 }
999                 do {
1000                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1001                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1002                     copy_size -= iov_copy;
1003                     ba += iov_copy;
1004                     iov_ofs += iov_copy;
1005                     if (iov_ofs == iov->iov_len) {
1006                         iov++;
1007                         iov_ofs = 0;
1008                     }
1009                 } while (copy_size);
1010             }
1011             desc_offset += desc_size;
1012             desc.length = cpu_to_le16(desc_size);
1013             if (desc_offset >= total_size) {
1014                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1015             } else {
1016                 /* Guest zeroing out status is not a hardware requirement.
1017                    Clear EOP in case guest didn't do it. */
1018                 desc.status &= ~E1000_RXD_STAT_EOP;
1019             }
1020         } else { // as per intel docs; skip descriptors with null buf addr
1021             DBGOUT(RX, "Null RX descriptor!!\n");
1022         }
1023         pci_dma_write(d, base, &desc, sizeof(desc));
1024         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1025         pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
1026                       &desc.status, sizeof(desc.status));
1027 
1028         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1029             s->mac_reg[RDH] = 0;
1030         /* see comment in start_xmit; same here */
1031         if (s->mac_reg[RDH] == rdh_start ||
1032             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1033             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1034                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1035             e1000_receiver_overrun(s, total_size);
1036             return -1;
1037         }
1038     } while (desc_offset < total_size);
1039 
1040     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1041 
1042     n = E1000_ICS_RXT0;
1043     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1044         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1045     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1046         s->rxbuf_min_shift)
1047         n |= E1000_ICS_RXDMT0;
1048 
1049     set_ics(s, 0, n);
1050 
1051     return size;
1052 }
1053 
1054 static ssize_t
1055 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1056 {
1057     const struct iovec iov = {
1058         .iov_base = (uint8_t *)buf,
1059         .iov_len = size
1060     };
1061 
1062     return e1000_receive_iov(nc, &iov, 1);
1063 }
1064 
1065 static uint32_t
1066 mac_readreg(E1000State *s, int index)
1067 {
1068     return s->mac_reg[index];
1069 }
1070 
1071 static uint32_t
1072 mac_icr_read(E1000State *s, int index)
1073 {
1074     uint32_t ret = s->mac_reg[ICR];
1075 
1076     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1077     set_interrupt_cause(s, 0, 0);
1078     return ret;
1079 }
1080 
1081 static uint32_t
1082 mac_read_clr4(E1000State *s, int index)
1083 {
1084     uint32_t ret = s->mac_reg[index];
1085 
1086     s->mac_reg[index] = 0;
1087     return ret;
1088 }
1089 
1090 static uint32_t
1091 mac_read_clr8(E1000State *s, int index)
1092 {
1093     uint32_t ret = s->mac_reg[index];
1094 
1095     s->mac_reg[index] = 0;
1096     s->mac_reg[index-1] = 0;
1097     return ret;
1098 }
1099 
1100 static void
1101 mac_writereg(E1000State *s, int index, uint32_t val)
1102 {
1103     uint32_t macaddr[2];
1104 
1105     s->mac_reg[index] = val;
1106 
1107     if (index == RA + 1) {
1108         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1109         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1110         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1111     }
1112 }
1113 
1114 static void
1115 set_rdt(E1000State *s, int index, uint32_t val)
1116 {
1117     s->mac_reg[index] = val & 0xffff;
1118     if (e1000_has_rxbufs(s, 1)) {
1119         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1120     }
1121 }
1122 
1123 #define LOW_BITS_SET_FUNC(num)                             \
1124     static void                                            \
1125     set_##num##bit(E1000State *s, int index, uint32_t val) \
1126     {                                                      \
1127         s->mac_reg[index] = val & (BIT(num) - 1);          \
1128     }
1129 
1130 LOW_BITS_SET_FUNC(4)
1131 LOW_BITS_SET_FUNC(11)
1132 LOW_BITS_SET_FUNC(13)
1133 LOW_BITS_SET_FUNC(16)
1134 
1135 static void
1136 set_dlen(E1000State *s, int index, uint32_t val)
1137 {
1138     s->mac_reg[index] = val & 0xfff80;
1139 }
1140 
1141 static void
1142 set_tctl(E1000State *s, int index, uint32_t val)
1143 {
1144     s->mac_reg[index] = val;
1145     s->mac_reg[TDT] &= 0xffff;
1146     start_xmit(s);
1147 }
1148 
1149 static void
1150 set_icr(E1000State *s, int index, uint32_t val)
1151 {
1152     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1153     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1154 }
1155 
1156 static void
1157 set_imc(E1000State *s, int index, uint32_t val)
1158 {
1159     s->mac_reg[IMS] &= ~val;
1160     set_ics(s, 0, 0);
1161 }
1162 
1163 static void
1164 set_ims(E1000State *s, int index, uint32_t val)
1165 {
1166     s->mac_reg[IMS] |= val;
1167     set_ics(s, 0, 0);
1168 }
1169 
1170 #define getreg(x)    [x] = mac_readreg
1171 typedef uint32_t (*readops)(E1000State *, int);
1172 static const readops macreg_readops[] = {
1173     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1174     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1175     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1176     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1177     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1178     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1179     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1180     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1181     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1182     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1183     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1184     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1185     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1186     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1187     getreg(GOTCL),    getreg(RDFH),     getreg(RDFT),     getreg(RDFHS),
1188     getreg(RDFTS),    getreg(RDFPC),    getreg(TDFH),     getreg(TDFT),
1189     getreg(TDFHS),    getreg(TDFTS),    getreg(TDFPC),    getreg(AIT),
1190 
1191     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1192     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1193     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1194     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1195     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1196     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1197     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1198     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1199     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1200     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1201     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1202     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1203     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1204     [MPTC]    = mac_read_clr4,
1205     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1206     [EERD]    = flash_eerd_read,
1207 
1208     [CRCERRS ... MPC]     = &mac_readreg,
1209     [IP6AT ... IP6AT + 3] = &mac_readreg,    [IP4AT ... IP4AT + 6] = &mac_readreg,
1210     [FFLT ... FFLT + 6]   = &mac_readreg,
1211     [RA ... RA + 31]      = &mac_readreg,
1212     [WUPM ... WUPM + 31]  = &mac_readreg,
1213     [MTA ... MTA + E1000_MC_TBL_SIZE - 1]   = &mac_readreg,
1214     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_readreg,
1215     [FFMT ... FFMT + 254] = &mac_readreg,
1216     [FFVT ... FFVT + 254] = &mac_readreg,
1217     [PBM ... PBM + 16383] = &mac_readreg,
1218 };
1219 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1220 
1221 #define putreg(x)    [x] = mac_writereg
1222 typedef void (*writeops)(E1000State *, int, uint32_t);
1223 static const writeops macreg_writeops[] = {
1224     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1225     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1226     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1227     putreg(IPAV),     putreg(WUC),
1228     putreg(WUS),
1229 
1230     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL]  = set_tctl,
1231     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]   = set_ics,
1232     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]   = set_rdt,
1233     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]   = set_icr,
1234     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL]  = set_ctrl,
1235     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV]  = set_16bit,
1236     [ITR]    = set_16bit,  [TDFH]   = set_11bit,      [TDFT]  = set_11bit,
1237     [TDFHS]  = set_13bit,  [TDFTS]  = set_13bit,      [TDFPC] = set_13bit,
1238     [RDFH]   = set_13bit,  [RDFT]   = set_13bit,      [RDFHS] = set_13bit,
1239     [RDFTS]  = set_13bit,  [RDFPC]  = set_13bit,      [AIT]   = set_16bit,
1240 
1241     [IP6AT ... IP6AT + 3] = &mac_writereg, [IP4AT ... IP4AT + 6] = &mac_writereg,
1242     [FFLT ... FFLT + 6]   = &set_11bit,
1243     [RA ... RA + 31]      = &mac_writereg,
1244     [WUPM ... WUPM + 31]  = &mac_writereg,
1245     [MTA ... MTA + E1000_MC_TBL_SIZE - 1] = &mac_writereg,
1246     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_writereg,
1247     [FFMT ... FFMT + 254] = &set_4bit,     [FFVT ... FFVT + 254] = &mac_writereg,
1248     [PBM ... PBM + 16383] = &mac_writereg,
1249 };
1250 
1251 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1252 
1253 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1254 
1255 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1256 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1257  * f - flag bits (up to 6 possible flags)
1258  * n - flag needed
1259  * p - partially implenented */
1260 static const uint8_t mac_reg_access[0x8000] = {
1261     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1262     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1263 
1264     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1265     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1266     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1267     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1268     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1269     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1270     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1271     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1272     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1273     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1274     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1275     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1276     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1277     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1278     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1279     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1280     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1281     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1282     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1283     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1284     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1285     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1286     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1287     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1288     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1289     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1290     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1291     [BPTC]    = markflag(MAC),
1292 
1293     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1294     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1295     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1296     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1297     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1298     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1299     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1300     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1301     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1302     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1303     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1304 };
1305 
1306 static void
1307 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1308                  unsigned size)
1309 {
1310     E1000State *s = opaque;
1311     unsigned int index = (addr & 0x1ffff) >> 2;
1312 
1313     if (index < NWRITEOPS && macreg_writeops[index]) {
1314         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1315             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1316             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1317                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1318                        "It is not fully implemented.\n", index<<2);
1319             }
1320             macreg_writeops[index](s, index, val);
1321         } else {    /* "flag needed" bit is set, but the flag is not active */
1322             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1323                    index<<2);
1324         }
1325     } else if (index < NREADOPS && macreg_readops[index]) {
1326         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1327                index<<2, val);
1328     } else {
1329         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1330                index<<2, val);
1331     }
1332 }
1333 
1334 static uint64_t
1335 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1336 {
1337     E1000State *s = opaque;
1338     unsigned int index = (addr & 0x1ffff) >> 2;
1339 
1340     if (index < NREADOPS && macreg_readops[index]) {
1341         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1342             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1343             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1344                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1345                        "It is not fully implemented.\n", index<<2);
1346             }
1347             return macreg_readops[index](s, index);
1348         } else {    /* "flag needed" bit is set, but the flag is not active */
1349             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1350                    index<<2);
1351         }
1352     } else {
1353         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1354     }
1355     return 0;
1356 }
1357 
1358 static const MemoryRegionOps e1000_mmio_ops = {
1359     .read = e1000_mmio_read,
1360     .write = e1000_mmio_write,
1361     .endianness = DEVICE_LITTLE_ENDIAN,
1362     .impl = {
1363         .min_access_size = 4,
1364         .max_access_size = 4,
1365     },
1366 };
1367 
1368 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1369                               unsigned size)
1370 {
1371     E1000State *s = opaque;
1372 
1373     (void)s;
1374     return 0;
1375 }
1376 
1377 static void e1000_io_write(void *opaque, hwaddr addr,
1378                            uint64_t val, unsigned size)
1379 {
1380     E1000State *s = opaque;
1381 
1382     (void)s;
1383 }
1384 
1385 static const MemoryRegionOps e1000_io_ops = {
1386     .read = e1000_io_read,
1387     .write = e1000_io_write,
1388     .endianness = DEVICE_LITTLE_ENDIAN,
1389 };
1390 
1391 static bool is_version_1(void *opaque, int version_id)
1392 {
1393     return version_id == 1;
1394 }
1395 
1396 static int e1000_pre_save(void *opaque)
1397 {
1398     E1000State *s = opaque;
1399     NetClientState *nc = qemu_get_queue(s->nic);
1400 
1401     /*
1402      * If link is down and auto-negotiation is supported and ongoing,
1403      * complete auto-negotiation immediately. This allows us to look
1404      * at MII_BMSR_AN_COMP to infer link status on load.
1405      */
1406     if (nc->link_down && have_autoneg(s)) {
1407         s->phy_reg[MII_BMSR] |= MII_BMSR_AN_COMP;
1408     }
1409 
1410     /* Decide which set of props to migrate in the main structure */
1411     if (chkflag(TSO) || !s->use_tso_for_migration) {
1412         /* Either we're migrating with the extra subsection, in which
1413          * case the mig_props is always 'props' OR
1414          * we've not got the subsection, but 'props' was the last
1415          * updated.
1416          */
1417         s->mig_props = s->tx.props;
1418     } else {
1419         /* We're not using the subsection, and 'tso_props' was
1420          * the last updated.
1421          */
1422         s->mig_props = s->tx.tso_props;
1423     }
1424     return 0;
1425 }
1426 
1427 static int e1000_post_load(void *opaque, int version_id)
1428 {
1429     E1000State *s = opaque;
1430     NetClientState *nc = qemu_get_queue(s->nic);
1431 
1432     if (!chkflag(MIT)) {
1433         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1434             s->mac_reg[TADV] = 0;
1435         s->mit_irq_level = false;
1436     }
1437     s->mit_ide = 0;
1438     s->mit_timer_on = true;
1439     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1440 
1441     /* nc.link_down can't be migrated, so infer link_down according
1442      * to link status bit in mac_reg[STATUS].
1443      * Alternatively, restart link negotiation if it was in progress. */
1444     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1445 
1446     if (have_autoneg(s) && !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
1447         nc->link_down = false;
1448         timer_mod(s->autoneg_timer,
1449                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1450     }
1451 
1452     s->tx.props = s->mig_props;
1453     if (!s->received_tx_tso) {
1454         /* We received only one set of offload data (tx.props)
1455          * and haven't got tx.tso_props.  The best we can do
1456          * is dupe the data.
1457          */
1458         s->tx.tso_props = s->mig_props;
1459     }
1460     return 0;
1461 }
1462 
1463 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1464 {
1465     E1000State *s = opaque;
1466     s->received_tx_tso = true;
1467     return 0;
1468 }
1469 
1470 static bool e1000_mit_state_needed(void *opaque)
1471 {
1472     E1000State *s = opaque;
1473 
1474     return chkflag(MIT);
1475 }
1476 
1477 static bool e1000_full_mac_needed(void *opaque)
1478 {
1479     E1000State *s = opaque;
1480 
1481     return chkflag(MAC);
1482 }
1483 
1484 static bool e1000_tso_state_needed(void *opaque)
1485 {
1486     E1000State *s = opaque;
1487 
1488     return chkflag(TSO);
1489 }
1490 
1491 static const VMStateDescription vmstate_e1000_mit_state = {
1492     .name = "e1000/mit_state",
1493     .version_id = 1,
1494     .minimum_version_id = 1,
1495     .needed = e1000_mit_state_needed,
1496     .fields = (VMStateField[]) {
1497         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1498         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1499         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1500         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1501         VMSTATE_BOOL(mit_irq_level, E1000State),
1502         VMSTATE_END_OF_LIST()
1503     }
1504 };
1505 
1506 static const VMStateDescription vmstate_e1000_full_mac_state = {
1507     .name = "e1000/full_mac_state",
1508     .version_id = 1,
1509     .minimum_version_id = 1,
1510     .needed = e1000_full_mac_needed,
1511     .fields = (VMStateField[]) {
1512         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1513         VMSTATE_END_OF_LIST()
1514     }
1515 };
1516 
1517 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1518     .name = "e1000/tx_tso_state",
1519     .version_id = 1,
1520     .minimum_version_id = 1,
1521     .needed = e1000_tso_state_needed,
1522     .post_load = e1000_tx_tso_post_load,
1523     .fields = (VMStateField[]) {
1524         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1525         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1526         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1527         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1528         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1529         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1530         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1531         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1532         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1533         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1534         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1535         VMSTATE_END_OF_LIST()
1536     }
1537 };
1538 
1539 static const VMStateDescription vmstate_e1000 = {
1540     .name = "e1000",
1541     .version_id = 2,
1542     .minimum_version_id = 1,
1543     .pre_save = e1000_pre_save,
1544     .post_load = e1000_post_load,
1545     .fields = (VMStateField[]) {
1546         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1547         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1548         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1549         VMSTATE_UINT32(rxbuf_size, E1000State),
1550         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1551         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1552         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1553         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1554         VMSTATE_UINT16(eecd_state.reading, E1000State),
1555         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1556         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1557         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1558         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1559         VMSTATE_UINT8(mig_props.tucss, E1000State),
1560         VMSTATE_UINT8(mig_props.tucso, E1000State),
1561         VMSTATE_UINT16(mig_props.tucse, E1000State),
1562         VMSTATE_UINT32(mig_props.paylen, E1000State),
1563         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1564         VMSTATE_UINT16(mig_props.mss, E1000State),
1565         VMSTATE_UINT16(tx.size, E1000State),
1566         VMSTATE_UINT16(tx.tso_frames, E1000State),
1567         VMSTATE_UINT8(tx.sum_needed, E1000State),
1568         VMSTATE_INT8(mig_props.ip, E1000State),
1569         VMSTATE_INT8(mig_props.tcp, E1000State),
1570         VMSTATE_BUFFER(tx.header, E1000State),
1571         VMSTATE_BUFFER(tx.data, E1000State),
1572         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1573         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1574         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1575         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1576         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1577         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1578         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1579         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1580         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1581         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1582         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1583         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1584         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1585         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1586         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1587         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1588         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1589         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1590         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1591         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1592         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1593         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1594         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1595         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1596         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1597         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1598         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1599         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1600         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1601         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1602         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1603         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1604         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1605         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1606         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1607         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1608         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1609         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1610         VMSTATE_UINT32(mac_reg[VET], E1000State),
1611         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1612         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, E1000_MC_TBL_SIZE),
1613         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA,
1614                                  E1000_VLAN_FILTER_TBL_SIZE),
1615         VMSTATE_END_OF_LIST()
1616     },
1617     .subsections = (const VMStateDescription*[]) {
1618         &vmstate_e1000_mit_state,
1619         &vmstate_e1000_full_mac_state,
1620         &vmstate_e1000_tx_tso_state,
1621         NULL
1622     }
1623 };
1624 
1625 /*
1626  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1627  * Note: A valid DevId will be inserted during pci_e1000_realize().
1628  */
1629 static const uint16_t e1000_eeprom_template[64] = {
1630     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1631     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1632     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1633     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1634     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1635     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1636     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1637     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1638 };
1639 
1640 /* PCI interface */
1641 
1642 static void
1643 e1000_mmio_setup(E1000State *d)
1644 {
1645     int i;
1646     const uint32_t excluded_regs[] = {
1647         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1648         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1649     };
1650 
1651     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1652                           "e1000-mmio", PNPMMIO_SIZE);
1653     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1654     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1655         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1656                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1657     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1658 }
1659 
1660 static void
1661 pci_e1000_uninit(PCIDevice *dev)
1662 {
1663     E1000State *d = E1000(dev);
1664 
1665     timer_free(d->autoneg_timer);
1666     timer_free(d->mit_timer);
1667     timer_free(d->flush_queue_timer);
1668     qemu_del_nic(d->nic);
1669 }
1670 
1671 static NetClientInfo net_e1000_info = {
1672     .type = NET_CLIENT_DRIVER_NIC,
1673     .size = sizeof(NICState),
1674     .can_receive = e1000_can_receive,
1675     .receive = e1000_receive,
1676     .receive_iov = e1000_receive_iov,
1677     .link_status_changed = e1000_set_link_status,
1678 };
1679 
1680 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1681                                 uint32_t val, int len)
1682 {
1683     E1000State *s = E1000(pci_dev);
1684 
1685     pci_default_write_config(pci_dev, address, val, len);
1686 
1687     if (range_covers_byte(address, len, PCI_COMMAND) &&
1688         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1689         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1690     }
1691 }
1692 
1693 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1694 {
1695     DeviceState *dev = DEVICE(pci_dev);
1696     E1000State *d = E1000(pci_dev);
1697     uint8_t *pci_conf;
1698     uint8_t *macaddr;
1699 
1700     pci_dev->config_write = e1000_write_config;
1701 
1702     pci_conf = pci_dev->config;
1703 
1704     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1705     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1706 
1707     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1708 
1709     e1000_mmio_setup(d);
1710 
1711     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1712 
1713     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1714 
1715     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1716     macaddr = d->conf.macaddr.a;
1717 
1718     e1000x_core_prepare_eeprom(d->eeprom_data,
1719                                e1000_eeprom_template,
1720                                sizeof(e1000_eeprom_template),
1721                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1722                                macaddr);
1723 
1724     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1725                           object_get_typename(OBJECT(d)), dev->id, d);
1726 
1727     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1728 
1729     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1730     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1731     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1732                                         e1000_flush_queue_timer, d);
1733 }
1734 
1735 static Property e1000_properties[] = {
1736     DEFINE_NIC_PROPERTIES(E1000State, conf),
1737     DEFINE_PROP_BIT("autonegotiation", E1000State,
1738                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1739     DEFINE_PROP_BIT("mitigation", E1000State,
1740                     compat_flags, E1000_FLAG_MIT_BIT, true),
1741     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1742                     compat_flags, E1000_FLAG_MAC_BIT, true),
1743     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1744                     compat_flags, E1000_FLAG_TSO_BIT, true),
1745     DEFINE_PROP_BIT("init-vet", E1000State,
1746                     compat_flags, E1000_FLAG_VET_BIT, true),
1747     DEFINE_PROP_END_OF_LIST(),
1748 };
1749 
1750 typedef struct E1000Info {
1751     const char *name;
1752     uint16_t   device_id;
1753     uint8_t    revision;
1754     uint16_t   phy_id2;
1755 } E1000Info;
1756 
1757 static void e1000_class_init(ObjectClass *klass, void *data)
1758 {
1759     DeviceClass *dc = DEVICE_CLASS(klass);
1760     ResettableClass *rc = RESETTABLE_CLASS(klass);
1761     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1762     E1000BaseClass *e = E1000_CLASS(klass);
1763     const E1000Info *info = data;
1764 
1765     k->realize = pci_e1000_realize;
1766     k->exit = pci_e1000_uninit;
1767     k->romfile = "efi-e1000.rom";
1768     k->vendor_id = PCI_VENDOR_ID_INTEL;
1769     k->device_id = info->device_id;
1770     k->revision = info->revision;
1771     e->phy_id2 = info->phy_id2;
1772     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1773     rc->phases.hold = e1000_reset_hold;
1774     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1775     dc->desc = "Intel Gigabit Ethernet";
1776     dc->vmsd = &vmstate_e1000;
1777     device_class_set_props(dc, e1000_properties);
1778 }
1779 
1780 static void e1000_instance_init(Object *obj)
1781 {
1782     E1000State *n = E1000(obj);
1783     device_add_bootindex_property(obj, &n->conf.bootindex,
1784                                   "bootindex", "/ethernet-phy@0",
1785                                   DEVICE(n));
1786 }
1787 
1788 static const TypeInfo e1000_base_info = {
1789     .name          = TYPE_E1000_BASE,
1790     .parent        = TYPE_PCI_DEVICE,
1791     .instance_size = sizeof(E1000State),
1792     .instance_init = e1000_instance_init,
1793     .class_size    = sizeof(E1000BaseClass),
1794     .abstract      = true,
1795     .interfaces = (InterfaceInfo[]) {
1796         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1797         { },
1798     },
1799 };
1800 
1801 static const E1000Info e1000_devices[] = {
1802     {
1803         .name      = "e1000",
1804         .device_id = E1000_DEV_ID_82540EM,
1805         .revision  = 0x03,
1806         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1807     },
1808     {
1809         .name      = "e1000-82544gc",
1810         .device_id = E1000_DEV_ID_82544GC_COPPER,
1811         .revision  = 0x03,
1812         .phy_id2   = E1000_PHY_ID2_82544x,
1813     },
1814     {
1815         .name      = "e1000-82545em",
1816         .device_id = E1000_DEV_ID_82545EM_COPPER,
1817         .revision  = 0x03,
1818         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1819     },
1820 };
1821 
1822 static void e1000_register_types(void)
1823 {
1824     int i;
1825 
1826     type_register_static(&e1000_base_info);
1827     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1828         const E1000Info *info = &e1000_devices[i];
1829         TypeInfo type_info = {};
1830 
1831         type_info.name = info->name;
1832         type_info.parent = TYPE_E1000_BASE;
1833         type_info.class_data = (void *)info;
1834         type_info.class_init = e1000_class_init;
1835 
1836         type_register(&type_info);
1837     }
1838 }
1839 
1840 type_init(e1000_register_types)
1841