xref: /qemu/hw/net/e1000.c (revision 0eadd56bf53ab196a16d492d7dd31c62e1c24c32)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/pci/pci_device.h"
30 #include "hw/qdev-properties.h"
31 #include "migration/vmstate.h"
32 #include "net/eth.h"
33 #include "net/net.h"
34 #include "net/checksum.h"
35 #include "sysemu/sysemu.h"
36 #include "sysemu/dma.h"
37 #include "qemu/iov.h"
38 #include "qemu/module.h"
39 #include "qemu/range.h"
40 
41 #include "e1000x_common.h"
42 #include "trace.h"
43 #include "qom/object.h"
44 
45 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
46 
47 /* #define E1000_DEBUG */
48 
49 #ifdef E1000_DEBUG
50 enum {
51     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
52     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
53     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
54     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
55 };
56 #define DBGBIT(x)    (1<<DEBUG_##x)
57 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
58 
59 #define DBGOUT(what, fmt, ...) do { \
60     if (debugflags & DBGBIT(what)) \
61         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
62     } while (0)
63 #else
64 #define DBGOUT(what, fmt, ...) do {} while (0)
65 #endif
66 
67 #define IOPORT_SIZE       0x40
68 #define PNPMMIO_SIZE      0x20000
69 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
70 
71 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
72 
73 /*
74  * HW models:
75  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
76  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
77  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
78  *  Others never tested
79  */
80 
81 struct E1000State_st {
82     /*< private >*/
83     PCIDevice parent_obj;
84     /*< public >*/
85 
86     NICState *nic;
87     NICConf conf;
88     MemoryRegion mmio;
89     MemoryRegion io;
90 
91     uint32_t mac_reg[0x8000];
92     uint16_t phy_reg[0x20];
93     uint16_t eeprom_data[64];
94 
95     uint32_t rxbuf_size;
96     uint32_t rxbuf_min_shift;
97     struct e1000_tx {
98         unsigned char header[256];
99         unsigned char vlan_header[4];
100         /* Fields vlan and data must not be reordered or separated. */
101         unsigned char vlan[4];
102         unsigned char data[0x10000];
103         uint16_t size;
104         unsigned char vlan_needed;
105         unsigned char sum_needed;
106         bool cptse;
107         e1000x_txd_props props;
108         e1000x_txd_props tso_props;
109         uint16_t tso_frames;
110         bool busy;
111     } tx;
112 
113     struct {
114         uint32_t val_in;    /* shifted in from guest driver */
115         uint16_t bitnum_in;
116         uint16_t bitnum_out;
117         uint16_t reading;
118         uint32_t old_eecd;
119     } eecd_state;
120 
121     QEMUTimer *autoneg_timer;
122 
123     QEMUTimer *mit_timer;      /* Mitigation timer. */
124     bool mit_timer_on;         /* Mitigation timer is running. */
125     bool mit_irq_level;        /* Tracks interrupt pin level. */
126     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
127 
128     QEMUTimer *flush_queue_timer;
129 
130 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
131 #define E1000_FLAG_AUTONEG_BIT 0
132 #define E1000_FLAG_MIT_BIT 1
133 #define E1000_FLAG_MAC_BIT 2
134 #define E1000_FLAG_TSO_BIT 3
135 #define E1000_FLAG_VET_BIT 4
136 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
137 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
138 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
139 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
140 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
141 
142     uint32_t compat_flags;
143     bool received_tx_tso;
144     bool use_tso_for_migration;
145     e1000x_txd_props mig_props;
146 };
147 typedef struct E1000State_st E1000State;
148 
149 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
150 
151 struct E1000BaseClass {
152     PCIDeviceClass parent_class;
153     uint16_t phy_id2;
154 };
155 typedef struct E1000BaseClass E1000BaseClass;
156 
157 #define TYPE_E1000_BASE "e1000-base"
158 
159 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
160                      E1000, TYPE_E1000_BASE)
161 
162 
163 static void
164 e1000_link_up(E1000State *s)
165 {
166     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
167 
168     /* E1000_STATUS_LU is tested by e1000_can_receive() */
169     qemu_flush_queued_packets(qemu_get_queue(s->nic));
170 }
171 
172 static void
173 e1000_autoneg_done(E1000State *s)
174 {
175     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
176 
177     /* E1000_STATUS_LU is tested by e1000_can_receive() */
178     qemu_flush_queued_packets(qemu_get_queue(s->nic));
179 }
180 
181 static bool
182 have_autoneg(E1000State *s)
183 {
184     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
185 }
186 
187 static void
188 set_phy_ctrl(E1000State *s, int index, uint16_t val)
189 {
190     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
191     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
192                                    MII_CR_RESET |
193                                    MII_CR_RESTART_AUTO_NEG);
194 
195     /*
196      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
197      * migrate during auto negotiation, after migration the link will be
198      * down.
199      */
200     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
201         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
202     }
203 }
204 
205 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
206     [PHY_CTRL] = set_phy_ctrl,
207 };
208 
209 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
210 
211 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
212 static const char phy_regcap[0x20] = {
213     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
214     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
215     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
216     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
217     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
218     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
219     [PHY_AUTONEG_EXP] = PHY_R,
220 };
221 
222 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
223 static const uint16_t phy_reg_init[] = {
224     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
225                    MII_CR_FULL_DUPLEX |
226                    MII_CR_AUTO_NEG_EN,
227 
228     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
229                    MII_SR_LINK_STATUS |   /* link initially up */
230                    MII_SR_AUTONEG_CAPS |
231                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
232                    MII_SR_PREAMBLE_SUPPRESS |
233                    MII_SR_EXTENDED_STATUS |
234                    MII_SR_10T_HD_CAPS |
235                    MII_SR_10T_FD_CAPS |
236                    MII_SR_100X_HD_CAPS |
237                    MII_SR_100X_FD_CAPS,
238 
239     [PHY_ID1] = 0x141,
240     /* [PHY_ID2] configured per DevId, from e1000_reset() */
241     [PHY_AUTONEG_ADV] = 0xde1,
242     [PHY_LP_ABILITY] = 0x1e0,
243     [PHY_1000T_CTRL] = 0x0e00,
244     [PHY_1000T_STATUS] = 0x3c00,
245     [M88E1000_PHY_SPEC_CTRL] = 0x360,
246     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
247     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
248 };
249 
250 static const uint32_t mac_reg_init[] = {
251     [PBA]     = 0x00100030,
252     [LEDCTL]  = 0x602,
253     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
254                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
255     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
256                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
257                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
258                 E1000_STATUS_LU,
259     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
260                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
261                 E1000_MANC_RMCP_EN,
262 };
263 
264 /* Helper function, *curr == 0 means the value is not set */
265 static inline void
266 mit_update_delay(uint32_t *curr, uint32_t value)
267 {
268     if (value && (*curr == 0 || value < *curr)) {
269         *curr = value;
270     }
271 }
272 
273 static void
274 set_interrupt_cause(E1000State *s, int index, uint32_t val)
275 {
276     PCIDevice *d = PCI_DEVICE(s);
277     uint32_t pending_ints;
278     uint32_t mit_delay;
279 
280     s->mac_reg[ICR] = val;
281 
282     /*
283      * Make sure ICR and ICS registers have the same value.
284      * The spec says that the ICS register is write-only.  However in practice,
285      * on real hardware ICS is readable, and for reads it has the same value as
286      * ICR (except that ICS does not have the clear on read behaviour of ICR).
287      *
288      * The VxWorks PRO/1000 driver uses this behaviour.
289      */
290     s->mac_reg[ICS] = val;
291 
292     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
293     if (!s->mit_irq_level && pending_ints) {
294         /*
295          * Here we detect a potential raising edge. We postpone raising the
296          * interrupt line if we are inside the mitigation delay window
297          * (s->mit_timer_on == 1).
298          * We provide a partial implementation of interrupt mitigation,
299          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
300          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
301          * RADV; relative timers based on TIDV and RDTR are not implemented.
302          */
303         if (s->mit_timer_on) {
304             return;
305         }
306         if (chkflag(MIT)) {
307             /* Compute the next mitigation delay according to pending
308              * interrupts and the current values of RADV (provided
309              * RDTR!=0), TADV and ITR.
310              * Then rearm the timer.
311              */
312             mit_delay = 0;
313             if (s->mit_ide &&
314                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
315                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
316             }
317             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
318                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
319             }
320             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
321 
322             /*
323              * According to e1000 SPEC, the Ethernet controller guarantees
324              * a maximum observable interrupt rate of 7813 interrupts/sec.
325              * Thus if mit_delay < 500 then the delay should be set to the
326              * minimum delay possible which is 500.
327              */
328             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
329 
330             s->mit_timer_on = 1;
331             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
332                       mit_delay * 256);
333             s->mit_ide = 0;
334         }
335     }
336 
337     s->mit_irq_level = (pending_ints != 0);
338     pci_set_irq(d, s->mit_irq_level);
339 }
340 
341 static void
342 e1000_mit_timer(void *opaque)
343 {
344     E1000State *s = opaque;
345 
346     s->mit_timer_on = 0;
347     /* Call set_interrupt_cause to update the irq level (if necessary). */
348     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
349 }
350 
351 static void
352 set_ics(E1000State *s, int index, uint32_t val)
353 {
354     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
355         s->mac_reg[IMS]);
356     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
357 }
358 
359 static void
360 e1000_autoneg_timer(void *opaque)
361 {
362     E1000State *s = opaque;
363     if (!qemu_get_queue(s->nic)->link_down) {
364         e1000_autoneg_done(s);
365         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
366     }
367 }
368 
369 static bool e1000_vet_init_need(void *opaque)
370 {
371     E1000State *s = opaque;
372 
373     return chkflag(VET);
374 }
375 
376 static void e1000_reset(void *opaque)
377 {
378     E1000State *d = opaque;
379     E1000BaseClass *edc = E1000_GET_CLASS(d);
380     uint8_t *macaddr = d->conf.macaddr.a;
381 
382     timer_del(d->autoneg_timer);
383     timer_del(d->mit_timer);
384     timer_del(d->flush_queue_timer);
385     d->mit_timer_on = 0;
386     d->mit_irq_level = 0;
387     d->mit_ide = 0;
388     memset(d->phy_reg, 0, sizeof d->phy_reg);
389     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
390     d->phy_reg[PHY_ID2] = edc->phy_id2;
391     memset(d->mac_reg, 0, sizeof d->mac_reg);
392     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
393     d->rxbuf_min_shift = 1;
394     memset(&d->tx, 0, sizeof d->tx);
395 
396     if (qemu_get_queue(d->nic)->link_down) {
397         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
398     }
399 
400     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
401 
402     if (e1000_vet_init_need(d)) {
403         d->mac_reg[VET] = ETH_P_VLAN;
404     }
405 }
406 
407 static void
408 set_ctrl(E1000State *s, int index, uint32_t val)
409 {
410     /* RST is self clearing */
411     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
412 }
413 
414 static void
415 e1000_flush_queue_timer(void *opaque)
416 {
417     E1000State *s = opaque;
418 
419     qemu_flush_queued_packets(qemu_get_queue(s->nic));
420 }
421 
422 static void
423 set_rx_control(E1000State *s, int index, uint32_t val)
424 {
425     s->mac_reg[RCTL] = val;
426     s->rxbuf_size = e1000x_rxbufsize(val);
427     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
428     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
429            s->mac_reg[RCTL]);
430     timer_mod(s->flush_queue_timer,
431               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
432 }
433 
434 static void
435 set_mdic(E1000State *s, int index, uint32_t val)
436 {
437     uint32_t data = val & E1000_MDIC_DATA_MASK;
438     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
439 
440     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
441         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
442     else if (val & E1000_MDIC_OP_READ) {
443         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
444         if (!(phy_regcap[addr] & PHY_R)) {
445             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
446             val |= E1000_MDIC_ERROR;
447         } else
448             val = (val ^ data) | s->phy_reg[addr];
449     } else if (val & E1000_MDIC_OP_WRITE) {
450         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
451         if (!(phy_regcap[addr] & PHY_W)) {
452             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
453             val |= E1000_MDIC_ERROR;
454         } else {
455             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
456                 phyreg_writeops[addr](s, index, data);
457             } else {
458                 s->phy_reg[addr] = data;
459             }
460         }
461     }
462     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
463 
464     if (val & E1000_MDIC_INT_EN) {
465         set_ics(s, 0, E1000_ICR_MDAC);
466     }
467 }
468 
469 static uint32_t
470 get_eecd(E1000State *s, int index)
471 {
472     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
473 
474     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
475            s->eecd_state.bitnum_out, s->eecd_state.reading);
476     if (!s->eecd_state.reading ||
477         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
478           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
479         ret |= E1000_EECD_DO;
480     return ret;
481 }
482 
483 static void
484 set_eecd(E1000State *s, int index, uint32_t val)
485 {
486     uint32_t oldval = s->eecd_state.old_eecd;
487 
488     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
489             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
490     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
491         return;
492     }
493     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
494         s->eecd_state.val_in = 0;
495         s->eecd_state.bitnum_in = 0;
496         s->eecd_state.bitnum_out = 0;
497         s->eecd_state.reading = 0;
498     }
499     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
500         return;
501     }
502     if (!(E1000_EECD_SK & val)) {               /* falling edge */
503         s->eecd_state.bitnum_out++;
504         return;
505     }
506     s->eecd_state.val_in <<= 1;
507     if (val & E1000_EECD_DI)
508         s->eecd_state.val_in |= 1;
509     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
510         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
511         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
512             EEPROM_READ_OPCODE_MICROWIRE);
513     }
514     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
515            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
516            s->eecd_state.reading);
517 }
518 
519 static uint32_t
520 flash_eerd_read(E1000State *s, int x)
521 {
522     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
523 
524     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
525         return (s->mac_reg[EERD]);
526 
527     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
528         return (E1000_EEPROM_RW_REG_DONE | r);
529 
530     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
531            E1000_EEPROM_RW_REG_DONE | r);
532 }
533 
534 static void
535 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
536 {
537     uint32_t sum;
538 
539     if (cse && cse < n)
540         n = cse + 1;
541     if (sloc < n-1) {
542         sum = net_checksum_add(n-css, data+css);
543         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
544     }
545 }
546 
547 static inline void
548 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
549 {
550     if (!memcmp(arr, bcast, sizeof bcast)) {
551         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
552     } else if (arr[0] & 1) {
553         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
554     }
555 }
556 
557 static void
558 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
559 {
560     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
561                                     PTC1023, PTC1522 };
562 
563     NetClientState *nc = qemu_get_queue(s->nic);
564     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
565         qemu_receive_packet(nc, buf, size);
566     } else {
567         qemu_send_packet(nc, buf, size);
568     }
569     inc_tx_bcast_or_mcast_count(s, buf);
570     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
571 }
572 
573 static void
574 xmit_seg(E1000State *s)
575 {
576     uint16_t len;
577     unsigned int frames = s->tx.tso_frames, css, sofar;
578     struct e1000_tx *tp = &s->tx;
579     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
580 
581     if (tp->cptse) {
582         css = props->ipcss;
583         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
584                frames, tp->size, css);
585         if (props->ip) {    /* IPv4 */
586             stw_be_p(tp->data+css+2, tp->size - css);
587             stw_be_p(tp->data+css+4,
588                      lduw_be_p(tp->data + css + 4) + frames);
589         } else {         /* IPv6 */
590             stw_be_p(tp->data+css+4, tp->size - css);
591         }
592         css = props->tucss;
593         len = tp->size - css;
594         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
595         if (props->tcp) {
596             sofar = frames * props->mss;
597             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
598             if (props->paylen - sofar > props->mss) {
599                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
600             } else if (frames) {
601                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
602             }
603         } else {    /* UDP */
604             stw_be_p(tp->data+css+4, len);
605         }
606         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
607             unsigned int phsum;
608             // add pseudo-header length before checksum calculation
609             void *sp = tp->data + props->tucso;
610 
611             phsum = lduw_be_p(sp) + len;
612             phsum = (phsum >> 16) + (phsum & 0xffff);
613             stw_be_p(sp, phsum);
614         }
615         tp->tso_frames++;
616     }
617 
618     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
619         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
620     }
621     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
622         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
623     }
624     if (tp->vlan_needed) {
625         memmove(tp->vlan, tp->data, 4);
626         memmove(tp->data, tp->data + 4, 8);
627         memcpy(tp->data + 8, tp->vlan_header, 4);
628         e1000_send_packet(s, tp->vlan, tp->size + 4);
629     } else {
630         e1000_send_packet(s, tp->data, tp->size);
631     }
632 
633     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
634     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
635     s->mac_reg[GPTC] = s->mac_reg[TPT];
636     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
637     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
638 }
639 
640 static void
641 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
642 {
643     PCIDevice *d = PCI_DEVICE(s);
644     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
645     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
646     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
647     unsigned int msh = 0xfffff;
648     uint64_t addr;
649     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
650     struct e1000_tx *tp = &s->tx;
651 
652     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
653     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
654         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
655             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
656             s->use_tso_for_migration = 1;
657             tp->tso_frames = 0;
658         } else {
659             e1000x_read_tx_ctx_descr(xp, &tp->props);
660             s->use_tso_for_migration = 0;
661         }
662         return;
663     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
664         // data descriptor
665         if (tp->size == 0) {
666             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
667         }
668         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
669     } else {
670         // legacy descriptor
671         tp->cptse = 0;
672     }
673 
674     if (e1000x_vlan_enabled(s->mac_reg) &&
675         e1000x_is_vlan_txd(txd_lower) &&
676         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
677         tp->vlan_needed = 1;
678         stw_be_p(tp->vlan_header,
679                       le16_to_cpu(s->mac_reg[VET]));
680         stw_be_p(tp->vlan_header + 2,
681                       le16_to_cpu(dp->upper.fields.special));
682     }
683 
684     addr = le64_to_cpu(dp->buffer_addr);
685     if (tp->cptse) {
686         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
687         do {
688             bytes = split_size;
689             if (tp->size >= msh) {
690                 goto eop;
691             }
692             if (tp->size + bytes > msh)
693                 bytes = msh - tp->size;
694 
695             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
696             pci_dma_read(d, addr, tp->data + tp->size, bytes);
697             sz = tp->size + bytes;
698             if (sz >= tp->tso_props.hdr_len
699                 && tp->size < tp->tso_props.hdr_len) {
700                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
701             }
702             tp->size = sz;
703             addr += bytes;
704             if (sz == msh) {
705                 xmit_seg(s);
706                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
707                 tp->size = tp->tso_props.hdr_len;
708             }
709             split_size -= bytes;
710         } while (bytes && split_size);
711     } else {
712         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
713         pci_dma_read(d, addr, tp->data + tp->size, split_size);
714         tp->size += split_size;
715     }
716 
717 eop:
718     if (!(txd_lower & E1000_TXD_CMD_EOP))
719         return;
720     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
721         xmit_seg(s);
722     }
723     tp->tso_frames = 0;
724     tp->sum_needed = 0;
725     tp->vlan_needed = 0;
726     tp->size = 0;
727     tp->cptse = 0;
728 }
729 
730 static uint32_t
731 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
732 {
733     PCIDevice *d = PCI_DEVICE(s);
734     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
735 
736     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
737         return 0;
738     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
739                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
740     dp->upper.data = cpu_to_le32(txd_upper);
741     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
742                   &dp->upper, sizeof(dp->upper));
743     return E1000_ICR_TXDW;
744 }
745 
746 static uint64_t tx_desc_base(E1000State *s)
747 {
748     uint64_t bah = s->mac_reg[TDBAH];
749     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
750 
751     return (bah << 32) + bal;
752 }
753 
754 static void
755 start_xmit(E1000State *s)
756 {
757     PCIDevice *d = PCI_DEVICE(s);
758     dma_addr_t base;
759     struct e1000_tx_desc desc;
760     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
761 
762     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
763         DBGOUT(TX, "tx disabled\n");
764         return;
765     }
766 
767     if (s->tx.busy) {
768         return;
769     }
770     s->tx.busy = true;
771 
772     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
773         base = tx_desc_base(s) +
774                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
775         pci_dma_read(d, base, &desc, sizeof(desc));
776 
777         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
778                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
779                desc.upper.data);
780 
781         process_tx_desc(s, &desc);
782         cause |= txdesc_writeback(s, base, &desc);
783 
784         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
785             s->mac_reg[TDH] = 0;
786         /*
787          * the following could happen only if guest sw assigns
788          * bogus values to TDT/TDLEN.
789          * there's nothing too intelligent we could do about this.
790          */
791         if (s->mac_reg[TDH] == tdh_start ||
792             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
793             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
794                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
795             break;
796         }
797     }
798     s->tx.busy = false;
799     set_ics(s, 0, cause);
800 }
801 
802 static int
803 receive_filter(E1000State *s, const uint8_t *buf, int size)
804 {
805     uint32_t rctl = s->mac_reg[RCTL];
806     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
807 
808     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
809         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
810         uint16_t vid = lduw_be_p(buf + 14);
811         uint32_t vfta = ldl_le_p((uint32_t *)(s->mac_reg + VFTA) +
812                                  ((vid >> 5) & 0x7f));
813         if ((vfta & (1 << (vid & 0x1f))) == 0) {
814             return 0;
815         }
816     }
817 
818     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
819         return 1;
820     }
821 
822     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
823         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
824         return 1;
825     }
826 
827     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
828         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
829         return 1;
830     }
831 
832     return e1000x_rx_group_filter(s->mac_reg, buf);
833 }
834 
835 static void
836 e1000_set_link_status(NetClientState *nc)
837 {
838     E1000State *s = qemu_get_nic_opaque(nc);
839     uint32_t old_status = s->mac_reg[STATUS];
840 
841     if (nc->link_down) {
842         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
843     } else {
844         if (have_autoneg(s) &&
845             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
846             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
847         } else {
848             e1000_link_up(s);
849         }
850     }
851 
852     if (s->mac_reg[STATUS] != old_status)
853         set_ics(s, 0, E1000_ICR_LSC);
854 }
855 
856 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
857 {
858     int bufs;
859     /* Fast-path short packets */
860     if (total_size <= s->rxbuf_size) {
861         return s->mac_reg[RDH] != s->mac_reg[RDT];
862     }
863     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
864         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
865     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
866         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
867             s->mac_reg[RDT] - s->mac_reg[RDH];
868     } else {
869         return false;
870     }
871     return total_size <= bufs * s->rxbuf_size;
872 }
873 
874 static bool
875 e1000_can_receive(NetClientState *nc)
876 {
877     E1000State *s = qemu_get_nic_opaque(nc);
878 
879     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
880         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
881 }
882 
883 static uint64_t rx_desc_base(E1000State *s)
884 {
885     uint64_t bah = s->mac_reg[RDBAH];
886     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
887 
888     return (bah << 32) + bal;
889 }
890 
891 static void
892 e1000_receiver_overrun(E1000State *s, size_t size)
893 {
894     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
895     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
896     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
897     set_ics(s, 0, E1000_ICS_RXO);
898 }
899 
900 static ssize_t
901 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
902 {
903     E1000State *s = qemu_get_nic_opaque(nc);
904     PCIDevice *d = PCI_DEVICE(s);
905     struct e1000_rx_desc desc;
906     dma_addr_t base;
907     unsigned int n, rdt;
908     uint32_t rdh_start;
909     uint16_t vlan_special = 0;
910     uint8_t vlan_status = 0;
911     uint8_t min_buf[MIN_BUF_SIZE];
912     struct iovec min_iov;
913     uint8_t *filter_buf = iov->iov_base;
914     size_t size = iov_size(iov, iovcnt);
915     size_t iov_ofs = 0;
916     size_t desc_offset;
917     size_t desc_size;
918     size_t total_size;
919 
920     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
921         return -1;
922     }
923 
924     if (timer_pending(s->flush_queue_timer)) {
925         return 0;
926     }
927 
928     /* Pad to minimum Ethernet frame length */
929     if (size < sizeof(min_buf)) {
930         iov_to_buf(iov, iovcnt, 0, min_buf, size);
931         memset(&min_buf[size], 0, sizeof(min_buf) - size);
932         min_iov.iov_base = filter_buf = min_buf;
933         min_iov.iov_len = size = sizeof(min_buf);
934         iovcnt = 1;
935         iov = &min_iov;
936     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
937         /* This is very unlikely, but may happen. */
938         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
939         filter_buf = min_buf;
940     }
941 
942     /* Discard oversized packets if !LPE and !SBP. */
943     if (e1000x_is_oversized(s->mac_reg, size)) {
944         return size;
945     }
946 
947     if (!receive_filter(s, filter_buf, size)) {
948         return size;
949     }
950 
951     if (e1000x_vlan_enabled(s->mac_reg) &&
952         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
953         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
954         iov_ofs = 4;
955         if (filter_buf == iov->iov_base) {
956             memmove(filter_buf + 4, filter_buf, 12);
957         } else {
958             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
959             while (iov->iov_len <= iov_ofs) {
960                 iov_ofs -= iov->iov_len;
961                 iov++;
962             }
963         }
964         vlan_status = E1000_RXD_STAT_VP;
965         size -= 4;
966     }
967 
968     rdh_start = s->mac_reg[RDH];
969     desc_offset = 0;
970     total_size = size + e1000x_fcs_len(s->mac_reg);
971     if (!e1000_has_rxbufs(s, total_size)) {
972         e1000_receiver_overrun(s, total_size);
973         return -1;
974     }
975     do {
976         desc_size = total_size - desc_offset;
977         if (desc_size > s->rxbuf_size) {
978             desc_size = s->rxbuf_size;
979         }
980         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
981         pci_dma_read(d, base, &desc, sizeof(desc));
982         desc.special = vlan_special;
983         desc.status &= ~E1000_RXD_STAT_DD;
984         if (desc.buffer_addr) {
985             if (desc_offset < size) {
986                 size_t iov_copy;
987                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
988                 size_t copy_size = size - desc_offset;
989                 if (copy_size > s->rxbuf_size) {
990                     copy_size = s->rxbuf_size;
991                 }
992                 do {
993                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
994                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
995                     copy_size -= iov_copy;
996                     ba += iov_copy;
997                     iov_ofs += iov_copy;
998                     if (iov_ofs == iov->iov_len) {
999                         iov++;
1000                         iov_ofs = 0;
1001                     }
1002                 } while (copy_size);
1003             }
1004             desc_offset += desc_size;
1005             desc.length = cpu_to_le16(desc_size);
1006             if (desc_offset >= total_size) {
1007                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1008             } else {
1009                 /* Guest zeroing out status is not a hardware requirement.
1010                    Clear EOP in case guest didn't do it. */
1011                 desc.status &= ~E1000_RXD_STAT_EOP;
1012             }
1013         } else { // as per intel docs; skip descriptors with null buf addr
1014             DBGOUT(RX, "Null RX descriptor!!\n");
1015         }
1016         pci_dma_write(d, base, &desc, sizeof(desc));
1017         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1018         pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
1019                       &desc.status, sizeof(desc.status));
1020 
1021         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1022             s->mac_reg[RDH] = 0;
1023         /* see comment in start_xmit; same here */
1024         if (s->mac_reg[RDH] == rdh_start ||
1025             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1026             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1027                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1028             e1000_receiver_overrun(s, total_size);
1029             return -1;
1030         }
1031     } while (desc_offset < total_size);
1032 
1033     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1034 
1035     n = E1000_ICS_RXT0;
1036     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1037         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1038     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1039         s->rxbuf_min_shift)
1040         n |= E1000_ICS_RXDMT0;
1041 
1042     set_ics(s, 0, n);
1043 
1044     return size;
1045 }
1046 
1047 static ssize_t
1048 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1049 {
1050     const struct iovec iov = {
1051         .iov_base = (uint8_t *)buf,
1052         .iov_len = size
1053     };
1054 
1055     return e1000_receive_iov(nc, &iov, 1);
1056 }
1057 
1058 static uint32_t
1059 mac_readreg(E1000State *s, int index)
1060 {
1061     return s->mac_reg[index];
1062 }
1063 
1064 static uint32_t
1065 mac_low4_read(E1000State *s, int index)
1066 {
1067     return s->mac_reg[index] & 0xf;
1068 }
1069 
1070 static uint32_t
1071 mac_low11_read(E1000State *s, int index)
1072 {
1073     return s->mac_reg[index] & 0x7ff;
1074 }
1075 
1076 static uint32_t
1077 mac_low13_read(E1000State *s, int index)
1078 {
1079     return s->mac_reg[index] & 0x1fff;
1080 }
1081 
1082 static uint32_t
1083 mac_low16_read(E1000State *s, int index)
1084 {
1085     return s->mac_reg[index] & 0xffff;
1086 }
1087 
1088 static uint32_t
1089 mac_icr_read(E1000State *s, int index)
1090 {
1091     uint32_t ret = s->mac_reg[ICR];
1092 
1093     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1094     set_interrupt_cause(s, 0, 0);
1095     return ret;
1096 }
1097 
1098 static uint32_t
1099 mac_read_clr4(E1000State *s, int index)
1100 {
1101     uint32_t ret = s->mac_reg[index];
1102 
1103     s->mac_reg[index] = 0;
1104     return ret;
1105 }
1106 
1107 static uint32_t
1108 mac_read_clr8(E1000State *s, int index)
1109 {
1110     uint32_t ret = s->mac_reg[index];
1111 
1112     s->mac_reg[index] = 0;
1113     s->mac_reg[index-1] = 0;
1114     return ret;
1115 }
1116 
1117 static void
1118 mac_writereg(E1000State *s, int index, uint32_t val)
1119 {
1120     uint32_t macaddr[2];
1121 
1122     s->mac_reg[index] = val;
1123 
1124     if (index == RA + 1) {
1125         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1126         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1127         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1128     }
1129 }
1130 
1131 static void
1132 set_rdt(E1000State *s, int index, uint32_t val)
1133 {
1134     s->mac_reg[index] = val & 0xffff;
1135     if (e1000_has_rxbufs(s, 1)) {
1136         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1137     }
1138 }
1139 
1140 static void
1141 set_16bit(E1000State *s, int index, uint32_t val)
1142 {
1143     s->mac_reg[index] = val & 0xffff;
1144 }
1145 
1146 static void
1147 set_dlen(E1000State *s, int index, uint32_t val)
1148 {
1149     s->mac_reg[index] = val & 0xfff80;
1150 }
1151 
1152 static void
1153 set_tctl(E1000State *s, int index, uint32_t val)
1154 {
1155     s->mac_reg[index] = val;
1156     s->mac_reg[TDT] &= 0xffff;
1157     start_xmit(s);
1158 }
1159 
1160 static void
1161 set_icr(E1000State *s, int index, uint32_t val)
1162 {
1163     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1164     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1165 }
1166 
1167 static void
1168 set_imc(E1000State *s, int index, uint32_t val)
1169 {
1170     s->mac_reg[IMS] &= ~val;
1171     set_ics(s, 0, 0);
1172 }
1173 
1174 static void
1175 set_ims(E1000State *s, int index, uint32_t val)
1176 {
1177     s->mac_reg[IMS] |= val;
1178     set_ics(s, 0, 0);
1179 }
1180 
1181 #define getreg(x)    [x] = mac_readreg
1182 typedef uint32_t (*readops)(E1000State *, int);
1183 static const readops macreg_readops[] = {
1184     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1185     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1186     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1187     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1188     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1189     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1190     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1191     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1192     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1193     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1194     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1195     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1196     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1197     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1198     getreg(GOTCL),
1199 
1200     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1201     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1202     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1203     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1204     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1205     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1206     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1207     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1208     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1209     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1210     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1211     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1212     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1213     [MPTC]    = mac_read_clr4,
1214     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1215     [EERD]    = flash_eerd_read,
1216     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1217     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1218     [RDFPC]   = mac_low13_read,
1219     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1220     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1221     [TDFPC]   = mac_low13_read,
1222     [AIT]     = mac_low16_read,
1223 
1224     [CRCERRS ... MPC]     = &mac_readreg,
1225     [IP6AT ... IP6AT + 3] = &mac_readreg,    [IP4AT ... IP4AT + 6] = &mac_readreg,
1226     [FFLT ... FFLT + 6]   = &mac_low11_read,
1227     [RA ... RA + 31]      = &mac_readreg,
1228     [WUPM ... WUPM + 31]  = &mac_readreg,
1229     [MTA ... MTA + 127]   = &mac_readreg,
1230     [VFTA ... VFTA + 127] = &mac_readreg,
1231     [FFMT ... FFMT + 254] = &mac_low4_read,
1232     [FFVT ... FFVT + 254] = &mac_readreg,
1233     [PBM ... PBM + 16383] = &mac_readreg,
1234 };
1235 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1236 
1237 #define putreg(x)    [x] = mac_writereg
1238 typedef void (*writeops)(E1000State *, int, uint32_t);
1239 static const writeops macreg_writeops[] = {
1240     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1241     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1242     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1243     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1244     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1245     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1246     putreg(WUS),      putreg(AIT),
1247 
1248     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1249     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1250     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1251     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1252     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1253     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1254     [ITR]    = set_16bit,
1255 
1256     [IP6AT ... IP6AT + 3] = &mac_writereg, [IP4AT ... IP4AT + 6] = &mac_writereg,
1257     [FFLT ... FFLT + 6]   = &mac_writereg,
1258     [RA ... RA + 31]      = &mac_writereg,
1259     [WUPM ... WUPM + 31]  = &mac_writereg,
1260     [MTA ... MTA + 127]   = &mac_writereg,
1261     [VFTA ... VFTA + 127] = &mac_writereg,
1262     [FFMT ... FFMT + 254] = &mac_writereg, [FFVT ... FFVT + 254] = &mac_writereg,
1263     [PBM ... PBM + 16383] = &mac_writereg,
1264 };
1265 
1266 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1267 
1268 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1269 
1270 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1271 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1272  * f - flag bits (up to 6 possible flags)
1273  * n - flag needed
1274  * p - partially implenented */
1275 static const uint8_t mac_reg_access[0x8000] = {
1276     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1277     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1278 
1279     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1280     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1281     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1282     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1283     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1284     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1285     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1286     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1287     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1288     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1289     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1290     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1291     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1292     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1293     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1294     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1295     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1296     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1297     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1298     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1299     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1300     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1301     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1302     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1303     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1304     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1305     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1306     [BPTC]    = markflag(MAC),
1307 
1308     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1309     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1310     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1311     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1312     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1313     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1314     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1315     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1316     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1317     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1318     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1319 };
1320 
1321 static void
1322 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1323                  unsigned size)
1324 {
1325     E1000State *s = opaque;
1326     unsigned int index = (addr & 0x1ffff) >> 2;
1327 
1328     if (index < NWRITEOPS && macreg_writeops[index]) {
1329         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1330             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1331             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1332                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1333                        "It is not fully implemented.\n", index<<2);
1334             }
1335             macreg_writeops[index](s, index, val);
1336         } else {    /* "flag needed" bit is set, but the flag is not active */
1337             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1338                    index<<2);
1339         }
1340     } else if (index < NREADOPS && macreg_readops[index]) {
1341         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1342                index<<2, val);
1343     } else {
1344         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1345                index<<2, val);
1346     }
1347 }
1348 
1349 static uint64_t
1350 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1351 {
1352     E1000State *s = opaque;
1353     unsigned int index = (addr & 0x1ffff) >> 2;
1354 
1355     if (index < NREADOPS && macreg_readops[index]) {
1356         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1357             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1358             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1359                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1360                        "It is not fully implemented.\n", index<<2);
1361             }
1362             return macreg_readops[index](s, index);
1363         } else {    /* "flag needed" bit is set, but the flag is not active */
1364             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1365                    index<<2);
1366         }
1367     } else {
1368         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1369     }
1370     return 0;
1371 }
1372 
1373 static const MemoryRegionOps e1000_mmio_ops = {
1374     .read = e1000_mmio_read,
1375     .write = e1000_mmio_write,
1376     .endianness = DEVICE_LITTLE_ENDIAN,
1377     .impl = {
1378         .min_access_size = 4,
1379         .max_access_size = 4,
1380     },
1381 };
1382 
1383 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1384                               unsigned size)
1385 {
1386     E1000State *s = opaque;
1387 
1388     (void)s;
1389     return 0;
1390 }
1391 
1392 static void e1000_io_write(void *opaque, hwaddr addr,
1393                            uint64_t val, unsigned size)
1394 {
1395     E1000State *s = opaque;
1396 
1397     (void)s;
1398 }
1399 
1400 static const MemoryRegionOps e1000_io_ops = {
1401     .read = e1000_io_read,
1402     .write = e1000_io_write,
1403     .endianness = DEVICE_LITTLE_ENDIAN,
1404 };
1405 
1406 static bool is_version_1(void *opaque, int version_id)
1407 {
1408     return version_id == 1;
1409 }
1410 
1411 static int e1000_pre_save(void *opaque)
1412 {
1413     E1000State *s = opaque;
1414     NetClientState *nc = qemu_get_queue(s->nic);
1415 
1416     /*
1417      * If link is down and auto-negotiation is supported and ongoing,
1418      * complete auto-negotiation immediately. This allows us to look
1419      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1420      */
1421     if (nc->link_down && have_autoneg(s)) {
1422         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1423     }
1424 
1425     /* Decide which set of props to migrate in the main structure */
1426     if (chkflag(TSO) || !s->use_tso_for_migration) {
1427         /* Either we're migrating with the extra subsection, in which
1428          * case the mig_props is always 'props' OR
1429          * we've not got the subsection, but 'props' was the last
1430          * updated.
1431          */
1432         s->mig_props = s->tx.props;
1433     } else {
1434         /* We're not using the subsection, and 'tso_props' was
1435          * the last updated.
1436          */
1437         s->mig_props = s->tx.tso_props;
1438     }
1439     return 0;
1440 }
1441 
1442 static int e1000_post_load(void *opaque, int version_id)
1443 {
1444     E1000State *s = opaque;
1445     NetClientState *nc = qemu_get_queue(s->nic);
1446 
1447     if (!chkflag(MIT)) {
1448         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1449             s->mac_reg[TADV] = 0;
1450         s->mit_irq_level = false;
1451     }
1452     s->mit_ide = 0;
1453     s->mit_timer_on = true;
1454     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1455 
1456     /* nc.link_down can't be migrated, so infer link_down according
1457      * to link status bit in mac_reg[STATUS].
1458      * Alternatively, restart link negotiation if it was in progress. */
1459     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1460 
1461     if (have_autoneg(s) &&
1462         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1463         nc->link_down = false;
1464         timer_mod(s->autoneg_timer,
1465                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1466     }
1467 
1468     s->tx.props = s->mig_props;
1469     if (!s->received_tx_tso) {
1470         /* We received only one set of offload data (tx.props)
1471          * and haven't got tx.tso_props.  The best we can do
1472          * is dupe the data.
1473          */
1474         s->tx.tso_props = s->mig_props;
1475     }
1476     return 0;
1477 }
1478 
1479 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1480 {
1481     E1000State *s = opaque;
1482     s->received_tx_tso = true;
1483     return 0;
1484 }
1485 
1486 static bool e1000_mit_state_needed(void *opaque)
1487 {
1488     E1000State *s = opaque;
1489 
1490     return chkflag(MIT);
1491 }
1492 
1493 static bool e1000_full_mac_needed(void *opaque)
1494 {
1495     E1000State *s = opaque;
1496 
1497     return chkflag(MAC);
1498 }
1499 
1500 static bool e1000_tso_state_needed(void *opaque)
1501 {
1502     E1000State *s = opaque;
1503 
1504     return chkflag(TSO);
1505 }
1506 
1507 static const VMStateDescription vmstate_e1000_mit_state = {
1508     .name = "e1000/mit_state",
1509     .version_id = 1,
1510     .minimum_version_id = 1,
1511     .needed = e1000_mit_state_needed,
1512     .fields = (VMStateField[]) {
1513         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1514         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1515         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1516         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1517         VMSTATE_BOOL(mit_irq_level, E1000State),
1518         VMSTATE_END_OF_LIST()
1519     }
1520 };
1521 
1522 static const VMStateDescription vmstate_e1000_full_mac_state = {
1523     .name = "e1000/full_mac_state",
1524     .version_id = 1,
1525     .minimum_version_id = 1,
1526     .needed = e1000_full_mac_needed,
1527     .fields = (VMStateField[]) {
1528         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1529         VMSTATE_END_OF_LIST()
1530     }
1531 };
1532 
1533 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1534     .name = "e1000/tx_tso_state",
1535     .version_id = 1,
1536     .minimum_version_id = 1,
1537     .needed = e1000_tso_state_needed,
1538     .post_load = e1000_tx_tso_post_load,
1539     .fields = (VMStateField[]) {
1540         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1541         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1542         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1543         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1544         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1545         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1546         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1547         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1548         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1549         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1550         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1551         VMSTATE_END_OF_LIST()
1552     }
1553 };
1554 
1555 static const VMStateDescription vmstate_e1000 = {
1556     .name = "e1000",
1557     .version_id = 2,
1558     .minimum_version_id = 1,
1559     .pre_save = e1000_pre_save,
1560     .post_load = e1000_post_load,
1561     .fields = (VMStateField[]) {
1562         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1563         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1564         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1565         VMSTATE_UINT32(rxbuf_size, E1000State),
1566         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1567         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1568         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1569         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1570         VMSTATE_UINT16(eecd_state.reading, E1000State),
1571         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1572         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1573         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1574         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1575         VMSTATE_UINT8(mig_props.tucss, E1000State),
1576         VMSTATE_UINT8(mig_props.tucso, E1000State),
1577         VMSTATE_UINT16(mig_props.tucse, E1000State),
1578         VMSTATE_UINT32(mig_props.paylen, E1000State),
1579         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1580         VMSTATE_UINT16(mig_props.mss, E1000State),
1581         VMSTATE_UINT16(tx.size, E1000State),
1582         VMSTATE_UINT16(tx.tso_frames, E1000State),
1583         VMSTATE_UINT8(tx.sum_needed, E1000State),
1584         VMSTATE_INT8(mig_props.ip, E1000State),
1585         VMSTATE_INT8(mig_props.tcp, E1000State),
1586         VMSTATE_BUFFER(tx.header, E1000State),
1587         VMSTATE_BUFFER(tx.data, E1000State),
1588         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1589         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1590         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1591         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1592         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1593         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1594         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1595         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1596         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1597         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1598         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1599         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1600         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1601         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1602         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1603         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1604         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1605         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1606         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1607         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1608         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1609         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1610         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1611         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1612         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1613         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1614         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1615         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1616         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1617         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1618         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1619         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1620         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1621         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1622         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1623         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1624         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1625         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1626         VMSTATE_UINT32(mac_reg[VET], E1000State),
1627         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1628         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1629         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1630         VMSTATE_END_OF_LIST()
1631     },
1632     .subsections = (const VMStateDescription*[]) {
1633         &vmstate_e1000_mit_state,
1634         &vmstate_e1000_full_mac_state,
1635         &vmstate_e1000_tx_tso_state,
1636         NULL
1637     }
1638 };
1639 
1640 /*
1641  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1642  * Note: A valid DevId will be inserted during pci_e1000_realize().
1643  */
1644 static const uint16_t e1000_eeprom_template[64] = {
1645     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1646     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1647     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1648     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1649     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1650     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1651     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1652     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1653 };
1654 
1655 /* PCI interface */
1656 
1657 static void
1658 e1000_mmio_setup(E1000State *d)
1659 {
1660     int i;
1661     const uint32_t excluded_regs[] = {
1662         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1663         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1664     };
1665 
1666     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1667                           "e1000-mmio", PNPMMIO_SIZE);
1668     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1669     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1670         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1671                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1672     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1673 }
1674 
1675 static void
1676 pci_e1000_uninit(PCIDevice *dev)
1677 {
1678     E1000State *d = E1000(dev);
1679 
1680     timer_free(d->autoneg_timer);
1681     timer_free(d->mit_timer);
1682     timer_free(d->flush_queue_timer);
1683     qemu_del_nic(d->nic);
1684 }
1685 
1686 static NetClientInfo net_e1000_info = {
1687     .type = NET_CLIENT_DRIVER_NIC,
1688     .size = sizeof(NICState),
1689     .can_receive = e1000_can_receive,
1690     .receive = e1000_receive,
1691     .receive_iov = e1000_receive_iov,
1692     .link_status_changed = e1000_set_link_status,
1693 };
1694 
1695 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1696                                 uint32_t val, int len)
1697 {
1698     E1000State *s = E1000(pci_dev);
1699 
1700     pci_default_write_config(pci_dev, address, val, len);
1701 
1702     if (range_covers_byte(address, len, PCI_COMMAND) &&
1703         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1704         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1705     }
1706 }
1707 
1708 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1709 {
1710     DeviceState *dev = DEVICE(pci_dev);
1711     E1000State *d = E1000(pci_dev);
1712     uint8_t *pci_conf;
1713     uint8_t *macaddr;
1714 
1715     pci_dev->config_write = e1000_write_config;
1716 
1717     pci_conf = pci_dev->config;
1718 
1719     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1720     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1721 
1722     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1723 
1724     e1000_mmio_setup(d);
1725 
1726     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1727 
1728     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1729 
1730     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1731     macaddr = d->conf.macaddr.a;
1732 
1733     e1000x_core_prepare_eeprom(d->eeprom_data,
1734                                e1000_eeprom_template,
1735                                sizeof(e1000_eeprom_template),
1736                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1737                                macaddr);
1738 
1739     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1740                           object_get_typename(OBJECT(d)), dev->id, d);
1741 
1742     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1743 
1744     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1745     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1746     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1747                                         e1000_flush_queue_timer, d);
1748 }
1749 
1750 static void qdev_e1000_reset(DeviceState *dev)
1751 {
1752     E1000State *d = E1000(dev);
1753     e1000_reset(d);
1754 }
1755 
1756 static Property e1000_properties[] = {
1757     DEFINE_NIC_PROPERTIES(E1000State, conf),
1758     DEFINE_PROP_BIT("autonegotiation", E1000State,
1759                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1760     DEFINE_PROP_BIT("mitigation", E1000State,
1761                     compat_flags, E1000_FLAG_MIT_BIT, true),
1762     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1763                     compat_flags, E1000_FLAG_MAC_BIT, true),
1764     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1765                     compat_flags, E1000_FLAG_TSO_BIT, true),
1766     DEFINE_PROP_BIT("init-vet", E1000State,
1767                     compat_flags, E1000_FLAG_VET_BIT, true),
1768     DEFINE_PROP_END_OF_LIST(),
1769 };
1770 
1771 typedef struct E1000Info {
1772     const char *name;
1773     uint16_t   device_id;
1774     uint8_t    revision;
1775     uint16_t   phy_id2;
1776 } E1000Info;
1777 
1778 static void e1000_class_init(ObjectClass *klass, void *data)
1779 {
1780     DeviceClass *dc = DEVICE_CLASS(klass);
1781     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1782     E1000BaseClass *e = E1000_CLASS(klass);
1783     const E1000Info *info = data;
1784 
1785     k->realize = pci_e1000_realize;
1786     k->exit = pci_e1000_uninit;
1787     k->romfile = "efi-e1000.rom";
1788     k->vendor_id = PCI_VENDOR_ID_INTEL;
1789     k->device_id = info->device_id;
1790     k->revision = info->revision;
1791     e->phy_id2 = info->phy_id2;
1792     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1793     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1794     dc->desc = "Intel Gigabit Ethernet";
1795     dc->reset = qdev_e1000_reset;
1796     dc->vmsd = &vmstate_e1000;
1797     device_class_set_props(dc, e1000_properties);
1798 }
1799 
1800 static void e1000_instance_init(Object *obj)
1801 {
1802     E1000State *n = E1000(obj);
1803     device_add_bootindex_property(obj, &n->conf.bootindex,
1804                                   "bootindex", "/ethernet-phy@0",
1805                                   DEVICE(n));
1806 }
1807 
1808 static const TypeInfo e1000_base_info = {
1809     .name          = TYPE_E1000_BASE,
1810     .parent        = TYPE_PCI_DEVICE,
1811     .instance_size = sizeof(E1000State),
1812     .instance_init = e1000_instance_init,
1813     .class_size    = sizeof(E1000BaseClass),
1814     .abstract      = true,
1815     .interfaces = (InterfaceInfo[]) {
1816         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1817         { },
1818     },
1819 };
1820 
1821 static const E1000Info e1000_devices[] = {
1822     {
1823         .name      = "e1000",
1824         .device_id = E1000_DEV_ID_82540EM,
1825         .revision  = 0x03,
1826         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1827     },
1828     {
1829         .name      = "e1000-82544gc",
1830         .device_id = E1000_DEV_ID_82544GC_COPPER,
1831         .revision  = 0x03,
1832         .phy_id2   = E1000_PHY_ID2_82544x,
1833     },
1834     {
1835         .name      = "e1000-82545em",
1836         .device_id = E1000_DEV_ID_82545EM_COPPER,
1837         .revision  = 0x03,
1838         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1839     },
1840 };
1841 
1842 static void e1000_register_types(void)
1843 {
1844     int i;
1845 
1846     type_register_static(&e1000_base_info);
1847     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1848         const E1000Info *info = &e1000_devices[i];
1849         TypeInfo type_info = {};
1850 
1851         type_info.name = info->name;
1852         type_info.parent = TYPE_E1000_BASE;
1853         type_info.class_data = (void *)info;
1854         type_info.class_init = e1000_class_init;
1855 
1856         type_register(&type_info);
1857     }
1858 }
1859 
1860 type_init(e1000_register_types)
1861