xref: /qemu/hw/net/e1000.c (revision c50b152485d4e10dfa1e1d7ea668f29a5fb92e9c)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/net/mii.h"
30 #include "hw/pci/pci_device.h"
31 #include "hw/qdev-properties.h"
32 #include "migration/vmstate.h"
33 #include "net/eth.h"
34 #include "net/net.h"
35 #include "net/checksum.h"
36 #include "sysemu/sysemu.h"
37 #include "sysemu/dma.h"
38 #include "qemu/iov.h"
39 #include "qemu/module.h"
40 #include "qemu/range.h"
41 
42 #include "e1000x_common.h"
43 #include "trace.h"
44 #include "qom/object.h"
45 
46 /* #define E1000_DEBUG */
47 
48 #ifdef E1000_DEBUG
49 enum {
50     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
51     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
52     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
53     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
54 };
55 #define DBGBIT(x)    (1<<DEBUG_##x)
56 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
57 
58 #define DBGOUT(what, fmt, ...) do { \
59     if (debugflags & DBGBIT(what)) \
60         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
61     } while (0)
62 #else
63 #define DBGOUT(what, fmt, ...) do {} while (0)
64 #endif
65 
66 #define IOPORT_SIZE       0x40
67 #define PNPMMIO_SIZE      0x20000
68 
69 #define MAXIMUM_ETHERNET_HDR_LEN (ETH_HLEN + 4)
70 
71 /*
72  * HW models:
73  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
74  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
75  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
76  *  Others never tested
77  */
78 
79 struct E1000State_st {
80     /*< private >*/
81     PCIDevice parent_obj;
82     /*< public >*/
83 
84     NICState *nic;
85     NICConf conf;
86     MemoryRegion mmio;
87     MemoryRegion io;
88 
89     uint32_t mac_reg[0x8000];
90     uint16_t phy_reg[0x20];
91     uint16_t eeprom_data[64];
92 
93     uint32_t rxbuf_size;
94     uint32_t rxbuf_min_shift;
95     struct e1000_tx {
96         unsigned char header[256];
97         unsigned char vlan_header[4];
98         /* Fields vlan and data must not be reordered or separated. */
99         unsigned char vlan[4];
100         unsigned char data[0x10000];
101         uint16_t size;
102         unsigned char vlan_needed;
103         unsigned char sum_needed;
104         bool cptse;
105         e1000x_txd_props props;
106         e1000x_txd_props tso_props;
107         uint16_t tso_frames;
108         bool busy;
109     } tx;
110 
111     struct {
112         uint32_t val_in;    /* shifted in from guest driver */
113         uint16_t bitnum_in;
114         uint16_t bitnum_out;
115         uint16_t reading;
116         uint32_t old_eecd;
117     } eecd_state;
118 
119     QEMUTimer *autoneg_timer;
120 
121     QEMUTimer *mit_timer;      /* Mitigation timer. */
122     bool mit_timer_on;         /* Mitigation timer is running. */
123     bool mit_irq_level;        /* Tracks interrupt pin level. */
124     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
125 
126     QEMUTimer *flush_queue_timer;
127 
128 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
129 #define E1000_FLAG_AUTONEG_BIT 0
130 #define E1000_FLAG_MIT_BIT 1
131 #define E1000_FLAG_MAC_BIT 2
132 #define E1000_FLAG_TSO_BIT 3
133 #define E1000_FLAG_VET_BIT 4
134 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
135 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
136 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
137 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
138 #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
139 
140     uint32_t compat_flags;
141     bool received_tx_tso;
142     bool use_tso_for_migration;
143     e1000x_txd_props mig_props;
144 };
145 typedef struct E1000State_st E1000State;
146 
147 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
148 
149 struct E1000BaseClass {
150     PCIDeviceClass parent_class;
151     uint16_t phy_id2;
152 };
153 typedef struct E1000BaseClass E1000BaseClass;
154 
155 #define TYPE_E1000_BASE "e1000-base"
156 
157 DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
158                      E1000, TYPE_E1000_BASE)
159 
160 
161 static void
162 e1000_link_up(E1000State *s)
163 {
164     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
165 
166     /* E1000_STATUS_LU is tested by e1000_can_receive() */
167     qemu_flush_queued_packets(qemu_get_queue(s->nic));
168 }
169 
170 static void
171 e1000_autoneg_done(E1000State *s)
172 {
173     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
174 
175     /* E1000_STATUS_LU is tested by e1000_can_receive() */
176     qemu_flush_queued_packets(qemu_get_queue(s->nic));
177 }
178 
179 static bool
180 have_autoneg(E1000State *s)
181 {
182     return chkflag(AUTONEG) && (s->phy_reg[MII_BMCR] & MII_BMCR_AUTOEN);
183 }
184 
185 static void
186 set_phy_ctrl(E1000State *s, int index, uint16_t val)
187 {
188     /* bits 0-5 reserved; MII_BMCR_[ANRESTART,RESET] are self clearing */
189     s->phy_reg[MII_BMCR] = val & ~(0x3f |
190                                    MII_BMCR_RESET |
191                                    MII_BMCR_ANRESTART);
192 
193     /*
194      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
195      * migrate during auto negotiation, after migration the link will be
196      * down.
197      */
198     if (have_autoneg(s) && (val & MII_BMCR_ANRESTART)) {
199         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
200     }
201 }
202 
203 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
204     [MII_BMCR] = set_phy_ctrl,
205 };
206 
207 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
208 
209 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
210 static const char phy_regcap[0x20] = {
211     [MII_BMSR]   = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
212     [MII_PHYID1] = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
213     [MII_BMCR]   = PHY_RW,    [MII_CTRL1000]               = PHY_RW,
214     [MII_ANLPAR] = PHY_R,     [MII_STAT1000]               = PHY_R,
215     [MII_ANAR]   = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
216     [MII_PHYID2] = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
217     [MII_ANER]   = PHY_R,
218 };
219 
220 /* MII_PHYID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
221 static const uint16_t phy_reg_init[] = {
222     [MII_BMCR] = MII_BMCR_SPEED1000 |
223                  MII_BMCR_FD |
224                  MII_BMCR_AUTOEN,
225 
226     [MII_BMSR] = MII_BMSR_EXTCAP |
227                  MII_BMSR_LINK_ST |   /* link initially up */
228                  MII_BMSR_AUTONEG |
229                  /* MII_BMSR_AN_COMP: initially NOT completed */
230                  MII_BMSR_MFPS |
231                  MII_BMSR_EXTSTAT |
232                  MII_BMSR_10T_HD |
233                  MII_BMSR_10T_FD |
234                  MII_BMSR_100TX_HD |
235                  MII_BMSR_100TX_FD,
236 
237     [MII_PHYID1] = 0x141,
238     /* [MII_PHYID2] configured per DevId, from e1000_reset() */
239     [MII_ANAR] = MII_ANAR_CSMACD | MII_ANAR_10 |
240                  MII_ANAR_10FD | MII_ANAR_TX |
241                  MII_ANAR_TXFD | MII_ANAR_PAUSE |
242                  MII_ANAR_PAUSE_ASYM,
243     [MII_ANLPAR] = MII_ANLPAR_10 | MII_ANLPAR_10FD |
244                    MII_ANLPAR_TX | MII_ANLPAR_TXFD,
245     [MII_CTRL1000] = MII_CTRL1000_FULL | MII_CTRL1000_PORT |
246                      MII_CTRL1000_MASTER,
247     [MII_STAT1000] = MII_STAT1000_HALF | MII_STAT1000_FULL |
248                      MII_STAT1000_ROK | MII_STAT1000_LOK,
249     [M88E1000_PHY_SPEC_CTRL] = 0x360,
250     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
251     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
252 };
253 
254 static const uint32_t mac_reg_init[] = {
255     [PBA]     = 0x00100030,
256     [LEDCTL]  = 0x602,
257     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
258                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
259     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
260                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
261                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
262                 E1000_STATUS_LU,
263     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
264                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
265                 E1000_MANC_RMCP_EN,
266 };
267 
268 /* Helper function, *curr == 0 means the value is not set */
269 static inline void
270 mit_update_delay(uint32_t *curr, uint32_t value)
271 {
272     if (value && (*curr == 0 || value < *curr)) {
273         *curr = value;
274     }
275 }
276 
277 static void
278 set_interrupt_cause(E1000State *s, int index, uint32_t val)
279 {
280     PCIDevice *d = PCI_DEVICE(s);
281     uint32_t pending_ints;
282     uint32_t mit_delay;
283 
284     s->mac_reg[ICR] = val;
285 
286     /*
287      * Make sure ICR and ICS registers have the same value.
288      * The spec says that the ICS register is write-only.  However in practice,
289      * on real hardware ICS is readable, and for reads it has the same value as
290      * ICR (except that ICS does not have the clear on read behaviour of ICR).
291      *
292      * The VxWorks PRO/1000 driver uses this behaviour.
293      */
294     s->mac_reg[ICS] = val;
295 
296     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
297     if (!s->mit_irq_level && pending_ints) {
298         /*
299          * Here we detect a potential raising edge. We postpone raising the
300          * interrupt line if we are inside the mitigation delay window
301          * (s->mit_timer_on == 1).
302          * We provide a partial implementation of interrupt mitigation,
303          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
304          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
305          * RADV; relative timers based on TIDV and RDTR are not implemented.
306          */
307         if (s->mit_timer_on) {
308             return;
309         }
310         if (chkflag(MIT)) {
311             /* Compute the next mitigation delay according to pending
312              * interrupts and the current values of RADV (provided
313              * RDTR!=0), TADV and ITR.
314              * Then rearm the timer.
315              */
316             mit_delay = 0;
317             if (s->mit_ide &&
318                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
319                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
320             }
321             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
322                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
323             }
324             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
325 
326             /*
327              * According to e1000 SPEC, the Ethernet controller guarantees
328              * a maximum observable interrupt rate of 7813 interrupts/sec.
329              * Thus if mit_delay < 500 then the delay should be set to the
330              * minimum delay possible which is 500.
331              */
332             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
333 
334             s->mit_timer_on = 1;
335             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
336                       mit_delay * 256);
337             s->mit_ide = 0;
338         }
339     }
340 
341     s->mit_irq_level = (pending_ints != 0);
342     pci_set_irq(d, s->mit_irq_level);
343 }
344 
345 static void
346 e1000_mit_timer(void *opaque)
347 {
348     E1000State *s = opaque;
349 
350     s->mit_timer_on = 0;
351     /* Call set_interrupt_cause to update the irq level (if necessary). */
352     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
353 }
354 
355 static void
356 set_ics(E1000State *s, int index, uint32_t val)
357 {
358     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
359         s->mac_reg[IMS]);
360     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
361 }
362 
363 static void
364 e1000_autoneg_timer(void *opaque)
365 {
366     E1000State *s = opaque;
367     if (!qemu_get_queue(s->nic)->link_down) {
368         e1000_autoneg_done(s);
369         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
370     }
371 }
372 
373 static bool e1000_vet_init_need(void *opaque)
374 {
375     E1000State *s = opaque;
376 
377     return chkflag(VET);
378 }
379 
380 static void e1000_reset_hold(Object *obj)
381 {
382     E1000State *d = E1000(obj);
383     E1000BaseClass *edc = E1000_GET_CLASS(d);
384     uint8_t *macaddr = d->conf.macaddr.a;
385 
386     timer_del(d->autoneg_timer);
387     timer_del(d->mit_timer);
388     timer_del(d->flush_queue_timer);
389     d->mit_timer_on = 0;
390     d->mit_irq_level = 0;
391     d->mit_ide = 0;
392     memset(d->phy_reg, 0, sizeof d->phy_reg);
393     memcpy(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
394     d->phy_reg[MII_PHYID2] = edc->phy_id2;
395     memset(d->mac_reg, 0, sizeof d->mac_reg);
396     memcpy(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
397     d->rxbuf_min_shift = 1;
398     memset(&d->tx, 0, sizeof d->tx);
399 
400     if (qemu_get_queue(d->nic)->link_down) {
401         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
402     }
403 
404     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
405 
406     if (e1000_vet_init_need(d)) {
407         d->mac_reg[VET] = ETH_P_VLAN;
408     }
409 }
410 
411 static void
412 set_ctrl(E1000State *s, int index, uint32_t val)
413 {
414     /* RST is self clearing */
415     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
416 }
417 
418 static void
419 e1000_flush_queue_timer(void *opaque)
420 {
421     E1000State *s = opaque;
422 
423     qemu_flush_queued_packets(qemu_get_queue(s->nic));
424 }
425 
426 static void
427 set_rx_control(E1000State *s, int index, uint32_t val)
428 {
429     s->mac_reg[RCTL] = val;
430     s->rxbuf_size = e1000x_rxbufsize(val);
431     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
432     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
433            s->mac_reg[RCTL]);
434     timer_mod(s->flush_queue_timer,
435               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
436 }
437 
438 static void
439 set_mdic(E1000State *s, int index, uint32_t val)
440 {
441     uint32_t data = val & E1000_MDIC_DATA_MASK;
442     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
443 
444     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
445         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
446     else if (val & E1000_MDIC_OP_READ) {
447         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
448         if (!(phy_regcap[addr] & PHY_R)) {
449             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
450             val |= E1000_MDIC_ERROR;
451         } else
452             val = (val ^ data) | s->phy_reg[addr];
453     } else if (val & E1000_MDIC_OP_WRITE) {
454         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
455         if (!(phy_regcap[addr] & PHY_W)) {
456             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
457             val |= E1000_MDIC_ERROR;
458         } else {
459             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
460                 phyreg_writeops[addr](s, index, data);
461             } else {
462                 s->phy_reg[addr] = data;
463             }
464         }
465     }
466     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
467 
468     if (val & E1000_MDIC_INT_EN) {
469         set_ics(s, 0, E1000_ICR_MDAC);
470     }
471 }
472 
473 static uint32_t
474 get_eecd(E1000State *s, int index)
475 {
476     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
477 
478     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
479            s->eecd_state.bitnum_out, s->eecd_state.reading);
480     if (!s->eecd_state.reading ||
481         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
482           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
483         ret |= E1000_EECD_DO;
484     return ret;
485 }
486 
487 static void
488 set_eecd(E1000State *s, int index, uint32_t val)
489 {
490     uint32_t oldval = s->eecd_state.old_eecd;
491 
492     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
493             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
494     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
495         return;
496     }
497     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
498         s->eecd_state.val_in = 0;
499         s->eecd_state.bitnum_in = 0;
500         s->eecd_state.bitnum_out = 0;
501         s->eecd_state.reading = 0;
502     }
503     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
504         return;
505     }
506     if (!(E1000_EECD_SK & val)) {               /* falling edge */
507         s->eecd_state.bitnum_out++;
508         return;
509     }
510     s->eecd_state.val_in <<= 1;
511     if (val & E1000_EECD_DI)
512         s->eecd_state.val_in |= 1;
513     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
514         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
515         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
516             EEPROM_READ_OPCODE_MICROWIRE);
517     }
518     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
519            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
520            s->eecd_state.reading);
521 }
522 
523 static uint32_t
524 flash_eerd_read(E1000State *s, int x)
525 {
526     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
527 
528     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
529         return (s->mac_reg[EERD]);
530 
531     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
532         return (E1000_EEPROM_RW_REG_DONE | r);
533 
534     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
535            E1000_EEPROM_RW_REG_DONE | r);
536 }
537 
538 static void
539 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
540 {
541     uint32_t sum;
542 
543     if (cse && cse < n)
544         n = cse + 1;
545     if (sloc < n-1) {
546         sum = net_checksum_add(n-css, data+css);
547         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
548     }
549 }
550 
551 static inline void
552 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
553 {
554     if (is_broadcast_ether_addr(arr)) {
555         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
556     } else if (is_multicast_ether_addr(arr)) {
557         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
558     }
559 }
560 
561 static void
562 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
563 {
564     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
565                                     PTC1023, PTC1522 };
566 
567     NetClientState *nc = qemu_get_queue(s->nic);
568     if (s->phy_reg[MII_BMCR] & MII_BMCR_LOOPBACK) {
569         qemu_receive_packet(nc, buf, size);
570     } else {
571         qemu_send_packet(nc, buf, size);
572     }
573     inc_tx_bcast_or_mcast_count(s, buf);
574     e1000x_increase_size_stats(s->mac_reg, PTCregs, size + 4);
575 }
576 
577 static void
578 xmit_seg(E1000State *s)
579 {
580     uint16_t len;
581     unsigned int frames = s->tx.tso_frames, css, sofar;
582     struct e1000_tx *tp = &s->tx;
583     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
584 
585     if (tp->cptse) {
586         css = props->ipcss;
587         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
588                frames, tp->size, css);
589         if (props->ip) {    /* IPv4 */
590             stw_be_p(tp->data+css+2, tp->size - css);
591             stw_be_p(tp->data+css+4,
592                      lduw_be_p(tp->data + css + 4) + frames);
593         } else {         /* IPv6 */
594             stw_be_p(tp->data+css+4, tp->size - css);
595         }
596         css = props->tucss;
597         len = tp->size - css;
598         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
599         if (props->tcp) {
600             sofar = frames * props->mss;
601             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
602             if (props->paylen - sofar > props->mss) {
603                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
604             } else if (frames) {
605                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
606             }
607         } else {    /* UDP */
608             stw_be_p(tp->data+css+4, len);
609         }
610         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
611             unsigned int phsum;
612             // add pseudo-header length before checksum calculation
613             void *sp = tp->data + props->tucso;
614 
615             phsum = lduw_be_p(sp) + len;
616             phsum = (phsum >> 16) + (phsum & 0xffff);
617             stw_be_p(sp, phsum);
618         }
619         tp->tso_frames++;
620     }
621 
622     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
623         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
624     }
625     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
626         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
627     }
628     if (tp->vlan_needed) {
629         memmove(tp->vlan, tp->data, 4);
630         memmove(tp->data, tp->data + 4, 8);
631         memcpy(tp->data + 8, tp->vlan_header, 4);
632         e1000_send_packet(s, tp->vlan, tp->size + 4);
633     } else {
634         e1000_send_packet(s, tp->data, tp->size);
635     }
636 
637     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
638     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size + 4);
639     s->mac_reg[GPTC] = s->mac_reg[TPT];
640     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
641     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
642 }
643 
644 static void
645 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
646 {
647     PCIDevice *d = PCI_DEVICE(s);
648     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
649     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
650     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
651     unsigned int msh = 0xfffff;
652     uint64_t addr;
653     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
654     struct e1000_tx *tp = &s->tx;
655 
656     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
657     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
658         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
659             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
660             s->use_tso_for_migration = 1;
661             tp->tso_frames = 0;
662         } else {
663             e1000x_read_tx_ctx_descr(xp, &tp->props);
664             s->use_tso_for_migration = 0;
665         }
666         return;
667     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
668         // data descriptor
669         if (tp->size == 0) {
670             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
671         }
672         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
673     } else {
674         // legacy descriptor
675         tp->cptse = 0;
676     }
677 
678     if (e1000x_vlan_enabled(s->mac_reg) &&
679         e1000x_is_vlan_txd(txd_lower) &&
680         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
681         tp->vlan_needed = 1;
682         stw_be_p(tp->vlan_header,
683                       le16_to_cpu(s->mac_reg[VET]));
684         stw_be_p(tp->vlan_header + 2,
685                       le16_to_cpu(dp->upper.fields.special));
686     }
687 
688     addr = le64_to_cpu(dp->buffer_addr);
689     if (tp->cptse) {
690         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
691         do {
692             bytes = split_size;
693             if (tp->size >= msh) {
694                 goto eop;
695             }
696             if (tp->size + bytes > msh)
697                 bytes = msh - tp->size;
698 
699             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
700             pci_dma_read(d, addr, tp->data + tp->size, bytes);
701             sz = tp->size + bytes;
702             if (sz >= tp->tso_props.hdr_len
703                 && tp->size < tp->tso_props.hdr_len) {
704                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
705             }
706             tp->size = sz;
707             addr += bytes;
708             if (sz == msh) {
709                 xmit_seg(s);
710                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
711                 tp->size = tp->tso_props.hdr_len;
712             }
713             split_size -= bytes;
714         } while (bytes && split_size);
715     } else {
716         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
717         pci_dma_read(d, addr, tp->data + tp->size, split_size);
718         tp->size += split_size;
719     }
720 
721 eop:
722     if (!(txd_lower & E1000_TXD_CMD_EOP))
723         return;
724     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
725         xmit_seg(s);
726     }
727     tp->tso_frames = 0;
728     tp->sum_needed = 0;
729     tp->vlan_needed = 0;
730     tp->size = 0;
731     tp->cptse = 0;
732 }
733 
734 static uint32_t
735 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
736 {
737     PCIDevice *d = PCI_DEVICE(s);
738     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
739 
740     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
741         return 0;
742     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
743                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
744     dp->upper.data = cpu_to_le32(txd_upper);
745     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
746                   &dp->upper, sizeof(dp->upper));
747     return E1000_ICR_TXDW;
748 }
749 
750 static uint64_t tx_desc_base(E1000State *s)
751 {
752     uint64_t bah = s->mac_reg[TDBAH];
753     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
754 
755     return (bah << 32) + bal;
756 }
757 
758 static void
759 start_xmit(E1000State *s)
760 {
761     PCIDevice *d = PCI_DEVICE(s);
762     dma_addr_t base;
763     struct e1000_tx_desc desc;
764     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
765 
766     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
767         DBGOUT(TX, "tx disabled\n");
768         return;
769     }
770 
771     if (s->tx.busy) {
772         return;
773     }
774     s->tx.busy = true;
775 
776     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
777         base = tx_desc_base(s) +
778                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
779         pci_dma_read(d, base, &desc, sizeof(desc));
780 
781         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
782                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
783                desc.upper.data);
784 
785         process_tx_desc(s, &desc);
786         cause |= txdesc_writeback(s, base, &desc);
787 
788         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
789             s->mac_reg[TDH] = 0;
790         /*
791          * the following could happen only if guest sw assigns
792          * bogus values to TDT/TDLEN.
793          * there's nothing too intelligent we could do about this.
794          */
795         if (s->mac_reg[TDH] == tdh_start ||
796             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
797             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
798                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
799             break;
800         }
801     }
802     s->tx.busy = false;
803     set_ics(s, 0, cause);
804 }
805 
806 static int
807 receive_filter(E1000State *s, const uint8_t *buf, int size)
808 {
809     uint32_t rctl = s->mac_reg[RCTL];
810     int isbcast = is_broadcast_ether_addr(buf);
811     int ismcast = is_multicast_ether_addr(buf);
812 
813     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
814         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
815         uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(buf)->h_tci);
816         uint32_t vfta =
817             ldl_le_p((uint32_t *)(s->mac_reg + VFTA) +
818                      ((vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK));
819         if ((vfta & (1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK))) == 0) {
820             return 0;
821         }
822     }
823 
824     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
825         return 1;
826     }
827 
828     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
829         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
830         return 1;
831     }
832 
833     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
834         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
835         return 1;
836     }
837 
838     return e1000x_rx_group_filter(s->mac_reg, buf);
839 }
840 
841 static void
842 e1000_set_link_status(NetClientState *nc)
843 {
844     E1000State *s = qemu_get_nic_opaque(nc);
845     uint32_t old_status = s->mac_reg[STATUS];
846 
847     if (nc->link_down) {
848         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
849     } else {
850         if (have_autoneg(s) &&
851             !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
852             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
853         } else {
854             e1000_link_up(s);
855         }
856     }
857 
858     if (s->mac_reg[STATUS] != old_status)
859         set_ics(s, 0, E1000_ICR_LSC);
860 }
861 
862 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
863 {
864     int bufs;
865     /* Fast-path short packets */
866     if (total_size <= s->rxbuf_size) {
867         return s->mac_reg[RDH] != s->mac_reg[RDT];
868     }
869     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
870         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
871     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
872         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
873             s->mac_reg[RDT] - s->mac_reg[RDH];
874     } else {
875         return false;
876     }
877     return total_size <= bufs * s->rxbuf_size;
878 }
879 
880 static bool
881 e1000_can_receive(NetClientState *nc)
882 {
883     E1000State *s = qemu_get_nic_opaque(nc);
884 
885     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
886         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
887 }
888 
889 static uint64_t rx_desc_base(E1000State *s)
890 {
891     uint64_t bah = s->mac_reg[RDBAH];
892     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
893 
894     return (bah << 32) + bal;
895 }
896 
897 static void
898 e1000_receiver_overrun(E1000State *s, size_t size)
899 {
900     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
901     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
902     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
903     set_ics(s, 0, E1000_ICS_RXO);
904 }
905 
906 static ssize_t
907 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
908 {
909     E1000State *s = qemu_get_nic_opaque(nc);
910     PCIDevice *d = PCI_DEVICE(s);
911     struct e1000_rx_desc desc;
912     dma_addr_t base;
913     unsigned int n, rdt;
914     uint32_t rdh_start;
915     uint16_t vlan_special = 0;
916     uint8_t vlan_status = 0;
917     uint8_t min_buf[ETH_ZLEN];
918     struct iovec min_iov;
919     uint8_t *filter_buf = iov->iov_base;
920     size_t size = iov_size(iov, iovcnt);
921     size_t iov_ofs = 0;
922     size_t desc_offset;
923     size_t desc_size;
924     size_t total_size;
925 
926     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
927         return -1;
928     }
929 
930     if (timer_pending(s->flush_queue_timer)) {
931         return 0;
932     }
933 
934     /* Pad to minimum Ethernet frame length */
935     if (size < sizeof(min_buf)) {
936         iov_to_buf(iov, iovcnt, 0, min_buf, size);
937         memset(&min_buf[size], 0, sizeof(min_buf) - size);
938         min_iov.iov_base = filter_buf = min_buf;
939         min_iov.iov_len = size = sizeof(min_buf);
940         iovcnt = 1;
941         iov = &min_iov;
942     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
943         /* This is very unlikely, but may happen. */
944         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
945         filter_buf = min_buf;
946     }
947 
948     /* Discard oversized packets if !LPE and !SBP. */
949     if (e1000x_is_oversized(s->mac_reg, size)) {
950         return size;
951     }
952 
953     if (!receive_filter(s, filter_buf, size)) {
954         return size;
955     }
956 
957     if (e1000x_vlan_enabled(s->mac_reg) &&
958         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
959         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
960         iov_ofs = 4;
961         if (filter_buf == iov->iov_base) {
962             memmove(filter_buf + 4, filter_buf, 12);
963         } else {
964             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
965             while (iov->iov_len <= iov_ofs) {
966                 iov_ofs -= iov->iov_len;
967                 iov++;
968             }
969         }
970         vlan_status = E1000_RXD_STAT_VP;
971         size -= 4;
972     }
973 
974     rdh_start = s->mac_reg[RDH];
975     desc_offset = 0;
976     total_size = size + e1000x_fcs_len(s->mac_reg);
977     if (!e1000_has_rxbufs(s, total_size)) {
978         e1000_receiver_overrun(s, total_size);
979         return -1;
980     }
981     do {
982         desc_size = total_size - desc_offset;
983         if (desc_size > s->rxbuf_size) {
984             desc_size = s->rxbuf_size;
985         }
986         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
987         pci_dma_read(d, base, &desc, sizeof(desc));
988         desc.special = vlan_special;
989         desc.status &= ~E1000_RXD_STAT_DD;
990         if (desc.buffer_addr) {
991             if (desc_offset < size) {
992                 size_t iov_copy;
993                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
994                 size_t copy_size = size - desc_offset;
995                 if (copy_size > s->rxbuf_size) {
996                     copy_size = s->rxbuf_size;
997                 }
998                 do {
999                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1000                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1001                     copy_size -= iov_copy;
1002                     ba += iov_copy;
1003                     iov_ofs += iov_copy;
1004                     if (iov_ofs == iov->iov_len) {
1005                         iov++;
1006                         iov_ofs = 0;
1007                     }
1008                 } while (copy_size);
1009             }
1010             desc_offset += desc_size;
1011             desc.length = cpu_to_le16(desc_size);
1012             if (desc_offset >= total_size) {
1013                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1014             } else {
1015                 /* Guest zeroing out status is not a hardware requirement.
1016                    Clear EOP in case guest didn't do it. */
1017                 desc.status &= ~E1000_RXD_STAT_EOP;
1018             }
1019         } else { // as per intel docs; skip descriptors with null buf addr
1020             DBGOUT(RX, "Null RX descriptor!!\n");
1021         }
1022         pci_dma_write(d, base, &desc, sizeof(desc));
1023         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1024         pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
1025                       &desc.status, sizeof(desc.status));
1026 
1027         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1028             s->mac_reg[RDH] = 0;
1029         /* see comment in start_xmit; same here */
1030         if (s->mac_reg[RDH] == rdh_start ||
1031             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1032             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1033                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1034             e1000_receiver_overrun(s, total_size);
1035             return -1;
1036         }
1037     } while (desc_offset < total_size);
1038 
1039     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1040 
1041     n = E1000_ICS_RXT0;
1042     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1043         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1044     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1045         s->rxbuf_min_shift)
1046         n |= E1000_ICS_RXDMT0;
1047 
1048     set_ics(s, 0, n);
1049 
1050     return size;
1051 }
1052 
1053 static ssize_t
1054 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1055 {
1056     const struct iovec iov = {
1057         .iov_base = (uint8_t *)buf,
1058         .iov_len = size
1059     };
1060 
1061     return e1000_receive_iov(nc, &iov, 1);
1062 }
1063 
1064 static uint32_t
1065 mac_readreg(E1000State *s, int index)
1066 {
1067     return s->mac_reg[index];
1068 }
1069 
1070 static uint32_t
1071 mac_icr_read(E1000State *s, int index)
1072 {
1073     uint32_t ret = s->mac_reg[ICR];
1074 
1075     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1076     set_interrupt_cause(s, 0, 0);
1077     return ret;
1078 }
1079 
1080 static uint32_t
1081 mac_read_clr4(E1000State *s, int index)
1082 {
1083     uint32_t ret = s->mac_reg[index];
1084 
1085     s->mac_reg[index] = 0;
1086     return ret;
1087 }
1088 
1089 static uint32_t
1090 mac_read_clr8(E1000State *s, int index)
1091 {
1092     uint32_t ret = s->mac_reg[index];
1093 
1094     s->mac_reg[index] = 0;
1095     s->mac_reg[index-1] = 0;
1096     return ret;
1097 }
1098 
1099 static void
1100 mac_writereg(E1000State *s, int index, uint32_t val)
1101 {
1102     uint32_t macaddr[2];
1103 
1104     s->mac_reg[index] = val;
1105 
1106     if (index == RA + 1) {
1107         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1108         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1109         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1110     }
1111 }
1112 
1113 static void
1114 set_rdt(E1000State *s, int index, uint32_t val)
1115 {
1116     s->mac_reg[index] = val & 0xffff;
1117     if (e1000_has_rxbufs(s, 1)) {
1118         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1119     }
1120 }
1121 
1122 #define LOW_BITS_SET_FUNC(num)                             \
1123     static void                                            \
1124     set_##num##bit(E1000State *s, int index, uint32_t val) \
1125     {                                                      \
1126         s->mac_reg[index] = val & (BIT(num) - 1);          \
1127     }
1128 
1129 LOW_BITS_SET_FUNC(4)
1130 LOW_BITS_SET_FUNC(11)
1131 LOW_BITS_SET_FUNC(13)
1132 LOW_BITS_SET_FUNC(16)
1133 
1134 static void
1135 set_dlen(E1000State *s, int index, uint32_t val)
1136 {
1137     s->mac_reg[index] = val & 0xfff80;
1138 }
1139 
1140 static void
1141 set_tctl(E1000State *s, int index, uint32_t val)
1142 {
1143     s->mac_reg[index] = val;
1144     s->mac_reg[TDT] &= 0xffff;
1145     start_xmit(s);
1146 }
1147 
1148 static void
1149 set_icr(E1000State *s, int index, uint32_t val)
1150 {
1151     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1152     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1153 }
1154 
1155 static void
1156 set_imc(E1000State *s, int index, uint32_t val)
1157 {
1158     s->mac_reg[IMS] &= ~val;
1159     set_ics(s, 0, 0);
1160 }
1161 
1162 static void
1163 set_ims(E1000State *s, int index, uint32_t val)
1164 {
1165     s->mac_reg[IMS] |= val;
1166     set_ics(s, 0, 0);
1167 }
1168 
1169 #define getreg(x)    [x] = mac_readreg
1170 typedef uint32_t (*readops)(E1000State *, int);
1171 static const readops macreg_readops[] = {
1172     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1173     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1174     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1175     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1176     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1177     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1178     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1179     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1180     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1181     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1182     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1183     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1184     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1185     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1186     getreg(GOTCL),    getreg(RDFH),     getreg(RDFT),     getreg(RDFHS),
1187     getreg(RDFTS),    getreg(RDFPC),    getreg(TDFH),     getreg(TDFT),
1188     getreg(TDFHS),    getreg(TDFTS),    getreg(TDFPC),    getreg(AIT),
1189 
1190     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1191     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1192     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1193     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1194     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1195     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1196     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1197     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1198     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1199     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1200     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1201     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1202     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1203     [MPTC]    = mac_read_clr4,
1204     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1205     [EERD]    = flash_eerd_read,
1206 
1207     [CRCERRS ... MPC]     = &mac_readreg,
1208     [IP6AT ... IP6AT + 3] = &mac_readreg,    [IP4AT ... IP4AT + 6] = &mac_readreg,
1209     [FFLT ... FFLT + 6]   = &mac_readreg,
1210     [RA ... RA + 31]      = &mac_readreg,
1211     [WUPM ... WUPM + 31]  = &mac_readreg,
1212     [MTA ... MTA + E1000_MC_TBL_SIZE - 1]   = &mac_readreg,
1213     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_readreg,
1214     [FFMT ... FFMT + 254] = &mac_readreg,
1215     [FFVT ... FFVT + 254] = &mac_readreg,
1216     [PBM ... PBM + 16383] = &mac_readreg,
1217 };
1218 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1219 
1220 #define putreg(x)    [x] = mac_writereg
1221 typedef void (*writeops)(E1000State *, int, uint32_t);
1222 static const writeops macreg_writeops[] = {
1223     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1224     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1225     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1226     putreg(IPAV),     putreg(WUC),
1227     putreg(WUS),
1228 
1229     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL]  = set_tctl,
1230     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]   = set_ics,
1231     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]   = set_rdt,
1232     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]   = set_icr,
1233     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL]  = set_ctrl,
1234     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV]  = set_16bit,
1235     [ITR]    = set_16bit,  [TDFH]   = set_11bit,      [TDFT]  = set_11bit,
1236     [TDFHS]  = set_13bit,  [TDFTS]  = set_13bit,      [TDFPC] = set_13bit,
1237     [RDFH]   = set_13bit,  [RDFT]   = set_13bit,      [RDFHS] = set_13bit,
1238     [RDFTS]  = set_13bit,  [RDFPC]  = set_13bit,      [AIT]   = set_16bit,
1239 
1240     [IP6AT ... IP6AT + 3] = &mac_writereg, [IP4AT ... IP4AT + 6] = &mac_writereg,
1241     [FFLT ... FFLT + 6]   = &set_11bit,
1242     [RA ... RA + 31]      = &mac_writereg,
1243     [WUPM ... WUPM + 31]  = &mac_writereg,
1244     [MTA ... MTA + E1000_MC_TBL_SIZE - 1] = &mac_writereg,
1245     [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_writereg,
1246     [FFMT ... FFMT + 254] = &set_4bit,     [FFVT ... FFVT + 254] = &mac_writereg,
1247     [PBM ... PBM + 16383] = &mac_writereg,
1248 };
1249 
1250 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1251 
1252 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1253 
1254 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1255 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1256  * f - flag bits (up to 6 possible flags)
1257  * n - flag needed
1258  * p - partially implenented */
1259 static const uint8_t mac_reg_access[0x8000] = {
1260     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1261     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1262 
1263     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1264     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1265     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1266     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1267     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1268     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1269     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1270     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1271     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1272     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1273     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1274     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1275     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1276     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1277     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1278     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1279     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1280     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1281     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1282     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1283     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1284     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1285     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1286     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1287     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1288     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1289     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1290     [BPTC]    = markflag(MAC),
1291 
1292     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1293     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1294     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1295     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1296     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1297     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1298     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1299     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1300     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1301     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1302     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1303 };
1304 
1305 static void
1306 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1307                  unsigned size)
1308 {
1309     E1000State *s = opaque;
1310     unsigned int index = (addr & 0x1ffff) >> 2;
1311 
1312     if (index < NWRITEOPS && macreg_writeops[index]) {
1313         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1314             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1315             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1316                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1317                        "It is not fully implemented.\n", index<<2);
1318             }
1319             macreg_writeops[index](s, index, val);
1320         } else {    /* "flag needed" bit is set, but the flag is not active */
1321             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1322                    index<<2);
1323         }
1324     } else if (index < NREADOPS && macreg_readops[index]) {
1325         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1326                index<<2, val);
1327     } else {
1328         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1329                index<<2, val);
1330     }
1331 }
1332 
1333 static uint64_t
1334 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1335 {
1336     E1000State *s = opaque;
1337     unsigned int index = (addr & 0x1ffff) >> 2;
1338 
1339     if (index < NREADOPS && macreg_readops[index]) {
1340         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1341             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1342             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1343                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1344                        "It is not fully implemented.\n", index<<2);
1345             }
1346             return macreg_readops[index](s, index);
1347         } else {    /* "flag needed" bit is set, but the flag is not active */
1348             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1349                    index<<2);
1350         }
1351     } else {
1352         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1353     }
1354     return 0;
1355 }
1356 
1357 static const MemoryRegionOps e1000_mmio_ops = {
1358     .read = e1000_mmio_read,
1359     .write = e1000_mmio_write,
1360     .endianness = DEVICE_LITTLE_ENDIAN,
1361     .impl = {
1362         .min_access_size = 4,
1363         .max_access_size = 4,
1364     },
1365 };
1366 
1367 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1368                               unsigned size)
1369 {
1370     E1000State *s = opaque;
1371 
1372     (void)s;
1373     return 0;
1374 }
1375 
1376 static void e1000_io_write(void *opaque, hwaddr addr,
1377                            uint64_t val, unsigned size)
1378 {
1379     E1000State *s = opaque;
1380 
1381     (void)s;
1382 }
1383 
1384 static const MemoryRegionOps e1000_io_ops = {
1385     .read = e1000_io_read,
1386     .write = e1000_io_write,
1387     .endianness = DEVICE_LITTLE_ENDIAN,
1388 };
1389 
1390 static bool is_version_1(void *opaque, int version_id)
1391 {
1392     return version_id == 1;
1393 }
1394 
1395 static int e1000_pre_save(void *opaque)
1396 {
1397     E1000State *s = opaque;
1398     NetClientState *nc = qemu_get_queue(s->nic);
1399 
1400     /*
1401      * If link is down and auto-negotiation is supported and ongoing,
1402      * complete auto-negotiation immediately. This allows us to look
1403      * at MII_BMSR_AN_COMP to infer link status on load.
1404      */
1405     if (nc->link_down && have_autoneg(s)) {
1406         s->phy_reg[MII_BMSR] |= MII_BMSR_AN_COMP;
1407     }
1408 
1409     /* Decide which set of props to migrate in the main structure */
1410     if (chkflag(TSO) || !s->use_tso_for_migration) {
1411         /* Either we're migrating with the extra subsection, in which
1412          * case the mig_props is always 'props' OR
1413          * we've not got the subsection, but 'props' was the last
1414          * updated.
1415          */
1416         s->mig_props = s->tx.props;
1417     } else {
1418         /* We're not using the subsection, and 'tso_props' was
1419          * the last updated.
1420          */
1421         s->mig_props = s->tx.tso_props;
1422     }
1423     return 0;
1424 }
1425 
1426 static int e1000_post_load(void *opaque, int version_id)
1427 {
1428     E1000State *s = opaque;
1429     NetClientState *nc = qemu_get_queue(s->nic);
1430 
1431     if (!chkflag(MIT)) {
1432         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1433             s->mac_reg[TADV] = 0;
1434         s->mit_irq_level = false;
1435     }
1436     s->mit_ide = 0;
1437     s->mit_timer_on = true;
1438     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1439 
1440     /* nc.link_down can't be migrated, so infer link_down according
1441      * to link status bit in mac_reg[STATUS].
1442      * Alternatively, restart link negotiation if it was in progress. */
1443     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1444 
1445     if (have_autoneg(s) && !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
1446         nc->link_down = false;
1447         timer_mod(s->autoneg_timer,
1448                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1449     }
1450 
1451     s->tx.props = s->mig_props;
1452     if (!s->received_tx_tso) {
1453         /* We received only one set of offload data (tx.props)
1454          * and haven't got tx.tso_props.  The best we can do
1455          * is dupe the data.
1456          */
1457         s->tx.tso_props = s->mig_props;
1458     }
1459     return 0;
1460 }
1461 
1462 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1463 {
1464     E1000State *s = opaque;
1465     s->received_tx_tso = true;
1466     return 0;
1467 }
1468 
1469 static bool e1000_mit_state_needed(void *opaque)
1470 {
1471     E1000State *s = opaque;
1472 
1473     return chkflag(MIT);
1474 }
1475 
1476 static bool e1000_full_mac_needed(void *opaque)
1477 {
1478     E1000State *s = opaque;
1479 
1480     return chkflag(MAC);
1481 }
1482 
1483 static bool e1000_tso_state_needed(void *opaque)
1484 {
1485     E1000State *s = opaque;
1486 
1487     return chkflag(TSO);
1488 }
1489 
1490 static const VMStateDescription vmstate_e1000_mit_state = {
1491     .name = "e1000/mit_state",
1492     .version_id = 1,
1493     .minimum_version_id = 1,
1494     .needed = e1000_mit_state_needed,
1495     .fields = (VMStateField[]) {
1496         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1497         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1498         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1499         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1500         VMSTATE_BOOL(mit_irq_level, E1000State),
1501         VMSTATE_END_OF_LIST()
1502     }
1503 };
1504 
1505 static const VMStateDescription vmstate_e1000_full_mac_state = {
1506     .name = "e1000/full_mac_state",
1507     .version_id = 1,
1508     .minimum_version_id = 1,
1509     .needed = e1000_full_mac_needed,
1510     .fields = (VMStateField[]) {
1511         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1512         VMSTATE_END_OF_LIST()
1513     }
1514 };
1515 
1516 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1517     .name = "e1000/tx_tso_state",
1518     .version_id = 1,
1519     .minimum_version_id = 1,
1520     .needed = e1000_tso_state_needed,
1521     .post_load = e1000_tx_tso_post_load,
1522     .fields = (VMStateField[]) {
1523         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1524         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1525         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1526         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1527         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1528         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1529         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1530         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1531         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1532         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1533         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1534         VMSTATE_END_OF_LIST()
1535     }
1536 };
1537 
1538 static const VMStateDescription vmstate_e1000 = {
1539     .name = "e1000",
1540     .version_id = 2,
1541     .minimum_version_id = 1,
1542     .pre_save = e1000_pre_save,
1543     .post_load = e1000_post_load,
1544     .fields = (VMStateField[]) {
1545         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1546         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1547         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1548         VMSTATE_UINT32(rxbuf_size, E1000State),
1549         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1550         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1551         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1552         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1553         VMSTATE_UINT16(eecd_state.reading, E1000State),
1554         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1555         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1556         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1557         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1558         VMSTATE_UINT8(mig_props.tucss, E1000State),
1559         VMSTATE_UINT8(mig_props.tucso, E1000State),
1560         VMSTATE_UINT16(mig_props.tucse, E1000State),
1561         VMSTATE_UINT32(mig_props.paylen, E1000State),
1562         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1563         VMSTATE_UINT16(mig_props.mss, E1000State),
1564         VMSTATE_UINT16(tx.size, E1000State),
1565         VMSTATE_UINT16(tx.tso_frames, E1000State),
1566         VMSTATE_UINT8(tx.sum_needed, E1000State),
1567         VMSTATE_INT8(mig_props.ip, E1000State),
1568         VMSTATE_INT8(mig_props.tcp, E1000State),
1569         VMSTATE_BUFFER(tx.header, E1000State),
1570         VMSTATE_BUFFER(tx.data, E1000State),
1571         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1572         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1573         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1574         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1575         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1576         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1577         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1578         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1579         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1580         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1581         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1582         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1583         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1584         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1585         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1586         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1587         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1588         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1589         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1590         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1591         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1592         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1593         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1594         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1595         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1596         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1597         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1598         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1599         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1600         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1601         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1602         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1603         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1604         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1605         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1606         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1607         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1608         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1609         VMSTATE_UINT32(mac_reg[VET], E1000State),
1610         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1611         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, E1000_MC_TBL_SIZE),
1612         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA,
1613                                  E1000_VLAN_FILTER_TBL_SIZE),
1614         VMSTATE_END_OF_LIST()
1615     },
1616     .subsections = (const VMStateDescription*[]) {
1617         &vmstate_e1000_mit_state,
1618         &vmstate_e1000_full_mac_state,
1619         &vmstate_e1000_tx_tso_state,
1620         NULL
1621     }
1622 };
1623 
1624 /*
1625  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1626  * Note: A valid DevId will be inserted during pci_e1000_realize().
1627  */
1628 static const uint16_t e1000_eeprom_template[64] = {
1629     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1630     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1631     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1632     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1633     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1634     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1635     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1636     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1637 };
1638 
1639 /* PCI interface */
1640 
1641 static void
1642 e1000_mmio_setup(E1000State *d)
1643 {
1644     int i;
1645     const uint32_t excluded_regs[] = {
1646         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1647         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1648     };
1649 
1650     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1651                           "e1000-mmio", PNPMMIO_SIZE);
1652     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1653     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1654         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1655                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1656     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1657 }
1658 
1659 static void
1660 pci_e1000_uninit(PCIDevice *dev)
1661 {
1662     E1000State *d = E1000(dev);
1663 
1664     timer_free(d->autoneg_timer);
1665     timer_free(d->mit_timer);
1666     timer_free(d->flush_queue_timer);
1667     qemu_del_nic(d->nic);
1668 }
1669 
1670 static NetClientInfo net_e1000_info = {
1671     .type = NET_CLIENT_DRIVER_NIC,
1672     .size = sizeof(NICState),
1673     .can_receive = e1000_can_receive,
1674     .receive = e1000_receive,
1675     .receive_iov = e1000_receive_iov,
1676     .link_status_changed = e1000_set_link_status,
1677 };
1678 
1679 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1680                                 uint32_t val, int len)
1681 {
1682     E1000State *s = E1000(pci_dev);
1683 
1684     pci_default_write_config(pci_dev, address, val, len);
1685 
1686     if (range_covers_byte(address, len, PCI_COMMAND) &&
1687         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1688         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1689     }
1690 }
1691 
1692 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1693 {
1694     DeviceState *dev = DEVICE(pci_dev);
1695     E1000State *d = E1000(pci_dev);
1696     uint8_t *pci_conf;
1697     uint8_t *macaddr;
1698 
1699     pci_dev->config_write = e1000_write_config;
1700 
1701     pci_conf = pci_dev->config;
1702 
1703     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1704     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1705 
1706     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1707 
1708     e1000_mmio_setup(d);
1709 
1710     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1711 
1712     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1713 
1714     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1715     macaddr = d->conf.macaddr.a;
1716 
1717     e1000x_core_prepare_eeprom(d->eeprom_data,
1718                                e1000_eeprom_template,
1719                                sizeof(e1000_eeprom_template),
1720                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1721                                macaddr);
1722 
1723     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1724                           object_get_typename(OBJECT(d)), dev->id, d);
1725 
1726     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1727 
1728     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1729     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1730     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1731                                         e1000_flush_queue_timer, d);
1732 }
1733 
1734 static Property e1000_properties[] = {
1735     DEFINE_NIC_PROPERTIES(E1000State, conf),
1736     DEFINE_PROP_BIT("autonegotiation", E1000State,
1737                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1738     DEFINE_PROP_BIT("mitigation", E1000State,
1739                     compat_flags, E1000_FLAG_MIT_BIT, true),
1740     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1741                     compat_flags, E1000_FLAG_MAC_BIT, true),
1742     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1743                     compat_flags, E1000_FLAG_TSO_BIT, true),
1744     DEFINE_PROP_BIT("init-vet", E1000State,
1745                     compat_flags, E1000_FLAG_VET_BIT, true),
1746     DEFINE_PROP_END_OF_LIST(),
1747 };
1748 
1749 typedef struct E1000Info {
1750     const char *name;
1751     uint16_t   device_id;
1752     uint8_t    revision;
1753     uint16_t   phy_id2;
1754 } E1000Info;
1755 
1756 static void e1000_class_init(ObjectClass *klass, void *data)
1757 {
1758     DeviceClass *dc = DEVICE_CLASS(klass);
1759     ResettableClass *rc = RESETTABLE_CLASS(klass);
1760     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1761     E1000BaseClass *e = E1000_CLASS(klass);
1762     const E1000Info *info = data;
1763 
1764     k->realize = pci_e1000_realize;
1765     k->exit = pci_e1000_uninit;
1766     k->romfile = "efi-e1000.rom";
1767     k->vendor_id = PCI_VENDOR_ID_INTEL;
1768     k->device_id = info->device_id;
1769     k->revision = info->revision;
1770     e->phy_id2 = info->phy_id2;
1771     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1772     rc->phases.hold = e1000_reset_hold;
1773     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1774     dc->desc = "Intel Gigabit Ethernet";
1775     dc->vmsd = &vmstate_e1000;
1776     device_class_set_props(dc, e1000_properties);
1777 }
1778 
1779 static void e1000_instance_init(Object *obj)
1780 {
1781     E1000State *n = E1000(obj);
1782     device_add_bootindex_property(obj, &n->conf.bootindex,
1783                                   "bootindex", "/ethernet-phy@0",
1784                                   DEVICE(n));
1785 }
1786 
1787 static const TypeInfo e1000_base_info = {
1788     .name          = TYPE_E1000_BASE,
1789     .parent        = TYPE_PCI_DEVICE,
1790     .instance_size = sizeof(E1000State),
1791     .instance_init = e1000_instance_init,
1792     .class_size    = sizeof(E1000BaseClass),
1793     .abstract      = true,
1794     .interfaces = (InterfaceInfo[]) {
1795         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1796         { },
1797     },
1798 };
1799 
1800 static const E1000Info e1000_devices[] = {
1801     {
1802         .name      = "e1000",
1803         .device_id = E1000_DEV_ID_82540EM,
1804         .revision  = 0x03,
1805         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1806     },
1807     {
1808         .name      = "e1000-82544gc",
1809         .device_id = E1000_DEV_ID_82544GC_COPPER,
1810         .revision  = 0x03,
1811         .phy_id2   = E1000_PHY_ID2_82544x,
1812     },
1813     {
1814         .name      = "e1000-82545em",
1815         .device_id = E1000_DEV_ID_82545EM_COPPER,
1816         .revision  = 0x03,
1817         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1818     },
1819 };
1820 
1821 static void e1000_register_types(void)
1822 {
1823     int i;
1824 
1825     type_register_static(&e1000_base_info);
1826     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1827         const E1000Info *info = &e1000_devices[i];
1828         TypeInfo type_info = {};
1829 
1830         type_info.name = info->name;
1831         type_info.parent = TYPE_E1000_BASE;
1832         type_info.class_data = (void *)info;
1833         type_info.class_init = e1000_class_init;
1834 
1835         type_register(&type_info);
1836     }
1837 }
1838 
1839 type_init(e1000_register_types)
1840