xref: /qemu/hw/net/e1000.c (revision 72ea771c9711cba63686d5d3284bc6645d13f7d2)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 #include "qemu/range.h"
37 
38 #include "e1000_regs.h"
39 
40 #define E1000_DEBUG
41 
42 #ifdef E1000_DEBUG
43 enum {
44     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
45     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
46     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
47     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
48 };
49 #define DBGBIT(x)    (1<<DEBUG_##x)
50 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
51 
52 #define DBGOUT(what, fmt, ...) do { \
53     if (debugflags & DBGBIT(what)) \
54         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
55     } while (0)
56 #else
57 #define DBGOUT(what, fmt, ...) do {} while (0)
58 #endif
59 
60 #define IOPORT_SIZE       0x40
61 #define PNPMMIO_SIZE      0x20000
62 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
63 
64 /* this is the size past which hardware will drop packets when setting LPE=0 */
65 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
66 /* this is the size past which hardware will drop packets when setting LPE=1 */
67 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
68 
69 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
70 
71 /*
72  * HW models:
73  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
74  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
75  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
76  *  Others never tested
77  */
78 
79 typedef struct E1000State_st {
80     /*< private >*/
81     PCIDevice parent_obj;
82     /*< public >*/
83 
84     NICState *nic;
85     NICConf conf;
86     MemoryRegion mmio;
87     MemoryRegion io;
88 
89     uint32_t mac_reg[0x8000];
90     uint16_t phy_reg[0x20];
91     uint16_t eeprom_data[64];
92 
93     uint32_t rxbuf_size;
94     uint32_t rxbuf_min_shift;
95     struct e1000_tx {
96         unsigned char header[256];
97         unsigned char vlan_header[4];
98         /* Fields vlan and data must not be reordered or separated. */
99         unsigned char vlan[4];
100         unsigned char data[0x10000];
101         uint16_t size;
102         unsigned char sum_needed;
103         unsigned char vlan_needed;
104         uint8_t ipcss;
105         uint8_t ipcso;
106         uint16_t ipcse;
107         uint8_t tucss;
108         uint8_t tucso;
109         uint16_t tucse;
110         uint8_t hdr_len;
111         uint16_t mss;
112         uint32_t paylen;
113         uint16_t tso_frames;
114         char tse;
115         int8_t ip;
116         int8_t tcp;
117         char cptse;     // current packet tse bit
118     } tx;
119 
120     struct {
121         uint32_t val_in;    /* shifted in from guest driver */
122         uint16_t bitnum_in;
123         uint16_t bitnum_out;
124         uint16_t reading;
125         uint32_t old_eecd;
126     } eecd_state;
127 
128     QEMUTimer *autoneg_timer;
129 
130     QEMUTimer *mit_timer;      /* Mitigation timer. */
131     bool mit_timer_on;         /* Mitigation timer is running. */
132     bool mit_irq_level;        /* Tracks interrupt pin level. */
133     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
134 
135 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
136 #define E1000_FLAG_AUTONEG_BIT 0
137 #define E1000_FLAG_MIT_BIT 1
138 #define E1000_FLAG_MAC_BIT 2
139 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
140 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
141 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
142     uint32_t compat_flags;
143 } E1000State;
144 
145 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
146 
147 typedef struct E1000BaseClass {
148     PCIDeviceClass parent_class;
149     uint16_t phy_id2;
150 } E1000BaseClass;
151 
152 #define TYPE_E1000_BASE "e1000-base"
153 
154 #define E1000(obj) \
155     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
156 
157 #define E1000_DEVICE_CLASS(klass) \
158      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
159 #define E1000_DEVICE_GET_CLASS(obj) \
160     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
161 
162 #define defreg(x)    x = (E1000_##x>>2)
163 enum {
164     defreg(CTRL),    defreg(EECD),    defreg(EERD),    defreg(GPRC),
165     defreg(GPTC),    defreg(ICR),     defreg(ICS),     defreg(IMC),
166     defreg(IMS),     defreg(LEDCTL),  defreg(MANC),    defreg(MDIC),
167     defreg(MPC),     defreg(PBA),     defreg(RCTL),    defreg(RDBAH),
168     defreg(RDBAL),   defreg(RDH),     defreg(RDLEN),   defreg(RDT),
169     defreg(STATUS),  defreg(SWSM),    defreg(TCTL),    defreg(TDBAH),
170     defreg(TDBAL),   defreg(TDH),     defreg(TDLEN),   defreg(TDT),
171     defreg(TORH),    defreg(TORL),    defreg(TOTH),    defreg(TOTL),
172     defreg(TPR),     defreg(TPT),     defreg(TXDCTL),  defreg(WUFC),
173     defreg(RA),      defreg(MTA),     defreg(CRCERRS), defreg(VFTA),
174     defreg(VET),     defreg(RDTR),    defreg(RADV),    defreg(TADV),
175     defreg(ITR),     defreg(FCRUC),   defreg(TDFH),    defreg(TDFT),
176     defreg(TDFHS),   defreg(TDFTS),   defreg(TDFPC),   defreg(RDFH),
177     defreg(RDFT),    defreg(RDFHS),   defreg(RDFTS),   defreg(RDFPC),
178     defreg(IPAV),    defreg(WUC),     defreg(WUS),     defreg(AIT),
179     defreg(IP6AT),   defreg(IP4AT),   defreg(FFLT),    defreg(FFMT),
180     defreg(FFVT),    defreg(WUPM),    defreg(PBM),     defreg(SCC),
181     defreg(ECOL),    defreg(MCC),     defreg(LATECOL), defreg(COLC),
182     defreg(DC),      defreg(TNCRS),   defreg(SEC),     defreg(CEXTERR),
183     defreg(RLEC),    defreg(XONRXC),  defreg(XONTXC),  defreg(XOFFRXC),
184     defreg(XOFFTXC), defreg(RFC),     defreg(RJC),     defreg(RNBC),
185     defreg(TSCTFC),  defreg(MGTPRC),  defreg(MGTPDC),  defreg(MGTPTC)
186 };
187 
188 static void
189 e1000_link_down(E1000State *s)
190 {
191     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
192     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
193     s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
194     s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
195 }
196 
197 static void
198 e1000_link_up(E1000State *s)
199 {
200     s->mac_reg[STATUS] |= E1000_STATUS_LU;
201     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
202 
203     /* E1000_STATUS_LU is tested by e1000_can_receive() */
204     qemu_flush_queued_packets(qemu_get_queue(s->nic));
205 }
206 
207 static bool
208 have_autoneg(E1000State *s)
209 {
210     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
211 }
212 
213 static void
214 set_phy_ctrl(E1000State *s, int index, uint16_t val)
215 {
216     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
217     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
218                                    MII_CR_RESET |
219                                    MII_CR_RESTART_AUTO_NEG);
220 
221     /*
222      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
223      * migrate during auto negotiation, after migration the link will be
224      * down.
225      */
226     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
227         e1000_link_down(s);
228         DBGOUT(PHY, "Start link auto negotiation\n");
229         timer_mod(s->autoneg_timer,
230                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
231     }
232 }
233 
234 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
235     [PHY_CTRL] = set_phy_ctrl,
236 };
237 
238 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
239 
240 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
241 static const char phy_regcap[0x20] = {
242     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
243     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
244     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
245     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
246     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
247     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
248     [PHY_AUTONEG_EXP] = PHY_R,
249 };
250 
251 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
252 static const uint16_t phy_reg_init[] = {
253     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
254                    MII_CR_FULL_DUPLEX |
255                    MII_CR_AUTO_NEG_EN,
256 
257     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
258                    MII_SR_LINK_STATUS |   /* link initially up */
259                    MII_SR_AUTONEG_CAPS |
260                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
261                    MII_SR_PREAMBLE_SUPPRESS |
262                    MII_SR_EXTENDED_STATUS |
263                    MII_SR_10T_HD_CAPS |
264                    MII_SR_10T_FD_CAPS |
265                    MII_SR_100X_HD_CAPS |
266                    MII_SR_100X_FD_CAPS,
267 
268     [PHY_ID1] = 0x141,
269     /* [PHY_ID2] configured per DevId, from e1000_reset() */
270     [PHY_AUTONEG_ADV] = 0xde1,
271     [PHY_LP_ABILITY] = 0x1e0,
272     [PHY_1000T_CTRL] = 0x0e00,
273     [PHY_1000T_STATUS] = 0x3c00,
274     [M88E1000_PHY_SPEC_CTRL] = 0x360,
275     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
276     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
277 };
278 
279 static const uint32_t mac_reg_init[] = {
280     [PBA]     = 0x00100030,
281     [LEDCTL]  = 0x602,
282     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
283                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
284     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
285                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
286                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
287                 E1000_STATUS_LU,
288     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
289                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
290                 E1000_MANC_RMCP_EN,
291 };
292 
293 /* Helper function, *curr == 0 means the value is not set */
294 static inline void
295 mit_update_delay(uint32_t *curr, uint32_t value)
296 {
297     if (value && (*curr == 0 || value < *curr)) {
298         *curr = value;
299     }
300 }
301 
302 static void
303 set_interrupt_cause(E1000State *s, int index, uint32_t val)
304 {
305     PCIDevice *d = PCI_DEVICE(s);
306     uint32_t pending_ints;
307     uint32_t mit_delay;
308 
309     s->mac_reg[ICR] = val;
310 
311     /*
312      * Make sure ICR and ICS registers have the same value.
313      * The spec says that the ICS register is write-only.  However in practice,
314      * on real hardware ICS is readable, and for reads it has the same value as
315      * ICR (except that ICS does not have the clear on read behaviour of ICR).
316      *
317      * The VxWorks PRO/1000 driver uses this behaviour.
318      */
319     s->mac_reg[ICS] = val;
320 
321     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
322     if (!s->mit_irq_level && pending_ints) {
323         /*
324          * Here we detect a potential raising edge. We postpone raising the
325          * interrupt line if we are inside the mitigation delay window
326          * (s->mit_timer_on == 1).
327          * We provide a partial implementation of interrupt mitigation,
328          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
329          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
330          * RADV; relative timers based on TIDV and RDTR are not implemented.
331          */
332         if (s->mit_timer_on) {
333             return;
334         }
335         if (chkflag(MIT)) {
336             /* Compute the next mitigation delay according to pending
337              * interrupts and the current values of RADV (provided
338              * RDTR!=0), TADV and ITR.
339              * Then rearm the timer.
340              */
341             mit_delay = 0;
342             if (s->mit_ide &&
343                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
344                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
345             }
346             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
347                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
348             }
349             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
350 
351             if (mit_delay) {
352                 s->mit_timer_on = 1;
353                 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
354                           mit_delay * 256);
355             }
356             s->mit_ide = 0;
357         }
358     }
359 
360     s->mit_irq_level = (pending_ints != 0);
361     pci_set_irq(d, s->mit_irq_level);
362 }
363 
364 static void
365 e1000_mit_timer(void *opaque)
366 {
367     E1000State *s = opaque;
368 
369     s->mit_timer_on = 0;
370     /* Call set_interrupt_cause to update the irq level (if necessary). */
371     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
372 }
373 
374 static void
375 set_ics(E1000State *s, int index, uint32_t val)
376 {
377     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
378         s->mac_reg[IMS]);
379     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
380 }
381 
382 static void
383 e1000_autoneg_timer(void *opaque)
384 {
385     E1000State *s = opaque;
386     if (!qemu_get_queue(s->nic)->link_down) {
387         e1000_link_up(s);
388         s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
389         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
390         DBGOUT(PHY, "Auto negotiation is completed\n");
391         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
392     }
393 }
394 
395 static int
396 rxbufsize(uint32_t v)
397 {
398     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
399          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
400          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
401     switch (v) {
402     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
403         return 16384;
404     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
405         return 8192;
406     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
407         return 4096;
408     case E1000_RCTL_SZ_1024:
409         return 1024;
410     case E1000_RCTL_SZ_512:
411         return 512;
412     case E1000_RCTL_SZ_256:
413         return 256;
414     }
415     return 2048;
416 }
417 
418 static void e1000_reset(void *opaque)
419 {
420     E1000State *d = opaque;
421     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
422     uint8_t *macaddr = d->conf.macaddr.a;
423     int i;
424 
425     timer_del(d->autoneg_timer);
426     timer_del(d->mit_timer);
427     d->mit_timer_on = 0;
428     d->mit_irq_level = 0;
429     d->mit_ide = 0;
430     memset(d->phy_reg, 0, sizeof d->phy_reg);
431     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
432     d->phy_reg[PHY_ID2] = edc->phy_id2;
433     memset(d->mac_reg, 0, sizeof d->mac_reg);
434     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
435     d->rxbuf_min_shift = 1;
436     memset(&d->tx, 0, sizeof d->tx);
437 
438     if (qemu_get_queue(d->nic)->link_down) {
439         e1000_link_down(d);
440     }
441 
442     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
443     d->mac_reg[RA] = 0;
444     d->mac_reg[RA + 1] = E1000_RAH_AV;
445     for (i = 0; i < 4; i++) {
446         d->mac_reg[RA] |= macaddr[i] << (8 * i);
447         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
448     }
449     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
450 }
451 
452 static void
453 set_ctrl(E1000State *s, int index, uint32_t val)
454 {
455     /* RST is self clearing */
456     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
457 }
458 
459 static void
460 set_rx_control(E1000State *s, int index, uint32_t val)
461 {
462     s->mac_reg[RCTL] = val;
463     s->rxbuf_size = rxbufsize(val);
464     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
465     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
466            s->mac_reg[RCTL]);
467     qemu_flush_queued_packets(qemu_get_queue(s->nic));
468 }
469 
470 static void
471 set_mdic(E1000State *s, int index, uint32_t val)
472 {
473     uint32_t data = val & E1000_MDIC_DATA_MASK;
474     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
475 
476     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
477         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
478     else if (val & E1000_MDIC_OP_READ) {
479         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
480         if (!(phy_regcap[addr] & PHY_R)) {
481             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
482             val |= E1000_MDIC_ERROR;
483         } else
484             val = (val ^ data) | s->phy_reg[addr];
485     } else if (val & E1000_MDIC_OP_WRITE) {
486         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
487         if (!(phy_regcap[addr] & PHY_W)) {
488             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
489             val |= E1000_MDIC_ERROR;
490         } else {
491             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
492                 phyreg_writeops[addr](s, index, data);
493             } else {
494                 s->phy_reg[addr] = data;
495             }
496         }
497     }
498     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
499 
500     if (val & E1000_MDIC_INT_EN) {
501         set_ics(s, 0, E1000_ICR_MDAC);
502     }
503 }
504 
505 static uint32_t
506 get_eecd(E1000State *s, int index)
507 {
508     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
509 
510     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
511            s->eecd_state.bitnum_out, s->eecd_state.reading);
512     if (!s->eecd_state.reading ||
513         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
514           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
515         ret |= E1000_EECD_DO;
516     return ret;
517 }
518 
519 static void
520 set_eecd(E1000State *s, int index, uint32_t val)
521 {
522     uint32_t oldval = s->eecd_state.old_eecd;
523 
524     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
525             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
526     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
527         return;
528     }
529     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
530         s->eecd_state.val_in = 0;
531         s->eecd_state.bitnum_in = 0;
532         s->eecd_state.bitnum_out = 0;
533         s->eecd_state.reading = 0;
534     }
535     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
536         return;
537     }
538     if (!(E1000_EECD_SK & val)) {               /* falling edge */
539         s->eecd_state.bitnum_out++;
540         return;
541     }
542     s->eecd_state.val_in <<= 1;
543     if (val & E1000_EECD_DI)
544         s->eecd_state.val_in |= 1;
545     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
546         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
547         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
548             EEPROM_READ_OPCODE_MICROWIRE);
549     }
550     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
551            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
552            s->eecd_state.reading);
553 }
554 
555 static uint32_t
556 flash_eerd_read(E1000State *s, int x)
557 {
558     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
559 
560     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
561         return (s->mac_reg[EERD]);
562 
563     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
564         return (E1000_EEPROM_RW_REG_DONE | r);
565 
566     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
567            E1000_EEPROM_RW_REG_DONE | r);
568 }
569 
570 static void
571 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
572 {
573     uint32_t sum;
574 
575     if (cse && cse < n)
576         n = cse + 1;
577     if (sloc < n-1) {
578         sum = net_checksum_add(n-css, data+css);
579         stw_be_p(data + sloc, net_checksum_finish(sum));
580     }
581 }
582 
583 static inline int
584 vlan_enabled(E1000State *s)
585 {
586     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
587 }
588 
589 static inline int
590 vlan_rx_filter_enabled(E1000State *s)
591 {
592     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
593 }
594 
595 static inline int
596 is_vlan_packet(E1000State *s, const uint8_t *buf)
597 {
598     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
599                 le16_to_cpu(s->mac_reg[VET]));
600 }
601 
602 static inline int
603 is_vlan_txd(uint32_t txd_lower)
604 {
605     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
606 }
607 
608 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
609  * fill it in, just pad descriptor length by 4 bytes unless guest
610  * told us to strip it off the packet. */
611 static inline int
612 fcs_len(E1000State *s)
613 {
614     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
615 }
616 
617 static void
618 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
619 {
620     NetClientState *nc = qemu_get_queue(s->nic);
621     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
622         nc->info->receive(nc, buf, size);
623     } else {
624         qemu_send_packet(nc, buf, size);
625     }
626 }
627 
628 static void
629 xmit_seg(E1000State *s)
630 {
631     uint16_t len, *sp;
632     unsigned int frames = s->tx.tso_frames, css, sofar, n;
633     struct e1000_tx *tp = &s->tx;
634 
635     if (tp->tse && tp->cptse) {
636         css = tp->ipcss;
637         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
638                frames, tp->size, css);
639         if (tp->ip) {    /* IPv4 */
640             stw_be_p(tp->data+css+2, tp->size - css);
641             stw_be_p(tp->data+css+4,
642                      be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
643         } else {         /* IPv6 */
644             stw_be_p(tp->data+css+4, tp->size - css);
645         }
646         css = tp->tucss;
647         len = tp->size - css;
648         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
649         if (tp->tcp) {
650             sofar = frames * tp->mss;
651             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
652             if (tp->paylen - sofar > tp->mss)
653                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
654         } else    /* UDP */
655             stw_be_p(tp->data+css+4, len);
656         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
657             unsigned int phsum;
658             // add pseudo-header length before checksum calculation
659             sp = (uint16_t *)(tp->data + tp->tucso);
660             phsum = be16_to_cpup(sp) + len;
661             phsum = (phsum >> 16) + (phsum & 0xffff);
662             stw_be_p(sp, phsum);
663         }
664         tp->tso_frames++;
665     }
666 
667     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
668         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
669     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
670         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
671     if (tp->vlan_needed) {
672         memmove(tp->vlan, tp->data, 4);
673         memmove(tp->data, tp->data + 4, 8);
674         memcpy(tp->data + 8, tp->vlan_header, 4);
675         e1000_send_packet(s, tp->vlan, tp->size + 4);
676     } else {
677         e1000_send_packet(s, tp->data, tp->size);
678     }
679 
680     s->mac_reg[TPT]++;
681     s->mac_reg[GPTC]++;
682     n = s->mac_reg[TOTL];
683     if ((s->mac_reg[TOTL] += s->tx.size) < n)
684         s->mac_reg[TOTH]++;
685 }
686 
687 static void
688 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
689 {
690     PCIDevice *d = PCI_DEVICE(s);
691     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
692     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
693     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
694     unsigned int msh = 0xfffff;
695     uint64_t addr;
696     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
697     struct e1000_tx *tp = &s->tx;
698 
699     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
700     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
701         op = le32_to_cpu(xp->cmd_and_length);
702         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
703         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
704         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
705         tp->tucss = xp->upper_setup.tcp_fields.tucss;
706         tp->tucso = xp->upper_setup.tcp_fields.tucso;
707         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
708         tp->paylen = op & 0xfffff;
709         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
710         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
711         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
712         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
713         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
714         tp->tso_frames = 0;
715         if (tp->tucso == 0) {    /* this is probably wrong */
716             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
717             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
718         }
719         return;
720     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
721         // data descriptor
722         if (tp->size == 0) {
723             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
724         }
725         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
726     } else {
727         // legacy descriptor
728         tp->cptse = 0;
729     }
730 
731     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
732         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
733         tp->vlan_needed = 1;
734         stw_be_p(tp->vlan_header,
735                       le16_to_cpu(s->mac_reg[VET]));
736         stw_be_p(tp->vlan_header + 2,
737                       le16_to_cpu(dp->upper.fields.special));
738     }
739 
740     addr = le64_to_cpu(dp->buffer_addr);
741     if (tp->tse && tp->cptse) {
742         msh = tp->hdr_len + tp->mss;
743         do {
744             bytes = split_size;
745             if (tp->size + bytes > msh)
746                 bytes = msh - tp->size;
747 
748             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
749             pci_dma_read(d, addr, tp->data + tp->size, bytes);
750             sz = tp->size + bytes;
751             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
752                 memmove(tp->header, tp->data, tp->hdr_len);
753             }
754             tp->size = sz;
755             addr += bytes;
756             if (sz == msh) {
757                 xmit_seg(s);
758                 memmove(tp->data, tp->header, tp->hdr_len);
759                 tp->size = tp->hdr_len;
760             }
761             split_size -= bytes;
762         } while (bytes && split_size);
763     } else if (!tp->tse && tp->cptse) {
764         // context descriptor TSE is not set, while data descriptor TSE is set
765         DBGOUT(TXERR, "TCP segmentation error\n");
766     } else {
767         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
768         pci_dma_read(d, addr, tp->data + tp->size, split_size);
769         tp->size += split_size;
770     }
771 
772     if (!(txd_lower & E1000_TXD_CMD_EOP))
773         return;
774     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
775         xmit_seg(s);
776     }
777     tp->tso_frames = 0;
778     tp->sum_needed = 0;
779     tp->vlan_needed = 0;
780     tp->size = 0;
781     tp->cptse = 0;
782 }
783 
784 static uint32_t
785 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
786 {
787     PCIDevice *d = PCI_DEVICE(s);
788     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
789 
790     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
791         return 0;
792     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
793                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
794     dp->upper.data = cpu_to_le32(txd_upper);
795     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
796                   &dp->upper, sizeof(dp->upper));
797     return E1000_ICR_TXDW;
798 }
799 
800 static uint64_t tx_desc_base(E1000State *s)
801 {
802     uint64_t bah = s->mac_reg[TDBAH];
803     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
804 
805     return (bah << 32) + bal;
806 }
807 
808 static void
809 start_xmit(E1000State *s)
810 {
811     PCIDevice *d = PCI_DEVICE(s);
812     dma_addr_t base;
813     struct e1000_tx_desc desc;
814     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
815 
816     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
817         DBGOUT(TX, "tx disabled\n");
818         return;
819     }
820 
821     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
822         base = tx_desc_base(s) +
823                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
824         pci_dma_read(d, base, &desc, sizeof(desc));
825 
826         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
827                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
828                desc.upper.data);
829 
830         process_tx_desc(s, &desc);
831         cause |= txdesc_writeback(s, base, &desc);
832 
833         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
834             s->mac_reg[TDH] = 0;
835         /*
836          * the following could happen only if guest sw assigns
837          * bogus values to TDT/TDLEN.
838          * there's nothing too intelligent we could do about this.
839          */
840         if (s->mac_reg[TDH] == tdh_start) {
841             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
842                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
843             break;
844         }
845     }
846     set_ics(s, 0, cause);
847 }
848 
849 static int
850 receive_filter(E1000State *s, const uint8_t *buf, int size)
851 {
852     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
853     static const int mta_shift[] = {4, 3, 2, 0};
854     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
855 
856     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
857         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
858         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
859                                      ((vid >> 5) & 0x7f));
860         if ((vfta & (1 << (vid & 0x1f))) == 0)
861             return 0;
862     }
863 
864     if (rctl & E1000_RCTL_UPE)			// promiscuous
865         return 1;
866 
867     if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE))	// promiscuous mcast
868         return 1;
869 
870     if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
871         return 1;
872 
873     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
874         if (!(rp[1] & E1000_RAH_AV))
875             continue;
876         ra[0] = cpu_to_le32(rp[0]);
877         ra[1] = cpu_to_le32(rp[1]);
878         if (!memcmp(buf, (uint8_t *)ra, 6)) {
879             DBGOUT(RXFILTER,
880                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
881                    (int)(rp - s->mac_reg - RA)/2,
882                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
883             return 1;
884         }
885     }
886     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
887            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
888 
889     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
890     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
891     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
892         return 1;
893     DBGOUT(RXFILTER,
894            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
895            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
896            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
897            s->mac_reg[MTA + (f >> 5)]);
898 
899     return 0;
900 }
901 
902 static void
903 e1000_set_link_status(NetClientState *nc)
904 {
905     E1000State *s = qemu_get_nic_opaque(nc);
906     uint32_t old_status = s->mac_reg[STATUS];
907 
908     if (nc->link_down) {
909         e1000_link_down(s);
910     } else {
911         if (have_autoneg(s) &&
912             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
913             /* emulate auto-negotiation if supported */
914             timer_mod(s->autoneg_timer,
915                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
916         } else {
917             e1000_link_up(s);
918         }
919     }
920 
921     if (s->mac_reg[STATUS] != old_status)
922         set_ics(s, 0, E1000_ICR_LSC);
923 }
924 
925 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
926 {
927     int bufs;
928     /* Fast-path short packets */
929     if (total_size <= s->rxbuf_size) {
930         return s->mac_reg[RDH] != s->mac_reg[RDT];
931     }
932     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
933         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
934     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
935         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
936             s->mac_reg[RDT] - s->mac_reg[RDH];
937     } else {
938         return false;
939     }
940     return total_size <= bufs * s->rxbuf_size;
941 }
942 
943 static int
944 e1000_can_receive(NetClientState *nc)
945 {
946     E1000State *s = qemu_get_nic_opaque(nc);
947 
948     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
949         (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
950         (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
951         e1000_has_rxbufs(s, 1);
952 }
953 
954 static uint64_t rx_desc_base(E1000State *s)
955 {
956     uint64_t bah = s->mac_reg[RDBAH];
957     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
958 
959     return (bah << 32) + bal;
960 }
961 
962 static ssize_t
963 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
964 {
965     E1000State *s = qemu_get_nic_opaque(nc);
966     PCIDevice *d = PCI_DEVICE(s);
967     struct e1000_rx_desc desc;
968     dma_addr_t base;
969     unsigned int n, rdt;
970     uint32_t rdh_start;
971     uint16_t vlan_special = 0;
972     uint8_t vlan_status = 0;
973     uint8_t min_buf[MIN_BUF_SIZE];
974     struct iovec min_iov;
975     uint8_t *filter_buf = iov->iov_base;
976     size_t size = iov_size(iov, iovcnt);
977     size_t iov_ofs = 0;
978     size_t desc_offset;
979     size_t desc_size;
980     size_t total_size;
981 
982     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
983         return -1;
984     }
985 
986     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
987         return -1;
988     }
989 
990     /* Pad to minimum Ethernet frame length */
991     if (size < sizeof(min_buf)) {
992         iov_to_buf(iov, iovcnt, 0, min_buf, size);
993         memset(&min_buf[size], 0, sizeof(min_buf) - size);
994         min_iov.iov_base = filter_buf = min_buf;
995         min_iov.iov_len = size = sizeof(min_buf);
996         iovcnt = 1;
997         iov = &min_iov;
998     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
999         /* This is very unlikely, but may happen. */
1000         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
1001         filter_buf = min_buf;
1002     }
1003 
1004     /* Discard oversized packets if !LPE and !SBP. */
1005     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
1006         (size > MAXIMUM_ETHERNET_VLAN_SIZE
1007         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
1008         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
1009         return size;
1010     }
1011 
1012     if (!receive_filter(s, filter_buf, size)) {
1013         return size;
1014     }
1015 
1016     if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
1017         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
1018                                                                 + 14)));
1019         iov_ofs = 4;
1020         if (filter_buf == iov->iov_base) {
1021             memmove(filter_buf + 4, filter_buf, 12);
1022         } else {
1023             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1024             while (iov->iov_len <= iov_ofs) {
1025                 iov_ofs -= iov->iov_len;
1026                 iov++;
1027             }
1028         }
1029         vlan_status = E1000_RXD_STAT_VP;
1030         size -= 4;
1031     }
1032 
1033     rdh_start = s->mac_reg[RDH];
1034     desc_offset = 0;
1035     total_size = size + fcs_len(s);
1036     if (!e1000_has_rxbufs(s, total_size)) {
1037             set_ics(s, 0, E1000_ICS_RXO);
1038             return -1;
1039     }
1040     do {
1041         desc_size = total_size - desc_offset;
1042         if (desc_size > s->rxbuf_size) {
1043             desc_size = s->rxbuf_size;
1044         }
1045         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1046         pci_dma_read(d, base, &desc, sizeof(desc));
1047         desc.special = vlan_special;
1048         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1049         if (desc.buffer_addr) {
1050             if (desc_offset < size) {
1051                 size_t iov_copy;
1052                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1053                 size_t copy_size = size - desc_offset;
1054                 if (copy_size > s->rxbuf_size) {
1055                     copy_size = s->rxbuf_size;
1056                 }
1057                 do {
1058                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1059                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1060                     copy_size -= iov_copy;
1061                     ba += iov_copy;
1062                     iov_ofs += iov_copy;
1063                     if (iov_ofs == iov->iov_len) {
1064                         iov++;
1065                         iov_ofs = 0;
1066                     }
1067                 } while (copy_size);
1068             }
1069             desc_offset += desc_size;
1070             desc.length = cpu_to_le16(desc_size);
1071             if (desc_offset >= total_size) {
1072                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1073             } else {
1074                 /* Guest zeroing out status is not a hardware requirement.
1075                    Clear EOP in case guest didn't do it. */
1076                 desc.status &= ~E1000_RXD_STAT_EOP;
1077             }
1078         } else { // as per intel docs; skip descriptors with null buf addr
1079             DBGOUT(RX, "Null RX descriptor!!\n");
1080         }
1081         pci_dma_write(d, base, &desc, sizeof(desc));
1082 
1083         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1084             s->mac_reg[RDH] = 0;
1085         /* see comment in start_xmit; same here */
1086         if (s->mac_reg[RDH] == rdh_start) {
1087             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1088                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1089             set_ics(s, 0, E1000_ICS_RXO);
1090             return -1;
1091         }
1092     } while (desc_offset < total_size);
1093 
1094     s->mac_reg[GPRC]++;
1095     s->mac_reg[TPR]++;
1096     /* TOR - Total Octets Received:
1097      * This register includes bytes received in a packet from the <Destination
1098      * Address> field through the <CRC> field, inclusively.
1099      */
1100     n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1101     if (n < s->mac_reg[TORL])
1102         s->mac_reg[TORH]++;
1103     s->mac_reg[TORL] = n;
1104 
1105     n = E1000_ICS_RXT0;
1106     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1107         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1108     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1109         s->rxbuf_min_shift)
1110         n |= E1000_ICS_RXDMT0;
1111 
1112     set_ics(s, 0, n);
1113 
1114     return size;
1115 }
1116 
1117 static ssize_t
1118 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1119 {
1120     const struct iovec iov = {
1121         .iov_base = (uint8_t *)buf,
1122         .iov_len = size
1123     };
1124 
1125     return e1000_receive_iov(nc, &iov, 1);
1126 }
1127 
1128 static uint32_t
1129 mac_readreg(E1000State *s, int index)
1130 {
1131     return s->mac_reg[index];
1132 }
1133 
1134 static uint32_t
1135 mac_low4_read(E1000State *s, int index)
1136 {
1137     return s->mac_reg[index] & 0xf;
1138 }
1139 
1140 static uint32_t
1141 mac_low11_read(E1000State *s, int index)
1142 {
1143     return s->mac_reg[index] & 0x7ff;
1144 }
1145 
1146 static uint32_t
1147 mac_low13_read(E1000State *s, int index)
1148 {
1149     return s->mac_reg[index] & 0x1fff;
1150 }
1151 
1152 static uint32_t
1153 mac_low16_read(E1000State *s, int index)
1154 {
1155     return s->mac_reg[index] & 0xffff;
1156 }
1157 
1158 static uint32_t
1159 mac_icr_read(E1000State *s, int index)
1160 {
1161     uint32_t ret = s->mac_reg[ICR];
1162 
1163     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1164     set_interrupt_cause(s, 0, 0);
1165     return ret;
1166 }
1167 
1168 static uint32_t
1169 mac_read_clr4(E1000State *s, int index)
1170 {
1171     uint32_t ret = s->mac_reg[index];
1172 
1173     s->mac_reg[index] = 0;
1174     return ret;
1175 }
1176 
1177 static uint32_t
1178 mac_read_clr8(E1000State *s, int index)
1179 {
1180     uint32_t ret = s->mac_reg[index];
1181 
1182     s->mac_reg[index] = 0;
1183     s->mac_reg[index-1] = 0;
1184     return ret;
1185 }
1186 
1187 static void
1188 mac_writereg(E1000State *s, int index, uint32_t val)
1189 {
1190     uint32_t macaddr[2];
1191 
1192     s->mac_reg[index] = val;
1193 
1194     if (index == RA + 1) {
1195         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1196         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1197         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1198     }
1199 }
1200 
1201 static void
1202 set_rdt(E1000State *s, int index, uint32_t val)
1203 {
1204     s->mac_reg[index] = val & 0xffff;
1205     if (e1000_has_rxbufs(s, 1)) {
1206         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1207     }
1208 }
1209 
1210 static void
1211 set_16bit(E1000State *s, int index, uint32_t val)
1212 {
1213     s->mac_reg[index] = val & 0xffff;
1214 }
1215 
1216 static void
1217 set_dlen(E1000State *s, int index, uint32_t val)
1218 {
1219     s->mac_reg[index] = val & 0xfff80;
1220 }
1221 
1222 static void
1223 set_tctl(E1000State *s, int index, uint32_t val)
1224 {
1225     s->mac_reg[index] = val;
1226     s->mac_reg[TDT] &= 0xffff;
1227     start_xmit(s);
1228 }
1229 
1230 static void
1231 set_icr(E1000State *s, int index, uint32_t val)
1232 {
1233     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1234     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1235 }
1236 
1237 static void
1238 set_imc(E1000State *s, int index, uint32_t val)
1239 {
1240     s->mac_reg[IMS] &= ~val;
1241     set_ics(s, 0, 0);
1242 }
1243 
1244 static void
1245 set_ims(E1000State *s, int index, uint32_t val)
1246 {
1247     s->mac_reg[IMS] |= val;
1248     set_ics(s, 0, 0);
1249 }
1250 
1251 #define getreg(x)    [x] = mac_readreg
1252 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1253     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1254     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1255     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1256     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1257     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1258     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1259     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1260     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1261     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1262     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1263     getreg(TNCRS),    getreg(SEC),      getreg(CEXTERR),  getreg(RLEC),
1264     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1265     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1266     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),
1267 
1268     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1269     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1270     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1271     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1272     [EERD]    = flash_eerd_read,
1273     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1274     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1275     [RDFPC]   = mac_low13_read,
1276     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1277     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1278     [TDFPC]   = mac_low13_read,
1279     [AIT]     = mac_low16_read,
1280 
1281     [CRCERRS ... MPC]   = &mac_readreg,
1282     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1283     [FFLT ... FFLT+6]   = &mac_low11_read,
1284     [RA ... RA+31]      = &mac_readreg,
1285     [WUPM ... WUPM+31]  = &mac_readreg,
1286     [MTA ... MTA+127]   = &mac_readreg,
1287     [VFTA ... VFTA+127] = &mac_readreg,
1288     [FFMT ... FFMT+254] = &mac_low4_read,
1289     [FFVT ... FFVT+254] = &mac_readreg,
1290     [PBM ... PBM+16383] = &mac_readreg,
1291 };
1292 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1293 
1294 #define putreg(x)    [x] = mac_writereg
1295 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1296     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1297     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1298     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1299     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1300     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1301     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1302     putreg(WUS),      putreg(AIT),
1303 
1304     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1305     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1306     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1307     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1308     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1309     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1310     [ITR]    = set_16bit,
1311 
1312     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1313     [FFLT ... FFLT+6]   = &mac_writereg,
1314     [RA ... RA+31]      = &mac_writereg,
1315     [WUPM ... WUPM+31]  = &mac_writereg,
1316     [MTA ... MTA+127]   = &mac_writereg,
1317     [VFTA ... VFTA+127] = &mac_writereg,
1318     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1319     [PBM ... PBM+16383] = &mac_writereg,
1320 };
1321 
1322 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1323 
1324 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1325 
1326 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1327 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1328  * f - flag bits (up to 6 possible flags)
1329  * n - flag needed
1330  * p - partially implenented */
1331 static const uint8_t mac_reg_access[0x8000] = {
1332     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1333     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1334 
1335     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1336     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1337     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1338     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1339     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1340     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1341     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1342     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1343     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1344     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1345     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1346     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1347     [SEC]     = markflag(MAC),    [CEXTERR] = markflag(MAC),
1348     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1349     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1350     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1351 
1352     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1353     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1354     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1355     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1356     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1357     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1358     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1359     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1360     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1361     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1362     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1363 };
1364 
1365 static void
1366 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1367                  unsigned size)
1368 {
1369     E1000State *s = opaque;
1370     unsigned int index = (addr & 0x1ffff) >> 2;
1371 
1372     if (index < NWRITEOPS && macreg_writeops[index]) {
1373         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1374             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1375             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1376                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1377                        "It is not fully implemented.\n", index<<2);
1378             }
1379             macreg_writeops[index](s, index, val);
1380         } else {    /* "flag needed" bit is set, but the flag is not active */
1381             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1382                    index<<2);
1383         }
1384     } else if (index < NREADOPS && macreg_readops[index]) {
1385         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1386                index<<2, val);
1387     } else {
1388         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1389                index<<2, val);
1390     }
1391 }
1392 
1393 static uint64_t
1394 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1395 {
1396     E1000State *s = opaque;
1397     unsigned int index = (addr & 0x1ffff) >> 2;
1398 
1399     if (index < NREADOPS && macreg_readops[index]) {
1400         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1401             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1402             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1403                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1404                        "It is not fully implemented.\n", index<<2);
1405             }
1406             return macreg_readops[index](s, index);
1407         } else {    /* "flag needed" bit is set, but the flag is not active */
1408             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1409                    index<<2);
1410         }
1411     } else {
1412         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1413     }
1414     return 0;
1415 }
1416 
1417 static const MemoryRegionOps e1000_mmio_ops = {
1418     .read = e1000_mmio_read,
1419     .write = e1000_mmio_write,
1420     .endianness = DEVICE_LITTLE_ENDIAN,
1421     .impl = {
1422         .min_access_size = 4,
1423         .max_access_size = 4,
1424     },
1425 };
1426 
1427 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1428                               unsigned size)
1429 {
1430     E1000State *s = opaque;
1431 
1432     (void)s;
1433     return 0;
1434 }
1435 
1436 static void e1000_io_write(void *opaque, hwaddr addr,
1437                            uint64_t val, unsigned size)
1438 {
1439     E1000State *s = opaque;
1440 
1441     (void)s;
1442 }
1443 
1444 static const MemoryRegionOps e1000_io_ops = {
1445     .read = e1000_io_read,
1446     .write = e1000_io_write,
1447     .endianness = DEVICE_LITTLE_ENDIAN,
1448 };
1449 
1450 static bool is_version_1(void *opaque, int version_id)
1451 {
1452     return version_id == 1;
1453 }
1454 
1455 static void e1000_pre_save(void *opaque)
1456 {
1457     E1000State *s = opaque;
1458     NetClientState *nc = qemu_get_queue(s->nic);
1459 
1460     /* If the mitigation timer is active, emulate a timeout now. */
1461     if (s->mit_timer_on) {
1462         e1000_mit_timer(s);
1463     }
1464 
1465     /*
1466      * If link is down and auto-negotiation is supported and ongoing,
1467      * complete auto-negotiation immediately. This allows us to look
1468      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1469      */
1470     if (nc->link_down && have_autoneg(s)) {
1471         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1472     }
1473 }
1474 
1475 static int e1000_post_load(void *opaque, int version_id)
1476 {
1477     E1000State *s = opaque;
1478     NetClientState *nc = qemu_get_queue(s->nic);
1479 
1480     if (!chkflag(MIT)) {
1481         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1482             s->mac_reg[TADV] = 0;
1483         s->mit_irq_level = false;
1484     }
1485     s->mit_ide = 0;
1486     s->mit_timer_on = false;
1487 
1488     /* nc.link_down can't be migrated, so infer link_down according
1489      * to link status bit in mac_reg[STATUS].
1490      * Alternatively, restart link negotiation if it was in progress. */
1491     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1492 
1493     if (have_autoneg(s) &&
1494         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1495         nc->link_down = false;
1496         timer_mod(s->autoneg_timer,
1497                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1498     }
1499 
1500     return 0;
1501 }
1502 
1503 static bool e1000_mit_state_needed(void *opaque)
1504 {
1505     E1000State *s = opaque;
1506 
1507     return chkflag(MIT);
1508 }
1509 
1510 static bool e1000_full_mac_needed(void *opaque)
1511 {
1512     E1000State *s = opaque;
1513 
1514     return chkflag(MAC);
1515 }
1516 
1517 static const VMStateDescription vmstate_e1000_mit_state = {
1518     .name = "e1000/mit_state",
1519     .version_id = 1,
1520     .minimum_version_id = 1,
1521     .needed = e1000_mit_state_needed,
1522     .fields = (VMStateField[]) {
1523         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1524         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1525         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1526         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1527         VMSTATE_BOOL(mit_irq_level, E1000State),
1528         VMSTATE_END_OF_LIST()
1529     }
1530 };
1531 
1532 static const VMStateDescription vmstate_e1000_full_mac_state = {
1533     .name = "e1000/full_mac_state",
1534     .version_id = 1,
1535     .minimum_version_id = 1,
1536     .needed = e1000_full_mac_needed,
1537     .fields = (VMStateField[]) {
1538         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1539         VMSTATE_END_OF_LIST()
1540     }
1541 };
1542 
1543 static const VMStateDescription vmstate_e1000 = {
1544     .name = "e1000",
1545     .version_id = 2,
1546     .minimum_version_id = 1,
1547     .pre_save = e1000_pre_save,
1548     .post_load = e1000_post_load,
1549     .fields = (VMStateField[]) {
1550         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1551         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1552         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1553         VMSTATE_UINT32(rxbuf_size, E1000State),
1554         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1555         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1556         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1557         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1558         VMSTATE_UINT16(eecd_state.reading, E1000State),
1559         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1560         VMSTATE_UINT8(tx.ipcss, E1000State),
1561         VMSTATE_UINT8(tx.ipcso, E1000State),
1562         VMSTATE_UINT16(tx.ipcse, E1000State),
1563         VMSTATE_UINT8(tx.tucss, E1000State),
1564         VMSTATE_UINT8(tx.tucso, E1000State),
1565         VMSTATE_UINT16(tx.tucse, E1000State),
1566         VMSTATE_UINT32(tx.paylen, E1000State),
1567         VMSTATE_UINT8(tx.hdr_len, E1000State),
1568         VMSTATE_UINT16(tx.mss, E1000State),
1569         VMSTATE_UINT16(tx.size, E1000State),
1570         VMSTATE_UINT16(tx.tso_frames, E1000State),
1571         VMSTATE_UINT8(tx.sum_needed, E1000State),
1572         VMSTATE_INT8(tx.ip, E1000State),
1573         VMSTATE_INT8(tx.tcp, E1000State),
1574         VMSTATE_BUFFER(tx.header, E1000State),
1575         VMSTATE_BUFFER(tx.data, E1000State),
1576         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1577         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1578         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1579         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1580         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1581         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1582         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1583         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1584         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1585         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1586         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1587         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1588         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1589         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1590         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1591         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1592         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1593         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1594         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1595         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1596         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1597         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1598         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1599         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1600         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1601         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1602         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1603         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1604         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1605         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1606         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1607         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1608         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1609         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1610         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1611         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1612         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1613         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1614         VMSTATE_UINT32(mac_reg[VET], E1000State),
1615         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1616         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1617         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1618         VMSTATE_END_OF_LIST()
1619     },
1620     .subsections = (const VMStateDescription*[]) {
1621         &vmstate_e1000_mit_state,
1622         &vmstate_e1000_full_mac_state,
1623         NULL
1624     }
1625 };
1626 
1627 /*
1628  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1629  * Note: A valid DevId will be inserted during pci_e1000_init().
1630  */
1631 static const uint16_t e1000_eeprom_template[64] = {
1632     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1633     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1634     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1635     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1636     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1637     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1638     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1639     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1640 };
1641 
1642 /* PCI interface */
1643 
1644 static void
1645 e1000_mmio_setup(E1000State *d)
1646 {
1647     int i;
1648     const uint32_t excluded_regs[] = {
1649         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1650         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1651     };
1652 
1653     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1654                           "e1000-mmio", PNPMMIO_SIZE);
1655     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1656     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1657         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1658                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1659     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1660 }
1661 
1662 static void
1663 pci_e1000_uninit(PCIDevice *dev)
1664 {
1665     E1000State *d = E1000(dev);
1666 
1667     timer_del(d->autoneg_timer);
1668     timer_free(d->autoneg_timer);
1669     timer_del(d->mit_timer);
1670     timer_free(d->mit_timer);
1671     qemu_del_nic(d->nic);
1672 }
1673 
1674 static NetClientInfo net_e1000_info = {
1675     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1676     .size = sizeof(NICState),
1677     .can_receive = e1000_can_receive,
1678     .receive = e1000_receive,
1679     .receive_iov = e1000_receive_iov,
1680     .link_status_changed = e1000_set_link_status,
1681 };
1682 
1683 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1684                                 uint32_t val, int len)
1685 {
1686     E1000State *s = E1000(pci_dev);
1687 
1688     pci_default_write_config(pci_dev, address, val, len);
1689 
1690     if (range_covers_byte(address, len, PCI_COMMAND) &&
1691         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1692         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1693     }
1694 }
1695 
1696 
1697 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1698 {
1699     DeviceState *dev = DEVICE(pci_dev);
1700     E1000State *d = E1000(pci_dev);
1701     PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1702     uint8_t *pci_conf;
1703     uint16_t checksum = 0;
1704     int i;
1705     uint8_t *macaddr;
1706 
1707     pci_dev->config_write = e1000_write_config;
1708 
1709     pci_conf = pci_dev->config;
1710 
1711     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1712     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1713 
1714     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1715 
1716     e1000_mmio_setup(d);
1717 
1718     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1719 
1720     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1721 
1722     memmove(d->eeprom_data, e1000_eeprom_template,
1723         sizeof e1000_eeprom_template);
1724     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1725     macaddr = d->conf.macaddr.a;
1726     for (i = 0; i < 3; i++)
1727         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1728     d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1729     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1730         checksum += d->eeprom_data[i];
1731     checksum = (uint16_t) EEPROM_SUM - checksum;
1732     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1733 
1734     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1735                           object_get_typename(OBJECT(d)), dev->id, d);
1736 
1737     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1738 
1739     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1740     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1741 }
1742 
1743 static void qdev_e1000_reset(DeviceState *dev)
1744 {
1745     E1000State *d = E1000(dev);
1746     e1000_reset(d);
1747 }
1748 
1749 static Property e1000_properties[] = {
1750     DEFINE_NIC_PROPERTIES(E1000State, conf),
1751     DEFINE_PROP_BIT("autonegotiation", E1000State,
1752                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1753     DEFINE_PROP_BIT("mitigation", E1000State,
1754                     compat_flags, E1000_FLAG_MIT_BIT, true),
1755     DEFINE_PROP_END_OF_LIST(),
1756 };
1757 
1758 typedef struct E1000Info {
1759     const char *name;
1760     uint16_t   device_id;
1761     uint8_t    revision;
1762     uint16_t   phy_id2;
1763 } E1000Info;
1764 
1765 static void e1000_class_init(ObjectClass *klass, void *data)
1766 {
1767     DeviceClass *dc = DEVICE_CLASS(klass);
1768     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1769     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1770     const E1000Info *info = data;
1771 
1772     k->realize = pci_e1000_realize;
1773     k->exit = pci_e1000_uninit;
1774     k->romfile = "efi-e1000.rom";
1775     k->vendor_id = PCI_VENDOR_ID_INTEL;
1776     k->device_id = info->device_id;
1777     k->revision = info->revision;
1778     e->phy_id2 = info->phy_id2;
1779     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1780     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1781     dc->desc = "Intel Gigabit Ethernet";
1782     dc->reset = qdev_e1000_reset;
1783     dc->vmsd = &vmstate_e1000;
1784     dc->props = e1000_properties;
1785 }
1786 
1787 static void e1000_instance_init(Object *obj)
1788 {
1789     E1000State *n = E1000(obj);
1790     device_add_bootindex_property(obj, &n->conf.bootindex,
1791                                   "bootindex", "/ethernet-phy@0",
1792                                   DEVICE(n), NULL);
1793 }
1794 
1795 static const TypeInfo e1000_base_info = {
1796     .name          = TYPE_E1000_BASE,
1797     .parent        = TYPE_PCI_DEVICE,
1798     .instance_size = sizeof(E1000State),
1799     .instance_init = e1000_instance_init,
1800     .class_size    = sizeof(E1000BaseClass),
1801     .abstract      = true,
1802 };
1803 
1804 static const E1000Info e1000_devices[] = {
1805     {
1806         .name      = "e1000",
1807         .device_id = E1000_DEV_ID_82540EM,
1808         .revision  = 0x03,
1809         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1810     },
1811     {
1812         .name      = "e1000-82544gc",
1813         .device_id = E1000_DEV_ID_82544GC_COPPER,
1814         .revision  = 0x03,
1815         .phy_id2   = E1000_PHY_ID2_82544x,
1816     },
1817     {
1818         .name      = "e1000-82545em",
1819         .device_id = E1000_DEV_ID_82545EM_COPPER,
1820         .revision  = 0x03,
1821         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1822     },
1823 };
1824 
1825 static void e1000_register_types(void)
1826 {
1827     int i;
1828 
1829     type_register_static(&e1000_base_info);
1830     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1831         const E1000Info *info = &e1000_devices[i];
1832         TypeInfo type_info = {};
1833 
1834         type_info.name = info->name;
1835         type_info.parent = TYPE_E1000_BASE;
1836         type_info.class_data = (void *)info;
1837         type_info.class_init = e1000_class_init;
1838         type_info.instance_init = e1000_instance_init;
1839 
1840         type_register(&type_info);
1841     }
1842 }
1843 
1844 type_init(e1000_register_types)
1845