xref: /qemu/hw/net/e1000.c (revision 45e93764711484440e56f580f233009bb3da18bc)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 #include "qemu/range.h"
37 
38 #include "e1000_regs.h"
39 
40 #define E1000_DEBUG
41 
42 #ifdef E1000_DEBUG
43 enum {
44     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
45     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
46     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
47     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
48 };
49 #define DBGBIT(x)    (1<<DEBUG_##x)
50 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
51 
52 #define DBGOUT(what, fmt, ...) do { \
53     if (debugflags & DBGBIT(what)) \
54         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
55     } while (0)
56 #else
57 #define DBGOUT(what, fmt, ...) do {} while (0)
58 #endif
59 
60 #define IOPORT_SIZE       0x40
61 #define PNPMMIO_SIZE      0x20000
62 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
63 
64 /* this is the size past which hardware will drop packets when setting LPE=0 */
65 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
66 /* this is the size past which hardware will drop packets when setting LPE=1 */
67 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
68 
69 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
70 
71 /*
72  * HW models:
73  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
74  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
75  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
76  *  Others never tested
77  */
78 
79 typedef struct E1000State_st {
80     /*< private >*/
81     PCIDevice parent_obj;
82     /*< public >*/
83 
84     NICState *nic;
85     NICConf conf;
86     MemoryRegion mmio;
87     MemoryRegion io;
88 
89     uint32_t mac_reg[0x8000];
90     uint16_t phy_reg[0x20];
91     uint16_t eeprom_data[64];
92 
93     uint32_t rxbuf_size;
94     uint32_t rxbuf_min_shift;
95     struct e1000_tx {
96         unsigned char header[256];
97         unsigned char vlan_header[4];
98         /* Fields vlan and data must not be reordered or separated. */
99         unsigned char vlan[4];
100         unsigned char data[0x10000];
101         uint16_t size;
102         unsigned char sum_needed;
103         unsigned char vlan_needed;
104         uint8_t ipcss;
105         uint8_t ipcso;
106         uint16_t ipcse;
107         uint8_t tucss;
108         uint8_t tucso;
109         uint16_t tucse;
110         uint8_t hdr_len;
111         uint16_t mss;
112         uint32_t paylen;
113         uint16_t tso_frames;
114         char tse;
115         int8_t ip;
116         int8_t tcp;
117         char cptse;     // current packet tse bit
118     } tx;
119 
120     struct {
121         uint32_t val_in;    /* shifted in from guest driver */
122         uint16_t bitnum_in;
123         uint16_t bitnum_out;
124         uint16_t reading;
125         uint32_t old_eecd;
126     } eecd_state;
127 
128     QEMUTimer *autoneg_timer;
129 
130     QEMUTimer *mit_timer;      /* Mitigation timer. */
131     bool mit_timer_on;         /* Mitigation timer is running. */
132     bool mit_irq_level;        /* Tracks interrupt pin level. */
133     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
134 
135 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
136 #define E1000_FLAG_AUTONEG_BIT 0
137 #define E1000_FLAG_MIT_BIT 1
138 #define E1000_FLAG_MAC_BIT 2
139 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
140 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
141 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
142     uint32_t compat_flags;
143 } E1000State;
144 
145 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
146 
147 typedef struct E1000BaseClass {
148     PCIDeviceClass parent_class;
149     uint16_t phy_id2;
150 } E1000BaseClass;
151 
152 #define TYPE_E1000_BASE "e1000-base"
153 
154 #define E1000(obj) \
155     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
156 
157 #define E1000_DEVICE_CLASS(klass) \
158      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
159 #define E1000_DEVICE_GET_CLASS(obj) \
160     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
161 
162 #define defreg(x)    x = (E1000_##x>>2)
163 enum {
164     defreg(CTRL),    defreg(EECD),    defreg(EERD),    defreg(GPRC),
165     defreg(GPTC),    defreg(ICR),     defreg(ICS),     defreg(IMC),
166     defreg(IMS),     defreg(LEDCTL),  defreg(MANC),    defreg(MDIC),
167     defreg(MPC),     defreg(PBA),     defreg(RCTL),    defreg(RDBAH),
168     defreg(RDBAL),   defreg(RDH),     defreg(RDLEN),   defreg(RDT),
169     defreg(STATUS),  defreg(SWSM),    defreg(TCTL),    defreg(TDBAH),
170     defreg(TDBAL),   defreg(TDH),     defreg(TDLEN),   defreg(TDT),
171     defreg(TORH),    defreg(TORL),    defreg(TOTH),    defreg(TOTL),
172     defreg(TPR),     defreg(TPT),     defreg(TXDCTL),  defreg(WUFC),
173     defreg(RA),      defreg(MTA),     defreg(CRCERRS), defreg(VFTA),
174     defreg(VET),     defreg(RDTR),    defreg(RADV),    defreg(TADV),
175     defreg(ITR),     defreg(FCRUC),   defreg(TDFH),    defreg(TDFT),
176     defreg(TDFHS),   defreg(TDFTS),   defreg(TDFPC),   defreg(RDFH),
177     defreg(RDFT),    defreg(RDFHS),   defreg(RDFTS),   defreg(RDFPC),
178     defreg(IPAV),    defreg(WUC),     defreg(WUS),     defreg(AIT),
179     defreg(IP6AT),   defreg(IP4AT),   defreg(FFLT),    defreg(FFMT),
180     defreg(FFVT),    defreg(WUPM),    defreg(PBM),     defreg(SCC),
181     defreg(ECOL),    defreg(MCC),     defreg(LATECOL), defreg(COLC),
182     defreg(DC),      defreg(TNCRS),   defreg(SEC),     defreg(CEXTERR),
183     defreg(RLEC),    defreg(XONRXC),  defreg(XONTXC),  defreg(XOFFRXC),
184     defreg(XOFFTXC), defreg(RFC),     defreg(RJC),     defreg(RNBC),
185     defreg(TSCTFC),  defreg(MGTPRC),  defreg(MGTPDC),  defreg(MGTPTC)
186 };
187 
188 static void
189 e1000_link_down(E1000State *s)
190 {
191     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
192     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
193     s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
194     s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
195 }
196 
197 static void
198 e1000_link_up(E1000State *s)
199 {
200     s->mac_reg[STATUS] |= E1000_STATUS_LU;
201     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
202 
203     /* E1000_STATUS_LU is tested by e1000_can_receive() */
204     qemu_flush_queued_packets(qemu_get_queue(s->nic));
205 }
206 
207 static bool
208 have_autoneg(E1000State *s)
209 {
210     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
211 }
212 
213 static void
214 set_phy_ctrl(E1000State *s, int index, uint16_t val)
215 {
216     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
217     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
218                                    MII_CR_RESET |
219                                    MII_CR_RESTART_AUTO_NEG);
220 
221     /*
222      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
223      * migrate during auto negotiation, after migration the link will be
224      * down.
225      */
226     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
227         e1000_link_down(s);
228         DBGOUT(PHY, "Start link auto negotiation\n");
229         timer_mod(s->autoneg_timer,
230                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
231     }
232 }
233 
234 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
235     [PHY_CTRL] = set_phy_ctrl,
236 };
237 
238 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
239 
240 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
241 static const char phy_regcap[0x20] = {
242     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
243     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
244     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
245     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
246     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
247     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
248     [PHY_AUTONEG_EXP] = PHY_R,
249 };
250 
251 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
252 static const uint16_t phy_reg_init[] = {
253     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
254                    MII_CR_FULL_DUPLEX |
255                    MII_CR_AUTO_NEG_EN,
256 
257     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
258                    MII_SR_LINK_STATUS |   /* link initially up */
259                    MII_SR_AUTONEG_CAPS |
260                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
261                    MII_SR_PREAMBLE_SUPPRESS |
262                    MII_SR_EXTENDED_STATUS |
263                    MII_SR_10T_HD_CAPS |
264                    MII_SR_10T_FD_CAPS |
265                    MII_SR_100X_HD_CAPS |
266                    MII_SR_100X_FD_CAPS,
267 
268     [PHY_ID1] = 0x141,
269     /* [PHY_ID2] configured per DevId, from e1000_reset() */
270     [PHY_AUTONEG_ADV] = 0xde1,
271     [PHY_LP_ABILITY] = 0x1e0,
272     [PHY_1000T_CTRL] = 0x0e00,
273     [PHY_1000T_STATUS] = 0x3c00,
274     [M88E1000_PHY_SPEC_CTRL] = 0x360,
275     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
276     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
277 };
278 
279 static const uint32_t mac_reg_init[] = {
280     [PBA]     = 0x00100030,
281     [LEDCTL]  = 0x602,
282     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
283                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
284     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
285                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
286                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
287                 E1000_STATUS_LU,
288     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
289                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
290                 E1000_MANC_RMCP_EN,
291 };
292 
293 /* Helper function, *curr == 0 means the value is not set */
294 static inline void
295 mit_update_delay(uint32_t *curr, uint32_t value)
296 {
297     if (value && (*curr == 0 || value < *curr)) {
298         *curr = value;
299     }
300 }
301 
302 static void
303 set_interrupt_cause(E1000State *s, int index, uint32_t val)
304 {
305     PCIDevice *d = PCI_DEVICE(s);
306     uint32_t pending_ints;
307     uint32_t mit_delay;
308 
309     s->mac_reg[ICR] = val;
310 
311     /*
312      * Make sure ICR and ICS registers have the same value.
313      * The spec says that the ICS register is write-only.  However in practice,
314      * on real hardware ICS is readable, and for reads it has the same value as
315      * ICR (except that ICS does not have the clear on read behaviour of ICR).
316      *
317      * The VxWorks PRO/1000 driver uses this behaviour.
318      */
319     s->mac_reg[ICS] = val;
320 
321     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
322     if (!s->mit_irq_level && pending_ints) {
323         /*
324          * Here we detect a potential raising edge. We postpone raising the
325          * interrupt line if we are inside the mitigation delay window
326          * (s->mit_timer_on == 1).
327          * We provide a partial implementation of interrupt mitigation,
328          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
329          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
330          * RADV; relative timers based on TIDV and RDTR are not implemented.
331          */
332         if (s->mit_timer_on) {
333             return;
334         }
335         if (chkflag(MIT)) {
336             /* Compute the next mitigation delay according to pending
337              * interrupts and the current values of RADV (provided
338              * RDTR!=0), TADV and ITR.
339              * Then rearm the timer.
340              */
341             mit_delay = 0;
342             if (s->mit_ide &&
343                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
344                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
345             }
346             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
347                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
348             }
349             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
350 
351             if (mit_delay) {
352                 s->mit_timer_on = 1;
353                 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
354                           mit_delay * 256);
355             }
356             s->mit_ide = 0;
357         }
358     }
359 
360     s->mit_irq_level = (pending_ints != 0);
361     pci_set_irq(d, s->mit_irq_level);
362 }
363 
364 static void
365 e1000_mit_timer(void *opaque)
366 {
367     E1000State *s = opaque;
368 
369     s->mit_timer_on = 0;
370     /* Call set_interrupt_cause to update the irq level (if necessary). */
371     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
372 }
373 
374 static void
375 set_ics(E1000State *s, int index, uint32_t val)
376 {
377     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
378         s->mac_reg[IMS]);
379     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
380 }
381 
382 static void
383 e1000_autoneg_timer(void *opaque)
384 {
385     E1000State *s = opaque;
386     if (!qemu_get_queue(s->nic)->link_down) {
387         e1000_link_up(s);
388         s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
389         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
390         DBGOUT(PHY, "Auto negotiation is completed\n");
391         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
392     }
393 }
394 
395 static int
396 rxbufsize(uint32_t v)
397 {
398     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
399          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
400          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
401     switch (v) {
402     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
403         return 16384;
404     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
405         return 8192;
406     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
407         return 4096;
408     case E1000_RCTL_SZ_1024:
409         return 1024;
410     case E1000_RCTL_SZ_512:
411         return 512;
412     case E1000_RCTL_SZ_256:
413         return 256;
414     }
415     return 2048;
416 }
417 
418 static void e1000_reset(void *opaque)
419 {
420     E1000State *d = opaque;
421     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
422     uint8_t *macaddr = d->conf.macaddr.a;
423     int i;
424 
425     timer_del(d->autoneg_timer);
426     timer_del(d->mit_timer);
427     d->mit_timer_on = 0;
428     d->mit_irq_level = 0;
429     d->mit_ide = 0;
430     memset(d->phy_reg, 0, sizeof d->phy_reg);
431     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
432     d->phy_reg[PHY_ID2] = edc->phy_id2;
433     memset(d->mac_reg, 0, sizeof d->mac_reg);
434     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
435     d->rxbuf_min_shift = 1;
436     memset(&d->tx, 0, sizeof d->tx);
437 
438     if (qemu_get_queue(d->nic)->link_down) {
439         e1000_link_down(d);
440     }
441 
442     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
443     d->mac_reg[RA] = 0;
444     d->mac_reg[RA + 1] = E1000_RAH_AV;
445     for (i = 0; i < 4; i++) {
446         d->mac_reg[RA] |= macaddr[i] << (8 * i);
447         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
448     }
449     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
450 }
451 
452 static void
453 set_ctrl(E1000State *s, int index, uint32_t val)
454 {
455     /* RST is self clearing */
456     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
457 }
458 
459 static void
460 set_rx_control(E1000State *s, int index, uint32_t val)
461 {
462     s->mac_reg[RCTL] = val;
463     s->rxbuf_size = rxbufsize(val);
464     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
465     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
466            s->mac_reg[RCTL]);
467     qemu_flush_queued_packets(qemu_get_queue(s->nic));
468 }
469 
470 static void
471 set_mdic(E1000State *s, int index, uint32_t val)
472 {
473     uint32_t data = val & E1000_MDIC_DATA_MASK;
474     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
475 
476     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
477         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
478     else if (val & E1000_MDIC_OP_READ) {
479         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
480         if (!(phy_regcap[addr] & PHY_R)) {
481             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
482             val |= E1000_MDIC_ERROR;
483         } else
484             val = (val ^ data) | s->phy_reg[addr];
485     } else if (val & E1000_MDIC_OP_WRITE) {
486         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
487         if (!(phy_regcap[addr] & PHY_W)) {
488             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
489             val |= E1000_MDIC_ERROR;
490         } else {
491             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
492                 phyreg_writeops[addr](s, index, data);
493             } else {
494                 s->phy_reg[addr] = data;
495             }
496         }
497     }
498     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
499 
500     if (val & E1000_MDIC_INT_EN) {
501         set_ics(s, 0, E1000_ICR_MDAC);
502     }
503 }
504 
505 static uint32_t
506 get_eecd(E1000State *s, int index)
507 {
508     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
509 
510     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
511            s->eecd_state.bitnum_out, s->eecd_state.reading);
512     if (!s->eecd_state.reading ||
513         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
514           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
515         ret |= E1000_EECD_DO;
516     return ret;
517 }
518 
519 static void
520 set_eecd(E1000State *s, int index, uint32_t val)
521 {
522     uint32_t oldval = s->eecd_state.old_eecd;
523 
524     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
525             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
526     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
527         return;
528     }
529     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
530         s->eecd_state.val_in = 0;
531         s->eecd_state.bitnum_in = 0;
532         s->eecd_state.bitnum_out = 0;
533         s->eecd_state.reading = 0;
534     }
535     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
536         return;
537     }
538     if (!(E1000_EECD_SK & val)) {               /* falling edge */
539         s->eecd_state.bitnum_out++;
540         return;
541     }
542     s->eecd_state.val_in <<= 1;
543     if (val & E1000_EECD_DI)
544         s->eecd_state.val_in |= 1;
545     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
546         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
547         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
548             EEPROM_READ_OPCODE_MICROWIRE);
549     }
550     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
551            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
552            s->eecd_state.reading);
553 }
554 
555 static uint32_t
556 flash_eerd_read(E1000State *s, int x)
557 {
558     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
559 
560     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
561         return (s->mac_reg[EERD]);
562 
563     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
564         return (E1000_EEPROM_RW_REG_DONE | r);
565 
566     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
567            E1000_EEPROM_RW_REG_DONE | r);
568 }
569 
570 static void
571 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
572 {
573     uint32_t sum;
574 
575     if (cse && cse < n)
576         n = cse + 1;
577     if (sloc < n-1) {
578         sum = net_checksum_add(n-css, data+css);
579         stw_be_p(data + sloc, net_checksum_finish(sum));
580     }
581 }
582 
583 static inline void
584 inc_reg_if_not_full(E1000State *s, int index)
585 {
586     if (s->mac_reg[index] != 0xffffffff) {
587         s->mac_reg[index]++;
588     }
589 }
590 
591 static void
592 grow_8reg_if_not_full(E1000State *s, int index, int size)
593 {
594     uint64_t sum = s->mac_reg[index] | (uint64_t)s->mac_reg[index+1] << 32;
595 
596     if (sum + size < sum) {
597         sum = ~0ULL;
598     } else {
599         sum += size;
600     }
601     s->mac_reg[index] = sum;
602     s->mac_reg[index+1] = sum >> 32;
603 }
604 
605 static inline int
606 vlan_enabled(E1000State *s)
607 {
608     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
609 }
610 
611 static inline int
612 vlan_rx_filter_enabled(E1000State *s)
613 {
614     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
615 }
616 
617 static inline int
618 is_vlan_packet(E1000State *s, const uint8_t *buf)
619 {
620     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
621                 le16_to_cpu(s->mac_reg[VET]));
622 }
623 
624 static inline int
625 is_vlan_txd(uint32_t txd_lower)
626 {
627     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
628 }
629 
630 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
631  * fill it in, just pad descriptor length by 4 bytes unless guest
632  * told us to strip it off the packet. */
633 static inline int
634 fcs_len(E1000State *s)
635 {
636     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
637 }
638 
639 static void
640 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
641 {
642     NetClientState *nc = qemu_get_queue(s->nic);
643     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
644         nc->info->receive(nc, buf, size);
645     } else {
646         qemu_send_packet(nc, buf, size);
647     }
648 }
649 
650 static void
651 xmit_seg(E1000State *s)
652 {
653     uint16_t len, *sp;
654     unsigned int frames = s->tx.tso_frames, css, sofar;
655     struct e1000_tx *tp = &s->tx;
656 
657     if (tp->tse && tp->cptse) {
658         css = tp->ipcss;
659         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
660                frames, tp->size, css);
661         if (tp->ip) {    /* IPv4 */
662             stw_be_p(tp->data+css+2, tp->size - css);
663             stw_be_p(tp->data+css+4,
664                      be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
665         } else {         /* IPv6 */
666             stw_be_p(tp->data+css+4, tp->size - css);
667         }
668         css = tp->tucss;
669         len = tp->size - css;
670         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
671         if (tp->tcp) {
672             sofar = frames * tp->mss;
673             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
674             if (tp->paylen - sofar > tp->mss)
675                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
676         } else    /* UDP */
677             stw_be_p(tp->data+css+4, len);
678         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
679             unsigned int phsum;
680             // add pseudo-header length before checksum calculation
681             sp = (uint16_t *)(tp->data + tp->tucso);
682             phsum = be16_to_cpup(sp) + len;
683             phsum = (phsum >> 16) + (phsum & 0xffff);
684             stw_be_p(sp, phsum);
685         }
686         tp->tso_frames++;
687     }
688 
689     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
690         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
691     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
692         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
693     if (tp->vlan_needed) {
694         memmove(tp->vlan, tp->data, 4);
695         memmove(tp->data, tp->data + 4, 8);
696         memcpy(tp->data + 8, tp->vlan_header, 4);
697         e1000_send_packet(s, tp->vlan, tp->size + 4);
698     } else {
699         e1000_send_packet(s, tp->data, tp->size);
700     }
701 
702     inc_reg_if_not_full(s, TPT);
703     grow_8reg_if_not_full(s, TOTL, s->tx.size);
704     s->mac_reg[GPTC] = s->mac_reg[TPT];
705 }
706 
707 static void
708 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
709 {
710     PCIDevice *d = PCI_DEVICE(s);
711     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
712     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
713     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
714     unsigned int msh = 0xfffff;
715     uint64_t addr;
716     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
717     struct e1000_tx *tp = &s->tx;
718 
719     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
720     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
721         op = le32_to_cpu(xp->cmd_and_length);
722         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
723         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
724         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
725         tp->tucss = xp->upper_setup.tcp_fields.tucss;
726         tp->tucso = xp->upper_setup.tcp_fields.tucso;
727         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
728         tp->paylen = op & 0xfffff;
729         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
730         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
731         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
732         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
733         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
734         tp->tso_frames = 0;
735         if (tp->tucso == 0) {    /* this is probably wrong */
736             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
737             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
738         }
739         return;
740     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
741         // data descriptor
742         if (tp->size == 0) {
743             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
744         }
745         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
746     } else {
747         // legacy descriptor
748         tp->cptse = 0;
749     }
750 
751     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
752         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
753         tp->vlan_needed = 1;
754         stw_be_p(tp->vlan_header,
755                       le16_to_cpu(s->mac_reg[VET]));
756         stw_be_p(tp->vlan_header + 2,
757                       le16_to_cpu(dp->upper.fields.special));
758     }
759 
760     addr = le64_to_cpu(dp->buffer_addr);
761     if (tp->tse && tp->cptse) {
762         msh = tp->hdr_len + tp->mss;
763         do {
764             bytes = split_size;
765             if (tp->size + bytes > msh)
766                 bytes = msh - tp->size;
767 
768             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
769             pci_dma_read(d, addr, tp->data + tp->size, bytes);
770             sz = tp->size + bytes;
771             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
772                 memmove(tp->header, tp->data, tp->hdr_len);
773             }
774             tp->size = sz;
775             addr += bytes;
776             if (sz == msh) {
777                 xmit_seg(s);
778                 memmove(tp->data, tp->header, tp->hdr_len);
779                 tp->size = tp->hdr_len;
780             }
781             split_size -= bytes;
782         } while (bytes && split_size);
783     } else if (!tp->tse && tp->cptse) {
784         // context descriptor TSE is not set, while data descriptor TSE is set
785         DBGOUT(TXERR, "TCP segmentation error\n");
786     } else {
787         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
788         pci_dma_read(d, addr, tp->data + tp->size, split_size);
789         tp->size += split_size;
790     }
791 
792     if (!(txd_lower & E1000_TXD_CMD_EOP))
793         return;
794     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
795         xmit_seg(s);
796     }
797     tp->tso_frames = 0;
798     tp->sum_needed = 0;
799     tp->vlan_needed = 0;
800     tp->size = 0;
801     tp->cptse = 0;
802 }
803 
804 static uint32_t
805 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
806 {
807     PCIDevice *d = PCI_DEVICE(s);
808     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
809 
810     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
811         return 0;
812     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
813                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
814     dp->upper.data = cpu_to_le32(txd_upper);
815     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
816                   &dp->upper, sizeof(dp->upper));
817     return E1000_ICR_TXDW;
818 }
819 
820 static uint64_t tx_desc_base(E1000State *s)
821 {
822     uint64_t bah = s->mac_reg[TDBAH];
823     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
824 
825     return (bah << 32) + bal;
826 }
827 
828 static void
829 start_xmit(E1000State *s)
830 {
831     PCIDevice *d = PCI_DEVICE(s);
832     dma_addr_t base;
833     struct e1000_tx_desc desc;
834     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
835 
836     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
837         DBGOUT(TX, "tx disabled\n");
838         return;
839     }
840 
841     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
842         base = tx_desc_base(s) +
843                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
844         pci_dma_read(d, base, &desc, sizeof(desc));
845 
846         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
847                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
848                desc.upper.data);
849 
850         process_tx_desc(s, &desc);
851         cause |= txdesc_writeback(s, base, &desc);
852 
853         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
854             s->mac_reg[TDH] = 0;
855         /*
856          * the following could happen only if guest sw assigns
857          * bogus values to TDT/TDLEN.
858          * there's nothing too intelligent we could do about this.
859          */
860         if (s->mac_reg[TDH] == tdh_start) {
861             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
862                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
863             break;
864         }
865     }
866     set_ics(s, 0, cause);
867 }
868 
869 static int
870 receive_filter(E1000State *s, const uint8_t *buf, int size)
871 {
872     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
873     static const int mta_shift[] = {4, 3, 2, 0};
874     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
875 
876     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
877         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
878         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
879                                      ((vid >> 5) & 0x7f));
880         if ((vfta & (1 << (vid & 0x1f))) == 0)
881             return 0;
882     }
883 
884     if (rctl & E1000_RCTL_UPE)			// promiscuous
885         return 1;
886 
887     if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE))	// promiscuous mcast
888         return 1;
889 
890     if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
891         return 1;
892 
893     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
894         if (!(rp[1] & E1000_RAH_AV))
895             continue;
896         ra[0] = cpu_to_le32(rp[0]);
897         ra[1] = cpu_to_le32(rp[1]);
898         if (!memcmp(buf, (uint8_t *)ra, 6)) {
899             DBGOUT(RXFILTER,
900                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
901                    (int)(rp - s->mac_reg - RA)/2,
902                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
903             return 1;
904         }
905     }
906     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
907            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
908 
909     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
910     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
911     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
912         return 1;
913     DBGOUT(RXFILTER,
914            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
915            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
916            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
917            s->mac_reg[MTA + (f >> 5)]);
918 
919     return 0;
920 }
921 
922 static void
923 e1000_set_link_status(NetClientState *nc)
924 {
925     E1000State *s = qemu_get_nic_opaque(nc);
926     uint32_t old_status = s->mac_reg[STATUS];
927 
928     if (nc->link_down) {
929         e1000_link_down(s);
930     } else {
931         if (have_autoneg(s) &&
932             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
933             /* emulate auto-negotiation if supported */
934             timer_mod(s->autoneg_timer,
935                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
936         } else {
937             e1000_link_up(s);
938         }
939     }
940 
941     if (s->mac_reg[STATUS] != old_status)
942         set_ics(s, 0, E1000_ICR_LSC);
943 }
944 
945 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
946 {
947     int bufs;
948     /* Fast-path short packets */
949     if (total_size <= s->rxbuf_size) {
950         return s->mac_reg[RDH] != s->mac_reg[RDT];
951     }
952     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
953         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
954     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
955         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
956             s->mac_reg[RDT] - s->mac_reg[RDH];
957     } else {
958         return false;
959     }
960     return total_size <= bufs * s->rxbuf_size;
961 }
962 
963 static int
964 e1000_can_receive(NetClientState *nc)
965 {
966     E1000State *s = qemu_get_nic_opaque(nc);
967 
968     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
969         (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
970         (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
971         e1000_has_rxbufs(s, 1);
972 }
973 
974 static uint64_t rx_desc_base(E1000State *s)
975 {
976     uint64_t bah = s->mac_reg[RDBAH];
977     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
978 
979     return (bah << 32) + bal;
980 }
981 
982 static ssize_t
983 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
984 {
985     E1000State *s = qemu_get_nic_opaque(nc);
986     PCIDevice *d = PCI_DEVICE(s);
987     struct e1000_rx_desc desc;
988     dma_addr_t base;
989     unsigned int n, rdt;
990     uint32_t rdh_start;
991     uint16_t vlan_special = 0;
992     uint8_t vlan_status = 0;
993     uint8_t min_buf[MIN_BUF_SIZE];
994     struct iovec min_iov;
995     uint8_t *filter_buf = iov->iov_base;
996     size_t size = iov_size(iov, iovcnt);
997     size_t iov_ofs = 0;
998     size_t desc_offset;
999     size_t desc_size;
1000     size_t total_size;
1001 
1002     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
1003         return -1;
1004     }
1005 
1006     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
1007         return -1;
1008     }
1009 
1010     /* Pad to minimum Ethernet frame length */
1011     if (size < sizeof(min_buf)) {
1012         iov_to_buf(iov, iovcnt, 0, min_buf, size);
1013         memset(&min_buf[size], 0, sizeof(min_buf) - size);
1014         min_iov.iov_base = filter_buf = min_buf;
1015         min_iov.iov_len = size = sizeof(min_buf);
1016         iovcnt = 1;
1017         iov = &min_iov;
1018     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
1019         /* This is very unlikely, but may happen. */
1020         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
1021         filter_buf = min_buf;
1022     }
1023 
1024     /* Discard oversized packets if !LPE and !SBP. */
1025     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
1026         (size > MAXIMUM_ETHERNET_VLAN_SIZE
1027         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
1028         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
1029         return size;
1030     }
1031 
1032     if (!receive_filter(s, filter_buf, size)) {
1033         return size;
1034     }
1035 
1036     if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
1037         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
1038                                                                 + 14)));
1039         iov_ofs = 4;
1040         if (filter_buf == iov->iov_base) {
1041             memmove(filter_buf + 4, filter_buf, 12);
1042         } else {
1043             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1044             while (iov->iov_len <= iov_ofs) {
1045                 iov_ofs -= iov->iov_len;
1046                 iov++;
1047             }
1048         }
1049         vlan_status = E1000_RXD_STAT_VP;
1050         size -= 4;
1051     }
1052 
1053     rdh_start = s->mac_reg[RDH];
1054     desc_offset = 0;
1055     total_size = size + fcs_len(s);
1056     if (!e1000_has_rxbufs(s, total_size)) {
1057             set_ics(s, 0, E1000_ICS_RXO);
1058             return -1;
1059     }
1060     do {
1061         desc_size = total_size - desc_offset;
1062         if (desc_size > s->rxbuf_size) {
1063             desc_size = s->rxbuf_size;
1064         }
1065         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1066         pci_dma_read(d, base, &desc, sizeof(desc));
1067         desc.special = vlan_special;
1068         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1069         if (desc.buffer_addr) {
1070             if (desc_offset < size) {
1071                 size_t iov_copy;
1072                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1073                 size_t copy_size = size - desc_offset;
1074                 if (copy_size > s->rxbuf_size) {
1075                     copy_size = s->rxbuf_size;
1076                 }
1077                 do {
1078                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1079                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1080                     copy_size -= iov_copy;
1081                     ba += iov_copy;
1082                     iov_ofs += iov_copy;
1083                     if (iov_ofs == iov->iov_len) {
1084                         iov++;
1085                         iov_ofs = 0;
1086                     }
1087                 } while (copy_size);
1088             }
1089             desc_offset += desc_size;
1090             desc.length = cpu_to_le16(desc_size);
1091             if (desc_offset >= total_size) {
1092                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1093             } else {
1094                 /* Guest zeroing out status is not a hardware requirement.
1095                    Clear EOP in case guest didn't do it. */
1096                 desc.status &= ~E1000_RXD_STAT_EOP;
1097             }
1098         } else { // as per intel docs; skip descriptors with null buf addr
1099             DBGOUT(RX, "Null RX descriptor!!\n");
1100         }
1101         pci_dma_write(d, base, &desc, sizeof(desc));
1102 
1103         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1104             s->mac_reg[RDH] = 0;
1105         /* see comment in start_xmit; same here */
1106         if (s->mac_reg[RDH] == rdh_start) {
1107             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1108                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1109             set_ics(s, 0, E1000_ICS_RXO);
1110             return -1;
1111         }
1112     } while (desc_offset < total_size);
1113 
1114     inc_reg_if_not_full(s, TPR);
1115     s->mac_reg[GPRC] = s->mac_reg[TPR];
1116     /* TOR - Total Octets Received:
1117      * This register includes bytes received in a packet from the <Destination
1118      * Address> field through the <CRC> field, inclusively.
1119      * Always include FCS length (4) in size.
1120      */
1121     grow_8reg_if_not_full(s, TORL, size+4);
1122 
1123     n = E1000_ICS_RXT0;
1124     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1125         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1126     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1127         s->rxbuf_min_shift)
1128         n |= E1000_ICS_RXDMT0;
1129 
1130     set_ics(s, 0, n);
1131 
1132     return size;
1133 }
1134 
1135 static ssize_t
1136 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1137 {
1138     const struct iovec iov = {
1139         .iov_base = (uint8_t *)buf,
1140         .iov_len = size
1141     };
1142 
1143     return e1000_receive_iov(nc, &iov, 1);
1144 }
1145 
1146 static uint32_t
1147 mac_readreg(E1000State *s, int index)
1148 {
1149     return s->mac_reg[index];
1150 }
1151 
1152 static uint32_t
1153 mac_low4_read(E1000State *s, int index)
1154 {
1155     return s->mac_reg[index] & 0xf;
1156 }
1157 
1158 static uint32_t
1159 mac_low11_read(E1000State *s, int index)
1160 {
1161     return s->mac_reg[index] & 0x7ff;
1162 }
1163 
1164 static uint32_t
1165 mac_low13_read(E1000State *s, int index)
1166 {
1167     return s->mac_reg[index] & 0x1fff;
1168 }
1169 
1170 static uint32_t
1171 mac_low16_read(E1000State *s, int index)
1172 {
1173     return s->mac_reg[index] & 0xffff;
1174 }
1175 
1176 static uint32_t
1177 mac_icr_read(E1000State *s, int index)
1178 {
1179     uint32_t ret = s->mac_reg[ICR];
1180 
1181     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1182     set_interrupt_cause(s, 0, 0);
1183     return ret;
1184 }
1185 
1186 static uint32_t
1187 mac_read_clr4(E1000State *s, int index)
1188 {
1189     uint32_t ret = s->mac_reg[index];
1190 
1191     s->mac_reg[index] = 0;
1192     return ret;
1193 }
1194 
1195 static uint32_t
1196 mac_read_clr8(E1000State *s, int index)
1197 {
1198     uint32_t ret = s->mac_reg[index];
1199 
1200     s->mac_reg[index] = 0;
1201     s->mac_reg[index-1] = 0;
1202     return ret;
1203 }
1204 
1205 static void
1206 mac_writereg(E1000State *s, int index, uint32_t val)
1207 {
1208     uint32_t macaddr[2];
1209 
1210     s->mac_reg[index] = val;
1211 
1212     if (index == RA + 1) {
1213         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1214         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1215         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1216     }
1217 }
1218 
1219 static void
1220 set_rdt(E1000State *s, int index, uint32_t val)
1221 {
1222     s->mac_reg[index] = val & 0xffff;
1223     if (e1000_has_rxbufs(s, 1)) {
1224         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1225     }
1226 }
1227 
1228 static void
1229 set_16bit(E1000State *s, int index, uint32_t val)
1230 {
1231     s->mac_reg[index] = val & 0xffff;
1232 }
1233 
1234 static void
1235 set_dlen(E1000State *s, int index, uint32_t val)
1236 {
1237     s->mac_reg[index] = val & 0xfff80;
1238 }
1239 
1240 static void
1241 set_tctl(E1000State *s, int index, uint32_t val)
1242 {
1243     s->mac_reg[index] = val;
1244     s->mac_reg[TDT] &= 0xffff;
1245     start_xmit(s);
1246 }
1247 
1248 static void
1249 set_icr(E1000State *s, int index, uint32_t val)
1250 {
1251     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1252     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1253 }
1254 
1255 static void
1256 set_imc(E1000State *s, int index, uint32_t val)
1257 {
1258     s->mac_reg[IMS] &= ~val;
1259     set_ics(s, 0, 0);
1260 }
1261 
1262 static void
1263 set_ims(E1000State *s, int index, uint32_t val)
1264 {
1265     s->mac_reg[IMS] |= val;
1266     set_ics(s, 0, 0);
1267 }
1268 
1269 #define getreg(x)    [x] = mac_readreg
1270 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1271     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1272     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1273     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1274     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1275     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1276     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1277     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1278     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1279     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1280     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1281     getreg(TNCRS),    getreg(SEC),      getreg(CEXTERR),  getreg(RLEC),
1282     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1283     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1284     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),
1285 
1286     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1287     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1288     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1289     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1290     [EERD]    = flash_eerd_read,
1291     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1292     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1293     [RDFPC]   = mac_low13_read,
1294     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1295     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1296     [TDFPC]   = mac_low13_read,
1297     [AIT]     = mac_low16_read,
1298 
1299     [CRCERRS ... MPC]   = &mac_readreg,
1300     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1301     [FFLT ... FFLT+6]   = &mac_low11_read,
1302     [RA ... RA+31]      = &mac_readreg,
1303     [WUPM ... WUPM+31]  = &mac_readreg,
1304     [MTA ... MTA+127]   = &mac_readreg,
1305     [VFTA ... VFTA+127] = &mac_readreg,
1306     [FFMT ... FFMT+254] = &mac_low4_read,
1307     [FFVT ... FFVT+254] = &mac_readreg,
1308     [PBM ... PBM+16383] = &mac_readreg,
1309 };
1310 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1311 
1312 #define putreg(x)    [x] = mac_writereg
1313 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1314     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1315     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1316     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1317     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1318     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1319     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1320     putreg(WUS),      putreg(AIT),
1321 
1322     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1323     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1324     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1325     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1326     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1327     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1328     [ITR]    = set_16bit,
1329 
1330     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1331     [FFLT ... FFLT+6]   = &mac_writereg,
1332     [RA ... RA+31]      = &mac_writereg,
1333     [WUPM ... WUPM+31]  = &mac_writereg,
1334     [MTA ... MTA+127]   = &mac_writereg,
1335     [VFTA ... VFTA+127] = &mac_writereg,
1336     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1337     [PBM ... PBM+16383] = &mac_writereg,
1338 };
1339 
1340 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1341 
1342 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1343 
1344 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1345 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1346  * f - flag bits (up to 6 possible flags)
1347  * n - flag needed
1348  * p - partially implenented */
1349 static const uint8_t mac_reg_access[0x8000] = {
1350     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1351     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1352 
1353     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1354     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1355     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1356     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1357     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1358     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1359     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1360     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1361     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1362     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1363     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1364     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1365     [SEC]     = markflag(MAC),    [CEXTERR] = markflag(MAC),
1366     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1367     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1368     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1369 
1370     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1371     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1372     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1373     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1374     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1375     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1376     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1377     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1378     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1379     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1380     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1381 };
1382 
1383 static void
1384 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1385                  unsigned size)
1386 {
1387     E1000State *s = opaque;
1388     unsigned int index = (addr & 0x1ffff) >> 2;
1389 
1390     if (index < NWRITEOPS && macreg_writeops[index]) {
1391         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1392             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1393             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1394                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1395                        "It is not fully implemented.\n", index<<2);
1396             }
1397             macreg_writeops[index](s, index, val);
1398         } else {    /* "flag needed" bit is set, but the flag is not active */
1399             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1400                    index<<2);
1401         }
1402     } else if (index < NREADOPS && macreg_readops[index]) {
1403         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1404                index<<2, val);
1405     } else {
1406         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1407                index<<2, val);
1408     }
1409 }
1410 
1411 static uint64_t
1412 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1413 {
1414     E1000State *s = opaque;
1415     unsigned int index = (addr & 0x1ffff) >> 2;
1416 
1417     if (index < NREADOPS && macreg_readops[index]) {
1418         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1419             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1420             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1421                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1422                        "It is not fully implemented.\n", index<<2);
1423             }
1424             return macreg_readops[index](s, index);
1425         } else {    /* "flag needed" bit is set, but the flag is not active */
1426             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1427                    index<<2);
1428         }
1429     } else {
1430         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1431     }
1432     return 0;
1433 }
1434 
1435 static const MemoryRegionOps e1000_mmio_ops = {
1436     .read = e1000_mmio_read,
1437     .write = e1000_mmio_write,
1438     .endianness = DEVICE_LITTLE_ENDIAN,
1439     .impl = {
1440         .min_access_size = 4,
1441         .max_access_size = 4,
1442     },
1443 };
1444 
1445 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1446                               unsigned size)
1447 {
1448     E1000State *s = opaque;
1449 
1450     (void)s;
1451     return 0;
1452 }
1453 
1454 static void e1000_io_write(void *opaque, hwaddr addr,
1455                            uint64_t val, unsigned size)
1456 {
1457     E1000State *s = opaque;
1458 
1459     (void)s;
1460 }
1461 
1462 static const MemoryRegionOps e1000_io_ops = {
1463     .read = e1000_io_read,
1464     .write = e1000_io_write,
1465     .endianness = DEVICE_LITTLE_ENDIAN,
1466 };
1467 
1468 static bool is_version_1(void *opaque, int version_id)
1469 {
1470     return version_id == 1;
1471 }
1472 
1473 static void e1000_pre_save(void *opaque)
1474 {
1475     E1000State *s = opaque;
1476     NetClientState *nc = qemu_get_queue(s->nic);
1477 
1478     /* If the mitigation timer is active, emulate a timeout now. */
1479     if (s->mit_timer_on) {
1480         e1000_mit_timer(s);
1481     }
1482 
1483     /*
1484      * If link is down and auto-negotiation is supported and ongoing,
1485      * complete auto-negotiation immediately. This allows us to look
1486      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1487      */
1488     if (nc->link_down && have_autoneg(s)) {
1489         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1490     }
1491 }
1492 
1493 static int e1000_post_load(void *opaque, int version_id)
1494 {
1495     E1000State *s = opaque;
1496     NetClientState *nc = qemu_get_queue(s->nic);
1497 
1498     if (!chkflag(MIT)) {
1499         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1500             s->mac_reg[TADV] = 0;
1501         s->mit_irq_level = false;
1502     }
1503     s->mit_ide = 0;
1504     s->mit_timer_on = false;
1505 
1506     /* nc.link_down can't be migrated, so infer link_down according
1507      * to link status bit in mac_reg[STATUS].
1508      * Alternatively, restart link negotiation if it was in progress. */
1509     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1510 
1511     if (have_autoneg(s) &&
1512         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1513         nc->link_down = false;
1514         timer_mod(s->autoneg_timer,
1515                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1516     }
1517 
1518     return 0;
1519 }
1520 
1521 static bool e1000_mit_state_needed(void *opaque)
1522 {
1523     E1000State *s = opaque;
1524 
1525     return chkflag(MIT);
1526 }
1527 
1528 static bool e1000_full_mac_needed(void *opaque)
1529 {
1530     E1000State *s = opaque;
1531 
1532     return chkflag(MAC);
1533 }
1534 
1535 static const VMStateDescription vmstate_e1000_mit_state = {
1536     .name = "e1000/mit_state",
1537     .version_id = 1,
1538     .minimum_version_id = 1,
1539     .needed = e1000_mit_state_needed,
1540     .fields = (VMStateField[]) {
1541         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1542         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1543         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1544         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1545         VMSTATE_BOOL(mit_irq_level, E1000State),
1546         VMSTATE_END_OF_LIST()
1547     }
1548 };
1549 
1550 static const VMStateDescription vmstate_e1000_full_mac_state = {
1551     .name = "e1000/full_mac_state",
1552     .version_id = 1,
1553     .minimum_version_id = 1,
1554     .needed = e1000_full_mac_needed,
1555     .fields = (VMStateField[]) {
1556         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1557         VMSTATE_END_OF_LIST()
1558     }
1559 };
1560 
1561 static const VMStateDescription vmstate_e1000 = {
1562     .name = "e1000",
1563     .version_id = 2,
1564     .minimum_version_id = 1,
1565     .pre_save = e1000_pre_save,
1566     .post_load = e1000_post_load,
1567     .fields = (VMStateField[]) {
1568         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1569         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1570         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1571         VMSTATE_UINT32(rxbuf_size, E1000State),
1572         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1573         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1574         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1575         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1576         VMSTATE_UINT16(eecd_state.reading, E1000State),
1577         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1578         VMSTATE_UINT8(tx.ipcss, E1000State),
1579         VMSTATE_UINT8(tx.ipcso, E1000State),
1580         VMSTATE_UINT16(tx.ipcse, E1000State),
1581         VMSTATE_UINT8(tx.tucss, E1000State),
1582         VMSTATE_UINT8(tx.tucso, E1000State),
1583         VMSTATE_UINT16(tx.tucse, E1000State),
1584         VMSTATE_UINT32(tx.paylen, E1000State),
1585         VMSTATE_UINT8(tx.hdr_len, E1000State),
1586         VMSTATE_UINT16(tx.mss, E1000State),
1587         VMSTATE_UINT16(tx.size, E1000State),
1588         VMSTATE_UINT16(tx.tso_frames, E1000State),
1589         VMSTATE_UINT8(tx.sum_needed, E1000State),
1590         VMSTATE_INT8(tx.ip, E1000State),
1591         VMSTATE_INT8(tx.tcp, E1000State),
1592         VMSTATE_BUFFER(tx.header, E1000State),
1593         VMSTATE_BUFFER(tx.data, E1000State),
1594         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1595         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1596         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1597         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1598         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1599         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1600         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1601         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1602         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1603         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1604         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1605         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1606         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1607         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1608         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1609         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1610         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1611         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1612         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1613         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1614         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1615         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1616         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1617         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1618         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1619         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1620         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1621         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1622         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1623         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1624         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1625         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1626         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1627         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1628         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1629         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1630         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1631         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1632         VMSTATE_UINT32(mac_reg[VET], E1000State),
1633         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1634         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1635         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1636         VMSTATE_END_OF_LIST()
1637     },
1638     .subsections = (const VMStateDescription*[]) {
1639         &vmstate_e1000_mit_state,
1640         &vmstate_e1000_full_mac_state,
1641         NULL
1642     }
1643 };
1644 
1645 /*
1646  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1647  * Note: A valid DevId will be inserted during pci_e1000_init().
1648  */
1649 static const uint16_t e1000_eeprom_template[64] = {
1650     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1651     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1652     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1653     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1654     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1655     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1656     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1657     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1658 };
1659 
1660 /* PCI interface */
1661 
1662 static void
1663 e1000_mmio_setup(E1000State *d)
1664 {
1665     int i;
1666     const uint32_t excluded_regs[] = {
1667         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1668         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1669     };
1670 
1671     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1672                           "e1000-mmio", PNPMMIO_SIZE);
1673     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1674     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1675         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1676                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1677     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1678 }
1679 
1680 static void
1681 pci_e1000_uninit(PCIDevice *dev)
1682 {
1683     E1000State *d = E1000(dev);
1684 
1685     timer_del(d->autoneg_timer);
1686     timer_free(d->autoneg_timer);
1687     timer_del(d->mit_timer);
1688     timer_free(d->mit_timer);
1689     qemu_del_nic(d->nic);
1690 }
1691 
1692 static NetClientInfo net_e1000_info = {
1693     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1694     .size = sizeof(NICState),
1695     .can_receive = e1000_can_receive,
1696     .receive = e1000_receive,
1697     .receive_iov = e1000_receive_iov,
1698     .link_status_changed = e1000_set_link_status,
1699 };
1700 
1701 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1702                                 uint32_t val, int len)
1703 {
1704     E1000State *s = E1000(pci_dev);
1705 
1706     pci_default_write_config(pci_dev, address, val, len);
1707 
1708     if (range_covers_byte(address, len, PCI_COMMAND) &&
1709         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1710         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1711     }
1712 }
1713 
1714 
1715 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1716 {
1717     DeviceState *dev = DEVICE(pci_dev);
1718     E1000State *d = E1000(pci_dev);
1719     PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1720     uint8_t *pci_conf;
1721     uint16_t checksum = 0;
1722     int i;
1723     uint8_t *macaddr;
1724 
1725     pci_dev->config_write = e1000_write_config;
1726 
1727     pci_conf = pci_dev->config;
1728 
1729     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1730     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1731 
1732     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1733 
1734     e1000_mmio_setup(d);
1735 
1736     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1737 
1738     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1739 
1740     memmove(d->eeprom_data, e1000_eeprom_template,
1741         sizeof e1000_eeprom_template);
1742     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1743     macaddr = d->conf.macaddr.a;
1744     for (i = 0; i < 3; i++)
1745         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1746     d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1747     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1748         checksum += d->eeprom_data[i];
1749     checksum = (uint16_t) EEPROM_SUM - checksum;
1750     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1751 
1752     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1753                           object_get_typename(OBJECT(d)), dev->id, d);
1754 
1755     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1756 
1757     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1758     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1759 }
1760 
1761 static void qdev_e1000_reset(DeviceState *dev)
1762 {
1763     E1000State *d = E1000(dev);
1764     e1000_reset(d);
1765 }
1766 
1767 static Property e1000_properties[] = {
1768     DEFINE_NIC_PROPERTIES(E1000State, conf),
1769     DEFINE_PROP_BIT("autonegotiation", E1000State,
1770                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1771     DEFINE_PROP_BIT("mitigation", E1000State,
1772                     compat_flags, E1000_FLAG_MIT_BIT, true),
1773     DEFINE_PROP_END_OF_LIST(),
1774 };
1775 
1776 typedef struct E1000Info {
1777     const char *name;
1778     uint16_t   device_id;
1779     uint8_t    revision;
1780     uint16_t   phy_id2;
1781 } E1000Info;
1782 
1783 static void e1000_class_init(ObjectClass *klass, void *data)
1784 {
1785     DeviceClass *dc = DEVICE_CLASS(klass);
1786     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1787     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1788     const E1000Info *info = data;
1789 
1790     k->realize = pci_e1000_realize;
1791     k->exit = pci_e1000_uninit;
1792     k->romfile = "efi-e1000.rom";
1793     k->vendor_id = PCI_VENDOR_ID_INTEL;
1794     k->device_id = info->device_id;
1795     k->revision = info->revision;
1796     e->phy_id2 = info->phy_id2;
1797     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1798     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1799     dc->desc = "Intel Gigabit Ethernet";
1800     dc->reset = qdev_e1000_reset;
1801     dc->vmsd = &vmstate_e1000;
1802     dc->props = e1000_properties;
1803 }
1804 
1805 static void e1000_instance_init(Object *obj)
1806 {
1807     E1000State *n = E1000(obj);
1808     device_add_bootindex_property(obj, &n->conf.bootindex,
1809                                   "bootindex", "/ethernet-phy@0",
1810                                   DEVICE(n), NULL);
1811 }
1812 
1813 static const TypeInfo e1000_base_info = {
1814     .name          = TYPE_E1000_BASE,
1815     .parent        = TYPE_PCI_DEVICE,
1816     .instance_size = sizeof(E1000State),
1817     .instance_init = e1000_instance_init,
1818     .class_size    = sizeof(E1000BaseClass),
1819     .abstract      = true,
1820 };
1821 
1822 static const E1000Info e1000_devices[] = {
1823     {
1824         .name      = "e1000",
1825         .device_id = E1000_DEV_ID_82540EM,
1826         .revision  = 0x03,
1827         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1828     },
1829     {
1830         .name      = "e1000-82544gc",
1831         .device_id = E1000_DEV_ID_82544GC_COPPER,
1832         .revision  = 0x03,
1833         .phy_id2   = E1000_PHY_ID2_82544x,
1834     },
1835     {
1836         .name      = "e1000-82545em",
1837         .device_id = E1000_DEV_ID_82545EM_COPPER,
1838         .revision  = 0x03,
1839         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1840     },
1841 };
1842 
1843 static void e1000_register_types(void)
1844 {
1845     int i;
1846 
1847     type_register_static(&e1000_base_info);
1848     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1849         const E1000Info *info = &e1000_devices[i];
1850         TypeInfo type_info = {};
1851 
1852         type_info.name = info->name;
1853         type_info.parent = TYPE_E1000_BASE;
1854         type_info.class_data = (void *)info;
1855         type_info.class_init = e1000_class_init;
1856         type_info.instance_init = e1000_instance_init;
1857 
1858         type_register(&type_info);
1859     }
1860 }
1861 
1862 type_init(e1000_register_types)
1863