xref: /qemu/hw/net/e1000.c (revision 74004e8ce4306f51f593a99d175a1e1f0453deba)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/hw.h"
30 #include "hw/pci/pci.h"
31 #include "net/net.h"
32 #include "net/checksum.h"
33 #include "hw/loader.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/dma.h"
36 #include "qemu/iov.h"
37 #include "qemu/range.h"
38 
39 #include "e1000_regs.h"
40 
41 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
42 
43 #define E1000_DEBUG
44 
45 #ifdef E1000_DEBUG
46 enum {
47     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
48     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
49     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
50     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
51 };
52 #define DBGBIT(x)    (1<<DEBUG_##x)
53 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
54 
55 #define DBGOUT(what, fmt, ...) do { \
56     if (debugflags & DBGBIT(what)) \
57         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
58     } while (0)
59 #else
60 #define DBGOUT(what, fmt, ...) do {} while (0)
61 #endif
62 
63 #define IOPORT_SIZE       0x40
64 #define PNPMMIO_SIZE      0x20000
65 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
66 
67 /* this is the size past which hardware will drop packets when setting LPE=0 */
68 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
69 /* this is the size past which hardware will drop packets when setting LPE=1 */
70 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
71 
72 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
73 
74 /*
75  * HW models:
76  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
77  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
78  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
79  *  Others never tested
80  */
81 
82 typedef struct E1000State_st {
83     /*< private >*/
84     PCIDevice parent_obj;
85     /*< public >*/
86 
87     NICState *nic;
88     NICConf conf;
89     MemoryRegion mmio;
90     MemoryRegion io;
91 
92     uint32_t mac_reg[0x8000];
93     uint16_t phy_reg[0x20];
94     uint16_t eeprom_data[64];
95 
96     uint32_t rxbuf_size;
97     uint32_t rxbuf_min_shift;
98     struct e1000_tx {
99         unsigned char header[256];
100         unsigned char vlan_header[4];
101         /* Fields vlan and data must not be reordered or separated. */
102         unsigned char vlan[4];
103         unsigned char data[0x10000];
104         uint16_t size;
105         unsigned char sum_needed;
106         unsigned char vlan_needed;
107         uint8_t ipcss;
108         uint8_t ipcso;
109         uint16_t ipcse;
110         uint8_t tucss;
111         uint8_t tucso;
112         uint16_t tucse;
113         uint8_t hdr_len;
114         uint16_t mss;
115         uint32_t paylen;
116         uint16_t tso_frames;
117         char tse;
118         int8_t ip;
119         int8_t tcp;
120         char cptse;     // current packet tse bit
121     } tx;
122 
123     struct {
124         uint32_t val_in;    /* shifted in from guest driver */
125         uint16_t bitnum_in;
126         uint16_t bitnum_out;
127         uint16_t reading;
128         uint32_t old_eecd;
129     } eecd_state;
130 
131     QEMUTimer *autoneg_timer;
132 
133     QEMUTimer *mit_timer;      /* Mitigation timer. */
134     bool mit_timer_on;         /* Mitigation timer is running. */
135     bool mit_irq_level;        /* Tracks interrupt pin level. */
136     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
137 
138 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
139 #define E1000_FLAG_AUTONEG_BIT 0
140 #define E1000_FLAG_MIT_BIT 1
141 #define E1000_FLAG_MAC_BIT 2
142 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
143 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
144 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
145     uint32_t compat_flags;
146 } E1000State;
147 
148 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
149 
150 typedef struct E1000BaseClass {
151     PCIDeviceClass parent_class;
152     uint16_t phy_id2;
153 } E1000BaseClass;
154 
155 #define TYPE_E1000_BASE "e1000-base"
156 
157 #define E1000(obj) \
158     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
159 
160 #define E1000_DEVICE_CLASS(klass) \
161      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
162 #define E1000_DEVICE_GET_CLASS(obj) \
163     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
164 
165 #define defreg(x)    x = (E1000_##x>>2)
166 enum {
167     defreg(CTRL),    defreg(EECD),    defreg(EERD),    defreg(GPRC),
168     defreg(GPTC),    defreg(ICR),     defreg(ICS),     defreg(IMC),
169     defreg(IMS),     defreg(LEDCTL),  defreg(MANC),    defreg(MDIC),
170     defreg(MPC),     defreg(PBA),     defreg(RCTL),    defreg(RDBAH),
171     defreg(RDBAL),   defreg(RDH),     defreg(RDLEN),   defreg(RDT),
172     defreg(STATUS),  defreg(SWSM),    defreg(TCTL),    defreg(TDBAH),
173     defreg(TDBAL),   defreg(TDH),     defreg(TDLEN),   defreg(TDT),
174     defreg(TORH),    defreg(TORL),    defreg(TOTH),    defreg(TOTL),
175     defreg(TPR),     defreg(TPT),     defreg(TXDCTL),  defreg(WUFC),
176     defreg(RA),      defreg(MTA),     defreg(CRCERRS), defreg(VFTA),
177     defreg(VET),     defreg(RDTR),    defreg(RADV),    defreg(TADV),
178     defreg(ITR),     defreg(FCRUC),   defreg(TDFH),    defreg(TDFT),
179     defreg(TDFHS),   defreg(TDFTS),   defreg(TDFPC),   defreg(RDFH),
180     defreg(RDFT),    defreg(RDFHS),   defreg(RDFTS),   defreg(RDFPC),
181     defreg(IPAV),    defreg(WUC),     defreg(WUS),     defreg(AIT),
182     defreg(IP6AT),   defreg(IP4AT),   defreg(FFLT),    defreg(FFMT),
183     defreg(FFVT),    defreg(WUPM),    defreg(PBM),     defreg(SCC),
184     defreg(ECOL),    defreg(MCC),     defreg(LATECOL), defreg(COLC),
185     defreg(DC),      defreg(TNCRS),   defreg(SEC),     defreg(CEXTERR),
186     defreg(RLEC),    defreg(XONRXC),  defreg(XONTXC),  defreg(XOFFRXC),
187     defreg(XOFFTXC), defreg(RFC),     defreg(RJC),     defreg(RNBC),
188     defreg(TSCTFC),  defreg(MGTPRC),  defreg(MGTPDC),  defreg(MGTPTC),
189     defreg(RUC),     defreg(ROC),     defreg(GORCL),   defreg(GORCH),
190     defreg(GOTCL),   defreg(GOTCH),   defreg(BPRC),    defreg(MPRC),
191     defreg(TSCTC),   defreg(PRC64),   defreg(PRC127),  defreg(PRC255),
192     defreg(PRC511),  defreg(PRC1023), defreg(PRC1522), defreg(PTC64),
193     defreg(PTC127),  defreg(PTC255),  defreg(PTC511),  defreg(PTC1023),
194     defreg(PTC1522), defreg(MPTC),    defreg(BPTC)
195 };
196 
197 static void
198 e1000_link_down(E1000State *s)
199 {
200     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
201     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
202     s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
203     s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
204 }
205 
206 static void
207 e1000_link_up(E1000State *s)
208 {
209     s->mac_reg[STATUS] |= E1000_STATUS_LU;
210     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
211 
212     /* E1000_STATUS_LU is tested by e1000_can_receive() */
213     qemu_flush_queued_packets(qemu_get_queue(s->nic));
214 }
215 
216 static bool
217 have_autoneg(E1000State *s)
218 {
219     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
220 }
221 
222 static void
223 set_phy_ctrl(E1000State *s, int index, uint16_t val)
224 {
225     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
226     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
227                                    MII_CR_RESET |
228                                    MII_CR_RESTART_AUTO_NEG);
229 
230     /*
231      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
232      * migrate during auto negotiation, after migration the link will be
233      * down.
234      */
235     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
236         e1000_link_down(s);
237         DBGOUT(PHY, "Start link auto negotiation\n");
238         timer_mod(s->autoneg_timer,
239                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
240     }
241 }
242 
243 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
244     [PHY_CTRL] = set_phy_ctrl,
245 };
246 
247 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
248 
249 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
250 static const char phy_regcap[0x20] = {
251     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
252     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
253     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
254     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
255     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
256     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
257     [PHY_AUTONEG_EXP] = PHY_R,
258 };
259 
260 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
261 static const uint16_t phy_reg_init[] = {
262     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
263                    MII_CR_FULL_DUPLEX |
264                    MII_CR_AUTO_NEG_EN,
265 
266     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
267                    MII_SR_LINK_STATUS |   /* link initially up */
268                    MII_SR_AUTONEG_CAPS |
269                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
270                    MII_SR_PREAMBLE_SUPPRESS |
271                    MII_SR_EXTENDED_STATUS |
272                    MII_SR_10T_HD_CAPS |
273                    MII_SR_10T_FD_CAPS |
274                    MII_SR_100X_HD_CAPS |
275                    MII_SR_100X_FD_CAPS,
276 
277     [PHY_ID1] = 0x141,
278     /* [PHY_ID2] configured per DevId, from e1000_reset() */
279     [PHY_AUTONEG_ADV] = 0xde1,
280     [PHY_LP_ABILITY] = 0x1e0,
281     [PHY_1000T_CTRL] = 0x0e00,
282     [PHY_1000T_STATUS] = 0x3c00,
283     [M88E1000_PHY_SPEC_CTRL] = 0x360,
284     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
285     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
286 };
287 
288 static const uint32_t mac_reg_init[] = {
289     [PBA]     = 0x00100030,
290     [LEDCTL]  = 0x602,
291     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
292                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
293     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
294                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
295                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
296                 E1000_STATUS_LU,
297     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
298                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
299                 E1000_MANC_RMCP_EN,
300 };
301 
302 /* Helper function, *curr == 0 means the value is not set */
303 static inline void
304 mit_update_delay(uint32_t *curr, uint32_t value)
305 {
306     if (value && (*curr == 0 || value < *curr)) {
307         *curr = value;
308     }
309 }
310 
311 static void
312 set_interrupt_cause(E1000State *s, int index, uint32_t val)
313 {
314     PCIDevice *d = PCI_DEVICE(s);
315     uint32_t pending_ints;
316     uint32_t mit_delay;
317 
318     s->mac_reg[ICR] = val;
319 
320     /*
321      * Make sure ICR and ICS registers have the same value.
322      * The spec says that the ICS register is write-only.  However in practice,
323      * on real hardware ICS is readable, and for reads it has the same value as
324      * ICR (except that ICS does not have the clear on read behaviour of ICR).
325      *
326      * The VxWorks PRO/1000 driver uses this behaviour.
327      */
328     s->mac_reg[ICS] = val;
329 
330     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
331     if (!s->mit_irq_level && pending_ints) {
332         /*
333          * Here we detect a potential raising edge. We postpone raising the
334          * interrupt line if we are inside the mitigation delay window
335          * (s->mit_timer_on == 1).
336          * We provide a partial implementation of interrupt mitigation,
337          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
338          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
339          * RADV; relative timers based on TIDV and RDTR are not implemented.
340          */
341         if (s->mit_timer_on) {
342             return;
343         }
344         if (chkflag(MIT)) {
345             /* Compute the next mitigation delay according to pending
346              * interrupts and the current values of RADV (provided
347              * RDTR!=0), TADV and ITR.
348              * Then rearm the timer.
349              */
350             mit_delay = 0;
351             if (s->mit_ide &&
352                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
353                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
354             }
355             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
356                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
357             }
358             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
359 
360             /*
361              * According to e1000 SPEC, the Ethernet controller guarantees
362              * a maximum observable interrupt rate of 7813 interrupts/sec.
363              * Thus if mit_delay < 500 then the delay should be set to the
364              * minimum delay possible which is 500.
365              */
366             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
367 
368             if (mit_delay) {
369                 s->mit_timer_on = 1;
370                 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
371                           mit_delay * 256);
372             }
373             s->mit_ide = 0;
374         }
375     }
376 
377     s->mit_irq_level = (pending_ints != 0);
378     pci_set_irq(d, s->mit_irq_level);
379 }
380 
381 static void
382 e1000_mit_timer(void *opaque)
383 {
384     E1000State *s = opaque;
385 
386     s->mit_timer_on = 0;
387     /* Call set_interrupt_cause to update the irq level (if necessary). */
388     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
389 }
390 
391 static void
392 set_ics(E1000State *s, int index, uint32_t val)
393 {
394     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
395         s->mac_reg[IMS]);
396     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
397 }
398 
399 static void
400 e1000_autoneg_timer(void *opaque)
401 {
402     E1000State *s = opaque;
403     if (!qemu_get_queue(s->nic)->link_down) {
404         e1000_link_up(s);
405         s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
406         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
407         DBGOUT(PHY, "Auto negotiation is completed\n");
408         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
409     }
410 }
411 
412 static int
413 rxbufsize(uint32_t v)
414 {
415     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
416          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
417          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
418     switch (v) {
419     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
420         return 16384;
421     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
422         return 8192;
423     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
424         return 4096;
425     case E1000_RCTL_SZ_1024:
426         return 1024;
427     case E1000_RCTL_SZ_512:
428         return 512;
429     case E1000_RCTL_SZ_256:
430         return 256;
431     }
432     return 2048;
433 }
434 
435 static void e1000_reset(void *opaque)
436 {
437     E1000State *d = opaque;
438     E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
439     uint8_t *macaddr = d->conf.macaddr.a;
440     int i;
441 
442     timer_del(d->autoneg_timer);
443     timer_del(d->mit_timer);
444     d->mit_timer_on = 0;
445     d->mit_irq_level = 0;
446     d->mit_ide = 0;
447     memset(d->phy_reg, 0, sizeof d->phy_reg);
448     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
449     d->phy_reg[PHY_ID2] = edc->phy_id2;
450     memset(d->mac_reg, 0, sizeof d->mac_reg);
451     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
452     d->rxbuf_min_shift = 1;
453     memset(&d->tx, 0, sizeof d->tx);
454 
455     if (qemu_get_queue(d->nic)->link_down) {
456         e1000_link_down(d);
457     }
458 
459     /* Throttle interrupts to prevent guest (e.g Win 2012) from
460      * reinjecting interrupts endlessly. TODO: fix non ITR case.
461      */
462     d->mac_reg[ITR] = 250;
463 
464     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
465     d->mac_reg[RA] = 0;
466     d->mac_reg[RA + 1] = E1000_RAH_AV;
467     for (i = 0; i < 4; i++) {
468         d->mac_reg[RA] |= macaddr[i] << (8 * i);
469         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
470     }
471     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
472 }
473 
474 static void
475 set_ctrl(E1000State *s, int index, uint32_t val)
476 {
477     /* RST is self clearing */
478     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
479 }
480 
481 static void
482 set_rx_control(E1000State *s, int index, uint32_t val)
483 {
484     s->mac_reg[RCTL] = val;
485     s->rxbuf_size = rxbufsize(val);
486     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
487     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
488            s->mac_reg[RCTL]);
489     qemu_flush_queued_packets(qemu_get_queue(s->nic));
490 }
491 
492 static void
493 set_mdic(E1000State *s, int index, uint32_t val)
494 {
495     uint32_t data = val & E1000_MDIC_DATA_MASK;
496     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
497 
498     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
499         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
500     else if (val & E1000_MDIC_OP_READ) {
501         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
502         if (!(phy_regcap[addr] & PHY_R)) {
503             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
504             val |= E1000_MDIC_ERROR;
505         } else
506             val = (val ^ data) | s->phy_reg[addr];
507     } else if (val & E1000_MDIC_OP_WRITE) {
508         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
509         if (!(phy_regcap[addr] & PHY_W)) {
510             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
511             val |= E1000_MDIC_ERROR;
512         } else {
513             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
514                 phyreg_writeops[addr](s, index, data);
515             } else {
516                 s->phy_reg[addr] = data;
517             }
518         }
519     }
520     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
521 
522     if (val & E1000_MDIC_INT_EN) {
523         set_ics(s, 0, E1000_ICR_MDAC);
524     }
525 }
526 
527 static uint32_t
528 get_eecd(E1000State *s, int index)
529 {
530     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
531 
532     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
533            s->eecd_state.bitnum_out, s->eecd_state.reading);
534     if (!s->eecd_state.reading ||
535         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
536           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
537         ret |= E1000_EECD_DO;
538     return ret;
539 }
540 
541 static void
542 set_eecd(E1000State *s, int index, uint32_t val)
543 {
544     uint32_t oldval = s->eecd_state.old_eecd;
545 
546     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
547             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
548     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
549         return;
550     }
551     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
552         s->eecd_state.val_in = 0;
553         s->eecd_state.bitnum_in = 0;
554         s->eecd_state.bitnum_out = 0;
555         s->eecd_state.reading = 0;
556     }
557     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
558         return;
559     }
560     if (!(E1000_EECD_SK & val)) {               /* falling edge */
561         s->eecd_state.bitnum_out++;
562         return;
563     }
564     s->eecd_state.val_in <<= 1;
565     if (val & E1000_EECD_DI)
566         s->eecd_state.val_in |= 1;
567     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
568         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
569         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
570             EEPROM_READ_OPCODE_MICROWIRE);
571     }
572     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
573            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
574            s->eecd_state.reading);
575 }
576 
577 static uint32_t
578 flash_eerd_read(E1000State *s, int x)
579 {
580     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
581 
582     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
583         return (s->mac_reg[EERD]);
584 
585     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
586         return (E1000_EEPROM_RW_REG_DONE | r);
587 
588     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
589            E1000_EEPROM_RW_REG_DONE | r);
590 }
591 
592 static void
593 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
594 {
595     uint32_t sum;
596 
597     if (cse && cse < n)
598         n = cse + 1;
599     if (sloc < n-1) {
600         sum = net_checksum_add(n-css, data+css);
601         stw_be_p(data + sloc, net_checksum_finish(sum));
602     }
603 }
604 
605 static inline void
606 inc_reg_if_not_full(E1000State *s, int index)
607 {
608     if (s->mac_reg[index] != 0xffffffff) {
609         s->mac_reg[index]++;
610     }
611 }
612 
613 static inline void
614 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
615 {
616     if (!memcmp(arr, bcast, sizeof bcast)) {
617         inc_reg_if_not_full(s, BPTC);
618     } else if (arr[0] & 1) {
619         inc_reg_if_not_full(s, MPTC);
620     }
621 }
622 
623 static void
624 grow_8reg_if_not_full(E1000State *s, int index, int size)
625 {
626     uint64_t sum = s->mac_reg[index] | (uint64_t)s->mac_reg[index+1] << 32;
627 
628     if (sum + size < sum) {
629         sum = ~0ULL;
630     } else {
631         sum += size;
632     }
633     s->mac_reg[index] = sum;
634     s->mac_reg[index+1] = sum >> 32;
635 }
636 
637 static void
638 increase_size_stats(E1000State *s, const int *size_regs, int size)
639 {
640     if (size > 1023) {
641         inc_reg_if_not_full(s, size_regs[5]);
642     } else if (size > 511) {
643         inc_reg_if_not_full(s, size_regs[4]);
644     } else if (size > 255) {
645         inc_reg_if_not_full(s, size_regs[3]);
646     } else if (size > 127) {
647         inc_reg_if_not_full(s, size_regs[2]);
648     } else if (size > 64) {
649         inc_reg_if_not_full(s, size_regs[1]);
650     } else if (size == 64) {
651         inc_reg_if_not_full(s, size_regs[0]);
652     }
653 }
654 
655 static inline int
656 vlan_enabled(E1000State *s)
657 {
658     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
659 }
660 
661 static inline int
662 vlan_rx_filter_enabled(E1000State *s)
663 {
664     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
665 }
666 
667 static inline int
668 is_vlan_packet(E1000State *s, const uint8_t *buf)
669 {
670     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
671                 le16_to_cpu(s->mac_reg[VET]));
672 }
673 
674 static inline int
675 is_vlan_txd(uint32_t txd_lower)
676 {
677     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
678 }
679 
680 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
681  * fill it in, just pad descriptor length by 4 bytes unless guest
682  * told us to strip it off the packet. */
683 static inline int
684 fcs_len(E1000State *s)
685 {
686     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
687 }
688 
689 static void
690 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
691 {
692     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
693                                     PTC1023, PTC1522 };
694 
695     NetClientState *nc = qemu_get_queue(s->nic);
696     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
697         nc->info->receive(nc, buf, size);
698     } else {
699         qemu_send_packet(nc, buf, size);
700     }
701     inc_tx_bcast_or_mcast_count(s, buf);
702     increase_size_stats(s, PTCregs, size);
703 }
704 
705 static void
706 xmit_seg(E1000State *s)
707 {
708     uint16_t len, *sp;
709     unsigned int frames = s->tx.tso_frames, css, sofar;
710     struct e1000_tx *tp = &s->tx;
711 
712     if (tp->tse && tp->cptse) {
713         css = tp->ipcss;
714         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
715                frames, tp->size, css);
716         if (tp->ip) {    /* IPv4 */
717             stw_be_p(tp->data+css+2, tp->size - css);
718             stw_be_p(tp->data+css+4,
719                      be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
720         } else {         /* IPv6 */
721             stw_be_p(tp->data+css+4, tp->size - css);
722         }
723         css = tp->tucss;
724         len = tp->size - css;
725         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
726         if (tp->tcp) {
727             sofar = frames * tp->mss;
728             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
729             if (tp->paylen - sofar > tp->mss) {
730                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
731             } else if (frames) {
732                 inc_reg_if_not_full(s, TSCTC);
733             }
734         } else    /* UDP */
735             stw_be_p(tp->data+css+4, len);
736         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
737             unsigned int phsum;
738             // add pseudo-header length before checksum calculation
739             sp = (uint16_t *)(tp->data + tp->tucso);
740             phsum = be16_to_cpup(sp) + len;
741             phsum = (phsum >> 16) + (phsum & 0xffff);
742             stw_be_p(sp, phsum);
743         }
744         tp->tso_frames++;
745     }
746 
747     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
748         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
749     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
750         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
751     if (tp->vlan_needed) {
752         memmove(tp->vlan, tp->data, 4);
753         memmove(tp->data, tp->data + 4, 8);
754         memcpy(tp->data + 8, tp->vlan_header, 4);
755         e1000_send_packet(s, tp->vlan, tp->size + 4);
756     } else {
757         e1000_send_packet(s, tp->data, tp->size);
758     }
759 
760     inc_reg_if_not_full(s, TPT);
761     grow_8reg_if_not_full(s, TOTL, s->tx.size);
762     s->mac_reg[GPTC] = s->mac_reg[TPT];
763     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
764     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
765 }
766 
767 static void
768 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
769 {
770     PCIDevice *d = PCI_DEVICE(s);
771     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
772     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
773     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
774     unsigned int msh = 0xfffff;
775     uint64_t addr;
776     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
777     struct e1000_tx *tp = &s->tx;
778 
779     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
780     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
781         op = le32_to_cpu(xp->cmd_and_length);
782         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
783         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
784         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
785         tp->tucss = xp->upper_setup.tcp_fields.tucss;
786         tp->tucso = xp->upper_setup.tcp_fields.tucso;
787         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
788         tp->paylen = op & 0xfffff;
789         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
790         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
791         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
792         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
793         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
794         tp->tso_frames = 0;
795         if (tp->tucso == 0) {    /* this is probably wrong */
796             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
797             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
798         }
799         return;
800     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
801         // data descriptor
802         if (tp->size == 0) {
803             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
804         }
805         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
806     } else {
807         // legacy descriptor
808         tp->cptse = 0;
809     }
810 
811     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
812         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
813         tp->vlan_needed = 1;
814         stw_be_p(tp->vlan_header,
815                       le16_to_cpu(s->mac_reg[VET]));
816         stw_be_p(tp->vlan_header + 2,
817                       le16_to_cpu(dp->upper.fields.special));
818     }
819 
820     addr = le64_to_cpu(dp->buffer_addr);
821     if (tp->tse && tp->cptse) {
822         msh = tp->hdr_len + tp->mss;
823         do {
824             bytes = split_size;
825             if (tp->size + bytes > msh)
826                 bytes = msh - tp->size;
827 
828             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
829             pci_dma_read(d, addr, tp->data + tp->size, bytes);
830             sz = tp->size + bytes;
831             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
832                 memmove(tp->header, tp->data, tp->hdr_len);
833             }
834             tp->size = sz;
835             addr += bytes;
836             if (sz == msh) {
837                 xmit_seg(s);
838                 memmove(tp->data, tp->header, tp->hdr_len);
839                 tp->size = tp->hdr_len;
840             }
841             split_size -= bytes;
842         } while (bytes && split_size);
843     } else if (!tp->tse && tp->cptse) {
844         // context descriptor TSE is not set, while data descriptor TSE is set
845         DBGOUT(TXERR, "TCP segmentation error\n");
846     } else {
847         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
848         pci_dma_read(d, addr, tp->data + tp->size, split_size);
849         tp->size += split_size;
850     }
851 
852     if (!(txd_lower & E1000_TXD_CMD_EOP))
853         return;
854     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
855         xmit_seg(s);
856     }
857     tp->tso_frames = 0;
858     tp->sum_needed = 0;
859     tp->vlan_needed = 0;
860     tp->size = 0;
861     tp->cptse = 0;
862 }
863 
864 static uint32_t
865 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
866 {
867     PCIDevice *d = PCI_DEVICE(s);
868     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
869 
870     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
871         return 0;
872     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
873                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
874     dp->upper.data = cpu_to_le32(txd_upper);
875     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
876                   &dp->upper, sizeof(dp->upper));
877     return E1000_ICR_TXDW;
878 }
879 
880 static uint64_t tx_desc_base(E1000State *s)
881 {
882     uint64_t bah = s->mac_reg[TDBAH];
883     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
884 
885     return (bah << 32) + bal;
886 }
887 
888 static void
889 start_xmit(E1000State *s)
890 {
891     PCIDevice *d = PCI_DEVICE(s);
892     dma_addr_t base;
893     struct e1000_tx_desc desc;
894     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
895 
896     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
897         DBGOUT(TX, "tx disabled\n");
898         return;
899     }
900 
901     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
902         base = tx_desc_base(s) +
903                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
904         pci_dma_read(d, base, &desc, sizeof(desc));
905 
906         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
907                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
908                desc.upper.data);
909 
910         process_tx_desc(s, &desc);
911         cause |= txdesc_writeback(s, base, &desc);
912 
913         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
914             s->mac_reg[TDH] = 0;
915         /*
916          * the following could happen only if guest sw assigns
917          * bogus values to TDT/TDLEN.
918          * there's nothing too intelligent we could do about this.
919          */
920         if (s->mac_reg[TDH] == tdh_start ||
921             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
922             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
923                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
924             break;
925         }
926     }
927     set_ics(s, 0, cause);
928 }
929 
930 static int
931 receive_filter(E1000State *s, const uint8_t *buf, int size)
932 {
933     static const int mta_shift[] = {4, 3, 2, 0};
934     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
935     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
936 
937     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
938         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
939         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
940                                      ((vid >> 5) & 0x7f));
941         if ((vfta & (1 << (vid & 0x1f))) == 0)
942             return 0;
943     }
944 
945     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
946         return 1;
947     }
948 
949     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
950         inc_reg_if_not_full(s, MPRC);
951         return 1;
952     }
953 
954     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
955         inc_reg_if_not_full(s, BPRC);
956         return 1;
957     }
958 
959     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
960         if (!(rp[1] & E1000_RAH_AV))
961             continue;
962         ra[0] = cpu_to_le32(rp[0]);
963         ra[1] = cpu_to_le32(rp[1]);
964         if (!memcmp(buf, (uint8_t *)ra, 6)) {
965             DBGOUT(RXFILTER,
966                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
967                    (int)(rp - s->mac_reg - RA)/2,
968                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
969             return 1;
970         }
971     }
972     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
973            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
974 
975     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
976     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
977     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) {
978         inc_reg_if_not_full(s, MPRC);
979         return 1;
980     }
981     DBGOUT(RXFILTER,
982            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
983            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
984            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
985            s->mac_reg[MTA + (f >> 5)]);
986 
987     return 0;
988 }
989 
990 static void
991 e1000_set_link_status(NetClientState *nc)
992 {
993     E1000State *s = qemu_get_nic_opaque(nc);
994     uint32_t old_status = s->mac_reg[STATUS];
995 
996     if (nc->link_down) {
997         e1000_link_down(s);
998     } else {
999         if (have_autoneg(s) &&
1000             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1001             /* emulate auto-negotiation if supported */
1002             timer_mod(s->autoneg_timer,
1003                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1004         } else {
1005             e1000_link_up(s);
1006         }
1007     }
1008 
1009     if (s->mac_reg[STATUS] != old_status)
1010         set_ics(s, 0, E1000_ICR_LSC);
1011 }
1012 
1013 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
1014 {
1015     int bufs;
1016     /* Fast-path short packets */
1017     if (total_size <= s->rxbuf_size) {
1018         return s->mac_reg[RDH] != s->mac_reg[RDT];
1019     }
1020     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
1021         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
1022     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
1023         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
1024             s->mac_reg[RDT] - s->mac_reg[RDH];
1025     } else {
1026         return false;
1027     }
1028     return total_size <= bufs * s->rxbuf_size;
1029 }
1030 
1031 static int
1032 e1000_can_receive(NetClientState *nc)
1033 {
1034     E1000State *s = qemu_get_nic_opaque(nc);
1035 
1036     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
1037         (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
1038         (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
1039         e1000_has_rxbufs(s, 1);
1040 }
1041 
1042 static uint64_t rx_desc_base(E1000State *s)
1043 {
1044     uint64_t bah = s->mac_reg[RDBAH];
1045     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
1046 
1047     return (bah << 32) + bal;
1048 }
1049 
1050 static ssize_t
1051 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
1052 {
1053     E1000State *s = qemu_get_nic_opaque(nc);
1054     PCIDevice *d = PCI_DEVICE(s);
1055     struct e1000_rx_desc desc;
1056     dma_addr_t base;
1057     unsigned int n, rdt;
1058     uint32_t rdh_start;
1059     uint16_t vlan_special = 0;
1060     uint8_t vlan_status = 0;
1061     uint8_t min_buf[MIN_BUF_SIZE];
1062     struct iovec min_iov;
1063     uint8_t *filter_buf = iov->iov_base;
1064     size_t size = iov_size(iov, iovcnt);
1065     size_t iov_ofs = 0;
1066     size_t desc_offset;
1067     size_t desc_size;
1068     size_t total_size;
1069     static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511,
1070                                     PRC1023, PRC1522 };
1071 
1072     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
1073         return -1;
1074     }
1075 
1076     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
1077         return -1;
1078     }
1079 
1080     /* Pad to minimum Ethernet frame length */
1081     if (size < sizeof(min_buf)) {
1082         iov_to_buf(iov, iovcnt, 0, min_buf, size);
1083         memset(&min_buf[size], 0, sizeof(min_buf) - size);
1084         inc_reg_if_not_full(s, RUC);
1085         min_iov.iov_base = filter_buf = min_buf;
1086         min_iov.iov_len = size = sizeof(min_buf);
1087         iovcnt = 1;
1088         iov = &min_iov;
1089     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
1090         /* This is very unlikely, but may happen. */
1091         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
1092         filter_buf = min_buf;
1093     }
1094 
1095     /* Discard oversized packets if !LPE and !SBP. */
1096     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
1097         (size > MAXIMUM_ETHERNET_VLAN_SIZE
1098         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
1099         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
1100         inc_reg_if_not_full(s, ROC);
1101         return size;
1102     }
1103 
1104     if (!receive_filter(s, filter_buf, size)) {
1105         return size;
1106     }
1107 
1108     if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
1109         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
1110                                                                 + 14)));
1111         iov_ofs = 4;
1112         if (filter_buf == iov->iov_base) {
1113             memmove(filter_buf + 4, filter_buf, 12);
1114         } else {
1115             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1116             while (iov->iov_len <= iov_ofs) {
1117                 iov_ofs -= iov->iov_len;
1118                 iov++;
1119             }
1120         }
1121         vlan_status = E1000_RXD_STAT_VP;
1122         size -= 4;
1123     }
1124 
1125     rdh_start = s->mac_reg[RDH];
1126     desc_offset = 0;
1127     total_size = size + fcs_len(s);
1128     if (!e1000_has_rxbufs(s, total_size)) {
1129             set_ics(s, 0, E1000_ICS_RXO);
1130             return -1;
1131     }
1132     do {
1133         desc_size = total_size - desc_offset;
1134         if (desc_size > s->rxbuf_size) {
1135             desc_size = s->rxbuf_size;
1136         }
1137         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1138         pci_dma_read(d, base, &desc, sizeof(desc));
1139         desc.special = vlan_special;
1140         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1141         if (desc.buffer_addr) {
1142             if (desc_offset < size) {
1143                 size_t iov_copy;
1144                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1145                 size_t copy_size = size - desc_offset;
1146                 if (copy_size > s->rxbuf_size) {
1147                     copy_size = s->rxbuf_size;
1148                 }
1149                 do {
1150                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1151                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1152                     copy_size -= iov_copy;
1153                     ba += iov_copy;
1154                     iov_ofs += iov_copy;
1155                     if (iov_ofs == iov->iov_len) {
1156                         iov++;
1157                         iov_ofs = 0;
1158                     }
1159                 } while (copy_size);
1160             }
1161             desc_offset += desc_size;
1162             desc.length = cpu_to_le16(desc_size);
1163             if (desc_offset >= total_size) {
1164                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1165             } else {
1166                 /* Guest zeroing out status is not a hardware requirement.
1167                    Clear EOP in case guest didn't do it. */
1168                 desc.status &= ~E1000_RXD_STAT_EOP;
1169             }
1170         } else { // as per intel docs; skip descriptors with null buf addr
1171             DBGOUT(RX, "Null RX descriptor!!\n");
1172         }
1173         pci_dma_write(d, base, &desc, sizeof(desc));
1174 
1175         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1176             s->mac_reg[RDH] = 0;
1177         /* see comment in start_xmit; same here */
1178         if (s->mac_reg[RDH] == rdh_start ||
1179             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1180             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1181                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1182             set_ics(s, 0, E1000_ICS_RXO);
1183             return -1;
1184         }
1185     } while (desc_offset < total_size);
1186 
1187     increase_size_stats(s, PRCregs, total_size);
1188     inc_reg_if_not_full(s, TPR);
1189     s->mac_reg[GPRC] = s->mac_reg[TPR];
1190     /* TOR - Total Octets Received:
1191      * This register includes bytes received in a packet from the <Destination
1192      * Address> field through the <CRC> field, inclusively.
1193      * Always include FCS length (4) in size.
1194      */
1195     grow_8reg_if_not_full(s, TORL, size+4);
1196     s->mac_reg[GORCL] = s->mac_reg[TORL];
1197     s->mac_reg[GORCH] = s->mac_reg[TORH];
1198 
1199     n = E1000_ICS_RXT0;
1200     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1201         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1202     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1203         s->rxbuf_min_shift)
1204         n |= E1000_ICS_RXDMT0;
1205 
1206     set_ics(s, 0, n);
1207 
1208     return size;
1209 }
1210 
1211 static ssize_t
1212 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1213 {
1214     const struct iovec iov = {
1215         .iov_base = (uint8_t *)buf,
1216         .iov_len = size
1217     };
1218 
1219     return e1000_receive_iov(nc, &iov, 1);
1220 }
1221 
1222 static uint32_t
1223 mac_readreg(E1000State *s, int index)
1224 {
1225     return s->mac_reg[index];
1226 }
1227 
1228 static uint32_t
1229 mac_low4_read(E1000State *s, int index)
1230 {
1231     return s->mac_reg[index] & 0xf;
1232 }
1233 
1234 static uint32_t
1235 mac_low11_read(E1000State *s, int index)
1236 {
1237     return s->mac_reg[index] & 0x7ff;
1238 }
1239 
1240 static uint32_t
1241 mac_low13_read(E1000State *s, int index)
1242 {
1243     return s->mac_reg[index] & 0x1fff;
1244 }
1245 
1246 static uint32_t
1247 mac_low16_read(E1000State *s, int index)
1248 {
1249     return s->mac_reg[index] & 0xffff;
1250 }
1251 
1252 static uint32_t
1253 mac_icr_read(E1000State *s, int index)
1254 {
1255     uint32_t ret = s->mac_reg[ICR];
1256 
1257     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1258     set_interrupt_cause(s, 0, 0);
1259     return ret;
1260 }
1261 
1262 static uint32_t
1263 mac_read_clr4(E1000State *s, int index)
1264 {
1265     uint32_t ret = s->mac_reg[index];
1266 
1267     s->mac_reg[index] = 0;
1268     return ret;
1269 }
1270 
1271 static uint32_t
1272 mac_read_clr8(E1000State *s, int index)
1273 {
1274     uint32_t ret = s->mac_reg[index];
1275 
1276     s->mac_reg[index] = 0;
1277     s->mac_reg[index-1] = 0;
1278     return ret;
1279 }
1280 
1281 static void
1282 mac_writereg(E1000State *s, int index, uint32_t val)
1283 {
1284     uint32_t macaddr[2];
1285 
1286     s->mac_reg[index] = val;
1287 
1288     if (index == RA + 1) {
1289         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1290         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1291         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1292     }
1293 }
1294 
1295 static void
1296 set_rdt(E1000State *s, int index, uint32_t val)
1297 {
1298     s->mac_reg[index] = val & 0xffff;
1299     if (e1000_has_rxbufs(s, 1)) {
1300         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1301     }
1302 }
1303 
1304 static void
1305 set_16bit(E1000State *s, int index, uint32_t val)
1306 {
1307     s->mac_reg[index] = val & 0xffff;
1308 }
1309 
1310 static void
1311 set_dlen(E1000State *s, int index, uint32_t val)
1312 {
1313     s->mac_reg[index] = val & 0xfff80;
1314 }
1315 
1316 static void
1317 set_tctl(E1000State *s, int index, uint32_t val)
1318 {
1319     s->mac_reg[index] = val;
1320     s->mac_reg[TDT] &= 0xffff;
1321     start_xmit(s);
1322 }
1323 
1324 static void
1325 set_icr(E1000State *s, int index, uint32_t val)
1326 {
1327     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1328     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1329 }
1330 
1331 static void
1332 set_imc(E1000State *s, int index, uint32_t val)
1333 {
1334     s->mac_reg[IMS] &= ~val;
1335     set_ics(s, 0, 0);
1336 }
1337 
1338 static void
1339 set_ims(E1000State *s, int index, uint32_t val)
1340 {
1341     s->mac_reg[IMS] |= val;
1342     set_ics(s, 0, 0);
1343 }
1344 
1345 #define getreg(x)    [x] = mac_readreg
1346 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1347     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1348     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1349     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1350     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1351     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1352     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1353     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1354     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1355     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1356     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1357     getreg(TNCRS),    getreg(SEC),      getreg(CEXTERR),  getreg(RLEC),
1358     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1359     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1360     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1361     getreg(GOTCL),
1362 
1363     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1364     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1365     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1366     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1367     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1368     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1369     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1370     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1371     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1372     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1373     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1374     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1375     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1376     [MPTC]    = mac_read_clr4,
1377     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1378     [EERD]    = flash_eerd_read,
1379     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1380     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1381     [RDFPC]   = mac_low13_read,
1382     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1383     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1384     [TDFPC]   = mac_low13_read,
1385     [AIT]     = mac_low16_read,
1386 
1387     [CRCERRS ... MPC]   = &mac_readreg,
1388     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1389     [FFLT ... FFLT+6]   = &mac_low11_read,
1390     [RA ... RA+31]      = &mac_readreg,
1391     [WUPM ... WUPM+31]  = &mac_readreg,
1392     [MTA ... MTA+127]   = &mac_readreg,
1393     [VFTA ... VFTA+127] = &mac_readreg,
1394     [FFMT ... FFMT+254] = &mac_low4_read,
1395     [FFVT ... FFVT+254] = &mac_readreg,
1396     [PBM ... PBM+16383] = &mac_readreg,
1397 };
1398 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1399 
1400 #define putreg(x)    [x] = mac_writereg
1401 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1402     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1403     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1404     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1405     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1406     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1407     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1408     putreg(WUS),      putreg(AIT),
1409 
1410     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1411     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1412     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1413     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1414     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1415     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1416     [ITR]    = set_16bit,
1417 
1418     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1419     [FFLT ... FFLT+6]   = &mac_writereg,
1420     [RA ... RA+31]      = &mac_writereg,
1421     [WUPM ... WUPM+31]  = &mac_writereg,
1422     [MTA ... MTA+127]   = &mac_writereg,
1423     [VFTA ... VFTA+127] = &mac_writereg,
1424     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1425     [PBM ... PBM+16383] = &mac_writereg,
1426 };
1427 
1428 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1429 
1430 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1431 
1432 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1433 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1434  * f - flag bits (up to 6 possible flags)
1435  * n - flag needed
1436  * p - partially implenented */
1437 static const uint8_t mac_reg_access[0x8000] = {
1438     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1439     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1440 
1441     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1442     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1443     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1444     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1445     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1446     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1447     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1448     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1449     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1450     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1451     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1452     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1453     [SEC]     = markflag(MAC),    [CEXTERR] = markflag(MAC),
1454     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1455     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1456     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1457     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1458     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1459     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1460     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1461     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1462     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1463     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1464     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1465     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1466     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1467     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1468     [BPTC]    = markflag(MAC),
1469 
1470     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1471     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1472     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1473     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1474     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1475     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1476     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1477     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1478     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1479     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1480     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1481 };
1482 
1483 static void
1484 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1485                  unsigned size)
1486 {
1487     E1000State *s = opaque;
1488     unsigned int index = (addr & 0x1ffff) >> 2;
1489 
1490     if (index < NWRITEOPS && macreg_writeops[index]) {
1491         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1492             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1493             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1494                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1495                        "It is not fully implemented.\n", index<<2);
1496             }
1497             macreg_writeops[index](s, index, val);
1498         } else {    /* "flag needed" bit is set, but the flag is not active */
1499             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1500                    index<<2);
1501         }
1502     } else if (index < NREADOPS && macreg_readops[index]) {
1503         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1504                index<<2, val);
1505     } else {
1506         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1507                index<<2, val);
1508     }
1509 }
1510 
1511 static uint64_t
1512 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1513 {
1514     E1000State *s = opaque;
1515     unsigned int index = (addr & 0x1ffff) >> 2;
1516 
1517     if (index < NREADOPS && macreg_readops[index]) {
1518         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1519             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1520             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1521                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1522                        "It is not fully implemented.\n", index<<2);
1523             }
1524             return macreg_readops[index](s, index);
1525         } else {    /* "flag needed" bit is set, but the flag is not active */
1526             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1527                    index<<2);
1528         }
1529     } else {
1530         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1531     }
1532     return 0;
1533 }
1534 
1535 static const MemoryRegionOps e1000_mmio_ops = {
1536     .read = e1000_mmio_read,
1537     .write = e1000_mmio_write,
1538     .endianness = DEVICE_LITTLE_ENDIAN,
1539     .impl = {
1540         .min_access_size = 4,
1541         .max_access_size = 4,
1542     },
1543 };
1544 
1545 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1546                               unsigned size)
1547 {
1548     E1000State *s = opaque;
1549 
1550     (void)s;
1551     return 0;
1552 }
1553 
1554 static void e1000_io_write(void *opaque, hwaddr addr,
1555                            uint64_t val, unsigned size)
1556 {
1557     E1000State *s = opaque;
1558 
1559     (void)s;
1560 }
1561 
1562 static const MemoryRegionOps e1000_io_ops = {
1563     .read = e1000_io_read,
1564     .write = e1000_io_write,
1565     .endianness = DEVICE_LITTLE_ENDIAN,
1566 };
1567 
1568 static bool is_version_1(void *opaque, int version_id)
1569 {
1570     return version_id == 1;
1571 }
1572 
1573 static void e1000_pre_save(void *opaque)
1574 {
1575     E1000State *s = opaque;
1576     NetClientState *nc = qemu_get_queue(s->nic);
1577 
1578     /* If the mitigation timer is active, emulate a timeout now. */
1579     if (s->mit_timer_on) {
1580         e1000_mit_timer(s);
1581     }
1582 
1583     /*
1584      * If link is down and auto-negotiation is supported and ongoing,
1585      * complete auto-negotiation immediately. This allows us to look
1586      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1587      */
1588     if (nc->link_down && have_autoneg(s)) {
1589         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1590     }
1591 }
1592 
1593 static int e1000_post_load(void *opaque, int version_id)
1594 {
1595     E1000State *s = opaque;
1596     NetClientState *nc = qemu_get_queue(s->nic);
1597 
1598     if (!chkflag(MIT)) {
1599         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1600             s->mac_reg[TADV] = 0;
1601         s->mit_irq_level = false;
1602     }
1603     s->mit_ide = 0;
1604     s->mit_timer_on = false;
1605 
1606     /* nc.link_down can't be migrated, so infer link_down according
1607      * to link status bit in mac_reg[STATUS].
1608      * Alternatively, restart link negotiation if it was in progress. */
1609     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1610 
1611     if (have_autoneg(s) &&
1612         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1613         nc->link_down = false;
1614         timer_mod(s->autoneg_timer,
1615                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1616     }
1617 
1618     return 0;
1619 }
1620 
1621 static bool e1000_mit_state_needed(void *opaque)
1622 {
1623     E1000State *s = opaque;
1624 
1625     return chkflag(MIT);
1626 }
1627 
1628 static bool e1000_full_mac_needed(void *opaque)
1629 {
1630     E1000State *s = opaque;
1631 
1632     return chkflag(MAC);
1633 }
1634 
1635 static const VMStateDescription vmstate_e1000_mit_state = {
1636     .name = "e1000/mit_state",
1637     .version_id = 1,
1638     .minimum_version_id = 1,
1639     .needed = e1000_mit_state_needed,
1640     .fields = (VMStateField[]) {
1641         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1642         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1643         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1644         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1645         VMSTATE_BOOL(mit_irq_level, E1000State),
1646         VMSTATE_END_OF_LIST()
1647     }
1648 };
1649 
1650 static const VMStateDescription vmstate_e1000_full_mac_state = {
1651     .name = "e1000/full_mac_state",
1652     .version_id = 1,
1653     .minimum_version_id = 1,
1654     .needed = e1000_full_mac_needed,
1655     .fields = (VMStateField[]) {
1656         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1657         VMSTATE_END_OF_LIST()
1658     }
1659 };
1660 
1661 static const VMStateDescription vmstate_e1000 = {
1662     .name = "e1000",
1663     .version_id = 2,
1664     .minimum_version_id = 1,
1665     .pre_save = e1000_pre_save,
1666     .post_load = e1000_post_load,
1667     .fields = (VMStateField[]) {
1668         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1669         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1670         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1671         VMSTATE_UINT32(rxbuf_size, E1000State),
1672         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1673         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1674         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1675         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1676         VMSTATE_UINT16(eecd_state.reading, E1000State),
1677         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1678         VMSTATE_UINT8(tx.ipcss, E1000State),
1679         VMSTATE_UINT8(tx.ipcso, E1000State),
1680         VMSTATE_UINT16(tx.ipcse, E1000State),
1681         VMSTATE_UINT8(tx.tucss, E1000State),
1682         VMSTATE_UINT8(tx.tucso, E1000State),
1683         VMSTATE_UINT16(tx.tucse, E1000State),
1684         VMSTATE_UINT32(tx.paylen, E1000State),
1685         VMSTATE_UINT8(tx.hdr_len, E1000State),
1686         VMSTATE_UINT16(tx.mss, E1000State),
1687         VMSTATE_UINT16(tx.size, E1000State),
1688         VMSTATE_UINT16(tx.tso_frames, E1000State),
1689         VMSTATE_UINT8(tx.sum_needed, E1000State),
1690         VMSTATE_INT8(tx.ip, E1000State),
1691         VMSTATE_INT8(tx.tcp, E1000State),
1692         VMSTATE_BUFFER(tx.header, E1000State),
1693         VMSTATE_BUFFER(tx.data, E1000State),
1694         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1695         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1696         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1697         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1698         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1699         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1700         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1701         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1702         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1703         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1704         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1705         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1706         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1707         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1708         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1709         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1710         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1711         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1712         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1713         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1714         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1715         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1716         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1717         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1718         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1719         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1720         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1721         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1722         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1723         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1724         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1725         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1726         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1727         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1728         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1729         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1730         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1731         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1732         VMSTATE_UINT32(mac_reg[VET], E1000State),
1733         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1734         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1735         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1736         VMSTATE_END_OF_LIST()
1737     },
1738     .subsections = (const VMStateDescription*[]) {
1739         &vmstate_e1000_mit_state,
1740         &vmstate_e1000_full_mac_state,
1741         NULL
1742     }
1743 };
1744 
1745 /*
1746  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1747  * Note: A valid DevId will be inserted during pci_e1000_init().
1748  */
1749 static const uint16_t e1000_eeprom_template[64] = {
1750     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1751     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1752     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1753     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1754     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1755     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1756     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1757     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1758 };
1759 
1760 /* PCI interface */
1761 
1762 static void
1763 e1000_mmio_setup(E1000State *d)
1764 {
1765     int i;
1766     const uint32_t excluded_regs[] = {
1767         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1768         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1769     };
1770 
1771     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1772                           "e1000-mmio", PNPMMIO_SIZE);
1773     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1774     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1775         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1776                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1777     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1778 }
1779 
1780 static void
1781 pci_e1000_uninit(PCIDevice *dev)
1782 {
1783     E1000State *d = E1000(dev);
1784 
1785     timer_del(d->autoneg_timer);
1786     timer_free(d->autoneg_timer);
1787     timer_del(d->mit_timer);
1788     timer_free(d->mit_timer);
1789     qemu_del_nic(d->nic);
1790 }
1791 
1792 static NetClientInfo net_e1000_info = {
1793     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1794     .size = sizeof(NICState),
1795     .can_receive = e1000_can_receive,
1796     .receive = e1000_receive,
1797     .receive_iov = e1000_receive_iov,
1798     .link_status_changed = e1000_set_link_status,
1799 };
1800 
1801 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1802                                 uint32_t val, int len)
1803 {
1804     E1000State *s = E1000(pci_dev);
1805 
1806     pci_default_write_config(pci_dev, address, val, len);
1807 
1808     if (range_covers_byte(address, len, PCI_COMMAND) &&
1809         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1810         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1811     }
1812 }
1813 
1814 
1815 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1816 {
1817     DeviceState *dev = DEVICE(pci_dev);
1818     E1000State *d = E1000(pci_dev);
1819     PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1820     uint8_t *pci_conf;
1821     uint16_t checksum = 0;
1822     int i;
1823     uint8_t *macaddr;
1824 
1825     pci_dev->config_write = e1000_write_config;
1826 
1827     pci_conf = pci_dev->config;
1828 
1829     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1830     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1831 
1832     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1833 
1834     e1000_mmio_setup(d);
1835 
1836     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1837 
1838     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1839 
1840     memmove(d->eeprom_data, e1000_eeprom_template,
1841         sizeof e1000_eeprom_template);
1842     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1843     macaddr = d->conf.macaddr.a;
1844     for (i = 0; i < 3; i++)
1845         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1846     d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1847     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1848         checksum += d->eeprom_data[i];
1849     checksum = (uint16_t) EEPROM_SUM - checksum;
1850     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1851 
1852     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1853                           object_get_typename(OBJECT(d)), dev->id, d);
1854 
1855     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1856 
1857     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1858     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1859 }
1860 
1861 static void qdev_e1000_reset(DeviceState *dev)
1862 {
1863     E1000State *d = E1000(dev);
1864     e1000_reset(d);
1865 }
1866 
1867 static Property e1000_properties[] = {
1868     DEFINE_NIC_PROPERTIES(E1000State, conf),
1869     DEFINE_PROP_BIT("autonegotiation", E1000State,
1870                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1871     DEFINE_PROP_BIT("mitigation", E1000State,
1872                     compat_flags, E1000_FLAG_MIT_BIT, true),
1873     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1874                     compat_flags, E1000_FLAG_MAC_BIT, true),
1875     DEFINE_PROP_END_OF_LIST(),
1876 };
1877 
1878 typedef struct E1000Info {
1879     const char *name;
1880     uint16_t   device_id;
1881     uint8_t    revision;
1882     uint16_t   phy_id2;
1883 } E1000Info;
1884 
1885 static void e1000_class_init(ObjectClass *klass, void *data)
1886 {
1887     DeviceClass *dc = DEVICE_CLASS(klass);
1888     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1889     E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1890     const E1000Info *info = data;
1891 
1892     k->realize = pci_e1000_realize;
1893     k->exit = pci_e1000_uninit;
1894     k->romfile = "efi-e1000.rom";
1895     k->vendor_id = PCI_VENDOR_ID_INTEL;
1896     k->device_id = info->device_id;
1897     k->revision = info->revision;
1898     e->phy_id2 = info->phy_id2;
1899     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1900     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1901     dc->desc = "Intel Gigabit Ethernet";
1902     dc->reset = qdev_e1000_reset;
1903     dc->vmsd = &vmstate_e1000;
1904     dc->props = e1000_properties;
1905 }
1906 
1907 static void e1000_instance_init(Object *obj)
1908 {
1909     E1000State *n = E1000(obj);
1910     device_add_bootindex_property(obj, &n->conf.bootindex,
1911                                   "bootindex", "/ethernet-phy@0",
1912                                   DEVICE(n), NULL);
1913 }
1914 
1915 static const TypeInfo e1000_base_info = {
1916     .name          = TYPE_E1000_BASE,
1917     .parent        = TYPE_PCI_DEVICE,
1918     .instance_size = sizeof(E1000State),
1919     .instance_init = e1000_instance_init,
1920     .class_size    = sizeof(E1000BaseClass),
1921     .abstract      = true,
1922 };
1923 
1924 static const E1000Info e1000_devices[] = {
1925     {
1926         .name      = "e1000",
1927         .device_id = E1000_DEV_ID_82540EM,
1928         .revision  = 0x03,
1929         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1930     },
1931     {
1932         .name      = "e1000-82544gc",
1933         .device_id = E1000_DEV_ID_82544GC_COPPER,
1934         .revision  = 0x03,
1935         .phy_id2   = E1000_PHY_ID2_82544x,
1936     },
1937     {
1938         .name      = "e1000-82545em",
1939         .device_id = E1000_DEV_ID_82545EM_COPPER,
1940         .revision  = 0x03,
1941         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1942     },
1943 };
1944 
1945 static void e1000_register_types(void)
1946 {
1947     int i;
1948 
1949     type_register_static(&e1000_base_info);
1950     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1951         const E1000Info *info = &e1000_devices[i];
1952         TypeInfo type_info = {};
1953 
1954         type_info.name = info->name;
1955         type_info.parent = TYPE_E1000_BASE;
1956         type_info.class_data = (void *)info;
1957         type_info.class_init = e1000_class_init;
1958         type_info.instance_init = e1000_instance_init;
1959 
1960         type_register(&type_info);
1961     }
1962 }
1963 
1964 type_init(e1000_register_types)
1965