xref: /qemu/hw/net/e1000.c (revision 567a3c9e7f98f698d1aeb73e32ca614086b63837)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 
36 #include "e1000_regs.h"
37 
38 #define E1000_DEBUG
39 
40 #ifdef E1000_DEBUG
41 enum {
42     DEBUG_GENERAL,	DEBUG_IO,	DEBUG_MMIO,	DEBUG_INTERRUPT,
43     DEBUG_RX,		DEBUG_TX,	DEBUG_MDIC,	DEBUG_EEPROM,
44     DEBUG_UNKNOWN,	DEBUG_TXSUM,	DEBUG_TXERR,	DEBUG_RXERR,
45     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
46 };
47 #define DBGBIT(x)	(1<<DEBUG_##x)
48 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
49 
50 #define	DBGOUT(what, fmt, ...) do { \
51     if (debugflags & DBGBIT(what)) \
52         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
53     } while (0)
54 #else
55 #define	DBGOUT(what, fmt, ...) do {} while (0)
56 #endif
57 
58 #define IOPORT_SIZE       0x40
59 #define PNPMMIO_SIZE      0x20000
60 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
61 
62 /* this is the size past which hardware will drop packets when setting LPE=0 */
63 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
64 /* this is the size past which hardware will drop packets when setting LPE=1 */
65 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
66 
67 /*
68  * HW models:
69  *  E1000_DEV_ID_82540EM works with Windows and Linux
70  *  E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
71  *	appears to perform better than 82540EM, but breaks with Linux 2.6.18
72  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
73  *  Others never tested
74  */
75 enum { E1000_DEVID = E1000_DEV_ID_82540EM };
76 
77 /*
78  * May need to specify additional MAC-to-PHY entries --
79  * Intel's Windows driver refuses to initialize unless they match
80  */
81 enum {
82     PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ?		0xcc2 :
83                    E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ?	0xc30 :
84                    /* default to E1000_DEV_ID_82540EM */	0xc20
85 };
86 
87 typedef struct E1000State_st {
88     PCIDevice dev;
89     NICState *nic;
90     NICConf conf;
91     MemoryRegion mmio;
92     MemoryRegion io;
93 
94     uint32_t mac_reg[0x8000];
95     uint16_t phy_reg[0x20];
96     uint16_t eeprom_data[64];
97 
98     uint32_t rxbuf_size;
99     uint32_t rxbuf_min_shift;
100     struct e1000_tx {
101         unsigned char header[256];
102         unsigned char vlan_header[4];
103         /* Fields vlan and data must not be reordered or separated. */
104         unsigned char vlan[4];
105         unsigned char data[0x10000];
106         uint16_t size;
107         unsigned char sum_needed;
108         unsigned char vlan_needed;
109         uint8_t ipcss;
110         uint8_t ipcso;
111         uint16_t ipcse;
112         uint8_t tucss;
113         uint8_t tucso;
114         uint16_t tucse;
115         uint8_t hdr_len;
116         uint16_t mss;
117         uint32_t paylen;
118         uint16_t tso_frames;
119         char tse;
120         int8_t ip;
121         int8_t tcp;
122         char cptse;     // current packet tse bit
123     } tx;
124 
125     struct {
126         uint32_t val_in;	// shifted in from guest driver
127         uint16_t bitnum_in;
128         uint16_t bitnum_out;
129         uint16_t reading;
130         uint32_t old_eecd;
131     } eecd_state;
132 
133     QEMUTimer *autoneg_timer;
134 
135 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
136 #define E1000_FLAG_AUTONEG_BIT 0
137 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
138     uint32_t compat_flags;
139 } E1000State;
140 
141 #define TYPE_E1000 "e1000"
142 
143 #define E1000(obj) \
144     OBJECT_CHECK(E1000State, (obj), TYPE_E1000)
145 
146 #define	defreg(x)	x = (E1000_##x>>2)
147 enum {
148     defreg(CTRL),	defreg(EECD),	defreg(EERD),	defreg(GPRC),
149     defreg(GPTC),	defreg(ICR),	defreg(ICS),	defreg(IMC),
150     defreg(IMS),	defreg(LEDCTL),	defreg(MANC),	defreg(MDIC),
151     defreg(MPC),	defreg(PBA),	defreg(RCTL),	defreg(RDBAH),
152     defreg(RDBAL),	defreg(RDH),	defreg(RDLEN),	defreg(RDT),
153     defreg(STATUS),	defreg(SWSM),	defreg(TCTL),	defreg(TDBAH),
154     defreg(TDBAL),	defreg(TDH),	defreg(TDLEN),	defreg(TDT),
155     defreg(TORH),	defreg(TORL),	defreg(TOTH),	defreg(TOTL),
156     defreg(TPR),	defreg(TPT),	defreg(TXDCTL),	defreg(WUFC),
157     defreg(RA),		defreg(MTA),	defreg(CRCERRS),defreg(VFTA),
158     defreg(VET),
159 };
160 
161 static void
162 e1000_link_down(E1000State *s)
163 {
164     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
165     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
166 }
167 
168 static void
169 e1000_link_up(E1000State *s)
170 {
171     s->mac_reg[STATUS] |= E1000_STATUS_LU;
172     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
173 }
174 
175 static void
176 set_phy_ctrl(E1000State *s, int index, uint16_t val)
177 {
178     /*
179      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
180      * migrate during auto negotiation, after migration the link will be
181      * down.
182      */
183     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
184         return;
185     }
186     if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
187         e1000_link_down(s);
188         s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
189         DBGOUT(PHY, "Start link auto negotiation\n");
190         qemu_mod_timer(s->autoneg_timer, qemu_get_clock_ms(vm_clock) + 500);
191     }
192 }
193 
194 static void
195 e1000_autoneg_timer(void *opaque)
196 {
197     E1000State *s = opaque;
198     if (!qemu_get_queue(s->nic)->link_down) {
199         e1000_link_up(s);
200     }
201     s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
202     DBGOUT(PHY, "Auto negotiation is completed\n");
203 }
204 
205 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
206     [PHY_CTRL] = set_phy_ctrl,
207 };
208 
209 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
210 
211 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
212 static const char phy_regcap[0x20] = {
213     [PHY_STATUS] = PHY_R,	[M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
214     [PHY_ID1] = PHY_R,		[M88E1000_PHY_SPEC_CTRL] = PHY_RW,
215     [PHY_CTRL] = PHY_RW,	[PHY_1000T_CTRL] = PHY_RW,
216     [PHY_LP_ABILITY] = PHY_R,	[PHY_1000T_STATUS] = PHY_R,
217     [PHY_AUTONEG_ADV] = PHY_RW,	[M88E1000_RX_ERR_CNTR] = PHY_R,
218     [PHY_ID2] = PHY_R,		[M88E1000_PHY_SPEC_STATUS] = PHY_R
219 };
220 
221 static const uint16_t phy_reg_init[] = {
222     [PHY_CTRL] = 0x1140,
223     [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
224     [PHY_ID1] = 0x141,				[PHY_ID2] = PHY_ID2_INIT,
225     [PHY_1000T_CTRL] = 0x0e00,			[M88E1000_PHY_SPEC_CTRL] = 0x360,
226     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,	[PHY_AUTONEG_ADV] = 0xde1,
227     [PHY_LP_ABILITY] = 0x1e0,			[PHY_1000T_STATUS] = 0x3c00,
228     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
229 };
230 
231 static const uint32_t mac_reg_init[] = {
232     [PBA] =     0x00100030,
233     [LEDCTL] =  0x602,
234     [CTRL] =    E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
235                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
236     [STATUS] =  0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
237                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
238                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
239                 E1000_STATUS_LU,
240     [MANC] =    E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
241                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
242                 E1000_MANC_RMCP_EN,
243 };
244 
245 static void
246 set_interrupt_cause(E1000State *s, int index, uint32_t val)
247 {
248     if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
249         /* Only for 8257x */
250         val |= E1000_ICR_INT_ASSERTED;
251     }
252     s->mac_reg[ICR] = val;
253 
254     /*
255      * Make sure ICR and ICS registers have the same value.
256      * The spec says that the ICS register is write-only.  However in practice,
257      * on real hardware ICS is readable, and for reads it has the same value as
258      * ICR (except that ICS does not have the clear on read behaviour of ICR).
259      *
260      * The VxWorks PRO/1000 driver uses this behaviour.
261      */
262     s->mac_reg[ICS] = val;
263 
264     qemu_set_irq(s->dev.irq[0], (s->mac_reg[IMS] & s->mac_reg[ICR]) != 0);
265 }
266 
267 static void
268 set_ics(E1000State *s, int index, uint32_t val)
269 {
270     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
271         s->mac_reg[IMS]);
272     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
273 }
274 
275 static int
276 rxbufsize(uint32_t v)
277 {
278     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
279          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
280          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
281     switch (v) {
282     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
283         return 16384;
284     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
285         return 8192;
286     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
287         return 4096;
288     case E1000_RCTL_SZ_1024:
289         return 1024;
290     case E1000_RCTL_SZ_512:
291         return 512;
292     case E1000_RCTL_SZ_256:
293         return 256;
294     }
295     return 2048;
296 }
297 
298 static void e1000_reset(void *opaque)
299 {
300     E1000State *d = opaque;
301     uint8_t *macaddr = d->conf.macaddr.a;
302     int i;
303 
304     qemu_del_timer(d->autoneg_timer);
305     memset(d->phy_reg, 0, sizeof d->phy_reg);
306     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
307     memset(d->mac_reg, 0, sizeof d->mac_reg);
308     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
309     d->rxbuf_min_shift = 1;
310     memset(&d->tx, 0, sizeof d->tx);
311 
312     if (qemu_get_queue(d->nic)->link_down) {
313         e1000_link_down(d);
314     }
315 
316     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
317     d->mac_reg[RA] = 0;
318     d->mac_reg[RA + 1] = E1000_RAH_AV;
319     for (i = 0; i < 4; i++) {
320         d->mac_reg[RA] |= macaddr[i] << (8 * i);
321         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
322     }
323 }
324 
325 static void
326 set_ctrl(E1000State *s, int index, uint32_t val)
327 {
328     /* RST is self clearing */
329     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
330 }
331 
332 static void
333 set_rx_control(E1000State *s, int index, uint32_t val)
334 {
335     s->mac_reg[RCTL] = val;
336     s->rxbuf_size = rxbufsize(val);
337     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
338     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
339            s->mac_reg[RCTL]);
340     qemu_flush_queued_packets(qemu_get_queue(s->nic));
341 }
342 
343 static void
344 set_mdic(E1000State *s, int index, uint32_t val)
345 {
346     uint32_t data = val & E1000_MDIC_DATA_MASK;
347     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
348 
349     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
350         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
351     else if (val & E1000_MDIC_OP_READ) {
352         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
353         if (!(phy_regcap[addr] & PHY_R)) {
354             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
355             val |= E1000_MDIC_ERROR;
356         } else
357             val = (val ^ data) | s->phy_reg[addr];
358     } else if (val & E1000_MDIC_OP_WRITE) {
359         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
360         if (!(phy_regcap[addr] & PHY_W)) {
361             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
362             val |= E1000_MDIC_ERROR;
363         } else {
364             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
365                 phyreg_writeops[addr](s, index, data);
366             }
367             s->phy_reg[addr] = data;
368         }
369     }
370     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
371 
372     if (val & E1000_MDIC_INT_EN) {
373         set_ics(s, 0, E1000_ICR_MDAC);
374     }
375 }
376 
377 static uint32_t
378 get_eecd(E1000State *s, int index)
379 {
380     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
381 
382     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
383            s->eecd_state.bitnum_out, s->eecd_state.reading);
384     if (!s->eecd_state.reading ||
385         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
386           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
387         ret |= E1000_EECD_DO;
388     return ret;
389 }
390 
391 static void
392 set_eecd(E1000State *s, int index, uint32_t val)
393 {
394     uint32_t oldval = s->eecd_state.old_eecd;
395 
396     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
397             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
398     if (!(E1000_EECD_CS & val))			// CS inactive; nothing to do
399 	return;
400     if (E1000_EECD_CS & (val ^ oldval)) {	// CS rise edge; reset state
401 	s->eecd_state.val_in = 0;
402 	s->eecd_state.bitnum_in = 0;
403 	s->eecd_state.bitnum_out = 0;
404 	s->eecd_state.reading = 0;
405     }
406     if (!(E1000_EECD_SK & (val ^ oldval)))	// no clock edge
407         return;
408     if (!(E1000_EECD_SK & val)) {		// falling edge
409         s->eecd_state.bitnum_out++;
410         return;
411     }
412     s->eecd_state.val_in <<= 1;
413     if (val & E1000_EECD_DI)
414         s->eecd_state.val_in |= 1;
415     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
416         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
417         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
418             EEPROM_READ_OPCODE_MICROWIRE);
419     }
420     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
421            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
422            s->eecd_state.reading);
423 }
424 
425 static uint32_t
426 flash_eerd_read(E1000State *s, int x)
427 {
428     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
429 
430     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
431         return (s->mac_reg[EERD]);
432 
433     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
434         return (E1000_EEPROM_RW_REG_DONE | r);
435 
436     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
437            E1000_EEPROM_RW_REG_DONE | r);
438 }
439 
440 static void
441 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
442 {
443     uint32_t sum;
444 
445     if (cse && cse < n)
446         n = cse + 1;
447     if (sloc < n-1) {
448         sum = net_checksum_add(n-css, data+css);
449         cpu_to_be16wu((uint16_t *)(data + sloc),
450                       net_checksum_finish(sum));
451     }
452 }
453 
454 static inline int
455 vlan_enabled(E1000State *s)
456 {
457     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
458 }
459 
460 static inline int
461 vlan_rx_filter_enabled(E1000State *s)
462 {
463     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
464 }
465 
466 static inline int
467 is_vlan_packet(E1000State *s, const uint8_t *buf)
468 {
469     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
470                 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
471 }
472 
473 static inline int
474 is_vlan_txd(uint32_t txd_lower)
475 {
476     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
477 }
478 
479 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
480  * fill it in, just pad descriptor length by 4 bytes unless guest
481  * told us to strip it off the packet. */
482 static inline int
483 fcs_len(E1000State *s)
484 {
485     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
486 }
487 
488 static void
489 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
490 {
491     NetClientState *nc = qemu_get_queue(s->nic);
492     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
493         nc->info->receive(nc, buf, size);
494     } else {
495         qemu_send_packet(nc, buf, size);
496     }
497 }
498 
499 static void
500 xmit_seg(E1000State *s)
501 {
502     uint16_t len, *sp;
503     unsigned int frames = s->tx.tso_frames, css, sofar, n;
504     struct e1000_tx *tp = &s->tx;
505 
506     if (tp->tse && tp->cptse) {
507         css = tp->ipcss;
508         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
509                frames, tp->size, css);
510         if (tp->ip) {		// IPv4
511             cpu_to_be16wu((uint16_t *)(tp->data+css+2),
512                           tp->size - css);
513             cpu_to_be16wu((uint16_t *)(tp->data+css+4),
514                           be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
515         } else			// IPv6
516             cpu_to_be16wu((uint16_t *)(tp->data+css+4),
517                           tp->size - css);
518         css = tp->tucss;
519         len = tp->size - css;
520         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
521         if (tp->tcp) {
522             sofar = frames * tp->mss;
523             cpu_to_be32wu((uint32_t *)(tp->data+css+4),	// seq
524                 be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
525             if (tp->paylen - sofar > tp->mss)
526                 tp->data[css + 13] &= ~9;		// PSH, FIN
527         } else	// UDP
528             cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
529         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
530             unsigned int phsum;
531             // add pseudo-header length before checksum calculation
532             sp = (uint16_t *)(tp->data + tp->tucso);
533             phsum = be16_to_cpup(sp) + len;
534             phsum = (phsum >> 16) + (phsum & 0xffff);
535             cpu_to_be16wu(sp, phsum);
536         }
537         tp->tso_frames++;
538     }
539 
540     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
541         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
542     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
543         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
544     if (tp->vlan_needed) {
545         memmove(tp->vlan, tp->data, 4);
546         memmove(tp->data, tp->data + 4, 8);
547         memcpy(tp->data + 8, tp->vlan_header, 4);
548         e1000_send_packet(s, tp->vlan, tp->size + 4);
549     } else
550         e1000_send_packet(s, tp->data, tp->size);
551     s->mac_reg[TPT]++;
552     s->mac_reg[GPTC]++;
553     n = s->mac_reg[TOTL];
554     if ((s->mac_reg[TOTL] += s->tx.size) < n)
555         s->mac_reg[TOTH]++;
556 }
557 
558 static void
559 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
560 {
561     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
562     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
563     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
564     unsigned int msh = 0xfffff;
565     uint64_t addr;
566     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
567     struct e1000_tx *tp = &s->tx;
568 
569     if (dtype == E1000_TXD_CMD_DEXT) {	// context descriptor
570         op = le32_to_cpu(xp->cmd_and_length);
571         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
572         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
573         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
574         tp->tucss = xp->upper_setup.tcp_fields.tucss;
575         tp->tucso = xp->upper_setup.tcp_fields.tucso;
576         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
577         tp->paylen = op & 0xfffff;
578         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
579         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
580         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
581         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
582         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
583         tp->tso_frames = 0;
584         if (tp->tucso == 0) {	// this is probably wrong
585             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
586             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
587         }
588         return;
589     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
590         // data descriptor
591         if (tp->size == 0) {
592             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
593         }
594         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
595     } else {
596         // legacy descriptor
597         tp->cptse = 0;
598     }
599 
600     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
601         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
602         tp->vlan_needed = 1;
603         cpu_to_be16wu((uint16_t *)(tp->vlan_header),
604                       le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
605         cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
606                       le16_to_cpu(dp->upper.fields.special));
607     }
608 
609     addr = le64_to_cpu(dp->buffer_addr);
610     if (tp->tse && tp->cptse) {
611         msh = tp->hdr_len + tp->mss;
612         do {
613             bytes = split_size;
614             if (tp->size + bytes > msh)
615                 bytes = msh - tp->size;
616 
617             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
618             pci_dma_read(&s->dev, addr, tp->data + tp->size, bytes);
619             sz = tp->size + bytes;
620             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
621                 memmove(tp->header, tp->data, tp->hdr_len);
622             }
623             tp->size = sz;
624             addr += bytes;
625             if (sz == msh) {
626                 xmit_seg(s);
627                 memmove(tp->data, tp->header, tp->hdr_len);
628                 tp->size = tp->hdr_len;
629             }
630         } while (split_size -= bytes);
631     } else if (!tp->tse && tp->cptse) {
632         // context descriptor TSE is not set, while data descriptor TSE is set
633         DBGOUT(TXERR, "TCP segmentation error\n");
634     } else {
635         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
636         pci_dma_read(&s->dev, addr, tp->data + tp->size, split_size);
637         tp->size += split_size;
638     }
639 
640     if (!(txd_lower & E1000_TXD_CMD_EOP))
641         return;
642     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
643         xmit_seg(s);
644     }
645     tp->tso_frames = 0;
646     tp->sum_needed = 0;
647     tp->vlan_needed = 0;
648     tp->size = 0;
649     tp->cptse = 0;
650 }
651 
652 static uint32_t
653 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
654 {
655     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
656 
657     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
658         return 0;
659     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
660                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
661     dp->upper.data = cpu_to_le32(txd_upper);
662     pci_dma_write(&s->dev, base + ((char *)&dp->upper - (char *)dp),
663                   &dp->upper, sizeof(dp->upper));
664     return E1000_ICR_TXDW;
665 }
666 
667 static uint64_t tx_desc_base(E1000State *s)
668 {
669     uint64_t bah = s->mac_reg[TDBAH];
670     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
671 
672     return (bah << 32) + bal;
673 }
674 
675 static void
676 start_xmit(E1000State *s)
677 {
678     dma_addr_t base;
679     struct e1000_tx_desc desc;
680     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
681 
682     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
683         DBGOUT(TX, "tx disabled\n");
684         return;
685     }
686 
687     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
688         base = tx_desc_base(s) +
689                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
690         pci_dma_read(&s->dev, base, &desc, sizeof(desc));
691 
692         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
693                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
694                desc.upper.data);
695 
696         process_tx_desc(s, &desc);
697         cause |= txdesc_writeback(s, base, &desc);
698 
699         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
700             s->mac_reg[TDH] = 0;
701         /*
702          * the following could happen only if guest sw assigns
703          * bogus values to TDT/TDLEN.
704          * there's nothing too intelligent we could do about this.
705          */
706         if (s->mac_reg[TDH] == tdh_start) {
707             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
708                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
709             break;
710         }
711     }
712     set_ics(s, 0, cause);
713 }
714 
715 static int
716 receive_filter(E1000State *s, const uint8_t *buf, int size)
717 {
718     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
719     static const int mta_shift[] = {4, 3, 2, 0};
720     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
721 
722     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
723         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
724         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
725                                      ((vid >> 5) & 0x7f));
726         if ((vfta & (1 << (vid & 0x1f))) == 0)
727             return 0;
728     }
729 
730     if (rctl & E1000_RCTL_UPE)			// promiscuous
731         return 1;
732 
733     if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE))	// promiscuous mcast
734         return 1;
735 
736     if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
737         return 1;
738 
739     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
740         if (!(rp[1] & E1000_RAH_AV))
741             continue;
742         ra[0] = cpu_to_le32(rp[0]);
743         ra[1] = cpu_to_le32(rp[1]);
744         if (!memcmp(buf, (uint8_t *)ra, 6)) {
745             DBGOUT(RXFILTER,
746                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
747                    (int)(rp - s->mac_reg - RA)/2,
748                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
749             return 1;
750         }
751     }
752     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
753            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
754 
755     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
756     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
757     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
758         return 1;
759     DBGOUT(RXFILTER,
760            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
761            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
762            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
763            s->mac_reg[MTA + (f >> 5)]);
764 
765     return 0;
766 }
767 
768 static void
769 e1000_set_link_status(NetClientState *nc)
770 {
771     E1000State *s = qemu_get_nic_opaque(nc);
772     uint32_t old_status = s->mac_reg[STATUS];
773 
774     if (nc->link_down) {
775         e1000_link_down(s);
776     } else {
777         e1000_link_up(s);
778     }
779 
780     if (s->mac_reg[STATUS] != old_status)
781         set_ics(s, 0, E1000_ICR_LSC);
782 }
783 
784 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
785 {
786     int bufs;
787     /* Fast-path short packets */
788     if (total_size <= s->rxbuf_size) {
789         return s->mac_reg[RDH] != s->mac_reg[RDT];
790     }
791     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
792         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
793     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
794         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
795             s->mac_reg[RDT] - s->mac_reg[RDH];
796     } else {
797         return false;
798     }
799     return total_size <= bufs * s->rxbuf_size;
800 }
801 
802 static int
803 e1000_can_receive(NetClientState *nc)
804 {
805     E1000State *s = qemu_get_nic_opaque(nc);
806 
807     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
808         (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
809 }
810 
811 static uint64_t rx_desc_base(E1000State *s)
812 {
813     uint64_t bah = s->mac_reg[RDBAH];
814     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
815 
816     return (bah << 32) + bal;
817 }
818 
819 static ssize_t
820 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
821 {
822     E1000State *s = qemu_get_nic_opaque(nc);
823     struct e1000_rx_desc desc;
824     dma_addr_t base;
825     unsigned int n, rdt;
826     uint32_t rdh_start;
827     uint16_t vlan_special = 0;
828     uint8_t vlan_status = 0, vlan_offset = 0;
829     uint8_t min_buf[MIN_BUF_SIZE];
830     size_t desc_offset;
831     size_t desc_size;
832     size_t total_size;
833 
834     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
835         return -1;
836     }
837 
838     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
839         return -1;
840     }
841 
842     /* Pad to minimum Ethernet frame length */
843     if (size < sizeof(min_buf)) {
844         memcpy(min_buf, buf, size);
845         memset(&min_buf[size], 0, sizeof(min_buf) - size);
846         buf = min_buf;
847         size = sizeof(min_buf);
848     }
849 
850     /* Discard oversized packets if !LPE and !SBP. */
851     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
852         (size > MAXIMUM_ETHERNET_VLAN_SIZE
853         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
854         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
855         return size;
856     }
857 
858     if (!receive_filter(s, buf, size))
859         return size;
860 
861     if (vlan_enabled(s) && is_vlan_packet(s, buf)) {
862         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(buf + 14)));
863         memmove((uint8_t *)buf + 4, buf, 12);
864         vlan_status = E1000_RXD_STAT_VP;
865         vlan_offset = 4;
866         size -= 4;
867     }
868 
869     rdh_start = s->mac_reg[RDH];
870     desc_offset = 0;
871     total_size = size + fcs_len(s);
872     if (!e1000_has_rxbufs(s, total_size)) {
873             set_ics(s, 0, E1000_ICS_RXO);
874             return -1;
875     }
876     do {
877         desc_size = total_size - desc_offset;
878         if (desc_size > s->rxbuf_size) {
879             desc_size = s->rxbuf_size;
880         }
881         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
882         pci_dma_read(&s->dev, base, &desc, sizeof(desc));
883         desc.special = vlan_special;
884         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
885         if (desc.buffer_addr) {
886             if (desc_offset < size) {
887                 size_t copy_size = size - desc_offset;
888                 if (copy_size > s->rxbuf_size) {
889                     copy_size = s->rxbuf_size;
890                 }
891                 pci_dma_write(&s->dev, le64_to_cpu(desc.buffer_addr),
892                               buf + desc_offset + vlan_offset, copy_size);
893             }
894             desc_offset += desc_size;
895             desc.length = cpu_to_le16(desc_size);
896             if (desc_offset >= total_size) {
897                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
898             } else {
899                 /* Guest zeroing out status is not a hardware requirement.
900                    Clear EOP in case guest didn't do it. */
901                 desc.status &= ~E1000_RXD_STAT_EOP;
902             }
903         } else { // as per intel docs; skip descriptors with null buf addr
904             DBGOUT(RX, "Null RX descriptor!!\n");
905         }
906         pci_dma_write(&s->dev, base, &desc, sizeof(desc));
907 
908         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
909             s->mac_reg[RDH] = 0;
910         /* see comment in start_xmit; same here */
911         if (s->mac_reg[RDH] == rdh_start) {
912             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
913                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
914             set_ics(s, 0, E1000_ICS_RXO);
915             return -1;
916         }
917     } while (desc_offset < total_size);
918 
919     s->mac_reg[GPRC]++;
920     s->mac_reg[TPR]++;
921     /* TOR - Total Octets Received:
922      * This register includes bytes received in a packet from the <Destination
923      * Address> field through the <CRC> field, inclusively.
924      */
925     n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
926     if (n < s->mac_reg[TORL])
927         s->mac_reg[TORH]++;
928     s->mac_reg[TORL] = n;
929 
930     n = E1000_ICS_RXT0;
931     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
932         rdt += s->mac_reg[RDLEN] / sizeof(desc);
933     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
934         s->rxbuf_min_shift)
935         n |= E1000_ICS_RXDMT0;
936 
937     set_ics(s, 0, n);
938 
939     return size;
940 }
941 
942 static uint32_t
943 mac_readreg(E1000State *s, int index)
944 {
945     return s->mac_reg[index];
946 }
947 
948 static uint32_t
949 mac_icr_read(E1000State *s, int index)
950 {
951     uint32_t ret = s->mac_reg[ICR];
952 
953     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
954     set_interrupt_cause(s, 0, 0);
955     return ret;
956 }
957 
958 static uint32_t
959 mac_read_clr4(E1000State *s, int index)
960 {
961     uint32_t ret = s->mac_reg[index];
962 
963     s->mac_reg[index] = 0;
964     return ret;
965 }
966 
967 static uint32_t
968 mac_read_clr8(E1000State *s, int index)
969 {
970     uint32_t ret = s->mac_reg[index];
971 
972     s->mac_reg[index] = 0;
973     s->mac_reg[index-1] = 0;
974     return ret;
975 }
976 
977 static void
978 mac_writereg(E1000State *s, int index, uint32_t val)
979 {
980     s->mac_reg[index] = val;
981 }
982 
983 static void
984 set_rdt(E1000State *s, int index, uint32_t val)
985 {
986     s->mac_reg[index] = val & 0xffff;
987     if (e1000_has_rxbufs(s, 1)) {
988         qemu_flush_queued_packets(qemu_get_queue(s->nic));
989     }
990 }
991 
992 static void
993 set_16bit(E1000State *s, int index, uint32_t val)
994 {
995     s->mac_reg[index] = val & 0xffff;
996 }
997 
998 static void
999 set_dlen(E1000State *s, int index, uint32_t val)
1000 {
1001     s->mac_reg[index] = val & 0xfff80;
1002 }
1003 
1004 static void
1005 set_tctl(E1000State *s, int index, uint32_t val)
1006 {
1007     s->mac_reg[index] = val;
1008     s->mac_reg[TDT] &= 0xffff;
1009     start_xmit(s);
1010 }
1011 
1012 static void
1013 set_icr(E1000State *s, int index, uint32_t val)
1014 {
1015     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1016     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1017 }
1018 
1019 static void
1020 set_imc(E1000State *s, int index, uint32_t val)
1021 {
1022     s->mac_reg[IMS] &= ~val;
1023     set_ics(s, 0, 0);
1024 }
1025 
1026 static void
1027 set_ims(E1000State *s, int index, uint32_t val)
1028 {
1029     s->mac_reg[IMS] |= val;
1030     set_ics(s, 0, 0);
1031 }
1032 
1033 #define getreg(x)	[x] = mac_readreg
1034 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1035     getreg(PBA),	getreg(RCTL),	getreg(TDH),	getreg(TXDCTL),
1036     getreg(WUFC),	getreg(TDT),	getreg(CTRL),	getreg(LEDCTL),
1037     getreg(MANC),	getreg(MDIC),	getreg(SWSM),	getreg(STATUS),
1038     getreg(TORL),	getreg(TOTL),	getreg(IMS),	getreg(TCTL),
1039     getreg(RDH),	getreg(RDT),	getreg(VET),	getreg(ICS),
1040     getreg(TDBAL),	getreg(TDBAH),	getreg(RDBAH),	getreg(RDBAL),
1041     getreg(TDLEN),	getreg(RDLEN),
1042 
1043     [TOTH] = mac_read_clr8,	[TORH] = mac_read_clr8,	[GPRC] = mac_read_clr4,
1044     [GPTC] = mac_read_clr4,	[TPR] = mac_read_clr4,	[TPT] = mac_read_clr4,
1045     [ICR] = mac_icr_read,	[EECD] = get_eecd,	[EERD] = flash_eerd_read,
1046     [CRCERRS ... MPC] = &mac_readreg,
1047     [RA ... RA+31] = &mac_readreg,
1048     [MTA ... MTA+127] = &mac_readreg,
1049     [VFTA ... VFTA+127] = &mac_readreg,
1050 };
1051 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1052 
1053 #define putreg(x)	[x] = mac_writereg
1054 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1055     putreg(PBA),	putreg(EERD),	putreg(SWSM),	putreg(WUFC),
1056     putreg(TDBAL),	putreg(TDBAH),	putreg(TXDCTL),	putreg(RDBAH),
1057     putreg(RDBAL),	putreg(LEDCTL), putreg(VET),
1058     [TDLEN] = set_dlen,	[RDLEN] = set_dlen,	[TCTL] = set_tctl,
1059     [TDT] = set_tctl,	[MDIC] = set_mdic,	[ICS] = set_ics,
1060     [TDH] = set_16bit,	[RDH] = set_16bit,	[RDT] = set_rdt,
1061     [IMC] = set_imc,	[IMS] = set_ims,	[ICR] = set_icr,
1062     [EECD] = set_eecd,	[RCTL] = set_rx_control, [CTRL] = set_ctrl,
1063     [RA ... RA+31] = &mac_writereg,
1064     [MTA ... MTA+127] = &mac_writereg,
1065     [VFTA ... VFTA+127] = &mac_writereg,
1066 };
1067 
1068 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1069 
1070 static void
1071 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1072                  unsigned size)
1073 {
1074     E1000State *s = opaque;
1075     unsigned int index = (addr & 0x1ffff) >> 2;
1076 
1077     if (index < NWRITEOPS && macreg_writeops[index]) {
1078         macreg_writeops[index](s, index, val);
1079     } else if (index < NREADOPS && macreg_readops[index]) {
1080         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1081     } else {
1082         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1083                index<<2, val);
1084     }
1085 }
1086 
1087 static uint64_t
1088 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1089 {
1090     E1000State *s = opaque;
1091     unsigned int index = (addr & 0x1ffff) >> 2;
1092 
1093     if (index < NREADOPS && macreg_readops[index])
1094     {
1095         return macreg_readops[index](s, index);
1096     }
1097     DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1098     return 0;
1099 }
1100 
1101 static const MemoryRegionOps e1000_mmio_ops = {
1102     .read = e1000_mmio_read,
1103     .write = e1000_mmio_write,
1104     .endianness = DEVICE_LITTLE_ENDIAN,
1105     .impl = {
1106         .min_access_size = 4,
1107         .max_access_size = 4,
1108     },
1109 };
1110 
1111 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1112                               unsigned size)
1113 {
1114     E1000State *s = opaque;
1115 
1116     (void)s;
1117     return 0;
1118 }
1119 
1120 static void e1000_io_write(void *opaque, hwaddr addr,
1121                            uint64_t val, unsigned size)
1122 {
1123     E1000State *s = opaque;
1124 
1125     (void)s;
1126 }
1127 
1128 static const MemoryRegionOps e1000_io_ops = {
1129     .read = e1000_io_read,
1130     .write = e1000_io_write,
1131     .endianness = DEVICE_LITTLE_ENDIAN,
1132 };
1133 
1134 static bool is_version_1(void *opaque, int version_id)
1135 {
1136     return version_id == 1;
1137 }
1138 
1139 static void e1000_pre_save(void *opaque)
1140 {
1141     E1000State *s = opaque;
1142     NetClientState *nc = qemu_get_queue(s->nic);
1143 
1144     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1145         return;
1146     }
1147 
1148     /*
1149      * If link is down and auto-negotiation is ongoing, complete
1150      * auto-negotiation immediately.  This allows is to look at
1151      * MII_SR_AUTONEG_COMPLETE to infer link status on load.
1152      */
1153     if (nc->link_down &&
1154         s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1155         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) {
1156          s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1157     }
1158 }
1159 
1160 static int e1000_post_load(void *opaque, int version_id)
1161 {
1162     E1000State *s = opaque;
1163     NetClientState *nc = qemu_get_queue(s->nic);
1164 
1165     /* nc.link_down can't be migrated, so infer link_down according
1166      * to link status bit in mac_reg[STATUS].
1167      * Alternatively, restart link negotiation if it was in progress. */
1168     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1169 
1170     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1171         return 0;
1172     }
1173 
1174     if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1175         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG &&
1176         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1177         nc->link_down = false;
1178         qemu_mod_timer(s->autoneg_timer, qemu_get_clock_ms(vm_clock) + 500);
1179     }
1180 
1181     return 0;
1182 }
1183 
1184 static const VMStateDescription vmstate_e1000 = {
1185     .name = "e1000",
1186     .version_id = 2,
1187     .minimum_version_id = 1,
1188     .minimum_version_id_old = 1,
1189     .pre_save = e1000_pre_save,
1190     .post_load = e1000_post_load,
1191     .fields      = (VMStateField []) {
1192         VMSTATE_PCI_DEVICE(dev, E1000State),
1193         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1194         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1195         VMSTATE_UINT32(rxbuf_size, E1000State),
1196         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1197         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1198         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1199         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1200         VMSTATE_UINT16(eecd_state.reading, E1000State),
1201         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1202         VMSTATE_UINT8(tx.ipcss, E1000State),
1203         VMSTATE_UINT8(tx.ipcso, E1000State),
1204         VMSTATE_UINT16(tx.ipcse, E1000State),
1205         VMSTATE_UINT8(tx.tucss, E1000State),
1206         VMSTATE_UINT8(tx.tucso, E1000State),
1207         VMSTATE_UINT16(tx.tucse, E1000State),
1208         VMSTATE_UINT32(tx.paylen, E1000State),
1209         VMSTATE_UINT8(tx.hdr_len, E1000State),
1210         VMSTATE_UINT16(tx.mss, E1000State),
1211         VMSTATE_UINT16(tx.size, E1000State),
1212         VMSTATE_UINT16(tx.tso_frames, E1000State),
1213         VMSTATE_UINT8(tx.sum_needed, E1000State),
1214         VMSTATE_INT8(tx.ip, E1000State),
1215         VMSTATE_INT8(tx.tcp, E1000State),
1216         VMSTATE_BUFFER(tx.header, E1000State),
1217         VMSTATE_BUFFER(tx.data, E1000State),
1218         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1219         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1220         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1221         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1222         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1223         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1224         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1225         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1226         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1227         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1228         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1229         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1230         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1231         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1232         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1233         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1234         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1235         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1236         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1237         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1238         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1239         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1240         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1241         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1242         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1243         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1244         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1245         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1246         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1247         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1248         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1249         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1250         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1251         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1252         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1253         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1254         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1255         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1256         VMSTATE_UINT32(mac_reg[VET], E1000State),
1257         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1258         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1259         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1260         VMSTATE_END_OF_LIST()
1261     }
1262 };
1263 
1264 static const uint16_t e1000_eeprom_template[64] = {
1265     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1266     0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1267     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1268     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1269     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1270     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1271     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1272     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1273 };
1274 
1275 /* PCI interface */
1276 
1277 static void
1278 e1000_mmio_setup(E1000State *d)
1279 {
1280     int i;
1281     const uint32_t excluded_regs[] = {
1282         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1283         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1284     };
1285 
1286     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1287                           "e1000-mmio", PNPMMIO_SIZE);
1288     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1289     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1290         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1291                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1292     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1293 }
1294 
1295 static void
1296 e1000_cleanup(NetClientState *nc)
1297 {
1298     E1000State *s = qemu_get_nic_opaque(nc);
1299 
1300     s->nic = NULL;
1301 }
1302 
1303 static void
1304 pci_e1000_uninit(PCIDevice *dev)
1305 {
1306     E1000State *d = E1000(dev);
1307 
1308     qemu_del_timer(d->autoneg_timer);
1309     qemu_free_timer(d->autoneg_timer);
1310     memory_region_destroy(&d->mmio);
1311     memory_region_destroy(&d->io);
1312     qemu_del_nic(d->nic);
1313 }
1314 
1315 static NetClientInfo net_e1000_info = {
1316     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1317     .size = sizeof(NICState),
1318     .can_receive = e1000_can_receive,
1319     .receive = e1000_receive,
1320     .cleanup = e1000_cleanup,
1321     .link_status_changed = e1000_set_link_status,
1322 };
1323 
1324 static int pci_e1000_init(PCIDevice *pci_dev)
1325 {
1326     DeviceState *dev = DEVICE(pci_dev);
1327     E1000State *d = E1000(pci_dev);
1328     uint8_t *pci_conf;
1329     uint16_t checksum = 0;
1330     int i;
1331     uint8_t *macaddr;
1332 
1333     pci_conf = d->dev.config;
1334 
1335     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1336     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1337 
1338     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1339 
1340     e1000_mmio_setup(d);
1341 
1342     pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1343 
1344     pci_register_bar(&d->dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1345 
1346     memmove(d->eeprom_data, e1000_eeprom_template,
1347         sizeof e1000_eeprom_template);
1348     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1349     macaddr = d->conf.macaddr.a;
1350     for (i = 0; i < 3; i++)
1351         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1352     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1353         checksum += d->eeprom_data[i];
1354     checksum = (uint16_t) EEPROM_SUM - checksum;
1355     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1356 
1357     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1358                           object_get_typename(OBJECT(d)), dev->id, d);
1359 
1360     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1361 
1362     add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0");
1363 
1364     d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d);
1365 
1366     return 0;
1367 }
1368 
1369 static void qdev_e1000_reset(DeviceState *dev)
1370 {
1371     E1000State *d = E1000(dev);
1372     e1000_reset(d);
1373 }
1374 
1375 static Property e1000_properties[] = {
1376     DEFINE_NIC_PROPERTIES(E1000State, conf),
1377     DEFINE_PROP_BIT("autonegotiation", E1000State,
1378                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1379     DEFINE_PROP_END_OF_LIST(),
1380 };
1381 
1382 static void e1000_class_init(ObjectClass *klass, void *data)
1383 {
1384     DeviceClass *dc = DEVICE_CLASS(klass);
1385     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1386 
1387     k->init = pci_e1000_init;
1388     k->exit = pci_e1000_uninit;
1389     k->romfile = "efi-e1000.rom";
1390     k->vendor_id = PCI_VENDOR_ID_INTEL;
1391     k->device_id = E1000_DEVID;
1392     k->revision = 0x03;
1393     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1394     dc->desc = "Intel Gigabit Ethernet";
1395     dc->reset = qdev_e1000_reset;
1396     dc->vmsd = &vmstate_e1000;
1397     dc->props = e1000_properties;
1398 }
1399 
1400 static const TypeInfo e1000_info = {
1401     .name          = TYPE_E1000,
1402     .parent        = TYPE_PCI_DEVICE,
1403     .instance_size = sizeof(E1000State),
1404     .class_init    = e1000_class_init,
1405 };
1406 
1407 static void e1000_register_types(void)
1408 {
1409     type_register_static(&e1000_info);
1410 }
1411 
1412 type_init(e1000_register_types)
1413