xref: /qemu/hw/net/e1000.c (revision 655d3b63b036b70714adbdae685055f1bda0f8f1)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 
37 #include "e1000_regs.h"
38 
39 #define E1000_DEBUG
40 
41 #ifdef E1000_DEBUG
42 enum {
43     DEBUG_GENERAL,	DEBUG_IO,	DEBUG_MMIO,	DEBUG_INTERRUPT,
44     DEBUG_RX,		DEBUG_TX,	DEBUG_MDIC,	DEBUG_EEPROM,
45     DEBUG_UNKNOWN,	DEBUG_TXSUM,	DEBUG_TXERR,	DEBUG_RXERR,
46     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
47 };
48 #define DBGBIT(x)	(1<<DEBUG_##x)
49 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
50 
51 #define	DBGOUT(what, fmt, ...) do { \
52     if (debugflags & DBGBIT(what)) \
53         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
54     } while (0)
55 #else
56 #define	DBGOUT(what, fmt, ...) do {} while (0)
57 #endif
58 
59 #define IOPORT_SIZE       0x40
60 #define PNPMMIO_SIZE      0x20000
61 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
62 
63 /* this is the size past which hardware will drop packets when setting LPE=0 */
64 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
65 /* this is the size past which hardware will drop packets when setting LPE=1 */
66 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
67 
68 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
69 
70 /*
71  * HW models:
72  *  E1000_DEV_ID_82540EM works with Windows and Linux
73  *  E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
74  *	appears to perform better than 82540EM, but breaks with Linux 2.6.18
75  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76  *  Others never tested
77  */
78 enum { E1000_DEVID = E1000_DEV_ID_82540EM };
79 
80 /*
81  * May need to specify additional MAC-to-PHY entries --
82  * Intel's Windows driver refuses to initialize unless they match
83  */
84 enum {
85     PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ?		0xcc2 :
86                    E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ?	0xc30 :
87                    /* default to E1000_DEV_ID_82540EM */	0xc20
88 };
89 
90 typedef struct E1000State_st {
91     /*< private >*/
92     PCIDevice parent_obj;
93     /*< public >*/
94 
95     NICState *nic;
96     NICConf conf;
97     MemoryRegion mmio;
98     MemoryRegion io;
99 
100     uint32_t mac_reg[0x8000];
101     uint16_t phy_reg[0x20];
102     uint16_t eeprom_data[64];
103 
104     uint32_t rxbuf_size;
105     uint32_t rxbuf_min_shift;
106     struct e1000_tx {
107         unsigned char header[256];
108         unsigned char vlan_header[4];
109         /* Fields vlan and data must not be reordered or separated. */
110         unsigned char vlan[4];
111         unsigned char data[0x10000];
112         uint16_t size;
113         unsigned char sum_needed;
114         unsigned char vlan_needed;
115         uint8_t ipcss;
116         uint8_t ipcso;
117         uint16_t ipcse;
118         uint8_t tucss;
119         uint8_t tucso;
120         uint16_t tucse;
121         uint8_t hdr_len;
122         uint16_t mss;
123         uint32_t paylen;
124         uint16_t tso_frames;
125         char tse;
126         int8_t ip;
127         int8_t tcp;
128         char cptse;     // current packet tse bit
129     } tx;
130 
131     struct {
132         uint32_t val_in;	// shifted in from guest driver
133         uint16_t bitnum_in;
134         uint16_t bitnum_out;
135         uint16_t reading;
136         uint32_t old_eecd;
137     } eecd_state;
138 
139     QEMUTimer *autoneg_timer;
140 
141     QEMUTimer *mit_timer;      /* Mitigation timer. */
142     bool mit_timer_on;         /* Mitigation timer is running. */
143     bool mit_irq_level;        /* Tracks interrupt pin level. */
144     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
145 
146 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
147 #define E1000_FLAG_AUTONEG_BIT 0
148 #define E1000_FLAG_MIT_BIT 1
149 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
150 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
151     uint32_t compat_flags;
152 } E1000State;
153 
154 #define TYPE_E1000 "e1000"
155 
156 #define E1000(obj) \
157     OBJECT_CHECK(E1000State, (obj), TYPE_E1000)
158 
159 #define	defreg(x)	x = (E1000_##x>>2)
160 enum {
161     defreg(CTRL),	defreg(EECD),	defreg(EERD),	defreg(GPRC),
162     defreg(GPTC),	defreg(ICR),	defreg(ICS),	defreg(IMC),
163     defreg(IMS),	defreg(LEDCTL),	defreg(MANC),	defreg(MDIC),
164     defreg(MPC),	defreg(PBA),	defreg(RCTL),	defreg(RDBAH),
165     defreg(RDBAL),	defreg(RDH),	defreg(RDLEN),	defreg(RDT),
166     defreg(STATUS),	defreg(SWSM),	defreg(TCTL),	defreg(TDBAH),
167     defreg(TDBAL),	defreg(TDH),	defreg(TDLEN),	defreg(TDT),
168     defreg(TORH),	defreg(TORL),	defreg(TOTH),	defreg(TOTL),
169     defreg(TPR),	defreg(TPT),	defreg(TXDCTL),	defreg(WUFC),
170     defreg(RA),		defreg(MTA),	defreg(CRCERRS),defreg(VFTA),
171     defreg(VET),        defreg(RDTR),   defreg(RADV),   defreg(TADV),
172     defreg(ITR),
173 };
174 
175 static void
176 e1000_link_down(E1000State *s)
177 {
178     s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
179     s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
180 }
181 
182 static void
183 e1000_link_up(E1000State *s)
184 {
185     s->mac_reg[STATUS] |= E1000_STATUS_LU;
186     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
187 }
188 
189 static void
190 set_phy_ctrl(E1000State *s, int index, uint16_t val)
191 {
192     /*
193      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
194      * migrate during auto negotiation, after migration the link will be
195      * down.
196      */
197     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
198         return;
199     }
200     if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
201         e1000_link_down(s);
202         s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
203         DBGOUT(PHY, "Start link auto negotiation\n");
204         timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
205     }
206 }
207 
208 static void
209 e1000_autoneg_timer(void *opaque)
210 {
211     E1000State *s = opaque;
212     if (!qemu_get_queue(s->nic)->link_down) {
213         e1000_link_up(s);
214     }
215     s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
216     DBGOUT(PHY, "Auto negotiation is completed\n");
217 }
218 
219 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
220     [PHY_CTRL] = set_phy_ctrl,
221 };
222 
223 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
224 
225 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
226 static const char phy_regcap[0x20] = {
227     [PHY_STATUS] = PHY_R,	[M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
228     [PHY_ID1] = PHY_R,		[M88E1000_PHY_SPEC_CTRL] = PHY_RW,
229     [PHY_CTRL] = PHY_RW,	[PHY_1000T_CTRL] = PHY_RW,
230     [PHY_LP_ABILITY] = PHY_R,	[PHY_1000T_STATUS] = PHY_R,
231     [PHY_AUTONEG_ADV] = PHY_RW,	[M88E1000_RX_ERR_CNTR] = PHY_R,
232     [PHY_ID2] = PHY_R,		[M88E1000_PHY_SPEC_STATUS] = PHY_R
233 };
234 
235 static const uint16_t phy_reg_init[] = {
236     [PHY_CTRL] = 0x1140,
237     [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
238     [PHY_ID1] = 0x141,				[PHY_ID2] = PHY_ID2_INIT,
239     [PHY_1000T_CTRL] = 0x0e00,			[M88E1000_PHY_SPEC_CTRL] = 0x360,
240     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,	[PHY_AUTONEG_ADV] = 0xde1,
241     [PHY_LP_ABILITY] = 0x1e0,			[PHY_1000T_STATUS] = 0x3c00,
242     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
243 };
244 
245 static const uint32_t mac_reg_init[] = {
246     [PBA] =     0x00100030,
247     [LEDCTL] =  0x602,
248     [CTRL] =    E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
249                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
250     [STATUS] =  0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
251                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
252                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
253                 E1000_STATUS_LU,
254     [MANC] =    E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
255                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
256                 E1000_MANC_RMCP_EN,
257 };
258 
259 /* Helper function, *curr == 0 means the value is not set */
260 static inline void
261 mit_update_delay(uint32_t *curr, uint32_t value)
262 {
263     if (value && (*curr == 0 || value < *curr)) {
264         *curr = value;
265     }
266 }
267 
268 static void
269 set_interrupt_cause(E1000State *s, int index, uint32_t val)
270 {
271     PCIDevice *d = PCI_DEVICE(s);
272     uint32_t pending_ints;
273     uint32_t mit_delay;
274 
275     if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
276         /* Only for 8257x */
277         val |= E1000_ICR_INT_ASSERTED;
278     }
279     s->mac_reg[ICR] = val;
280 
281     /*
282      * Make sure ICR and ICS registers have the same value.
283      * The spec says that the ICS register is write-only.  However in practice,
284      * on real hardware ICS is readable, and for reads it has the same value as
285      * ICR (except that ICS does not have the clear on read behaviour of ICR).
286      *
287      * The VxWorks PRO/1000 driver uses this behaviour.
288      */
289     s->mac_reg[ICS] = val;
290 
291     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
292     if (!s->mit_irq_level && pending_ints) {
293         /*
294          * Here we detect a potential raising edge. We postpone raising the
295          * interrupt line if we are inside the mitigation delay window
296          * (s->mit_timer_on == 1).
297          * We provide a partial implementation of interrupt mitigation,
298          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
299          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
300          * RADV; relative timers based on TIDV and RDTR are not implemented.
301          */
302         if (s->mit_timer_on) {
303             return;
304         }
305         if (s->compat_flags & E1000_FLAG_MIT) {
306             /* Compute the next mitigation delay according to pending
307              * interrupts and the current values of RADV (provided
308              * RDTR!=0), TADV and ITR.
309              * Then rearm the timer.
310              */
311             mit_delay = 0;
312             if (s->mit_ide &&
313                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
314                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
315             }
316             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
317                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
318             }
319             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
320 
321             if (mit_delay) {
322                 s->mit_timer_on = 1;
323                 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
324                           mit_delay * 256);
325             }
326             s->mit_ide = 0;
327         }
328     }
329 
330     s->mit_irq_level = (pending_ints != 0);
331     qemu_set_irq(d->irq[0], s->mit_irq_level);
332 }
333 
334 static void
335 e1000_mit_timer(void *opaque)
336 {
337     E1000State *s = opaque;
338 
339     s->mit_timer_on = 0;
340     /* Call set_interrupt_cause to update the irq level (if necessary). */
341     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
342 }
343 
344 static void
345 set_ics(E1000State *s, int index, uint32_t val)
346 {
347     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
348         s->mac_reg[IMS]);
349     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
350 }
351 
352 static int
353 rxbufsize(uint32_t v)
354 {
355     v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
356          E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
357          E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
358     switch (v) {
359     case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
360         return 16384;
361     case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
362         return 8192;
363     case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
364         return 4096;
365     case E1000_RCTL_SZ_1024:
366         return 1024;
367     case E1000_RCTL_SZ_512:
368         return 512;
369     case E1000_RCTL_SZ_256:
370         return 256;
371     }
372     return 2048;
373 }
374 
375 static void e1000_reset(void *opaque)
376 {
377     E1000State *d = opaque;
378     uint8_t *macaddr = d->conf.macaddr.a;
379     int i;
380 
381     timer_del(d->autoneg_timer);
382     timer_del(d->mit_timer);
383     d->mit_timer_on = 0;
384     d->mit_irq_level = 0;
385     d->mit_ide = 0;
386     memset(d->phy_reg, 0, sizeof d->phy_reg);
387     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
388     memset(d->mac_reg, 0, sizeof d->mac_reg);
389     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
390     d->rxbuf_min_shift = 1;
391     memset(&d->tx, 0, sizeof d->tx);
392 
393     if (qemu_get_queue(d->nic)->link_down) {
394         e1000_link_down(d);
395     }
396 
397     /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
398     d->mac_reg[RA] = 0;
399     d->mac_reg[RA + 1] = E1000_RAH_AV;
400     for (i = 0; i < 4; i++) {
401         d->mac_reg[RA] |= macaddr[i] << (8 * i);
402         d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
403     }
404     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
405 }
406 
407 static void
408 set_ctrl(E1000State *s, int index, uint32_t val)
409 {
410     /* RST is self clearing */
411     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
412 }
413 
414 static void
415 set_rx_control(E1000State *s, int index, uint32_t val)
416 {
417     s->mac_reg[RCTL] = val;
418     s->rxbuf_size = rxbufsize(val);
419     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
420     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
421            s->mac_reg[RCTL]);
422     qemu_flush_queued_packets(qemu_get_queue(s->nic));
423 }
424 
425 static void
426 set_mdic(E1000State *s, int index, uint32_t val)
427 {
428     uint32_t data = val & E1000_MDIC_DATA_MASK;
429     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
430 
431     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
432         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
433     else if (val & E1000_MDIC_OP_READ) {
434         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
435         if (!(phy_regcap[addr] & PHY_R)) {
436             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
437             val |= E1000_MDIC_ERROR;
438         } else
439             val = (val ^ data) | s->phy_reg[addr];
440     } else if (val & E1000_MDIC_OP_WRITE) {
441         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
442         if (!(phy_regcap[addr] & PHY_W)) {
443             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
444             val |= E1000_MDIC_ERROR;
445         } else {
446             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
447                 phyreg_writeops[addr](s, index, data);
448             }
449             s->phy_reg[addr] = data;
450         }
451     }
452     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
453 
454     if (val & E1000_MDIC_INT_EN) {
455         set_ics(s, 0, E1000_ICR_MDAC);
456     }
457 }
458 
459 static uint32_t
460 get_eecd(E1000State *s, int index)
461 {
462     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
463 
464     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
465            s->eecd_state.bitnum_out, s->eecd_state.reading);
466     if (!s->eecd_state.reading ||
467         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
468           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
469         ret |= E1000_EECD_DO;
470     return ret;
471 }
472 
473 static void
474 set_eecd(E1000State *s, int index, uint32_t val)
475 {
476     uint32_t oldval = s->eecd_state.old_eecd;
477 
478     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
479             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
480     if (!(E1000_EECD_CS & val))			// CS inactive; nothing to do
481 	return;
482     if (E1000_EECD_CS & (val ^ oldval)) {	// CS rise edge; reset state
483 	s->eecd_state.val_in = 0;
484 	s->eecd_state.bitnum_in = 0;
485 	s->eecd_state.bitnum_out = 0;
486 	s->eecd_state.reading = 0;
487     }
488     if (!(E1000_EECD_SK & (val ^ oldval)))	// no clock edge
489         return;
490     if (!(E1000_EECD_SK & val)) {		// falling edge
491         s->eecd_state.bitnum_out++;
492         return;
493     }
494     s->eecd_state.val_in <<= 1;
495     if (val & E1000_EECD_DI)
496         s->eecd_state.val_in |= 1;
497     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
498         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
499         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
500             EEPROM_READ_OPCODE_MICROWIRE);
501     }
502     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
503            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
504            s->eecd_state.reading);
505 }
506 
507 static uint32_t
508 flash_eerd_read(E1000State *s, int x)
509 {
510     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
511 
512     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
513         return (s->mac_reg[EERD]);
514 
515     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
516         return (E1000_EEPROM_RW_REG_DONE | r);
517 
518     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
519            E1000_EEPROM_RW_REG_DONE | r);
520 }
521 
522 static void
523 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
524 {
525     uint32_t sum;
526 
527     if (cse && cse < n)
528         n = cse + 1;
529     if (sloc < n-1) {
530         sum = net_checksum_add(n-css, data+css);
531         cpu_to_be16wu((uint16_t *)(data + sloc),
532                       net_checksum_finish(sum));
533     }
534 }
535 
536 static inline int
537 vlan_enabled(E1000State *s)
538 {
539     return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
540 }
541 
542 static inline int
543 vlan_rx_filter_enabled(E1000State *s)
544 {
545     return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
546 }
547 
548 static inline int
549 is_vlan_packet(E1000State *s, const uint8_t *buf)
550 {
551     return (be16_to_cpup((uint16_t *)(buf + 12)) ==
552                 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
553 }
554 
555 static inline int
556 is_vlan_txd(uint32_t txd_lower)
557 {
558     return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
559 }
560 
561 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
562  * fill it in, just pad descriptor length by 4 bytes unless guest
563  * told us to strip it off the packet. */
564 static inline int
565 fcs_len(E1000State *s)
566 {
567     return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
568 }
569 
570 static void
571 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
572 {
573     NetClientState *nc = qemu_get_queue(s->nic);
574     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
575         nc->info->receive(nc, buf, size);
576     } else {
577         qemu_send_packet(nc, buf, size);
578     }
579 }
580 
581 static void
582 xmit_seg(E1000State *s)
583 {
584     uint16_t len, *sp;
585     unsigned int frames = s->tx.tso_frames, css, sofar, n;
586     struct e1000_tx *tp = &s->tx;
587 
588     if (tp->tse && tp->cptse) {
589         css = tp->ipcss;
590         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
591                frames, tp->size, css);
592         if (tp->ip) {		// IPv4
593             cpu_to_be16wu((uint16_t *)(tp->data+css+2),
594                           tp->size - css);
595             cpu_to_be16wu((uint16_t *)(tp->data+css+4),
596                           be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
597         } else			// IPv6
598             cpu_to_be16wu((uint16_t *)(tp->data+css+4),
599                           tp->size - css);
600         css = tp->tucss;
601         len = tp->size - css;
602         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
603         if (tp->tcp) {
604             sofar = frames * tp->mss;
605             cpu_to_be32wu((uint32_t *)(tp->data+css+4),	// seq
606                 be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
607             if (tp->paylen - sofar > tp->mss)
608                 tp->data[css + 13] &= ~9;		// PSH, FIN
609         } else	// UDP
610             cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
611         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
612             unsigned int phsum;
613             // add pseudo-header length before checksum calculation
614             sp = (uint16_t *)(tp->data + tp->tucso);
615             phsum = be16_to_cpup(sp) + len;
616             phsum = (phsum >> 16) + (phsum & 0xffff);
617             cpu_to_be16wu(sp, phsum);
618         }
619         tp->tso_frames++;
620     }
621 
622     if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
623         putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
624     if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
625         putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
626     if (tp->vlan_needed) {
627         memmove(tp->vlan, tp->data, 4);
628         memmove(tp->data, tp->data + 4, 8);
629         memcpy(tp->data + 8, tp->vlan_header, 4);
630         e1000_send_packet(s, tp->vlan, tp->size + 4);
631     } else
632         e1000_send_packet(s, tp->data, tp->size);
633     s->mac_reg[TPT]++;
634     s->mac_reg[GPTC]++;
635     n = s->mac_reg[TOTL];
636     if ((s->mac_reg[TOTL] += s->tx.size) < n)
637         s->mac_reg[TOTH]++;
638 }
639 
640 static void
641 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
642 {
643     PCIDevice *d = PCI_DEVICE(s);
644     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
645     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
646     unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
647     unsigned int msh = 0xfffff;
648     uint64_t addr;
649     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
650     struct e1000_tx *tp = &s->tx;
651 
652     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
653     if (dtype == E1000_TXD_CMD_DEXT) {	// context descriptor
654         op = le32_to_cpu(xp->cmd_and_length);
655         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
656         tp->ipcso = xp->lower_setup.ip_fields.ipcso;
657         tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
658         tp->tucss = xp->upper_setup.tcp_fields.tucss;
659         tp->tucso = xp->upper_setup.tcp_fields.tucso;
660         tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
661         tp->paylen = op & 0xfffff;
662         tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
663         tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
664         tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
665         tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
666         tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
667         tp->tso_frames = 0;
668         if (tp->tucso == 0) {	// this is probably wrong
669             DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
670             tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
671         }
672         return;
673     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
674         // data descriptor
675         if (tp->size == 0) {
676             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
677         }
678         tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
679     } else {
680         // legacy descriptor
681         tp->cptse = 0;
682     }
683 
684     if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
685         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
686         tp->vlan_needed = 1;
687         cpu_to_be16wu((uint16_t *)(tp->vlan_header),
688                       le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
689         cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
690                       le16_to_cpu(dp->upper.fields.special));
691     }
692 
693     addr = le64_to_cpu(dp->buffer_addr);
694     if (tp->tse && tp->cptse) {
695         msh = tp->hdr_len + tp->mss;
696         do {
697             bytes = split_size;
698             if (tp->size + bytes > msh)
699                 bytes = msh - tp->size;
700 
701             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
702             pci_dma_read(d, addr, tp->data + tp->size, bytes);
703             sz = tp->size + bytes;
704             if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
705                 memmove(tp->header, tp->data, tp->hdr_len);
706             }
707             tp->size = sz;
708             addr += bytes;
709             if (sz == msh) {
710                 xmit_seg(s);
711                 memmove(tp->data, tp->header, tp->hdr_len);
712                 tp->size = tp->hdr_len;
713             }
714         } while (split_size -= bytes);
715     } else if (!tp->tse && tp->cptse) {
716         // context descriptor TSE is not set, while data descriptor TSE is set
717         DBGOUT(TXERR, "TCP segmentation error\n");
718     } else {
719         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
720         pci_dma_read(d, addr, tp->data + tp->size, split_size);
721         tp->size += split_size;
722     }
723 
724     if (!(txd_lower & E1000_TXD_CMD_EOP))
725         return;
726     if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
727         xmit_seg(s);
728     }
729     tp->tso_frames = 0;
730     tp->sum_needed = 0;
731     tp->vlan_needed = 0;
732     tp->size = 0;
733     tp->cptse = 0;
734 }
735 
736 static uint32_t
737 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
738 {
739     PCIDevice *d = PCI_DEVICE(s);
740     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
741 
742     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
743         return 0;
744     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
745                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
746     dp->upper.data = cpu_to_le32(txd_upper);
747     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
748                   &dp->upper, sizeof(dp->upper));
749     return E1000_ICR_TXDW;
750 }
751 
752 static uint64_t tx_desc_base(E1000State *s)
753 {
754     uint64_t bah = s->mac_reg[TDBAH];
755     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
756 
757     return (bah << 32) + bal;
758 }
759 
760 static void
761 start_xmit(E1000State *s)
762 {
763     PCIDevice *d = PCI_DEVICE(s);
764     dma_addr_t base;
765     struct e1000_tx_desc desc;
766     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
767 
768     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
769         DBGOUT(TX, "tx disabled\n");
770         return;
771     }
772 
773     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
774         base = tx_desc_base(s) +
775                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
776         pci_dma_read(d, base, &desc, sizeof(desc));
777 
778         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
779                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
780                desc.upper.data);
781 
782         process_tx_desc(s, &desc);
783         cause |= txdesc_writeback(s, base, &desc);
784 
785         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
786             s->mac_reg[TDH] = 0;
787         /*
788          * the following could happen only if guest sw assigns
789          * bogus values to TDT/TDLEN.
790          * there's nothing too intelligent we could do about this.
791          */
792         if (s->mac_reg[TDH] == tdh_start) {
793             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
794                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
795             break;
796         }
797     }
798     set_ics(s, 0, cause);
799 }
800 
801 static int
802 receive_filter(E1000State *s, const uint8_t *buf, int size)
803 {
804     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
805     static const int mta_shift[] = {4, 3, 2, 0};
806     uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
807 
808     if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
809         uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
810         uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
811                                      ((vid >> 5) & 0x7f));
812         if ((vfta & (1 << (vid & 0x1f))) == 0)
813             return 0;
814     }
815 
816     if (rctl & E1000_RCTL_UPE)			// promiscuous
817         return 1;
818 
819     if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE))	// promiscuous mcast
820         return 1;
821 
822     if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
823         return 1;
824 
825     for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
826         if (!(rp[1] & E1000_RAH_AV))
827             continue;
828         ra[0] = cpu_to_le32(rp[0]);
829         ra[1] = cpu_to_le32(rp[1]);
830         if (!memcmp(buf, (uint8_t *)ra, 6)) {
831             DBGOUT(RXFILTER,
832                    "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
833                    (int)(rp - s->mac_reg - RA)/2,
834                    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
835             return 1;
836         }
837     }
838     DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
839            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
840 
841     f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
842     f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
843     if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
844         return 1;
845     DBGOUT(RXFILTER,
846            "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
847            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
848            (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
849            s->mac_reg[MTA + (f >> 5)]);
850 
851     return 0;
852 }
853 
854 static void
855 e1000_set_link_status(NetClientState *nc)
856 {
857     E1000State *s = qemu_get_nic_opaque(nc);
858     uint32_t old_status = s->mac_reg[STATUS];
859 
860     if (nc->link_down) {
861         e1000_link_down(s);
862     } else {
863         e1000_link_up(s);
864     }
865 
866     if (s->mac_reg[STATUS] != old_status)
867         set_ics(s, 0, E1000_ICR_LSC);
868 }
869 
870 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
871 {
872     int bufs;
873     /* Fast-path short packets */
874     if (total_size <= s->rxbuf_size) {
875         return s->mac_reg[RDH] != s->mac_reg[RDT];
876     }
877     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
878         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
879     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
880         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
881             s->mac_reg[RDT] - s->mac_reg[RDH];
882     } else {
883         return false;
884     }
885     return total_size <= bufs * s->rxbuf_size;
886 }
887 
888 static int
889 e1000_can_receive(NetClientState *nc)
890 {
891     E1000State *s = qemu_get_nic_opaque(nc);
892 
893     return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
894         (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
895 }
896 
897 static uint64_t rx_desc_base(E1000State *s)
898 {
899     uint64_t bah = s->mac_reg[RDBAH];
900     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
901 
902     return (bah << 32) + bal;
903 }
904 
905 static ssize_t
906 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
907 {
908     E1000State *s = qemu_get_nic_opaque(nc);
909     PCIDevice *d = PCI_DEVICE(s);
910     struct e1000_rx_desc desc;
911     dma_addr_t base;
912     unsigned int n, rdt;
913     uint32_t rdh_start;
914     uint16_t vlan_special = 0;
915     uint8_t vlan_status = 0;
916     uint8_t min_buf[MIN_BUF_SIZE];
917     struct iovec min_iov;
918     uint8_t *filter_buf = iov->iov_base;
919     size_t size = iov_size(iov, iovcnt);
920     size_t iov_ofs = 0;
921     size_t desc_offset;
922     size_t desc_size;
923     size_t total_size;
924 
925     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
926         return -1;
927     }
928 
929     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
930         return -1;
931     }
932 
933     /* Pad to minimum Ethernet frame length */
934     if (size < sizeof(min_buf)) {
935         iov_to_buf(iov, iovcnt, 0, min_buf, size);
936         memset(&min_buf[size], 0, sizeof(min_buf) - size);
937         min_iov.iov_base = filter_buf = min_buf;
938         min_iov.iov_len = size = sizeof(min_buf);
939         iovcnt = 1;
940         iov = &min_iov;
941     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
942         /* This is very unlikely, but may happen. */
943         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
944         filter_buf = min_buf;
945     }
946 
947     /* Discard oversized packets if !LPE and !SBP. */
948     if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
949         (size > MAXIMUM_ETHERNET_VLAN_SIZE
950         && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
951         && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
952         return size;
953     }
954 
955     if (!receive_filter(s, filter_buf, size)) {
956         return size;
957     }
958 
959     if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
960         vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
961                                                                 + 14)));
962         iov_ofs = 4;
963         if (filter_buf == iov->iov_base) {
964             memmove(filter_buf + 4, filter_buf, 12);
965         } else {
966             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
967             while (iov->iov_len <= iov_ofs) {
968                 iov_ofs -= iov->iov_len;
969                 iov++;
970             }
971         }
972         vlan_status = E1000_RXD_STAT_VP;
973         size -= 4;
974     }
975 
976     rdh_start = s->mac_reg[RDH];
977     desc_offset = 0;
978     total_size = size + fcs_len(s);
979     if (!e1000_has_rxbufs(s, total_size)) {
980             set_ics(s, 0, E1000_ICS_RXO);
981             return -1;
982     }
983     do {
984         desc_size = total_size - desc_offset;
985         if (desc_size > s->rxbuf_size) {
986             desc_size = s->rxbuf_size;
987         }
988         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
989         pci_dma_read(d, base, &desc, sizeof(desc));
990         desc.special = vlan_special;
991         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
992         if (desc.buffer_addr) {
993             if (desc_offset < size) {
994                 size_t iov_copy;
995                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
996                 size_t copy_size = size - desc_offset;
997                 if (copy_size > s->rxbuf_size) {
998                     copy_size = s->rxbuf_size;
999                 }
1000                 do {
1001                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1002                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1003                     copy_size -= iov_copy;
1004                     ba += iov_copy;
1005                     iov_ofs += iov_copy;
1006                     if (iov_ofs == iov->iov_len) {
1007                         iov++;
1008                         iov_ofs = 0;
1009                     }
1010                 } while (copy_size);
1011             }
1012             desc_offset += desc_size;
1013             desc.length = cpu_to_le16(desc_size);
1014             if (desc_offset >= total_size) {
1015                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1016             } else {
1017                 /* Guest zeroing out status is not a hardware requirement.
1018                    Clear EOP in case guest didn't do it. */
1019                 desc.status &= ~E1000_RXD_STAT_EOP;
1020             }
1021         } else { // as per intel docs; skip descriptors with null buf addr
1022             DBGOUT(RX, "Null RX descriptor!!\n");
1023         }
1024         pci_dma_write(d, base, &desc, sizeof(desc));
1025 
1026         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1027             s->mac_reg[RDH] = 0;
1028         /* see comment in start_xmit; same here */
1029         if (s->mac_reg[RDH] == rdh_start) {
1030             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1031                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1032             set_ics(s, 0, E1000_ICS_RXO);
1033             return -1;
1034         }
1035     } while (desc_offset < total_size);
1036 
1037     s->mac_reg[GPRC]++;
1038     s->mac_reg[TPR]++;
1039     /* TOR - Total Octets Received:
1040      * This register includes bytes received in a packet from the <Destination
1041      * Address> field through the <CRC> field, inclusively.
1042      */
1043     n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1044     if (n < s->mac_reg[TORL])
1045         s->mac_reg[TORH]++;
1046     s->mac_reg[TORL] = n;
1047 
1048     n = E1000_ICS_RXT0;
1049     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1050         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1051     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1052         s->rxbuf_min_shift)
1053         n |= E1000_ICS_RXDMT0;
1054 
1055     set_ics(s, 0, n);
1056 
1057     return size;
1058 }
1059 
1060 static ssize_t
1061 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1062 {
1063     const struct iovec iov = {
1064         .iov_base = (uint8_t *)buf,
1065         .iov_len = size
1066     };
1067 
1068     return e1000_receive_iov(nc, &iov, 1);
1069 }
1070 
1071 static uint32_t
1072 mac_readreg(E1000State *s, int index)
1073 {
1074     return s->mac_reg[index];
1075 }
1076 
1077 static uint32_t
1078 mac_icr_read(E1000State *s, int index)
1079 {
1080     uint32_t ret = s->mac_reg[ICR];
1081 
1082     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1083     set_interrupt_cause(s, 0, 0);
1084     return ret;
1085 }
1086 
1087 static uint32_t
1088 mac_read_clr4(E1000State *s, int index)
1089 {
1090     uint32_t ret = s->mac_reg[index];
1091 
1092     s->mac_reg[index] = 0;
1093     return ret;
1094 }
1095 
1096 static uint32_t
1097 mac_read_clr8(E1000State *s, int index)
1098 {
1099     uint32_t ret = s->mac_reg[index];
1100 
1101     s->mac_reg[index] = 0;
1102     s->mac_reg[index-1] = 0;
1103     return ret;
1104 }
1105 
1106 static void
1107 mac_writereg(E1000State *s, int index, uint32_t val)
1108 {
1109     s->mac_reg[index] = val;
1110 }
1111 
1112 static void
1113 set_rdt(E1000State *s, int index, uint32_t val)
1114 {
1115     s->mac_reg[index] = val & 0xffff;
1116     if (e1000_has_rxbufs(s, 1)) {
1117         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1118     }
1119 }
1120 
1121 static void
1122 set_16bit(E1000State *s, int index, uint32_t val)
1123 {
1124     s->mac_reg[index] = val & 0xffff;
1125 }
1126 
1127 static void
1128 set_dlen(E1000State *s, int index, uint32_t val)
1129 {
1130     s->mac_reg[index] = val & 0xfff80;
1131 }
1132 
1133 static void
1134 set_tctl(E1000State *s, int index, uint32_t val)
1135 {
1136     s->mac_reg[index] = val;
1137     s->mac_reg[TDT] &= 0xffff;
1138     start_xmit(s);
1139 }
1140 
1141 static void
1142 set_icr(E1000State *s, int index, uint32_t val)
1143 {
1144     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1145     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1146 }
1147 
1148 static void
1149 set_imc(E1000State *s, int index, uint32_t val)
1150 {
1151     s->mac_reg[IMS] &= ~val;
1152     set_ics(s, 0, 0);
1153 }
1154 
1155 static void
1156 set_ims(E1000State *s, int index, uint32_t val)
1157 {
1158     s->mac_reg[IMS] |= val;
1159     set_ics(s, 0, 0);
1160 }
1161 
1162 #define getreg(x)	[x] = mac_readreg
1163 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1164     getreg(PBA),	getreg(RCTL),	getreg(TDH),	getreg(TXDCTL),
1165     getreg(WUFC),	getreg(TDT),	getreg(CTRL),	getreg(LEDCTL),
1166     getreg(MANC),	getreg(MDIC),	getreg(SWSM),	getreg(STATUS),
1167     getreg(TORL),	getreg(TOTL),	getreg(IMS),	getreg(TCTL),
1168     getreg(RDH),	getreg(RDT),	getreg(VET),	getreg(ICS),
1169     getreg(TDBAL),	getreg(TDBAH),	getreg(RDBAH),	getreg(RDBAL),
1170     getreg(TDLEN),      getreg(RDLEN),  getreg(RDTR),   getreg(RADV),
1171     getreg(TADV),       getreg(ITR),
1172 
1173     [TOTH] = mac_read_clr8,	[TORH] = mac_read_clr8,	[GPRC] = mac_read_clr4,
1174     [GPTC] = mac_read_clr4,	[TPR] = mac_read_clr4,	[TPT] = mac_read_clr4,
1175     [ICR] = mac_icr_read,	[EECD] = get_eecd,	[EERD] = flash_eerd_read,
1176     [CRCERRS ... MPC] = &mac_readreg,
1177     [RA ... RA+31] = &mac_readreg,
1178     [MTA ... MTA+127] = &mac_readreg,
1179     [VFTA ... VFTA+127] = &mac_readreg,
1180 };
1181 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1182 
1183 #define putreg(x)	[x] = mac_writereg
1184 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1185     putreg(PBA),	putreg(EERD),	putreg(SWSM),	putreg(WUFC),
1186     putreg(TDBAL),	putreg(TDBAH),	putreg(TXDCTL),	putreg(RDBAH),
1187     putreg(RDBAL),	putreg(LEDCTL), putreg(VET),
1188     [TDLEN] = set_dlen,	[RDLEN] = set_dlen,	[TCTL] = set_tctl,
1189     [TDT] = set_tctl,	[MDIC] = set_mdic,	[ICS] = set_ics,
1190     [TDH] = set_16bit,	[RDH] = set_16bit,	[RDT] = set_rdt,
1191     [IMC] = set_imc,	[IMS] = set_ims,	[ICR] = set_icr,
1192     [EECD] = set_eecd,	[RCTL] = set_rx_control, [CTRL] = set_ctrl,
1193     [RDTR] = set_16bit, [RADV] = set_16bit,     [TADV] = set_16bit,
1194     [ITR] = set_16bit,
1195     [RA ... RA+31] = &mac_writereg,
1196     [MTA ... MTA+127] = &mac_writereg,
1197     [VFTA ... VFTA+127] = &mac_writereg,
1198 };
1199 
1200 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1201 
1202 static void
1203 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1204                  unsigned size)
1205 {
1206     E1000State *s = opaque;
1207     unsigned int index = (addr & 0x1ffff) >> 2;
1208 
1209     if (index < NWRITEOPS && macreg_writeops[index]) {
1210         macreg_writeops[index](s, index, val);
1211     } else if (index < NREADOPS && macreg_readops[index]) {
1212         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1213     } else {
1214         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1215                index<<2, val);
1216     }
1217 }
1218 
1219 static uint64_t
1220 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1221 {
1222     E1000State *s = opaque;
1223     unsigned int index = (addr & 0x1ffff) >> 2;
1224 
1225     if (index < NREADOPS && macreg_readops[index])
1226     {
1227         return macreg_readops[index](s, index);
1228     }
1229     DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1230     return 0;
1231 }
1232 
1233 static const MemoryRegionOps e1000_mmio_ops = {
1234     .read = e1000_mmio_read,
1235     .write = e1000_mmio_write,
1236     .endianness = DEVICE_LITTLE_ENDIAN,
1237     .impl = {
1238         .min_access_size = 4,
1239         .max_access_size = 4,
1240     },
1241 };
1242 
1243 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1244                               unsigned size)
1245 {
1246     E1000State *s = opaque;
1247 
1248     (void)s;
1249     return 0;
1250 }
1251 
1252 static void e1000_io_write(void *opaque, hwaddr addr,
1253                            uint64_t val, unsigned size)
1254 {
1255     E1000State *s = opaque;
1256 
1257     (void)s;
1258 }
1259 
1260 static const MemoryRegionOps e1000_io_ops = {
1261     .read = e1000_io_read,
1262     .write = e1000_io_write,
1263     .endianness = DEVICE_LITTLE_ENDIAN,
1264 };
1265 
1266 static bool is_version_1(void *opaque, int version_id)
1267 {
1268     return version_id == 1;
1269 }
1270 
1271 static void e1000_pre_save(void *opaque)
1272 {
1273     E1000State *s = opaque;
1274     NetClientState *nc = qemu_get_queue(s->nic);
1275 
1276     /* If the mitigation timer is active, emulate a timeout now. */
1277     if (s->mit_timer_on) {
1278         e1000_mit_timer(s);
1279     }
1280 
1281     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1282         return;
1283     }
1284 
1285     /*
1286      * If link is down and auto-negotiation is ongoing, complete
1287      * auto-negotiation immediately.  This allows is to look at
1288      * MII_SR_AUTONEG_COMPLETE to infer link status on load.
1289      */
1290     if (nc->link_down &&
1291         s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1292         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) {
1293          s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1294     }
1295 }
1296 
1297 static int e1000_post_load(void *opaque, int version_id)
1298 {
1299     E1000State *s = opaque;
1300     NetClientState *nc = qemu_get_queue(s->nic);
1301 
1302     if (!(s->compat_flags & E1000_FLAG_MIT)) {
1303         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1304             s->mac_reg[TADV] = 0;
1305         s->mit_irq_level = false;
1306     }
1307     s->mit_ide = 0;
1308     s->mit_timer_on = false;
1309 
1310     /* nc.link_down can't be migrated, so infer link_down according
1311      * to link status bit in mac_reg[STATUS].
1312      * Alternatively, restart link negotiation if it was in progress. */
1313     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1314 
1315     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1316         return 0;
1317     }
1318 
1319     if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1320         s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG &&
1321         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1322         nc->link_down = false;
1323         timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1324     }
1325 
1326     return 0;
1327 }
1328 
1329 static bool e1000_mit_state_needed(void *opaque)
1330 {
1331     E1000State *s = opaque;
1332 
1333     return s->compat_flags & E1000_FLAG_MIT;
1334 }
1335 
1336 static const VMStateDescription vmstate_e1000_mit_state = {
1337     .name = "e1000/mit_state",
1338     .version_id = 1,
1339     .minimum_version_id = 1,
1340     .minimum_version_id_old = 1,
1341     .fields    = (VMStateField[]) {
1342         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1343         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1344         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1345         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1346         VMSTATE_BOOL(mit_irq_level, E1000State),
1347         VMSTATE_END_OF_LIST()
1348     }
1349 };
1350 
1351 static const VMStateDescription vmstate_e1000 = {
1352     .name = "e1000",
1353     .version_id = 2,
1354     .minimum_version_id = 1,
1355     .minimum_version_id_old = 1,
1356     .pre_save = e1000_pre_save,
1357     .post_load = e1000_post_load,
1358     .fields      = (VMStateField []) {
1359         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1360         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1361         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1362         VMSTATE_UINT32(rxbuf_size, E1000State),
1363         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1364         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1365         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1366         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1367         VMSTATE_UINT16(eecd_state.reading, E1000State),
1368         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1369         VMSTATE_UINT8(tx.ipcss, E1000State),
1370         VMSTATE_UINT8(tx.ipcso, E1000State),
1371         VMSTATE_UINT16(tx.ipcse, E1000State),
1372         VMSTATE_UINT8(tx.tucss, E1000State),
1373         VMSTATE_UINT8(tx.tucso, E1000State),
1374         VMSTATE_UINT16(tx.tucse, E1000State),
1375         VMSTATE_UINT32(tx.paylen, E1000State),
1376         VMSTATE_UINT8(tx.hdr_len, E1000State),
1377         VMSTATE_UINT16(tx.mss, E1000State),
1378         VMSTATE_UINT16(tx.size, E1000State),
1379         VMSTATE_UINT16(tx.tso_frames, E1000State),
1380         VMSTATE_UINT8(tx.sum_needed, E1000State),
1381         VMSTATE_INT8(tx.ip, E1000State),
1382         VMSTATE_INT8(tx.tcp, E1000State),
1383         VMSTATE_BUFFER(tx.header, E1000State),
1384         VMSTATE_BUFFER(tx.data, E1000State),
1385         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1386         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1387         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1388         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1389         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1390         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1391         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1392         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1393         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1394         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1395         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1396         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1397         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1398         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1399         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1400         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1401         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1402         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1403         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1404         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1405         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1406         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1407         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1408         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1409         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1410         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1411         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1412         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1413         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1414         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1415         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1416         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1417         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1418         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1419         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1420         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1421         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1422         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1423         VMSTATE_UINT32(mac_reg[VET], E1000State),
1424         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1425         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1426         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1427         VMSTATE_END_OF_LIST()
1428     },
1429     .subsections = (VMStateSubsection[]) {
1430         {
1431             .vmsd = &vmstate_e1000_mit_state,
1432             .needed = e1000_mit_state_needed,
1433         }, {
1434             /* empty */
1435         }
1436     }
1437 };
1438 
1439 static const uint16_t e1000_eeprom_template[64] = {
1440     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1441     0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1442     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1443     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1444     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1445     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1446     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1447     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1448 };
1449 
1450 /* PCI interface */
1451 
1452 static void
1453 e1000_mmio_setup(E1000State *d)
1454 {
1455     int i;
1456     const uint32_t excluded_regs[] = {
1457         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1458         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1459     };
1460 
1461     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1462                           "e1000-mmio", PNPMMIO_SIZE);
1463     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1464     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1465         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1466                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1467     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1468 }
1469 
1470 static void
1471 e1000_cleanup(NetClientState *nc)
1472 {
1473     E1000State *s = qemu_get_nic_opaque(nc);
1474 
1475     s->nic = NULL;
1476 }
1477 
1478 static void
1479 pci_e1000_uninit(PCIDevice *dev)
1480 {
1481     E1000State *d = E1000(dev);
1482 
1483     timer_del(d->autoneg_timer);
1484     timer_free(d->autoneg_timer);
1485     timer_del(d->mit_timer);
1486     timer_free(d->mit_timer);
1487     memory_region_destroy(&d->mmio);
1488     memory_region_destroy(&d->io);
1489     qemu_del_nic(d->nic);
1490 }
1491 
1492 static NetClientInfo net_e1000_info = {
1493     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1494     .size = sizeof(NICState),
1495     .can_receive = e1000_can_receive,
1496     .receive = e1000_receive,
1497     .receive_iov = e1000_receive_iov,
1498     .cleanup = e1000_cleanup,
1499     .link_status_changed = e1000_set_link_status,
1500 };
1501 
1502 static int pci_e1000_init(PCIDevice *pci_dev)
1503 {
1504     DeviceState *dev = DEVICE(pci_dev);
1505     E1000State *d = E1000(pci_dev);
1506     uint8_t *pci_conf;
1507     uint16_t checksum = 0;
1508     int i;
1509     uint8_t *macaddr;
1510 
1511     pci_conf = pci_dev->config;
1512 
1513     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1514     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1515 
1516     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1517 
1518     e1000_mmio_setup(d);
1519 
1520     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1521 
1522     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1523 
1524     memmove(d->eeprom_data, e1000_eeprom_template,
1525         sizeof e1000_eeprom_template);
1526     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1527     macaddr = d->conf.macaddr.a;
1528     for (i = 0; i < 3; i++)
1529         d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1530     for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1531         checksum += d->eeprom_data[i];
1532     checksum = (uint16_t) EEPROM_SUM - checksum;
1533     d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1534 
1535     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1536                           object_get_typename(OBJECT(d)), dev->id, d);
1537 
1538     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1539 
1540     add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0");
1541 
1542     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1543     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1544 
1545     return 0;
1546 }
1547 
1548 static void qdev_e1000_reset(DeviceState *dev)
1549 {
1550     E1000State *d = E1000(dev);
1551     e1000_reset(d);
1552 }
1553 
1554 static Property e1000_properties[] = {
1555     DEFINE_NIC_PROPERTIES(E1000State, conf),
1556     DEFINE_PROP_BIT("autonegotiation", E1000State,
1557                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1558     DEFINE_PROP_BIT("mitigation", E1000State,
1559                     compat_flags, E1000_FLAG_MIT_BIT, true),
1560     DEFINE_PROP_END_OF_LIST(),
1561 };
1562 
1563 static void e1000_class_init(ObjectClass *klass, void *data)
1564 {
1565     DeviceClass *dc = DEVICE_CLASS(klass);
1566     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1567 
1568     k->init = pci_e1000_init;
1569     k->exit = pci_e1000_uninit;
1570     k->romfile = "efi-e1000.rom";
1571     k->vendor_id = PCI_VENDOR_ID_INTEL;
1572     k->device_id = E1000_DEVID;
1573     k->revision = 0x03;
1574     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1575     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1576     dc->desc = "Intel Gigabit Ethernet";
1577     dc->reset = qdev_e1000_reset;
1578     dc->vmsd = &vmstate_e1000;
1579     dc->props = e1000_properties;
1580 }
1581 
1582 static const TypeInfo e1000_info = {
1583     .name          = TYPE_E1000,
1584     .parent        = TYPE_PCI_DEVICE,
1585     .instance_size = sizeof(E1000State),
1586     .class_init    = e1000_class_init,
1587 };
1588 
1589 static void e1000_register_types(void)
1590 {
1591     type_register_static(&e1000_info);
1592 }
1593 
1594 type_init(e1000_register_types)
1595