xref: /qemu/hw/net/e1000.c (revision db1015e92e04835c9eb50c29625fe566d1202dbd)
1 /*
2  * QEMU e1000 emulation
3  *
4  * Software developer's manual:
5  * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6  *
7  * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8  * Copyright (c) 2008 Qumranet
9  * Based on work done by:
10  * Copyright (c) 2007 Dan Aloni
11  * Copyright (c) 2004 Antony T Curtis
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 #include "qemu/osdep.h"
29 #include "hw/pci/pci.h"
30 #include "hw/qdev-properties.h"
31 #include "migration/vmstate.h"
32 #include "net/net.h"
33 #include "net/checksum.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/dma.h"
36 #include "qemu/iov.h"
37 #include "qemu/module.h"
38 #include "qemu/range.h"
39 
40 #include "e1000x_common.h"
41 #include "trace.h"
42 #include "qom/object.h"
43 
44 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
45 
46 /* #define E1000_DEBUG */
47 
48 #ifdef E1000_DEBUG
49 enum {
50     DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
51     DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
52     DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
53     DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
54 };
55 #define DBGBIT(x)    (1<<DEBUG_##x)
56 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
57 
58 #define DBGOUT(what, fmt, ...) do { \
59     if (debugflags & DBGBIT(what)) \
60         fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
61     } while (0)
62 #else
63 #define DBGOUT(what, fmt, ...) do {} while (0)
64 #endif
65 
66 #define IOPORT_SIZE       0x40
67 #define PNPMMIO_SIZE      0x20000
68 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
69 
70 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
71 
72 /*
73  * HW models:
74  *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
75  *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76  *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
77  *  Others never tested
78  */
79 
80 struct E1000State_st {
81     /*< private >*/
82     PCIDevice parent_obj;
83     /*< public >*/
84 
85     NICState *nic;
86     NICConf conf;
87     MemoryRegion mmio;
88     MemoryRegion io;
89 
90     uint32_t mac_reg[0x8000];
91     uint16_t phy_reg[0x20];
92     uint16_t eeprom_data[64];
93 
94     uint32_t rxbuf_size;
95     uint32_t rxbuf_min_shift;
96     struct e1000_tx {
97         unsigned char header[256];
98         unsigned char vlan_header[4];
99         /* Fields vlan and data must not be reordered or separated. */
100         unsigned char vlan[4];
101         unsigned char data[0x10000];
102         uint16_t size;
103         unsigned char vlan_needed;
104         unsigned char sum_needed;
105         bool cptse;
106         e1000x_txd_props props;
107         e1000x_txd_props tso_props;
108         uint16_t tso_frames;
109     } tx;
110 
111     struct {
112         uint32_t val_in;    /* shifted in from guest driver */
113         uint16_t bitnum_in;
114         uint16_t bitnum_out;
115         uint16_t reading;
116         uint32_t old_eecd;
117     } eecd_state;
118 
119     QEMUTimer *autoneg_timer;
120 
121     QEMUTimer *mit_timer;      /* Mitigation timer. */
122     bool mit_timer_on;         /* Mitigation timer is running. */
123     bool mit_irq_level;        /* Tracks interrupt pin level. */
124     uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
125 
126     QEMUTimer *flush_queue_timer;
127 
128 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
129 #define E1000_FLAG_AUTONEG_BIT 0
130 #define E1000_FLAG_MIT_BIT 1
131 #define E1000_FLAG_MAC_BIT 2
132 #define E1000_FLAG_TSO_BIT 3
133 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
134 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
135 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
136 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
137     uint32_t compat_flags;
138     bool received_tx_tso;
139     bool use_tso_for_migration;
140     e1000x_txd_props mig_props;
141 };
142 typedef struct E1000State_st E1000State;
143 
144 #define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
145 
146 struct E1000BaseClass {
147     PCIDeviceClass parent_class;
148     uint16_t phy_id2;
149 };
150 typedef struct E1000BaseClass E1000BaseClass;
151 
152 #define TYPE_E1000_BASE "e1000-base"
153 
154 #define E1000(obj) \
155     OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
156 
157 #define E1000_CLASS(klass) \
158      OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
159 #define E1000_GET_CLASS(obj) \
160     OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
161 
162 static void
163 e1000_link_up(E1000State *s)
164 {
165     e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
166 
167     /* E1000_STATUS_LU is tested by e1000_can_receive() */
168     qemu_flush_queued_packets(qemu_get_queue(s->nic));
169 }
170 
171 static void
172 e1000_autoneg_done(E1000State *s)
173 {
174     e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
175 
176     /* E1000_STATUS_LU is tested by e1000_can_receive() */
177     qemu_flush_queued_packets(qemu_get_queue(s->nic));
178 }
179 
180 static bool
181 have_autoneg(E1000State *s)
182 {
183     return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
184 }
185 
186 static void
187 set_phy_ctrl(E1000State *s, int index, uint16_t val)
188 {
189     /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
190     s->phy_reg[PHY_CTRL] = val & ~(0x3f |
191                                    MII_CR_RESET |
192                                    MII_CR_RESTART_AUTO_NEG);
193 
194     /*
195      * QEMU 1.3 does not support link auto-negotiation emulation, so if we
196      * migrate during auto negotiation, after migration the link will be
197      * down.
198      */
199     if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
200         e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
201     }
202 }
203 
204 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
205     [PHY_CTRL] = set_phy_ctrl,
206 };
207 
208 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
209 
210 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
211 static const char phy_regcap[0x20] = {
212     [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
213     [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
214     [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
215     [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
216     [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
217     [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
218     [PHY_AUTONEG_EXP] = PHY_R,
219 };
220 
221 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
222 static const uint16_t phy_reg_init[] = {
223     [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
224                    MII_CR_FULL_DUPLEX |
225                    MII_CR_AUTO_NEG_EN,
226 
227     [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
228                    MII_SR_LINK_STATUS |   /* link initially up */
229                    MII_SR_AUTONEG_CAPS |
230                    /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
231                    MII_SR_PREAMBLE_SUPPRESS |
232                    MII_SR_EXTENDED_STATUS |
233                    MII_SR_10T_HD_CAPS |
234                    MII_SR_10T_FD_CAPS |
235                    MII_SR_100X_HD_CAPS |
236                    MII_SR_100X_FD_CAPS,
237 
238     [PHY_ID1] = 0x141,
239     /* [PHY_ID2] configured per DevId, from e1000_reset() */
240     [PHY_AUTONEG_ADV] = 0xde1,
241     [PHY_LP_ABILITY] = 0x1e0,
242     [PHY_1000T_CTRL] = 0x0e00,
243     [PHY_1000T_STATUS] = 0x3c00,
244     [M88E1000_PHY_SPEC_CTRL] = 0x360,
245     [M88E1000_PHY_SPEC_STATUS] = 0xac00,
246     [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
247 };
248 
249 static const uint32_t mac_reg_init[] = {
250     [PBA]     = 0x00100030,
251     [LEDCTL]  = 0x602,
252     [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
253                 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
254     [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
255                 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
256                 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
257                 E1000_STATUS_LU,
258     [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
259                 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
260                 E1000_MANC_RMCP_EN,
261 };
262 
263 /* Helper function, *curr == 0 means the value is not set */
264 static inline void
265 mit_update_delay(uint32_t *curr, uint32_t value)
266 {
267     if (value && (*curr == 0 || value < *curr)) {
268         *curr = value;
269     }
270 }
271 
272 static void
273 set_interrupt_cause(E1000State *s, int index, uint32_t val)
274 {
275     PCIDevice *d = PCI_DEVICE(s);
276     uint32_t pending_ints;
277     uint32_t mit_delay;
278 
279     s->mac_reg[ICR] = val;
280 
281     /*
282      * Make sure ICR and ICS registers have the same value.
283      * The spec says that the ICS register is write-only.  However in practice,
284      * on real hardware ICS is readable, and for reads it has the same value as
285      * ICR (except that ICS does not have the clear on read behaviour of ICR).
286      *
287      * The VxWorks PRO/1000 driver uses this behaviour.
288      */
289     s->mac_reg[ICS] = val;
290 
291     pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
292     if (!s->mit_irq_level && pending_ints) {
293         /*
294          * Here we detect a potential raising edge. We postpone raising the
295          * interrupt line if we are inside the mitigation delay window
296          * (s->mit_timer_on == 1).
297          * We provide a partial implementation of interrupt mitigation,
298          * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
299          * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
300          * RADV; relative timers based on TIDV and RDTR are not implemented.
301          */
302         if (s->mit_timer_on) {
303             return;
304         }
305         if (chkflag(MIT)) {
306             /* Compute the next mitigation delay according to pending
307              * interrupts and the current values of RADV (provided
308              * RDTR!=0), TADV and ITR.
309              * Then rearm the timer.
310              */
311             mit_delay = 0;
312             if (s->mit_ide &&
313                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
314                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
315             }
316             if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
317                 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
318             }
319             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
320 
321             /*
322              * According to e1000 SPEC, the Ethernet controller guarantees
323              * a maximum observable interrupt rate of 7813 interrupts/sec.
324              * Thus if mit_delay < 500 then the delay should be set to the
325              * minimum delay possible which is 500.
326              */
327             mit_delay = (mit_delay < 500) ? 500 : mit_delay;
328 
329             s->mit_timer_on = 1;
330             timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
331                       mit_delay * 256);
332             s->mit_ide = 0;
333         }
334     }
335 
336     s->mit_irq_level = (pending_ints != 0);
337     pci_set_irq(d, s->mit_irq_level);
338 }
339 
340 static void
341 e1000_mit_timer(void *opaque)
342 {
343     E1000State *s = opaque;
344 
345     s->mit_timer_on = 0;
346     /* Call set_interrupt_cause to update the irq level (if necessary). */
347     set_interrupt_cause(s, 0, s->mac_reg[ICR]);
348 }
349 
350 static void
351 set_ics(E1000State *s, int index, uint32_t val)
352 {
353     DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
354         s->mac_reg[IMS]);
355     set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
356 }
357 
358 static void
359 e1000_autoneg_timer(void *opaque)
360 {
361     E1000State *s = opaque;
362     if (!qemu_get_queue(s->nic)->link_down) {
363         e1000_autoneg_done(s);
364         set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
365     }
366 }
367 
368 static void e1000_reset(void *opaque)
369 {
370     E1000State *d = opaque;
371     E1000BaseClass *edc = E1000_GET_CLASS(d);
372     uint8_t *macaddr = d->conf.macaddr.a;
373 
374     timer_del(d->autoneg_timer);
375     timer_del(d->mit_timer);
376     timer_del(d->flush_queue_timer);
377     d->mit_timer_on = 0;
378     d->mit_irq_level = 0;
379     d->mit_ide = 0;
380     memset(d->phy_reg, 0, sizeof d->phy_reg);
381     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
382     d->phy_reg[PHY_ID2] = edc->phy_id2;
383     memset(d->mac_reg, 0, sizeof d->mac_reg);
384     memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
385     d->rxbuf_min_shift = 1;
386     memset(&d->tx, 0, sizeof d->tx);
387 
388     if (qemu_get_queue(d->nic)->link_down) {
389         e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
390     }
391 
392     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
393 }
394 
395 static void
396 set_ctrl(E1000State *s, int index, uint32_t val)
397 {
398     /* RST is self clearing */
399     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
400 }
401 
402 static void
403 e1000_flush_queue_timer(void *opaque)
404 {
405     E1000State *s = opaque;
406 
407     qemu_flush_queued_packets(qemu_get_queue(s->nic));
408 }
409 
410 static void
411 set_rx_control(E1000State *s, int index, uint32_t val)
412 {
413     s->mac_reg[RCTL] = val;
414     s->rxbuf_size = e1000x_rxbufsize(val);
415     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
416     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
417            s->mac_reg[RCTL]);
418     timer_mod(s->flush_queue_timer,
419               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
420 }
421 
422 static void
423 set_mdic(E1000State *s, int index, uint32_t val)
424 {
425     uint32_t data = val & E1000_MDIC_DATA_MASK;
426     uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
427 
428     if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
429         val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
430     else if (val & E1000_MDIC_OP_READ) {
431         DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
432         if (!(phy_regcap[addr] & PHY_R)) {
433             DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
434             val |= E1000_MDIC_ERROR;
435         } else
436             val = (val ^ data) | s->phy_reg[addr];
437     } else if (val & E1000_MDIC_OP_WRITE) {
438         DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
439         if (!(phy_regcap[addr] & PHY_W)) {
440             DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
441             val |= E1000_MDIC_ERROR;
442         } else {
443             if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
444                 phyreg_writeops[addr](s, index, data);
445             } else {
446                 s->phy_reg[addr] = data;
447             }
448         }
449     }
450     s->mac_reg[MDIC] = val | E1000_MDIC_READY;
451 
452     if (val & E1000_MDIC_INT_EN) {
453         set_ics(s, 0, E1000_ICR_MDAC);
454     }
455 }
456 
457 static uint32_t
458 get_eecd(E1000State *s, int index)
459 {
460     uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
461 
462     DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
463            s->eecd_state.bitnum_out, s->eecd_state.reading);
464     if (!s->eecd_state.reading ||
465         ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
466           ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
467         ret |= E1000_EECD_DO;
468     return ret;
469 }
470 
471 static void
472 set_eecd(E1000State *s, int index, uint32_t val)
473 {
474     uint32_t oldval = s->eecd_state.old_eecd;
475 
476     s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
477             E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
478     if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
479         return;
480     }
481     if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
482         s->eecd_state.val_in = 0;
483         s->eecd_state.bitnum_in = 0;
484         s->eecd_state.bitnum_out = 0;
485         s->eecd_state.reading = 0;
486     }
487     if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
488         return;
489     }
490     if (!(E1000_EECD_SK & val)) {               /* falling edge */
491         s->eecd_state.bitnum_out++;
492         return;
493     }
494     s->eecd_state.val_in <<= 1;
495     if (val & E1000_EECD_DI)
496         s->eecd_state.val_in |= 1;
497     if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
498         s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
499         s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
500             EEPROM_READ_OPCODE_MICROWIRE);
501     }
502     DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
503            s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
504            s->eecd_state.reading);
505 }
506 
507 static uint32_t
508 flash_eerd_read(E1000State *s, int x)
509 {
510     unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
511 
512     if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
513         return (s->mac_reg[EERD]);
514 
515     if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
516         return (E1000_EEPROM_RW_REG_DONE | r);
517 
518     return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
519            E1000_EEPROM_RW_REG_DONE | r);
520 }
521 
522 static void
523 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
524 {
525     uint32_t sum;
526 
527     if (cse && cse < n)
528         n = cse + 1;
529     if (sloc < n-1) {
530         sum = net_checksum_add(n-css, data+css);
531         stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
532     }
533 }
534 
535 static inline void
536 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
537 {
538     if (!memcmp(arr, bcast, sizeof bcast)) {
539         e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
540     } else if (arr[0] & 1) {
541         e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
542     }
543 }
544 
545 static void
546 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
547 {
548     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
549                                     PTC1023, PTC1522 };
550 
551     NetClientState *nc = qemu_get_queue(s->nic);
552     if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
553         nc->info->receive(nc, buf, size);
554     } else {
555         qemu_send_packet(nc, buf, size);
556     }
557     inc_tx_bcast_or_mcast_count(s, buf);
558     e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
559 }
560 
561 static void
562 xmit_seg(E1000State *s)
563 {
564     uint16_t len;
565     unsigned int frames = s->tx.tso_frames, css, sofar;
566     struct e1000_tx *tp = &s->tx;
567     struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
568 
569     if (tp->cptse) {
570         css = props->ipcss;
571         DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
572                frames, tp->size, css);
573         if (props->ip) {    /* IPv4 */
574             stw_be_p(tp->data+css+2, tp->size - css);
575             stw_be_p(tp->data+css+4,
576                      lduw_be_p(tp->data + css + 4) + frames);
577         } else {         /* IPv6 */
578             stw_be_p(tp->data+css+4, tp->size - css);
579         }
580         css = props->tucss;
581         len = tp->size - css;
582         DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
583         if (props->tcp) {
584             sofar = frames * props->mss;
585             stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
586             if (props->paylen - sofar > props->mss) {
587                 tp->data[css + 13] &= ~9;    /* PSH, FIN */
588             } else if (frames) {
589                 e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
590             }
591         } else {    /* UDP */
592             stw_be_p(tp->data+css+4, len);
593         }
594         if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
595             unsigned int phsum;
596             // add pseudo-header length before checksum calculation
597             void *sp = tp->data + props->tucso;
598 
599             phsum = lduw_be_p(sp) + len;
600             phsum = (phsum >> 16) + (phsum & 0xffff);
601             stw_be_p(sp, phsum);
602         }
603         tp->tso_frames++;
604     }
605 
606     if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
607         putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
608     }
609     if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
610         putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
611     }
612     if (tp->vlan_needed) {
613         memmove(tp->vlan, tp->data, 4);
614         memmove(tp->data, tp->data + 4, 8);
615         memcpy(tp->data + 8, tp->vlan_header, 4);
616         e1000_send_packet(s, tp->vlan, tp->size + 4);
617     } else {
618         e1000_send_packet(s, tp->data, tp->size);
619     }
620 
621     e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
622     e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
623     s->mac_reg[GPTC] = s->mac_reg[TPT];
624     s->mac_reg[GOTCL] = s->mac_reg[TOTL];
625     s->mac_reg[GOTCH] = s->mac_reg[TOTH];
626 }
627 
628 static void
629 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
630 {
631     PCIDevice *d = PCI_DEVICE(s);
632     uint32_t txd_lower = le32_to_cpu(dp->lower.data);
633     uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
634     unsigned int split_size = txd_lower & 0xffff, bytes, sz;
635     unsigned int msh = 0xfffff;
636     uint64_t addr;
637     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
638     struct e1000_tx *tp = &s->tx;
639 
640     s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
641     if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
642         if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
643             e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
644             s->use_tso_for_migration = 1;
645             tp->tso_frames = 0;
646         } else {
647             e1000x_read_tx_ctx_descr(xp, &tp->props);
648             s->use_tso_for_migration = 0;
649         }
650         return;
651     } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
652         // data descriptor
653         if (tp->size == 0) {
654             tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
655         }
656         tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
657     } else {
658         // legacy descriptor
659         tp->cptse = 0;
660     }
661 
662     if (e1000x_vlan_enabled(s->mac_reg) &&
663         e1000x_is_vlan_txd(txd_lower) &&
664         (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
665         tp->vlan_needed = 1;
666         stw_be_p(tp->vlan_header,
667                       le16_to_cpu(s->mac_reg[VET]));
668         stw_be_p(tp->vlan_header + 2,
669                       le16_to_cpu(dp->upper.fields.special));
670     }
671 
672     addr = le64_to_cpu(dp->buffer_addr);
673     if (tp->cptse) {
674         msh = tp->tso_props.hdr_len + tp->tso_props.mss;
675         do {
676             bytes = split_size;
677             if (tp->size + bytes > msh)
678                 bytes = msh - tp->size;
679 
680             bytes = MIN(sizeof(tp->data) - tp->size, bytes);
681             pci_dma_read(d, addr, tp->data + tp->size, bytes);
682             sz = tp->size + bytes;
683             if (sz >= tp->tso_props.hdr_len
684                 && tp->size < tp->tso_props.hdr_len) {
685                 memmove(tp->header, tp->data, tp->tso_props.hdr_len);
686             }
687             tp->size = sz;
688             addr += bytes;
689             if (sz == msh) {
690                 xmit_seg(s);
691                 memmove(tp->data, tp->header, tp->tso_props.hdr_len);
692                 tp->size = tp->tso_props.hdr_len;
693             }
694             split_size -= bytes;
695         } while (bytes && split_size);
696     } else {
697         split_size = MIN(sizeof(tp->data) - tp->size, split_size);
698         pci_dma_read(d, addr, tp->data + tp->size, split_size);
699         tp->size += split_size;
700     }
701 
702     if (!(txd_lower & E1000_TXD_CMD_EOP))
703         return;
704     if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
705         xmit_seg(s);
706     }
707     tp->tso_frames = 0;
708     tp->sum_needed = 0;
709     tp->vlan_needed = 0;
710     tp->size = 0;
711     tp->cptse = 0;
712 }
713 
714 static uint32_t
715 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
716 {
717     PCIDevice *d = PCI_DEVICE(s);
718     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
719 
720     if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
721         return 0;
722     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
723                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
724     dp->upper.data = cpu_to_le32(txd_upper);
725     pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
726                   &dp->upper, sizeof(dp->upper));
727     return E1000_ICR_TXDW;
728 }
729 
730 static uint64_t tx_desc_base(E1000State *s)
731 {
732     uint64_t bah = s->mac_reg[TDBAH];
733     uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
734 
735     return (bah << 32) + bal;
736 }
737 
738 static void
739 start_xmit(E1000State *s)
740 {
741     PCIDevice *d = PCI_DEVICE(s);
742     dma_addr_t base;
743     struct e1000_tx_desc desc;
744     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
745 
746     if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
747         DBGOUT(TX, "tx disabled\n");
748         return;
749     }
750 
751     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
752         base = tx_desc_base(s) +
753                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
754         pci_dma_read(d, base, &desc, sizeof(desc));
755 
756         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
757                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
758                desc.upper.data);
759 
760         process_tx_desc(s, &desc);
761         cause |= txdesc_writeback(s, base, &desc);
762 
763         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
764             s->mac_reg[TDH] = 0;
765         /*
766          * the following could happen only if guest sw assigns
767          * bogus values to TDT/TDLEN.
768          * there's nothing too intelligent we could do about this.
769          */
770         if (s->mac_reg[TDH] == tdh_start ||
771             tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
772             DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
773                    tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
774             break;
775         }
776     }
777     set_ics(s, 0, cause);
778 }
779 
780 static int
781 receive_filter(E1000State *s, const uint8_t *buf, int size)
782 {
783     uint32_t rctl = s->mac_reg[RCTL];
784     int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
785 
786     if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
787         e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
788         uint16_t vid = lduw_be_p(buf + 14);
789         uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
790                                  ((vid >> 5) & 0x7f));
791         if ((vfta & (1 << (vid & 0x1f))) == 0)
792             return 0;
793     }
794 
795     if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
796         return 1;
797     }
798 
799     if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
800         e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
801         return 1;
802     }
803 
804     if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
805         e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
806         return 1;
807     }
808 
809     return e1000x_rx_group_filter(s->mac_reg, buf);
810 }
811 
812 static void
813 e1000_set_link_status(NetClientState *nc)
814 {
815     E1000State *s = qemu_get_nic_opaque(nc);
816     uint32_t old_status = s->mac_reg[STATUS];
817 
818     if (nc->link_down) {
819         e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
820     } else {
821         if (have_autoneg(s) &&
822             !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
823             e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
824         } else {
825             e1000_link_up(s);
826         }
827     }
828 
829     if (s->mac_reg[STATUS] != old_status)
830         set_ics(s, 0, E1000_ICR_LSC);
831 }
832 
833 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
834 {
835     int bufs;
836     /* Fast-path short packets */
837     if (total_size <= s->rxbuf_size) {
838         return s->mac_reg[RDH] != s->mac_reg[RDT];
839     }
840     if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
841         bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
842     } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
843         bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
844             s->mac_reg[RDT] - s->mac_reg[RDH];
845     } else {
846         return false;
847     }
848     return total_size <= bufs * s->rxbuf_size;
849 }
850 
851 static bool
852 e1000_can_receive(NetClientState *nc)
853 {
854     E1000State *s = qemu_get_nic_opaque(nc);
855 
856     return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
857         e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
858 }
859 
860 static uint64_t rx_desc_base(E1000State *s)
861 {
862     uint64_t bah = s->mac_reg[RDBAH];
863     uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
864 
865     return (bah << 32) + bal;
866 }
867 
868 static void
869 e1000_receiver_overrun(E1000State *s, size_t size)
870 {
871     trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
872     e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
873     e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
874     set_ics(s, 0, E1000_ICS_RXO);
875 }
876 
877 static ssize_t
878 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
879 {
880     E1000State *s = qemu_get_nic_opaque(nc);
881     PCIDevice *d = PCI_DEVICE(s);
882     struct e1000_rx_desc desc;
883     dma_addr_t base;
884     unsigned int n, rdt;
885     uint32_t rdh_start;
886     uint16_t vlan_special = 0;
887     uint8_t vlan_status = 0;
888     uint8_t min_buf[MIN_BUF_SIZE];
889     struct iovec min_iov;
890     uint8_t *filter_buf = iov->iov_base;
891     size_t size = iov_size(iov, iovcnt);
892     size_t iov_ofs = 0;
893     size_t desc_offset;
894     size_t desc_size;
895     size_t total_size;
896 
897     if (!e1000x_hw_rx_enabled(s->mac_reg)) {
898         return -1;
899     }
900 
901     if (timer_pending(s->flush_queue_timer)) {
902         return 0;
903     }
904 
905     /* Pad to minimum Ethernet frame length */
906     if (size < sizeof(min_buf)) {
907         iov_to_buf(iov, iovcnt, 0, min_buf, size);
908         memset(&min_buf[size], 0, sizeof(min_buf) - size);
909         min_iov.iov_base = filter_buf = min_buf;
910         min_iov.iov_len = size = sizeof(min_buf);
911         iovcnt = 1;
912         iov = &min_iov;
913     } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
914         /* This is very unlikely, but may happen. */
915         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
916         filter_buf = min_buf;
917     }
918 
919     /* Discard oversized packets if !LPE and !SBP. */
920     if (e1000x_is_oversized(s->mac_reg, size)) {
921         return size;
922     }
923 
924     if (!receive_filter(s, filter_buf, size)) {
925         return size;
926     }
927 
928     if (e1000x_vlan_enabled(s->mac_reg) &&
929         e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
930         vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
931         iov_ofs = 4;
932         if (filter_buf == iov->iov_base) {
933             memmove(filter_buf + 4, filter_buf, 12);
934         } else {
935             iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
936             while (iov->iov_len <= iov_ofs) {
937                 iov_ofs -= iov->iov_len;
938                 iov++;
939             }
940         }
941         vlan_status = E1000_RXD_STAT_VP;
942         size -= 4;
943     }
944 
945     rdh_start = s->mac_reg[RDH];
946     desc_offset = 0;
947     total_size = size + e1000x_fcs_len(s->mac_reg);
948     if (!e1000_has_rxbufs(s, total_size)) {
949         e1000_receiver_overrun(s, total_size);
950         return -1;
951     }
952     do {
953         desc_size = total_size - desc_offset;
954         if (desc_size > s->rxbuf_size) {
955             desc_size = s->rxbuf_size;
956         }
957         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
958         pci_dma_read(d, base, &desc, sizeof(desc));
959         desc.special = vlan_special;
960         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
961         if (desc.buffer_addr) {
962             if (desc_offset < size) {
963                 size_t iov_copy;
964                 hwaddr ba = le64_to_cpu(desc.buffer_addr);
965                 size_t copy_size = size - desc_offset;
966                 if (copy_size > s->rxbuf_size) {
967                     copy_size = s->rxbuf_size;
968                 }
969                 do {
970                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
971                     pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
972                     copy_size -= iov_copy;
973                     ba += iov_copy;
974                     iov_ofs += iov_copy;
975                     if (iov_ofs == iov->iov_len) {
976                         iov++;
977                         iov_ofs = 0;
978                     }
979                 } while (copy_size);
980             }
981             desc_offset += desc_size;
982             desc.length = cpu_to_le16(desc_size);
983             if (desc_offset >= total_size) {
984                 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
985             } else {
986                 /* Guest zeroing out status is not a hardware requirement.
987                    Clear EOP in case guest didn't do it. */
988                 desc.status &= ~E1000_RXD_STAT_EOP;
989             }
990         } else { // as per intel docs; skip descriptors with null buf addr
991             DBGOUT(RX, "Null RX descriptor!!\n");
992         }
993         pci_dma_write(d, base, &desc, sizeof(desc));
994 
995         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
996             s->mac_reg[RDH] = 0;
997         /* see comment in start_xmit; same here */
998         if (s->mac_reg[RDH] == rdh_start ||
999             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1000             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1001                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1002             e1000_receiver_overrun(s, total_size);
1003             return -1;
1004         }
1005     } while (desc_offset < total_size);
1006 
1007     e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1008 
1009     n = E1000_ICS_RXT0;
1010     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1011         rdt += s->mac_reg[RDLEN] / sizeof(desc);
1012     if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1013         s->rxbuf_min_shift)
1014         n |= E1000_ICS_RXDMT0;
1015 
1016     set_ics(s, 0, n);
1017 
1018     return size;
1019 }
1020 
1021 static ssize_t
1022 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1023 {
1024     const struct iovec iov = {
1025         .iov_base = (uint8_t *)buf,
1026         .iov_len = size
1027     };
1028 
1029     return e1000_receive_iov(nc, &iov, 1);
1030 }
1031 
1032 static uint32_t
1033 mac_readreg(E1000State *s, int index)
1034 {
1035     return s->mac_reg[index];
1036 }
1037 
1038 static uint32_t
1039 mac_low4_read(E1000State *s, int index)
1040 {
1041     return s->mac_reg[index] & 0xf;
1042 }
1043 
1044 static uint32_t
1045 mac_low11_read(E1000State *s, int index)
1046 {
1047     return s->mac_reg[index] & 0x7ff;
1048 }
1049 
1050 static uint32_t
1051 mac_low13_read(E1000State *s, int index)
1052 {
1053     return s->mac_reg[index] & 0x1fff;
1054 }
1055 
1056 static uint32_t
1057 mac_low16_read(E1000State *s, int index)
1058 {
1059     return s->mac_reg[index] & 0xffff;
1060 }
1061 
1062 static uint32_t
1063 mac_icr_read(E1000State *s, int index)
1064 {
1065     uint32_t ret = s->mac_reg[ICR];
1066 
1067     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1068     set_interrupt_cause(s, 0, 0);
1069     return ret;
1070 }
1071 
1072 static uint32_t
1073 mac_read_clr4(E1000State *s, int index)
1074 {
1075     uint32_t ret = s->mac_reg[index];
1076 
1077     s->mac_reg[index] = 0;
1078     return ret;
1079 }
1080 
1081 static uint32_t
1082 mac_read_clr8(E1000State *s, int index)
1083 {
1084     uint32_t ret = s->mac_reg[index];
1085 
1086     s->mac_reg[index] = 0;
1087     s->mac_reg[index-1] = 0;
1088     return ret;
1089 }
1090 
1091 static void
1092 mac_writereg(E1000State *s, int index, uint32_t val)
1093 {
1094     uint32_t macaddr[2];
1095 
1096     s->mac_reg[index] = val;
1097 
1098     if (index == RA + 1) {
1099         macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1100         macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1101         qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1102     }
1103 }
1104 
1105 static void
1106 set_rdt(E1000State *s, int index, uint32_t val)
1107 {
1108     s->mac_reg[index] = val & 0xffff;
1109     if (e1000_has_rxbufs(s, 1)) {
1110         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1111     }
1112 }
1113 
1114 static void
1115 set_16bit(E1000State *s, int index, uint32_t val)
1116 {
1117     s->mac_reg[index] = val & 0xffff;
1118 }
1119 
1120 static void
1121 set_dlen(E1000State *s, int index, uint32_t val)
1122 {
1123     s->mac_reg[index] = val & 0xfff80;
1124 }
1125 
1126 static void
1127 set_tctl(E1000State *s, int index, uint32_t val)
1128 {
1129     s->mac_reg[index] = val;
1130     s->mac_reg[TDT] &= 0xffff;
1131     start_xmit(s);
1132 }
1133 
1134 static void
1135 set_icr(E1000State *s, int index, uint32_t val)
1136 {
1137     DBGOUT(INTERRUPT, "set_icr %x\n", val);
1138     set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1139 }
1140 
1141 static void
1142 set_imc(E1000State *s, int index, uint32_t val)
1143 {
1144     s->mac_reg[IMS] &= ~val;
1145     set_ics(s, 0, 0);
1146 }
1147 
1148 static void
1149 set_ims(E1000State *s, int index, uint32_t val)
1150 {
1151     s->mac_reg[IMS] |= val;
1152     set_ics(s, 0, 0);
1153 }
1154 
1155 #define getreg(x)    [x] = mac_readreg
1156 typedef uint32_t (*readops)(E1000State *, int);
1157 static const readops macreg_readops[] = {
1158     getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1159     getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1160     getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1161     getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1162     getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1163     getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1164     getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1165     getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1166     getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1167     getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1168     getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1169     getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1170     getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1171     getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1172     getreg(GOTCL),
1173 
1174     [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1175     [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1176     [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1177     [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1178     [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1179     [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1180     [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1181     [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1182     [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1183     [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1184     [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1185     [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1186     [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1187     [MPTC]    = mac_read_clr4,
1188     [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1189     [EERD]    = flash_eerd_read,
1190     [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1191     [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1192     [RDFPC]   = mac_low13_read,
1193     [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1194     [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1195     [TDFPC]   = mac_low13_read,
1196     [AIT]     = mac_low16_read,
1197 
1198     [CRCERRS ... MPC]   = &mac_readreg,
1199     [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1200     [FFLT ... FFLT+6]   = &mac_low11_read,
1201     [RA ... RA+31]      = &mac_readreg,
1202     [WUPM ... WUPM+31]  = &mac_readreg,
1203     [MTA ... MTA+127]   = &mac_readreg,
1204     [VFTA ... VFTA+127] = &mac_readreg,
1205     [FFMT ... FFMT+254] = &mac_low4_read,
1206     [FFVT ... FFVT+254] = &mac_readreg,
1207     [PBM ... PBM+16383] = &mac_readreg,
1208 };
1209 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1210 
1211 #define putreg(x)    [x] = mac_writereg
1212 typedef void (*writeops)(E1000State *, int, uint32_t);
1213 static const writeops macreg_writeops[] = {
1214     putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1215     putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1216     putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1217     putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1218     putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1219     putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1220     putreg(WUS),      putreg(AIT),
1221 
1222     [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1223     [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1224     [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1225     [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1226     [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1227     [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1228     [ITR]    = set_16bit,
1229 
1230     [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1231     [FFLT ... FFLT+6]   = &mac_writereg,
1232     [RA ... RA+31]      = &mac_writereg,
1233     [WUPM ... WUPM+31]  = &mac_writereg,
1234     [MTA ... MTA+127]   = &mac_writereg,
1235     [VFTA ... VFTA+127] = &mac_writereg,
1236     [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1237     [PBM ... PBM+16383] = &mac_writereg,
1238 };
1239 
1240 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1241 
1242 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1243 
1244 #define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1245 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1246  * f - flag bits (up to 6 possible flags)
1247  * n - flag needed
1248  * p - partially implenented */
1249 static const uint8_t mac_reg_access[0x8000] = {
1250     [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1251     [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1252 
1253     [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1254     [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1255     [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1256     [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1257     [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1258     [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1259     [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1260     [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1261     [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1262     [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1263     [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1264     [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1265     [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1266     [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1267     [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1268     [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1269     [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1270     [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1271     [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1272     [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1273     [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1274     [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1275     [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1276     [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1277     [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1278     [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1279     [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1280     [BPTC]    = markflag(MAC),
1281 
1282     [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1283     [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1284     [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1285     [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1286     [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1287     [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1288     [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1289     [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1290     [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1291     [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1292     [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1293 };
1294 
1295 static void
1296 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1297                  unsigned size)
1298 {
1299     E1000State *s = opaque;
1300     unsigned int index = (addr & 0x1ffff) >> 2;
1301 
1302     if (index < NWRITEOPS && macreg_writeops[index]) {
1303         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1304             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1305             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1306                 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1307                        "It is not fully implemented.\n", index<<2);
1308             }
1309             macreg_writeops[index](s, index, val);
1310         } else {    /* "flag needed" bit is set, but the flag is not active */
1311             DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1312                    index<<2);
1313         }
1314     } else if (index < NREADOPS && macreg_readops[index]) {
1315         DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1316                index<<2, val);
1317     } else {
1318         DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1319                index<<2, val);
1320     }
1321 }
1322 
1323 static uint64_t
1324 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1325 {
1326     E1000State *s = opaque;
1327     unsigned int index = (addr & 0x1ffff) >> 2;
1328 
1329     if (index < NREADOPS && macreg_readops[index]) {
1330         if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1331             || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1332             if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1333                 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1334                        "It is not fully implemented.\n", index<<2);
1335             }
1336             return macreg_readops[index](s, index);
1337         } else {    /* "flag needed" bit is set, but the flag is not active */
1338             DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1339                    index<<2);
1340         }
1341     } else {
1342         DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1343     }
1344     return 0;
1345 }
1346 
1347 static const MemoryRegionOps e1000_mmio_ops = {
1348     .read = e1000_mmio_read,
1349     .write = e1000_mmio_write,
1350     .endianness = DEVICE_LITTLE_ENDIAN,
1351     .impl = {
1352         .min_access_size = 4,
1353         .max_access_size = 4,
1354     },
1355 };
1356 
1357 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1358                               unsigned size)
1359 {
1360     E1000State *s = opaque;
1361 
1362     (void)s;
1363     return 0;
1364 }
1365 
1366 static void e1000_io_write(void *opaque, hwaddr addr,
1367                            uint64_t val, unsigned size)
1368 {
1369     E1000State *s = opaque;
1370 
1371     (void)s;
1372 }
1373 
1374 static const MemoryRegionOps e1000_io_ops = {
1375     .read = e1000_io_read,
1376     .write = e1000_io_write,
1377     .endianness = DEVICE_LITTLE_ENDIAN,
1378 };
1379 
1380 static bool is_version_1(void *opaque, int version_id)
1381 {
1382     return version_id == 1;
1383 }
1384 
1385 static int e1000_pre_save(void *opaque)
1386 {
1387     E1000State *s = opaque;
1388     NetClientState *nc = qemu_get_queue(s->nic);
1389 
1390     /*
1391      * If link is down and auto-negotiation is supported and ongoing,
1392      * complete auto-negotiation immediately. This allows us to look
1393      * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1394      */
1395     if (nc->link_down && have_autoneg(s)) {
1396         s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1397     }
1398 
1399     /* Decide which set of props to migrate in the main structure */
1400     if (chkflag(TSO) || !s->use_tso_for_migration) {
1401         /* Either we're migrating with the extra subsection, in which
1402          * case the mig_props is always 'props' OR
1403          * we've not got the subsection, but 'props' was the last
1404          * updated.
1405          */
1406         s->mig_props = s->tx.props;
1407     } else {
1408         /* We're not using the subsection, and 'tso_props' was
1409          * the last updated.
1410          */
1411         s->mig_props = s->tx.tso_props;
1412     }
1413     return 0;
1414 }
1415 
1416 static int e1000_post_load(void *opaque, int version_id)
1417 {
1418     E1000State *s = opaque;
1419     NetClientState *nc = qemu_get_queue(s->nic);
1420 
1421     if (!chkflag(MIT)) {
1422         s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1423             s->mac_reg[TADV] = 0;
1424         s->mit_irq_level = false;
1425     }
1426     s->mit_ide = 0;
1427     s->mit_timer_on = true;
1428     timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1429 
1430     /* nc.link_down can't be migrated, so infer link_down according
1431      * to link status bit in mac_reg[STATUS].
1432      * Alternatively, restart link negotiation if it was in progress. */
1433     nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1434 
1435     if (have_autoneg(s) &&
1436         !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1437         nc->link_down = false;
1438         timer_mod(s->autoneg_timer,
1439                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1440     }
1441 
1442     s->tx.props = s->mig_props;
1443     if (!s->received_tx_tso) {
1444         /* We received only one set of offload data (tx.props)
1445          * and haven't got tx.tso_props.  The best we can do
1446          * is dupe the data.
1447          */
1448         s->tx.tso_props = s->mig_props;
1449     }
1450     return 0;
1451 }
1452 
1453 static int e1000_tx_tso_post_load(void *opaque, int version_id)
1454 {
1455     E1000State *s = opaque;
1456     s->received_tx_tso = true;
1457     return 0;
1458 }
1459 
1460 static bool e1000_mit_state_needed(void *opaque)
1461 {
1462     E1000State *s = opaque;
1463 
1464     return chkflag(MIT);
1465 }
1466 
1467 static bool e1000_full_mac_needed(void *opaque)
1468 {
1469     E1000State *s = opaque;
1470 
1471     return chkflag(MAC);
1472 }
1473 
1474 static bool e1000_tso_state_needed(void *opaque)
1475 {
1476     E1000State *s = opaque;
1477 
1478     return chkflag(TSO);
1479 }
1480 
1481 static const VMStateDescription vmstate_e1000_mit_state = {
1482     .name = "e1000/mit_state",
1483     .version_id = 1,
1484     .minimum_version_id = 1,
1485     .needed = e1000_mit_state_needed,
1486     .fields = (VMStateField[]) {
1487         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1488         VMSTATE_UINT32(mac_reg[RADV], E1000State),
1489         VMSTATE_UINT32(mac_reg[TADV], E1000State),
1490         VMSTATE_UINT32(mac_reg[ITR], E1000State),
1491         VMSTATE_BOOL(mit_irq_level, E1000State),
1492         VMSTATE_END_OF_LIST()
1493     }
1494 };
1495 
1496 static const VMStateDescription vmstate_e1000_full_mac_state = {
1497     .name = "e1000/full_mac_state",
1498     .version_id = 1,
1499     .minimum_version_id = 1,
1500     .needed = e1000_full_mac_needed,
1501     .fields = (VMStateField[]) {
1502         VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1503         VMSTATE_END_OF_LIST()
1504     }
1505 };
1506 
1507 static const VMStateDescription vmstate_e1000_tx_tso_state = {
1508     .name = "e1000/tx_tso_state",
1509     .version_id = 1,
1510     .minimum_version_id = 1,
1511     .needed = e1000_tso_state_needed,
1512     .post_load = e1000_tx_tso_post_load,
1513     .fields = (VMStateField[]) {
1514         VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1515         VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1516         VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1517         VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1518         VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1519         VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1520         VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1521         VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1522         VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1523         VMSTATE_INT8(tx.tso_props.ip, E1000State),
1524         VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1525         VMSTATE_END_OF_LIST()
1526     }
1527 };
1528 
1529 static const VMStateDescription vmstate_e1000 = {
1530     .name = "e1000",
1531     .version_id = 2,
1532     .minimum_version_id = 1,
1533     .pre_save = e1000_pre_save,
1534     .post_load = e1000_post_load,
1535     .fields = (VMStateField[]) {
1536         VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1537         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1538         VMSTATE_UNUSED(4), /* Was mmio_base.  */
1539         VMSTATE_UINT32(rxbuf_size, E1000State),
1540         VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1541         VMSTATE_UINT32(eecd_state.val_in, E1000State),
1542         VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1543         VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1544         VMSTATE_UINT16(eecd_state.reading, E1000State),
1545         VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1546         VMSTATE_UINT8(mig_props.ipcss, E1000State),
1547         VMSTATE_UINT8(mig_props.ipcso, E1000State),
1548         VMSTATE_UINT16(mig_props.ipcse, E1000State),
1549         VMSTATE_UINT8(mig_props.tucss, E1000State),
1550         VMSTATE_UINT8(mig_props.tucso, E1000State),
1551         VMSTATE_UINT16(mig_props.tucse, E1000State),
1552         VMSTATE_UINT32(mig_props.paylen, E1000State),
1553         VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1554         VMSTATE_UINT16(mig_props.mss, E1000State),
1555         VMSTATE_UINT16(tx.size, E1000State),
1556         VMSTATE_UINT16(tx.tso_frames, E1000State),
1557         VMSTATE_UINT8(tx.sum_needed, E1000State),
1558         VMSTATE_INT8(mig_props.ip, E1000State),
1559         VMSTATE_INT8(mig_props.tcp, E1000State),
1560         VMSTATE_BUFFER(tx.header, E1000State),
1561         VMSTATE_BUFFER(tx.data, E1000State),
1562         VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1563         VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1564         VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1565         VMSTATE_UINT32(mac_reg[EECD], E1000State),
1566         VMSTATE_UINT32(mac_reg[EERD], E1000State),
1567         VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1568         VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1569         VMSTATE_UINT32(mac_reg[ICR], E1000State),
1570         VMSTATE_UINT32(mac_reg[ICS], E1000State),
1571         VMSTATE_UINT32(mac_reg[IMC], E1000State),
1572         VMSTATE_UINT32(mac_reg[IMS], E1000State),
1573         VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1574         VMSTATE_UINT32(mac_reg[MANC], E1000State),
1575         VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1576         VMSTATE_UINT32(mac_reg[MPC], E1000State),
1577         VMSTATE_UINT32(mac_reg[PBA], E1000State),
1578         VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1579         VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1580         VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1581         VMSTATE_UINT32(mac_reg[RDH], E1000State),
1582         VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1583         VMSTATE_UINT32(mac_reg[RDT], E1000State),
1584         VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1585         VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1586         VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1587         VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1588         VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1589         VMSTATE_UINT32(mac_reg[TDH], E1000State),
1590         VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1591         VMSTATE_UINT32(mac_reg[TDT], E1000State),
1592         VMSTATE_UINT32(mac_reg[TORH], E1000State),
1593         VMSTATE_UINT32(mac_reg[TORL], E1000State),
1594         VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1595         VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1596         VMSTATE_UINT32(mac_reg[TPR], E1000State),
1597         VMSTATE_UINT32(mac_reg[TPT], E1000State),
1598         VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1599         VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1600         VMSTATE_UINT32(mac_reg[VET], E1000State),
1601         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1602         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1603         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1604         VMSTATE_END_OF_LIST()
1605     },
1606     .subsections = (const VMStateDescription*[]) {
1607         &vmstate_e1000_mit_state,
1608         &vmstate_e1000_full_mac_state,
1609         &vmstate_e1000_tx_tso_state,
1610         NULL
1611     }
1612 };
1613 
1614 /*
1615  * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1616  * Note: A valid DevId will be inserted during pci_e1000_realize().
1617  */
1618 static const uint16_t e1000_eeprom_template[64] = {
1619     0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1620     0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1621     0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1622     0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1623     0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1624     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1625     0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1626     0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1627 };
1628 
1629 /* PCI interface */
1630 
1631 static void
1632 e1000_mmio_setup(E1000State *d)
1633 {
1634     int i;
1635     const uint32_t excluded_regs[] = {
1636         E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1637         E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1638     };
1639 
1640     memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1641                           "e1000-mmio", PNPMMIO_SIZE);
1642     memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1643     for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1644         memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1645                                      excluded_regs[i+1] - excluded_regs[i] - 4);
1646     memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1647 }
1648 
1649 static void
1650 pci_e1000_uninit(PCIDevice *dev)
1651 {
1652     E1000State *d = E1000(dev);
1653 
1654     timer_del(d->autoneg_timer);
1655     timer_free(d->autoneg_timer);
1656     timer_del(d->mit_timer);
1657     timer_free(d->mit_timer);
1658     timer_del(d->flush_queue_timer);
1659     timer_free(d->flush_queue_timer);
1660     qemu_del_nic(d->nic);
1661 }
1662 
1663 static NetClientInfo net_e1000_info = {
1664     .type = NET_CLIENT_DRIVER_NIC,
1665     .size = sizeof(NICState),
1666     .can_receive = e1000_can_receive,
1667     .receive = e1000_receive,
1668     .receive_iov = e1000_receive_iov,
1669     .link_status_changed = e1000_set_link_status,
1670 };
1671 
1672 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1673                                 uint32_t val, int len)
1674 {
1675     E1000State *s = E1000(pci_dev);
1676 
1677     pci_default_write_config(pci_dev, address, val, len);
1678 
1679     if (range_covers_byte(address, len, PCI_COMMAND) &&
1680         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1681         qemu_flush_queued_packets(qemu_get_queue(s->nic));
1682     }
1683 }
1684 
1685 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1686 {
1687     DeviceState *dev = DEVICE(pci_dev);
1688     E1000State *d = E1000(pci_dev);
1689     uint8_t *pci_conf;
1690     uint8_t *macaddr;
1691 
1692     pci_dev->config_write = e1000_write_config;
1693 
1694     pci_conf = pci_dev->config;
1695 
1696     /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1697     pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1698 
1699     pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1700 
1701     e1000_mmio_setup(d);
1702 
1703     pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1704 
1705     pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1706 
1707     qemu_macaddr_default_if_unset(&d->conf.macaddr);
1708     macaddr = d->conf.macaddr.a;
1709 
1710     e1000x_core_prepare_eeprom(d->eeprom_data,
1711                                e1000_eeprom_template,
1712                                sizeof(e1000_eeprom_template),
1713                                PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1714                                macaddr);
1715 
1716     d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1717                           object_get_typename(OBJECT(d)), dev->id, d);
1718 
1719     qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1720 
1721     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1722     d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1723     d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1724                                         e1000_flush_queue_timer, d);
1725 }
1726 
1727 static void qdev_e1000_reset(DeviceState *dev)
1728 {
1729     E1000State *d = E1000(dev);
1730     e1000_reset(d);
1731 }
1732 
1733 static Property e1000_properties[] = {
1734     DEFINE_NIC_PROPERTIES(E1000State, conf),
1735     DEFINE_PROP_BIT("autonegotiation", E1000State,
1736                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1737     DEFINE_PROP_BIT("mitigation", E1000State,
1738                     compat_flags, E1000_FLAG_MIT_BIT, true),
1739     DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1740                     compat_flags, E1000_FLAG_MAC_BIT, true),
1741     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1742                     compat_flags, E1000_FLAG_TSO_BIT, true),
1743     DEFINE_PROP_END_OF_LIST(),
1744 };
1745 
1746 typedef struct E1000Info {
1747     const char *name;
1748     uint16_t   device_id;
1749     uint8_t    revision;
1750     uint16_t   phy_id2;
1751 } E1000Info;
1752 
1753 static void e1000_class_init(ObjectClass *klass, void *data)
1754 {
1755     DeviceClass *dc = DEVICE_CLASS(klass);
1756     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1757     E1000BaseClass *e = E1000_CLASS(klass);
1758     const E1000Info *info = data;
1759 
1760     k->realize = pci_e1000_realize;
1761     k->exit = pci_e1000_uninit;
1762     k->romfile = "efi-e1000.rom";
1763     k->vendor_id = PCI_VENDOR_ID_INTEL;
1764     k->device_id = info->device_id;
1765     k->revision = info->revision;
1766     e->phy_id2 = info->phy_id2;
1767     k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1768     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1769     dc->desc = "Intel Gigabit Ethernet";
1770     dc->reset = qdev_e1000_reset;
1771     dc->vmsd = &vmstate_e1000;
1772     device_class_set_props(dc, e1000_properties);
1773 }
1774 
1775 static void e1000_instance_init(Object *obj)
1776 {
1777     E1000State *n = E1000(obj);
1778     device_add_bootindex_property(obj, &n->conf.bootindex,
1779                                   "bootindex", "/ethernet-phy@0",
1780                                   DEVICE(n));
1781 }
1782 
1783 static const TypeInfo e1000_base_info = {
1784     .name          = TYPE_E1000_BASE,
1785     .parent        = TYPE_PCI_DEVICE,
1786     .instance_size = sizeof(E1000State),
1787     .instance_init = e1000_instance_init,
1788     .class_size    = sizeof(E1000BaseClass),
1789     .abstract      = true,
1790     .interfaces = (InterfaceInfo[]) {
1791         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1792         { },
1793     },
1794 };
1795 
1796 static const E1000Info e1000_devices[] = {
1797     {
1798         .name      = "e1000",
1799         .device_id = E1000_DEV_ID_82540EM,
1800         .revision  = 0x03,
1801         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1802     },
1803     {
1804         .name      = "e1000-82544gc",
1805         .device_id = E1000_DEV_ID_82544GC_COPPER,
1806         .revision  = 0x03,
1807         .phy_id2   = E1000_PHY_ID2_82544x,
1808     },
1809     {
1810         .name      = "e1000-82545em",
1811         .device_id = E1000_DEV_ID_82545EM_COPPER,
1812         .revision  = 0x03,
1813         .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1814     },
1815 };
1816 
1817 static void e1000_register_types(void)
1818 {
1819     int i;
1820 
1821     type_register_static(&e1000_base_info);
1822     for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1823         const E1000Info *info = &e1000_devices[i];
1824         TypeInfo type_info = {};
1825 
1826         type_info.name = info->name;
1827         type_info.parent = TYPE_E1000_BASE;
1828         type_info.class_data = (void *)info;
1829         type_info.class_init = e1000_class_init;
1830 
1831         type_register(&type_info);
1832     }
1833 }
1834 
1835 type_init(e1000_register_types)
1836