xref: /qemu/hw/net/vmxnet3.c (revision 107215089da92427c4c1644d84f5437b7b6e5e9c)
1 /*
2  * QEMU VMWARE VMXNET3 paravirtual NIC
3  *
4  * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5  *
6  * Developed by Daynix Computing LTD (http://www.daynix.com)
7  *
8  * Authors:
9  * Dmitry Fleytman <dmitry@daynix.com>
10  * Tamir Shomer <tamirs@daynix.com>
11  * Yan Vugenfirer <yan@daynix.com>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2.
14  * See the COPYING file in the top-level directory.
15  *
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "hw/qdev-properties.h"
22 #include "net/tap.h"
23 #include "net/checksum.h"
24 #include "system/system.h"
25 #include "qemu/bswap.h"
26 #include "qemu/log.h"
27 #include "qemu/module.h"
28 #include "hw/pci/msix.h"
29 #include "hw/pci/msi.h"
30 #include "migration/register.h"
31 #include "migration/vmstate.h"
32 
33 #include "vmxnet3.h"
34 #include "vmxnet3_defs.h"
35 #include "vmxnet_debug.h"
36 #include "vmware_utils.h"
37 #include "net_tx_pkt.h"
38 #include "net_rx_pkt.h"
39 #include "qom/object.h"
40 
41 #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
42 #define VMXNET3_MSIX_BAR_SIZE 0x2000
43 
44 #define VMXNET3_EXP_EP_OFFSET (0x48)
45 #define VMXNET3_MSI_OFFSET    (0x84)
46 #define VMXNET3_MSIX_OFFSET   (0x9c)
47 #define VMXNET3_DSN_OFFSET     (0x100)
48 
49 #define VMXNET3_BAR0_IDX      (0)
50 #define VMXNET3_BAR1_IDX      (1)
51 #define VMXNET3_MSIX_BAR_IDX  (2)
52 
53 #define VMXNET3_OFF_MSIX_TABLE (0x000)
54 #define VMXNET3_OFF_MSIX_PBA   (0x1000)
55 
56 /* Link speed in Mbps should be shifted by 16 */
57 #define VMXNET3_LINK_SPEED      (1000 << 16)
58 
59 /* Link status: 1 - up, 0 - down. */
60 #define VMXNET3_LINK_STATUS_UP  0x1
61 
62 /* Least significant bit should be set for revision and version */
63 #define VMXNET3_UPT_REVISION      0x1
64 #define VMXNET3_DEVICE_REVISION   0x1
65 
66 /* Number of interrupt vectors for non-MSIx modes */
67 #define VMXNET3_MAX_NMSIX_INTRS   (1)
68 
69 /* Macros for rings descriptors access */
70 #define VMXNET3_READ_TX_QUEUE_DESCR8(_d, dpa, field) \
71     (vmw_shmem_ld8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
72 
73 #define VMXNET3_WRITE_TX_QUEUE_DESCR8(_d, dpa, field, value) \
74     (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value)))
75 
76 #define VMXNET3_READ_TX_QUEUE_DESCR32(_d, dpa, field) \
77     (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
78 
79 #define VMXNET3_WRITE_TX_QUEUE_DESCR32(_d, dpa, field, value) \
80     (vmw_shmem_st32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
81 
82 #define VMXNET3_READ_TX_QUEUE_DESCR64(_d, dpa, field) \
83     (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
84 
85 #define VMXNET3_WRITE_TX_QUEUE_DESCR64(_d, dpa, field, value) \
86     (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
87 
88 #define VMXNET3_READ_RX_QUEUE_DESCR64(_d, dpa, field) \
89     (vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
90 
91 #define VMXNET3_READ_RX_QUEUE_DESCR32(_d, dpa, field) \
92     (vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
93 
94 #define VMXNET3_WRITE_RX_QUEUE_DESCR64(_d, dpa, field, value) \
95     (vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
96 
97 #define VMXNET3_WRITE_RX_QUEUE_DESCR8(_d, dpa, field, value) \
98     (vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
99 
100 /* Macros for guest driver shared area access */
101 #define VMXNET3_READ_DRV_SHARED64(_d, shpa, field) \
102     (vmw_shmem_ld64(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
103 
104 #define VMXNET3_READ_DRV_SHARED32(_d, shpa, field) \
105     (vmw_shmem_ld32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
106 
107 #define VMXNET3_WRITE_DRV_SHARED32(_d, shpa, field, val) \
108     (vmw_shmem_st32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), val))
109 
110 #define VMXNET3_READ_DRV_SHARED16(_d, shpa, field) \
111     (vmw_shmem_ld16(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
112 
113 #define VMXNET3_READ_DRV_SHARED8(_d, shpa, field) \
114     (vmw_shmem_ld8(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
115 
116 #define VMXNET3_READ_DRV_SHARED(_d, shpa, field, b, l) \
117     (vmw_shmem_read(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l))
118 
119 #define VMXNET_FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag))
120 
121 struct VMXNET3Class {
122     PCIDeviceClass parent_class;
123     DeviceRealize parent_dc_realize;
124 };
125 typedef struct VMXNET3Class VMXNET3Class;
126 
DECLARE_CLASS_CHECKERS(VMXNET3Class,VMXNET3_DEVICE,TYPE_VMXNET3)127 DECLARE_CLASS_CHECKERS(VMXNET3Class, VMXNET3_DEVICE,
128                        TYPE_VMXNET3)
129 
130 static inline void vmxnet3_ring_init(PCIDevice *d,
131                                      Vmxnet3Ring *ring,
132                                      hwaddr pa,
133                                      uint32_t size,
134                                      uint32_t cell_size,
135                                      bool zero_region)
136 {
137     ring->pa = pa;
138     ring->size = size;
139     ring->cell_size = cell_size;
140     ring->gen = VMXNET3_INIT_GEN;
141     ring->next = 0;
142 
143     if (zero_region) {
144         vmw_shmem_set(d, pa, 0, size * cell_size);
145     }
146 }
147 
148 #define VMXNET3_RING_DUMP(macro, ring_name, ridx, r)                         \
149     macro("%s#%d: base %" PRIx64 " size %u cell_size %u gen %d next %u",  \
150           (ring_name), (ridx),                                               \
151           (r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next)
152 
vmxnet3_ring_inc(Vmxnet3Ring * ring)153 static inline void vmxnet3_ring_inc(Vmxnet3Ring *ring)
154 {
155     if (++ring->next >= ring->size) {
156         ring->next = 0;
157         ring->gen ^= 1;
158     }
159 }
160 
vmxnet3_ring_dec(Vmxnet3Ring * ring)161 static inline void vmxnet3_ring_dec(Vmxnet3Ring *ring)
162 {
163     if (ring->next-- == 0) {
164         ring->next = ring->size - 1;
165         ring->gen ^= 1;
166     }
167 }
168 
vmxnet3_ring_curr_cell_pa(Vmxnet3Ring * ring)169 static inline hwaddr vmxnet3_ring_curr_cell_pa(Vmxnet3Ring *ring)
170 {
171     return ring->pa + ring->next * ring->cell_size;
172 }
173 
vmxnet3_ring_read_curr_cell(PCIDevice * d,Vmxnet3Ring * ring,void * buff)174 static inline void vmxnet3_ring_read_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
175                                                void *buff)
176 {
177     vmw_shmem_read(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
178 }
179 
vmxnet3_ring_write_curr_cell(PCIDevice * d,Vmxnet3Ring * ring,void * buff)180 static inline void vmxnet3_ring_write_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
181                                                 void *buff)
182 {
183     vmw_shmem_write(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
184 }
185 
vmxnet3_ring_curr_cell_idx(Vmxnet3Ring * ring)186 static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3Ring *ring)
187 {
188     return ring->next;
189 }
190 
vmxnet3_ring_curr_gen(Vmxnet3Ring * ring)191 static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3Ring *ring)
192 {
193     return ring->gen;
194 }
195 
196 /* Debug trace-related functions */
197 static inline void
vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc * descr)198 vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr)
199 {
200     VMW_PKPRN("TX DESCR: "
201               "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
202               "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, "
203               "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d",
204               descr->addr, descr->len, descr->gen, descr->rsvd,
205               descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om,
206               descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci);
207 }
208 
209 static inline void
vmxnet3_dump_virt_hdr(struct virtio_net_hdr * vhdr)210 vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr)
211 {
212     VMW_PKPRN("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, "
213               "csum_start: %d, csum_offset: %d",
214               vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size,
215               vhdr->csum_start, vhdr->csum_offset);
216 }
217 
218 static inline void
vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc * descr)219 vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
220 {
221     VMW_PKPRN("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
222               "dtype: %d, ext1: %d, btype: %d",
223               descr->addr, descr->len, descr->gen,
224               descr->rsvd, descr->dtype, descr->ext1, descr->btype);
225 }
226 
227 /* Interrupt management */
228 
229 /*
230  * This function returns sign whether interrupt line is in asserted state
231  * This depends on the type of interrupt used. For INTX interrupt line will
232  * be asserted until explicit deassertion, for MSI(X) interrupt line will
233  * be deasserted automatically due to notification semantics of the MSI(X)
234  * interrupts
235  */
_vmxnet3_assert_interrupt_line(VMXNET3State * s,uint32_t int_idx)236 static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx)
237 {
238     PCIDevice *d = PCI_DEVICE(s);
239 
240     if (s->msix_used && msix_enabled(d)) {
241         VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx);
242         msix_notify(d, int_idx);
243         return false;
244     }
245     if (msi_enabled(d)) {
246         VMW_IRPRN("Sending MSI notification for vector %u", int_idx);
247         msi_notify(d, int_idx);
248         return false;
249     }
250 
251     VMW_IRPRN("Asserting line for interrupt %u", int_idx);
252     pci_irq_assert(d);
253     return true;
254 }
255 
_vmxnet3_deassert_interrupt_line(VMXNET3State * s,int lidx)256 static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx)
257 {
258     PCIDevice *d = PCI_DEVICE(s);
259 
260     /*
261      * This function should never be called for MSI(X) interrupts
262      * because deassertion never required for message interrupts
263      */
264     assert(!s->msix_used || !msix_enabled(d));
265     /*
266      * This function should never be called for MSI(X) interrupts
267      * because deassertion never required for message interrupts
268      */
269     assert(!msi_enabled(d));
270 
271     VMW_IRPRN("Deasserting line for interrupt %u", lidx);
272     pci_irq_deassert(d);
273 }
274 
vmxnet3_update_interrupt_line_state(VMXNET3State * s,int lidx)275 static void vmxnet3_update_interrupt_line_state(VMXNET3State *s, int lidx)
276 {
277     if (!s->interrupt_states[lidx].is_pending &&
278        s->interrupt_states[lidx].is_asserted) {
279         VMW_IRPRN("New interrupt line state for index %d is DOWN", lidx);
280         _vmxnet3_deassert_interrupt_line(s, lidx);
281         s->interrupt_states[lidx].is_asserted = false;
282         return;
283     }
284 
285     if (s->interrupt_states[lidx].is_pending &&
286        !s->interrupt_states[lidx].is_masked &&
287        !s->interrupt_states[lidx].is_asserted) {
288         VMW_IRPRN("New interrupt line state for index %d is UP", lidx);
289         s->interrupt_states[lidx].is_asserted =
290             _vmxnet3_assert_interrupt_line(s, lidx);
291         s->interrupt_states[lidx].is_pending = false;
292         return;
293     }
294 }
295 
vmxnet3_trigger_interrupt(VMXNET3State * s,int lidx)296 static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx)
297 {
298     PCIDevice *d = PCI_DEVICE(s);
299     s->interrupt_states[lidx].is_pending = true;
300     vmxnet3_update_interrupt_line_state(s, lidx);
301 
302     if (s->msix_used && msix_enabled(d) && s->auto_int_masking) {
303         goto do_automask;
304     }
305 
306     if (msi_enabled(d) && s->auto_int_masking) {
307         goto do_automask;
308     }
309 
310     return;
311 
312 do_automask:
313     s->interrupt_states[lidx].is_masked = true;
314     vmxnet3_update_interrupt_line_state(s, lidx);
315 }
316 
vmxnet3_interrupt_asserted(VMXNET3State * s,int lidx)317 static bool vmxnet3_interrupt_asserted(VMXNET3State *s, int lidx)
318 {
319     return s->interrupt_states[lidx].is_asserted;
320 }
321 
vmxnet3_clear_interrupt(VMXNET3State * s,int int_idx)322 static void vmxnet3_clear_interrupt(VMXNET3State *s, int int_idx)
323 {
324     s->interrupt_states[int_idx].is_pending = false;
325     if (s->auto_int_masking) {
326         s->interrupt_states[int_idx].is_masked = true;
327     }
328     vmxnet3_update_interrupt_line_state(s, int_idx);
329 }
330 
331 static void
vmxnet3_on_interrupt_mask_changed(VMXNET3State * s,int lidx,bool is_masked)332 vmxnet3_on_interrupt_mask_changed(VMXNET3State *s, int lidx, bool is_masked)
333 {
334     s->interrupt_states[lidx].is_masked = is_masked;
335     vmxnet3_update_interrupt_line_state(s, lidx);
336 }
337 
vmxnet3_verify_driver_magic(PCIDevice * d,hwaddr dshmem)338 static bool vmxnet3_verify_driver_magic(PCIDevice *d, hwaddr dshmem)
339 {
340     return (VMXNET3_READ_DRV_SHARED32(d, dshmem, magic) == VMXNET3_REV1_MAGIC);
341 }
342 
343 #define VMXNET3_GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF)
344 #define VMXNET3_MAKE_BYTE(byte_num, val) \
345     (((uint32_t)((val) & 0xFF)) << (byte_num)*8)
346 
vmxnet3_set_variable_mac(VMXNET3State * s,uint32_t h,uint32_t l)347 static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l)
348 {
349     s->conf.macaddr.a[0] = VMXNET3_GET_BYTE(l,  0);
350     s->conf.macaddr.a[1] = VMXNET3_GET_BYTE(l,  1);
351     s->conf.macaddr.a[2] = VMXNET3_GET_BYTE(l,  2);
352     s->conf.macaddr.a[3] = VMXNET3_GET_BYTE(l,  3);
353     s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0);
354     s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1);
355 
356     VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
357 
358     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
359 }
360 
vmxnet3_get_mac_low(MACAddr * addr)361 static uint64_t vmxnet3_get_mac_low(MACAddr *addr)
362 {
363     return VMXNET3_MAKE_BYTE(0, addr->a[0]) |
364            VMXNET3_MAKE_BYTE(1, addr->a[1]) |
365            VMXNET3_MAKE_BYTE(2, addr->a[2]) |
366            VMXNET3_MAKE_BYTE(3, addr->a[3]);
367 }
368 
vmxnet3_get_mac_high(MACAddr * addr)369 static uint64_t vmxnet3_get_mac_high(MACAddr *addr)
370 {
371     return VMXNET3_MAKE_BYTE(0, addr->a[4]) |
372            VMXNET3_MAKE_BYTE(1, addr->a[5]);
373 }
374 
375 static void
vmxnet3_inc_tx_consumption_counter(VMXNET3State * s,int qidx)376 vmxnet3_inc_tx_consumption_counter(VMXNET3State *s, int qidx)
377 {
378     vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring);
379 }
380 
381 static inline void
vmxnet3_inc_rx_consumption_counter(VMXNET3State * s,int qidx,int ridx)382 vmxnet3_inc_rx_consumption_counter(VMXNET3State *s, int qidx, int ridx)
383 {
384     vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]);
385 }
386 
387 static inline void
vmxnet3_inc_tx_completion_counter(VMXNET3State * s,int qidx)388 vmxnet3_inc_tx_completion_counter(VMXNET3State *s, int qidx)
389 {
390     vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring);
391 }
392 
393 static void
vmxnet3_inc_rx_completion_counter(VMXNET3State * s,int qidx)394 vmxnet3_inc_rx_completion_counter(VMXNET3State *s, int qidx)
395 {
396     vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring);
397 }
398 
399 static void
vmxnet3_dec_rx_completion_counter(VMXNET3State * s,int qidx)400 vmxnet3_dec_rx_completion_counter(VMXNET3State *s, int qidx)
401 {
402     vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring);
403 }
404 
vmxnet3_complete_packet(VMXNET3State * s,int qidx,uint32_t tx_ridx)405 static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32_t tx_ridx)
406 {
407     struct Vmxnet3_TxCompDesc txcq_descr;
408     PCIDevice *d = PCI_DEVICE(s);
409 
410     VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring);
411 
412     memset(&txcq_descr, 0, sizeof(txcq_descr));
413     txcq_descr.txdIdx = tx_ridx;
414     txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring);
415     txcq_descr.val1 = cpu_to_le32(txcq_descr.val1);
416     txcq_descr.val2 = cpu_to_le32(txcq_descr.val2);
417     vmxnet3_ring_write_curr_cell(d, &s->txq_descr[qidx].comp_ring, &txcq_descr);
418 
419     /* Flush changes in TX descriptor before changing the counter value */
420     smp_wmb();
421 
422     vmxnet3_inc_tx_completion_counter(s, qidx);
423     vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx);
424 }
425 
426 static bool
vmxnet3_setup_tx_offloads(VMXNET3State * s)427 vmxnet3_setup_tx_offloads(VMXNET3State *s)
428 {
429     switch (s->offload_mode) {
430     case VMXNET3_OM_NONE:
431         return net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);
432 
433     case VMXNET3_OM_CSUM:
434         VMW_PKPRN("L4 CSO requested\n");
435         return net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);
436 
437     case VMXNET3_OM_TSO:
438         VMW_PKPRN("GSO offload requested.");
439         if (!net_tx_pkt_build_vheader(s->tx_pkt, true, true,
440             s->cso_or_gso_size)) {
441             return false;
442         }
443         net_tx_pkt_update_ip_checksums(s->tx_pkt);
444         break;
445 
446     default:
447         g_assert_not_reached();
448     }
449 
450     return true;
451 }
452 
453 static void
vmxnet3_tx_retrieve_metadata(VMXNET3State * s,const struct Vmxnet3_TxDesc * txd)454 vmxnet3_tx_retrieve_metadata(VMXNET3State *s,
455                              const struct Vmxnet3_TxDesc *txd)
456 {
457     s->offload_mode = txd->om;
458     s->cso_or_gso_size = txd->msscof;
459     s->tci = txd->tci;
460     s->needs_vlan = txd->ti;
461 }
462 
463 typedef enum {
464     VMXNET3_PKT_STATUS_OK,
465     VMXNET3_PKT_STATUS_ERROR,
466     VMXNET3_PKT_STATUS_DISCARD,/* only for tx */
467     VMXNET3_PKT_STATUS_OUT_OF_BUF /* only for rx */
468 } Vmxnet3PktStatus;
469 
470 static void
vmxnet3_on_tx_done_update_stats(VMXNET3State * s,int qidx,Vmxnet3PktStatus status)471 vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx,
472     Vmxnet3PktStatus status)
473 {
474     size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt);
475     struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
476 
477     switch (status) {
478     case VMXNET3_PKT_STATUS_OK:
479         switch (net_tx_pkt_get_packet_type(s->tx_pkt)) {
480         case ETH_PKT_BCAST:
481             stats->bcastPktsTxOK++;
482             stats->bcastBytesTxOK += tot_len;
483             break;
484         case ETH_PKT_MCAST:
485             stats->mcastPktsTxOK++;
486             stats->mcastBytesTxOK += tot_len;
487             break;
488         case ETH_PKT_UCAST:
489             stats->ucastPktsTxOK++;
490             stats->ucastBytesTxOK += tot_len;
491             break;
492         default:
493             g_assert_not_reached();
494         }
495 
496         if (s->offload_mode == VMXNET3_OM_TSO) {
497             /*
498              * According to VMWARE headers this statistic is a number
499              * of packets after segmentation but since we don't have
500              * this information in QEMU model, the best we can do is to
501              * provide number of non-segmented packets
502              */
503             stats->TSOPktsTxOK++;
504             stats->TSOBytesTxOK += tot_len;
505         }
506         break;
507 
508     case VMXNET3_PKT_STATUS_DISCARD:
509         stats->pktsTxDiscard++;
510         break;
511 
512     case VMXNET3_PKT_STATUS_ERROR:
513         stats->pktsTxError++;
514         break;
515 
516     default:
517         g_assert_not_reached();
518     }
519 }
520 
521 static void
vmxnet3_on_rx_done_update_stats(VMXNET3State * s,int qidx,Vmxnet3PktStatus status)522 vmxnet3_on_rx_done_update_stats(VMXNET3State *s,
523                                 int qidx,
524                                 Vmxnet3PktStatus status)
525 {
526     struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
527     size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt);
528 
529     switch (status) {
530     case VMXNET3_PKT_STATUS_OUT_OF_BUF:
531         stats->pktsRxOutOfBuf++;
532         break;
533 
534     case VMXNET3_PKT_STATUS_ERROR:
535         stats->pktsRxError++;
536         break;
537     case VMXNET3_PKT_STATUS_OK:
538         switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
539         case ETH_PKT_BCAST:
540             stats->bcastPktsRxOK++;
541             stats->bcastBytesRxOK += tot_len;
542             break;
543         case ETH_PKT_MCAST:
544             stats->mcastPktsRxOK++;
545             stats->mcastBytesRxOK += tot_len;
546             break;
547         case ETH_PKT_UCAST:
548             stats->ucastPktsRxOK++;
549             stats->ucastBytesRxOK += tot_len;
550             break;
551         default:
552             g_assert_not_reached();
553         }
554 
555         if (tot_len > s->mtu) {
556             stats->LROPktsRxOK++;
557             stats->LROBytesRxOK += tot_len;
558         }
559         break;
560     default:
561         g_assert_not_reached();
562     }
563 }
564 
565 static inline void
vmxnet3_ring_read_curr_txdesc(PCIDevice * pcidev,Vmxnet3Ring * ring,struct Vmxnet3_TxDesc * txd)566 vmxnet3_ring_read_curr_txdesc(PCIDevice *pcidev, Vmxnet3Ring *ring,
567                               struct Vmxnet3_TxDesc *txd)
568 {
569     vmxnet3_ring_read_curr_cell(pcidev, ring, txd);
570     txd->addr = le64_to_cpu(txd->addr);
571     txd->val1 = le32_to_cpu(txd->val1);
572     txd->val2 = le32_to_cpu(txd->val2);
573 }
574 
575 static inline bool
vmxnet3_pop_next_tx_descr(VMXNET3State * s,int qidx,struct Vmxnet3_TxDesc * txd,uint32_t * descr_idx)576 vmxnet3_pop_next_tx_descr(VMXNET3State *s,
577                           int qidx,
578                           struct Vmxnet3_TxDesc *txd,
579                           uint32_t *descr_idx)
580 {
581     Vmxnet3Ring *ring = &s->txq_descr[qidx].tx_ring;
582     PCIDevice *d = PCI_DEVICE(s);
583 
584     vmxnet3_ring_read_curr_txdesc(d, ring, txd);
585     if (txd->gen == vmxnet3_ring_curr_gen(ring)) {
586         /* Only read after generation field verification */
587         smp_rmb();
588         /* Re-read to be sure we got the latest version */
589         vmxnet3_ring_read_curr_txdesc(d, ring, txd);
590         VMXNET3_RING_DUMP(VMW_RIPRN, "TX", qidx, ring);
591         *descr_idx = vmxnet3_ring_curr_cell_idx(ring);
592         vmxnet3_inc_tx_consumption_counter(s, qidx);
593         return true;
594     }
595 
596     return false;
597 }
598 
599 static bool
vmxnet3_send_packet(VMXNET3State * s,uint32_t qidx)600 vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx)
601 {
602     Vmxnet3PktStatus status = VMXNET3_PKT_STATUS_OK;
603 
604     if (!vmxnet3_setup_tx_offloads(s)) {
605         status = VMXNET3_PKT_STATUS_ERROR;
606         goto func_exit;
607     }
608 
609     /* debug prints */
610     vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt));
611     net_tx_pkt_dump(s->tx_pkt);
612 
613     if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) {
614         status = VMXNET3_PKT_STATUS_DISCARD;
615         goto func_exit;
616     }
617 
618 func_exit:
619     vmxnet3_on_tx_done_update_stats(s, qidx, status);
620     return (status == VMXNET3_PKT_STATUS_OK);
621 }
622 
vmxnet3_process_tx_queue(VMXNET3State * s,int qidx)623 static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
624 {
625     struct Vmxnet3_TxDesc txd;
626     uint32_t txd_idx;
627     uint32_t data_len;
628     hwaddr data_pa;
629 
630     for (;;) {
631         if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) {
632             break;
633         }
634 
635         vmxnet3_dump_tx_descr(&txd);
636 
637         if (!s->skip_current_tx_pkt) {
638             data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
639             data_pa = txd.addr;
640 
641             if (!net_tx_pkt_add_raw_fragment_pci(s->tx_pkt, PCI_DEVICE(s),
642                                                  data_pa, data_len)) {
643                 s->skip_current_tx_pkt = true;
644             }
645         }
646 
647         if (s->tx_sop) {
648             vmxnet3_tx_retrieve_metadata(s, &txd);
649             s->tx_sop = false;
650         }
651 
652         if (txd.eop) {
653             if (!s->skip_current_tx_pkt && net_tx_pkt_parse(s->tx_pkt)) {
654                 if (s->needs_vlan) {
655                     net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci);
656                 }
657 
658                 vmxnet3_send_packet(s, qidx);
659             } else {
660                 vmxnet3_on_tx_done_update_stats(s, qidx,
661                                                 VMXNET3_PKT_STATUS_ERROR);
662             }
663 
664             vmxnet3_complete_packet(s, qidx, txd_idx);
665             s->tx_sop = true;
666             s->skip_current_tx_pkt = false;
667             net_tx_pkt_reset(s->tx_pkt,
668                              net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
669         }
670     }
671 
672     net_tx_pkt_reset(s->tx_pkt, net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
673 }
674 
675 static inline void
vmxnet3_read_next_rx_descr(VMXNET3State * s,int qidx,int ridx,struct Vmxnet3_RxDesc * dbuf,uint32_t * didx)676 vmxnet3_read_next_rx_descr(VMXNET3State *s, int qidx, int ridx,
677                            struct Vmxnet3_RxDesc *dbuf, uint32_t *didx)
678 {
679     PCIDevice *d = PCI_DEVICE(s);
680 
681     Vmxnet3Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx];
682     *didx = vmxnet3_ring_curr_cell_idx(ring);
683     vmxnet3_ring_read_curr_cell(d, ring, dbuf);
684     dbuf->addr = le64_to_cpu(dbuf->addr);
685     dbuf->val1 = le32_to_cpu(dbuf->val1);
686     dbuf->ext1 = le32_to_cpu(dbuf->ext1);
687 }
688 
689 static inline uint8_t
vmxnet3_get_rx_ring_gen(VMXNET3State * s,int qidx,int ridx)690 vmxnet3_get_rx_ring_gen(VMXNET3State *s, int qidx, int ridx)
691 {
692     return s->rxq_descr[qidx].rx_ring[ridx].gen;
693 }
694 
695 static inline hwaddr
vmxnet3_pop_rxc_descr(VMXNET3State * s,int qidx,uint32_t * descr_gen)696 vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen)
697 {
698     uint8_t ring_gen;
699     struct Vmxnet3_RxCompDesc rxcd;
700 
701     hwaddr daddr =
702         vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring);
703 
704     pci_dma_read(PCI_DEVICE(s),
705                  daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc));
706     rxcd.val1 = le32_to_cpu(rxcd.val1);
707     rxcd.val2 = le32_to_cpu(rxcd.val2);
708     rxcd.val3 = le32_to_cpu(rxcd.val3);
709     ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring);
710 
711     if (rxcd.gen != ring_gen) {
712         *descr_gen = ring_gen;
713         vmxnet3_inc_rx_completion_counter(s, qidx);
714         return daddr;
715     }
716 
717     return 0;
718 }
719 
720 static inline void
vmxnet3_revert_rxc_descr(VMXNET3State * s,int qidx)721 vmxnet3_revert_rxc_descr(VMXNET3State *s, int qidx)
722 {
723     vmxnet3_dec_rx_completion_counter(s, qidx);
724 }
725 
726 #define RXQ_IDX      (0)
727 #define RX_HEAD_BODY_RING (0)
728 #define RX_BODY_ONLY_RING (1)
729 
730 static bool
vmxnet3_get_next_head_rx_descr(VMXNET3State * s,struct Vmxnet3_RxDesc * descr_buf,uint32_t * descr_idx,uint32_t * ridx)731 vmxnet3_get_next_head_rx_descr(VMXNET3State *s,
732                                struct Vmxnet3_RxDesc *descr_buf,
733                                uint32_t *descr_idx,
734                                uint32_t *ridx)
735 {
736     for (;;) {
737         uint32_t ring_gen;
738         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
739                                    descr_buf, descr_idx);
740 
741         /* If no more free descriptors - return */
742         ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING);
743         if (descr_buf->gen != ring_gen) {
744             return false;
745         }
746 
747         /* Only read after generation field verification */
748         smp_rmb();
749         /* Re-read to be sure we got the latest version */
750         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
751                                    descr_buf, descr_idx);
752 
753         /* Mark current descriptor as used/skipped */
754         vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
755 
756         /* If this is what we are looking for - return */
757         if (descr_buf->btype == VMXNET3_RXD_BTYPE_HEAD) {
758             *ridx = RX_HEAD_BODY_RING;
759             return true;
760         }
761     }
762 }
763 
764 static bool
vmxnet3_get_next_body_rx_descr(VMXNET3State * s,struct Vmxnet3_RxDesc * d,uint32_t * didx,uint32_t * ridx)765 vmxnet3_get_next_body_rx_descr(VMXNET3State *s,
766                                struct Vmxnet3_RxDesc *d,
767                                uint32_t *didx,
768                                uint32_t *ridx)
769 {
770     vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
771 
772     /* Try to find corresponding descriptor in head/body ring */
773     if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) {
774         /* Only read after generation field verification */
775         smp_rmb();
776         /* Re-read to be sure we got the latest version */
777         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
778         if (d->btype == VMXNET3_RXD_BTYPE_BODY) {
779             vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
780             *ridx = RX_HEAD_BODY_RING;
781             return true;
782         }
783     }
784 
785     /*
786      * If there is no free descriptors on head/body ring or next free
787      * descriptor is a head descriptor switch to body only ring
788      */
789     vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
790 
791     /* If no more free descriptors - return */
792     if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) {
793         /* Only read after generation field verification */
794         smp_rmb();
795         /* Re-read to be sure we got the latest version */
796         vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
797         assert(d->btype == VMXNET3_RXD_BTYPE_BODY);
798         *ridx = RX_BODY_ONLY_RING;
799         vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING);
800         return true;
801     }
802 
803     return false;
804 }
805 
806 static inline bool
vmxnet3_get_next_rx_descr(VMXNET3State * s,bool is_head,struct Vmxnet3_RxDesc * descr_buf,uint32_t * descr_idx,uint32_t * ridx)807 vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head,
808                           struct Vmxnet3_RxDesc *descr_buf,
809                           uint32_t *descr_idx,
810                           uint32_t *ridx)
811 {
812     if (is_head || !s->rx_packets_compound) {
813         return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx);
814     } else {
815         return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx);
816     }
817 }
818 
819 /* In case packet was csum offloaded (either NEEDS_CSUM or DATA_VALID),
820  * the implementation always passes an RxCompDesc with a "Checksum
821  * calculated and found correct" to the OS (cnc=0 and tuc=1, see
822  * vmxnet3_rx_update_descr). This emulates the observed ESXi behavior.
823  *
824  * Therefore, if packet has the NEEDS_CSUM set, we must calculate
825  * and place a fully computed checksum into the tcp/udp header.
826  * Otherwise, the OS driver will receive a checksum-correct indication
827  * (CHECKSUM_UNNECESSARY), but with the actual tcp/udp checksum field
828  * having just the pseudo header csum value.
829  *
830  * While this is not a problem if packet is destined for local delivery,
831  * in the case the host OS performs forwarding, it will forward an
832  * incorrectly checksummed packet.
833  */
vmxnet3_rx_need_csum_calculate(struct NetRxPkt * pkt,const void * pkt_data,size_t pkt_len)834 static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt,
835                                            const void *pkt_data,
836                                            size_t pkt_len)
837 {
838     struct virtio_net_hdr *vhdr;
839     bool hasip4, hasip6;
840     EthL4HdrProto l4hdr_proto;
841     uint8_t *data;
842     int len;
843 
844     vhdr = net_rx_pkt_get_vhdr(pkt);
845     if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
846         return;
847     }
848 
849     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
850     if (!(hasip4 || hasip6) ||
851         (l4hdr_proto != ETH_L4_HDR_PROTO_TCP &&
852          l4hdr_proto != ETH_L4_HDR_PROTO_UDP)) {
853         return;
854     }
855 
856     vmxnet3_dump_virt_hdr(vhdr);
857 
858     /* Validate packet len: csum_start + scum_offset + length of csum field */
859     if (pkt_len < (vhdr->csum_start + vhdr->csum_offset + 2)) {
860         VMW_PKPRN("packet len:%zu < csum_start(%d) + csum_offset(%d) + 2, "
861                   "cannot calculate checksum",
862                   pkt_len, vhdr->csum_start, vhdr->csum_offset);
863         return;
864     }
865 
866     data = (uint8_t *)pkt_data + vhdr->csum_start;
867     len = pkt_len - vhdr->csum_start;
868     /* Put the checksum obtained into the packet */
869     stw_be_p(data + vhdr->csum_offset,
870              net_checksum_finish_nozero(net_checksum_add(len, data)));
871 
872     vhdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
873     vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
874 }
875 
vmxnet3_rx_update_descr(struct NetRxPkt * pkt,struct Vmxnet3_RxCompDesc * rxcd)876 static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt,
877     struct Vmxnet3_RxCompDesc *rxcd)
878 {
879     int csum_ok, is_gso;
880     bool hasip4, hasip6;
881     EthL4HdrProto l4hdr_proto;
882     struct virtio_net_hdr *vhdr;
883     uint8_t offload_type;
884 
885     if (net_rx_pkt_is_vlan_stripped(pkt)) {
886         rxcd->ts = 1;
887         rxcd->tci = net_rx_pkt_get_vlan_tag(pkt);
888     }
889 
890     vhdr = net_rx_pkt_get_vhdr(pkt);
891     /*
892      * Checksum is valid when lower level tell so or when lower level
893      * requires checksum offload telling that packet produced/bridged
894      * locally and did travel over network after last checksum calculation
895      * or production
896      */
897     csum_ok = VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) ||
898               VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM);
899 
900     offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
901     is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0;
902 
903     if (!csum_ok && !is_gso) {
904         goto nocsum;
905     }
906 
907     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
908     if ((l4hdr_proto != ETH_L4_HDR_PROTO_TCP &&
909          l4hdr_proto != ETH_L4_HDR_PROTO_UDP) ||
910         (!hasip4 && !hasip6)) {
911         goto nocsum;
912     }
913 
914     rxcd->cnc = 0;
915     rxcd->v4 = hasip4 ? 1 : 0;
916     rxcd->v6 = hasip6 ? 1 : 0;
917     rxcd->tcp = l4hdr_proto == ETH_L4_HDR_PROTO_TCP;
918     rxcd->udp = l4hdr_proto == ETH_L4_HDR_PROTO_UDP;
919     rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
920     return;
921 
922 nocsum:
923     rxcd->cnc = 1;
924 }
925 
926 static void
vmxnet3_pci_dma_writev(PCIDevice * pci_dev,const struct iovec * iov,size_t start_iov_off,hwaddr target_addr,size_t bytes_to_copy)927 vmxnet3_pci_dma_writev(PCIDevice *pci_dev,
928                        const struct iovec *iov,
929                        size_t start_iov_off,
930                        hwaddr target_addr,
931                        size_t bytes_to_copy)
932 {
933     size_t curr_off = 0;
934     size_t copied = 0;
935 
936     while (bytes_to_copy) {
937         if (start_iov_off < (curr_off + iov->iov_len)) {
938             size_t chunk_len =
939                 MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy);
940 
941             pci_dma_write(pci_dev, target_addr + copied,
942                           iov->iov_base + start_iov_off - curr_off,
943                           chunk_len);
944 
945             copied += chunk_len;
946             start_iov_off += chunk_len;
947             curr_off = start_iov_off;
948             bytes_to_copy -= chunk_len;
949         } else {
950             curr_off += iov->iov_len;
951         }
952         iov++;
953     }
954 }
955 
956 static void
vmxnet3_pci_dma_write_rxcd(PCIDevice * pcidev,dma_addr_t pa,struct Vmxnet3_RxCompDesc * rxcd)957 vmxnet3_pci_dma_write_rxcd(PCIDevice *pcidev, dma_addr_t pa,
958                            struct Vmxnet3_RxCompDesc *rxcd)
959 {
960     rxcd->val1 = cpu_to_le32(rxcd->val1);
961     rxcd->val2 = cpu_to_le32(rxcd->val2);
962     rxcd->val3 = cpu_to_le32(rxcd->val3);
963     pci_dma_write(pcidev, pa, rxcd, sizeof(*rxcd));
964 }
965 
966 static bool
vmxnet3_indicate_packet(VMXNET3State * s)967 vmxnet3_indicate_packet(VMXNET3State *s)
968 {
969     struct Vmxnet3_RxDesc rxd;
970     PCIDevice *d = PCI_DEVICE(s);
971     bool is_head = true;
972     uint32_t rxd_idx;
973     uint32_t rx_ridx = 0;
974 
975     struct Vmxnet3_RxCompDesc rxcd;
976     uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
977     hwaddr new_rxcd_pa = 0;
978     hwaddr ready_rxcd_pa = 0;
979     struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt);
980     size_t bytes_copied = 0;
981     size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt);
982     uint16_t num_frags = 0;
983     size_t chunk_size;
984 
985     net_rx_pkt_dump(s->rx_pkt);
986 
987     while (bytes_left > 0) {
988 
989         /* cannot add more frags to packet */
990         if (num_frags == s->max_rx_frags) {
991             break;
992         }
993 
994         new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen);
995         if (!new_rxcd_pa) {
996             break;
997         }
998 
999         if (!vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) {
1000             break;
1001         }
1002 
1003         chunk_size = MIN(bytes_left, rxd.len);
1004         vmxnet3_pci_dma_writev(d, data, bytes_copied, rxd.addr, chunk_size);
1005         bytes_copied += chunk_size;
1006         bytes_left -= chunk_size;
1007 
1008         vmxnet3_dump_rx_descr(&rxd);
1009 
1010         if (ready_rxcd_pa != 0) {
1011             vmxnet3_pci_dma_write_rxcd(d, ready_rxcd_pa, &rxcd);
1012         }
1013 
1014         memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc));
1015         rxcd.rxdIdx = rxd_idx;
1016         rxcd.len = chunk_size;
1017         rxcd.sop = is_head;
1018         rxcd.gen = new_rxcd_gen;
1019         rxcd.rqID = RXQ_IDX + rx_ridx * s->rxq_num;
1020 
1021         if (bytes_left == 0) {
1022             vmxnet3_rx_update_descr(s->rx_pkt, &rxcd);
1023         }
1024 
1025         VMW_RIPRN("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu "
1026                   "sop %d csum_correct %lu",
1027                   (unsigned long) rx_ridx,
1028                   (unsigned long) rxcd.rxdIdx,
1029                   (unsigned long) rxcd.len,
1030                   (int) rxcd.sop,
1031                   (unsigned long) rxcd.tuc);
1032 
1033         is_head = false;
1034         ready_rxcd_pa = new_rxcd_pa;
1035         new_rxcd_pa = 0;
1036         num_frags++;
1037     }
1038 
1039     if (ready_rxcd_pa != 0) {
1040         rxcd.eop = 1;
1041         rxcd.err = (bytes_left != 0);
1042 
1043         vmxnet3_pci_dma_write_rxcd(d, ready_rxcd_pa, &rxcd);
1044 
1045         /* Flush RX descriptor changes */
1046         smp_wmb();
1047     }
1048 
1049     if (new_rxcd_pa != 0) {
1050         vmxnet3_revert_rxc_descr(s, RXQ_IDX);
1051     }
1052 
1053     vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx);
1054 
1055     if (bytes_left == 0) {
1056         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_OK);
1057         return true;
1058     } else if (num_frags == s->max_rx_frags) {
1059         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_ERROR);
1060         return false;
1061     } else {
1062         vmxnet3_on_rx_done_update_stats(s, RXQ_IDX,
1063                                         VMXNET3_PKT_STATUS_OUT_OF_BUF);
1064         return false;
1065     }
1066 }
1067 
1068 static void
vmxnet3_io_bar0_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)1069 vmxnet3_io_bar0_write(void *opaque, hwaddr addr,
1070                       uint64_t val, unsigned size)
1071 {
1072     VMXNET3State *s = opaque;
1073 
1074     if (!s->device_active) {
1075         return;
1076     }
1077 
1078     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD,
1079                         VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) {
1080         int tx_queue_idx =
1081             VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD,
1082                                      VMXNET3_REG_ALIGN);
1083         if (tx_queue_idx <= s->txq_num) {
1084             vmxnet3_process_tx_queue(s, tx_queue_idx);
1085         } else {
1086             qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Illegal TX queue %d/%d\n",
1087                           tx_queue_idx, s->txq_num);
1088         }
1089         return;
1090     }
1091 
1092     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1093                         VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1094         int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1095                                          VMXNET3_REG_ALIGN);
1096 
1097         VMW_CBPRN("Interrupt mask for line %d written: 0x%" PRIx64, l, val);
1098 
1099         vmxnet3_on_interrupt_mask_changed(s, l, val);
1100         return;
1101     }
1102 
1103     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD,
1104                         VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) ||
1105        VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2,
1106                         VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) {
1107         return;
1108     }
1109 
1110     VMW_WRPRN("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d",
1111               (uint64_t) addr, val, size);
1112 }
1113 
1114 static uint64_t
vmxnet3_io_bar0_read(void * opaque,hwaddr addr,unsigned size)1115 vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size)
1116 {
1117     VMXNET3State *s = opaque;
1118 
1119     if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1120                         VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1121         int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1122                                          VMXNET3_REG_ALIGN);
1123         return s->interrupt_states[l].is_masked;
1124     }
1125 
1126     VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size);
1127     return 0;
1128 }
1129 
vmxnet3_reset_interrupt_states(VMXNET3State * s)1130 static void vmxnet3_reset_interrupt_states(VMXNET3State *s)
1131 {
1132     int i;
1133     for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) {
1134         s->interrupt_states[i].is_asserted = false;
1135         s->interrupt_states[i].is_pending = false;
1136         s->interrupt_states[i].is_masked = true;
1137     }
1138 }
1139 
vmxnet3_reset_mac(VMXNET3State * s)1140 static void vmxnet3_reset_mac(VMXNET3State *s)
1141 {
1142     memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a));
1143     VMW_CFPRN("MAC address set to: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
1144 }
1145 
vmxnet3_deactivate_device(VMXNET3State * s)1146 static void vmxnet3_deactivate_device(VMXNET3State *s)
1147 {
1148     if (s->device_active) {
1149         VMW_CBPRN("Deactivating vmxnet3...");
1150         net_tx_pkt_uninit(s->tx_pkt);
1151         net_rx_pkt_uninit(s->rx_pkt);
1152         s->device_active = false;
1153     }
1154 }
1155 
vmxnet3_reset(VMXNET3State * s)1156 static void vmxnet3_reset(VMXNET3State *s)
1157 {
1158     VMW_CBPRN("Resetting vmxnet3...");
1159 
1160     vmxnet3_deactivate_device(s);
1161     vmxnet3_reset_interrupt_states(s);
1162     s->drv_shmem = 0;
1163     s->tx_sop = true;
1164     s->skip_current_tx_pkt = false;
1165 }
1166 
vmxnet3_update_rx_mode(VMXNET3State * s)1167 static void vmxnet3_update_rx_mode(VMXNET3State *s)
1168 {
1169     PCIDevice *d = PCI_DEVICE(s);
1170 
1171     s->rx_mode = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1172                                            devRead.rxFilterConf.rxMode);
1173     VMW_CFPRN("RX mode: 0x%08X", s->rx_mode);
1174 }
1175 
vmxnet3_update_vlan_filters(VMXNET3State * s)1176 static void vmxnet3_update_vlan_filters(VMXNET3State *s)
1177 {
1178     int i;
1179     PCIDevice *d = PCI_DEVICE(s);
1180 
1181     /* Copy configuration from shared memory */
1182     VMXNET3_READ_DRV_SHARED(d, s->drv_shmem,
1183                             devRead.rxFilterConf.vfTable,
1184                             s->vlan_table,
1185                             sizeof(s->vlan_table));
1186 
1187     /* Invert byte order when needed */
1188     for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) {
1189         s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]);
1190     }
1191 
1192     /* Dump configuration for debugging purposes */
1193     VMW_CFPRN("Configured VLANs:");
1194     for (i = 0; i < sizeof(s->vlan_table) * 8; i++) {
1195         if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) {
1196             VMW_CFPRN("\tVLAN %d is present", i);
1197         }
1198     }
1199 }
1200 
vmxnet3_update_mcast_filters(VMXNET3State * s)1201 static void vmxnet3_update_mcast_filters(VMXNET3State *s)
1202 {
1203     PCIDevice *d = PCI_DEVICE(s);
1204 
1205     uint16_t list_bytes =
1206         VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem,
1207                                   devRead.rxFilterConf.mfTableLen);
1208 
1209     s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]);
1210 
1211     s->mcast_list = g_realloc(s->mcast_list, list_bytes);
1212     if (!s->mcast_list) {
1213         if (s->mcast_list_len == 0) {
1214             VMW_CFPRN("Current multicast list is empty");
1215         } else {
1216             VMW_ERPRN("Failed to allocate multicast list of %d elements",
1217                       s->mcast_list_len);
1218         }
1219         s->mcast_list_len = 0;
1220     } else {
1221         int i;
1222         hwaddr mcast_list_pa =
1223             VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem,
1224                                       devRead.rxFilterConf.mfTablePA);
1225 
1226         pci_dma_read(d, mcast_list_pa, s->mcast_list, list_bytes);
1227 
1228         VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len);
1229         for (i = 0; i < s->mcast_list_len; i++) {
1230             VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a));
1231         }
1232     }
1233 }
1234 
vmxnet3_setup_rx_filtering(VMXNET3State * s)1235 static void vmxnet3_setup_rx_filtering(VMXNET3State *s)
1236 {
1237     vmxnet3_update_rx_mode(s);
1238     vmxnet3_update_vlan_filters(s);
1239     vmxnet3_update_mcast_filters(s);
1240 }
1241 
vmxnet3_get_interrupt_config(VMXNET3State * s)1242 static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s)
1243 {
1244     uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2);
1245     VMW_CFPRN("Interrupt config is 0x%X", interrupt_mode);
1246     return interrupt_mode;
1247 }
1248 
vmxnet3_fill_stats(VMXNET3State * s)1249 static void vmxnet3_fill_stats(VMXNET3State *s)
1250 {
1251     int i;
1252     PCIDevice *d = PCI_DEVICE(s);
1253 
1254     if (!s->device_active)
1255         return;
1256 
1257     for (i = 0; i < s->txq_num; i++) {
1258         pci_dma_write(d,
1259                       s->txq_descr[i].tx_stats_pa,
1260                       &s->txq_descr[i].txq_stats,
1261                       sizeof(s->txq_descr[i].txq_stats));
1262     }
1263 
1264     for (i = 0; i < s->rxq_num; i++) {
1265         pci_dma_write(d,
1266                       s->rxq_descr[i].rx_stats_pa,
1267                       &s->rxq_descr[i].rxq_stats,
1268                       sizeof(s->rxq_descr[i].rxq_stats));
1269     }
1270 }
1271 
vmxnet3_adjust_by_guest_type(VMXNET3State * s)1272 static void vmxnet3_adjust_by_guest_type(VMXNET3State *s)
1273 {
1274     struct Vmxnet3_GOSInfo gos;
1275     PCIDevice *d = PCI_DEVICE(s);
1276 
1277     VMXNET3_READ_DRV_SHARED(d, s->drv_shmem, devRead.misc.driverInfo.gos,
1278                             &gos, sizeof(gos));
1279     s->rx_packets_compound =
1280         (gos.gosType == VMXNET3_GOS_TYPE_WIN) ? false : true;
1281 
1282     VMW_CFPRN("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound);
1283 }
1284 
1285 static void
vmxnet3_dump_conf_descr(const char * name,struct Vmxnet3_VariableLenConfDesc * pm_descr)1286 vmxnet3_dump_conf_descr(const char *name,
1287                         struct Vmxnet3_VariableLenConfDesc *pm_descr)
1288 {
1289     VMW_CFPRN("%s descriptor dump: Version %u, Length %u",
1290               name, pm_descr->confVer, pm_descr->confLen);
1291 
1292 };
1293 
vmxnet3_update_pm_state(VMXNET3State * s)1294 static void vmxnet3_update_pm_state(VMXNET3State *s)
1295 {
1296     struct Vmxnet3_VariableLenConfDesc pm_descr;
1297     PCIDevice *d = PCI_DEVICE(s);
1298 
1299     pm_descr.confLen =
1300         VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confLen);
1301     pm_descr.confVer =
1302         VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.pmConfDesc.confVer);
1303     pm_descr.confPA =
1304         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.pmConfDesc.confPA);
1305 
1306     vmxnet3_dump_conf_descr("PM State", &pm_descr);
1307 }
1308 
vmxnet3_update_features(VMXNET3State * s)1309 static void vmxnet3_update_features(VMXNET3State *s)
1310 {
1311     uint32_t guest_features;
1312     int rxcso_supported;
1313     PCIDevice *d = PCI_DEVICE(s);
1314 
1315     guest_features = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem,
1316                                                devRead.misc.uptFeatures);
1317 
1318     rxcso_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXCSUM);
1319     s->rx_vlan_stripping = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXVLAN);
1320     s->lro_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_LRO);
1321 
1322     VMW_CFPRN("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d",
1323               s->lro_supported, rxcso_supported,
1324               s->rx_vlan_stripping);
1325     if (s->peer_has_vhdr) {
1326         qemu_set_offload(qemu_get_queue(s->nic)->peer,
1327                          rxcso_supported,
1328                          s->lro_supported,
1329                          s->lro_supported,
1330                          0,
1331                          0,
1332                          0,
1333                          0);
1334     }
1335 }
1336 
vmxnet3_verify_intx(VMXNET3State * s,int intx)1337 static bool vmxnet3_verify_intx(VMXNET3State *s, int intx)
1338 {
1339     return s->msix_used || msi_enabled(PCI_DEVICE(s))
1340         || intx == pci_get_byte(s->parent_obj.config + PCI_INTERRUPT_PIN) - 1;
1341 }
1342 
vmxnet3_validate_interrupt_idx(bool is_msix,int idx)1343 static void vmxnet3_validate_interrupt_idx(bool is_msix, int idx)
1344 {
1345     int max_ints = is_msix ? VMXNET3_MAX_INTRS : VMXNET3_MAX_NMSIX_INTRS;
1346     if (idx >= max_ints) {
1347         hw_error("Bad interrupt index: %d\n", idx);
1348     }
1349 }
1350 
vmxnet3_validate_interrupts(VMXNET3State * s)1351 static void vmxnet3_validate_interrupts(VMXNET3State *s)
1352 {
1353     int i;
1354 
1355     VMW_CFPRN("Verifying event interrupt index (%d)", s->event_int_idx);
1356     vmxnet3_validate_interrupt_idx(s->msix_used, s->event_int_idx);
1357 
1358     for (i = 0; i < s->txq_num; i++) {
1359         int idx = s->txq_descr[i].intr_idx;
1360         VMW_CFPRN("Verifying TX queue %d interrupt index (%d)", i, idx);
1361         vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1362     }
1363 
1364     for (i = 0; i < s->rxq_num; i++) {
1365         int idx = s->rxq_descr[i].intr_idx;
1366         VMW_CFPRN("Verifying RX queue %d interrupt index (%d)", i, idx);
1367         vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1368     }
1369 }
1370 
vmxnet3_validate_queues(VMXNET3State * s)1371 static bool vmxnet3_validate_queues(VMXNET3State *s)
1372 {
1373     /*
1374     * txq_num and rxq_num are total number of queues
1375     * configured by guest. These numbers must not
1376     * exceed corresponding maximal values.
1377     */
1378 
1379     if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) {
1380         qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad TX queues number: %d\n",
1381                       s->txq_num);
1382         return false;
1383     }
1384 
1385     if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) {
1386         qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad RX queues number: %d\n",
1387                       s->rxq_num);
1388         return false;
1389     }
1390 
1391     return true;
1392 }
1393 
vmxnet3_activate_device(VMXNET3State * s)1394 static void vmxnet3_activate_device(VMXNET3State *s)
1395 {
1396     int i;
1397     static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1;
1398     PCIDevice *d = PCI_DEVICE(s);
1399     hwaddr qdescr_table_pa;
1400     uint64_t pa;
1401     uint32_t size;
1402 
1403     /* Verify configuration consistency */
1404     if (!vmxnet3_verify_driver_magic(d, s->drv_shmem)) {
1405         VMW_ERPRN("Device configuration received from driver is invalid");
1406         return;
1407     }
1408 
1409     /* Verify if device is active */
1410     if (s->device_active) {
1411         VMW_CFPRN("Vmxnet3 device is active");
1412         return;
1413     }
1414 
1415     s->txq_num =
1416         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues);
1417     s->rxq_num =
1418         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues);
1419 
1420     VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
1421     if (!vmxnet3_validate_queues(s)) {
1422         return;
1423     }
1424 
1425     vmxnet3_adjust_by_guest_type(s);
1426     vmxnet3_update_features(s);
1427     vmxnet3_update_pm_state(s);
1428     vmxnet3_setup_rx_filtering(s);
1429     /* Cache fields from shared memory */
1430     s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
1431     if (s->mtu < VMXNET3_MIN_MTU || s->mtu > VMXNET3_MAX_MTU) {
1432         qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad MTU size: %u\n", s->mtu);
1433         return;
1434     }
1435     VMW_CFPRN("MTU is %u", s->mtu);
1436 
1437     s->max_rx_frags =
1438         VMXNET3_READ_DRV_SHARED16(d, s->drv_shmem, devRead.misc.maxNumRxSG);
1439 
1440     if (s->max_rx_frags == 0) {
1441         s->max_rx_frags = 1;
1442     }
1443 
1444     VMW_CFPRN("Max RX fragments is %u", s->max_rx_frags);
1445 
1446     s->event_int_idx =
1447         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.eventIntrIdx);
1448     assert(vmxnet3_verify_intx(s, s->event_int_idx));
1449     VMW_CFPRN("Events interrupt line is %u", s->event_int_idx);
1450 
1451     s->auto_int_masking =
1452         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.autoMask);
1453     VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking);
1454 
1455     qdescr_table_pa =
1456         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.misc.queueDescPA);
1457     VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa);
1458 
1459     /*
1460      * Worst-case scenario is a packet that holds all TX rings space so
1461      * we calculate total size of all TX rings for max TX fragments number
1462      */
1463     s->max_tx_frags = 0;
1464 
1465     /* TX queues */
1466     for (i = 0; i < s->txq_num; i++) {
1467         hwaddr qdescr_pa =
1468             qdescr_table_pa + i * sizeof(struct Vmxnet3_TxQueueDesc);
1469 
1470         /* Read interrupt number for this TX queue */
1471         s->txq_descr[i].intr_idx =
1472             VMXNET3_READ_TX_QUEUE_DESCR8(d, qdescr_pa, conf.intrIdx);
1473         assert(vmxnet3_verify_intx(s, s->txq_descr[i].intr_idx));
1474 
1475         VMW_CFPRN("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx);
1476 
1477         /* Read rings memory locations for TX queues */
1478         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA);
1479         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize);
1480         if (size > VMXNET3_TX_RING_MAX_SIZE) {
1481             size = VMXNET3_TX_RING_MAX_SIZE;
1482         }
1483 
1484         vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size,
1485                           sizeof(struct Vmxnet3_TxDesc), false);
1486         VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring);
1487 
1488         s->max_tx_frags += size;
1489 
1490         /* TXC ring */
1491         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA);
1492         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize);
1493         if (size > VMXNET3_TC_RING_MAX_SIZE) {
1494             size = VMXNET3_TC_RING_MAX_SIZE;
1495         }
1496         vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size,
1497                           sizeof(struct Vmxnet3_TxCompDesc), true);
1498         VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring);
1499 
1500         s->txq_descr[i].tx_stats_pa =
1501             qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats);
1502 
1503         memset(&s->txq_descr[i].txq_stats, 0,
1504                sizeof(s->txq_descr[i].txq_stats));
1505 
1506         /* Fill device-managed parameters for queues */
1507         VMXNET3_WRITE_TX_QUEUE_DESCR32(d, qdescr_pa,
1508                                        ctrl.txThreshold,
1509                                        VMXNET3_DEF_TX_THRESHOLD);
1510     }
1511 
1512     /* Preallocate TX packet wrapper */
1513     VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags);
1514     net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags);
1515     net_rx_pkt_init(&s->rx_pkt);
1516 
1517     /* Read rings memory locations for RX queues */
1518     for (i = 0; i < s->rxq_num; i++) {
1519         int j;
1520         hwaddr qd_pa =
1521             qdescr_table_pa + s->txq_num * sizeof(struct Vmxnet3_TxQueueDesc) +
1522             i * sizeof(struct Vmxnet3_RxQueueDesc);
1523 
1524         /* Read interrupt number for this RX queue */
1525         s->rxq_descr[i].intr_idx =
1526             VMXNET3_READ_TX_QUEUE_DESCR8(d, qd_pa, conf.intrIdx);
1527         assert(vmxnet3_verify_intx(s, s->rxq_descr[i].intr_idx));
1528 
1529         VMW_CFPRN("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx);
1530 
1531         /* Read rings memory locations */
1532         for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) {
1533             /* RX rings */
1534             pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]);
1535             size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]);
1536             if (size > VMXNET3_RX_RING_MAX_SIZE) {
1537                 size = VMXNET3_RX_RING_MAX_SIZE;
1538             }
1539             vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size,
1540                               sizeof(struct Vmxnet3_RxDesc), false);
1541             VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
1542                       i, j, pa, size);
1543         }
1544 
1545         /* RXC ring */
1546         pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA);
1547         size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize);
1548         if (size > VMXNET3_RC_RING_MAX_SIZE) {
1549             size = VMXNET3_RC_RING_MAX_SIZE;
1550         }
1551         vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size,
1552                           sizeof(struct Vmxnet3_RxCompDesc), true);
1553         VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);
1554 
1555         s->rxq_descr[i].rx_stats_pa =
1556             qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats);
1557         memset(&s->rxq_descr[i].rxq_stats, 0,
1558                sizeof(s->rxq_descr[i].rxq_stats));
1559     }
1560 
1561     vmxnet3_validate_interrupts(s);
1562 
1563     /* Make sure everything is in place before device activation */
1564     smp_wmb();
1565 
1566     vmxnet3_reset_mac(s);
1567 
1568     s->device_active = true;
1569 }
1570 
vmxnet3_handle_command(VMXNET3State * s,uint64_t cmd)1571 static void vmxnet3_handle_command(VMXNET3State *s, uint64_t cmd)
1572 {
1573     s->last_command = cmd;
1574 
1575     switch (cmd) {
1576     case VMXNET3_CMD_GET_PERM_MAC_HI:
1577         VMW_CBPRN("Set: Get upper part of permanent MAC");
1578         break;
1579 
1580     case VMXNET3_CMD_GET_PERM_MAC_LO:
1581         VMW_CBPRN("Set: Get lower part of permanent MAC");
1582         break;
1583 
1584     case VMXNET3_CMD_GET_STATS:
1585         VMW_CBPRN("Set: Get device statistics");
1586         vmxnet3_fill_stats(s);
1587         break;
1588 
1589     case VMXNET3_CMD_ACTIVATE_DEV:
1590         VMW_CBPRN("Set: Activating vmxnet3 device");
1591         vmxnet3_activate_device(s);
1592         break;
1593 
1594     case VMXNET3_CMD_UPDATE_RX_MODE:
1595         VMW_CBPRN("Set: Update rx mode");
1596         vmxnet3_update_rx_mode(s);
1597         break;
1598 
1599     case VMXNET3_CMD_UPDATE_VLAN_FILTERS:
1600         VMW_CBPRN("Set: Update VLAN filters");
1601         vmxnet3_update_vlan_filters(s);
1602         break;
1603 
1604     case VMXNET3_CMD_UPDATE_MAC_FILTERS:
1605         VMW_CBPRN("Set: Update MAC filters");
1606         vmxnet3_update_mcast_filters(s);
1607         break;
1608 
1609     case VMXNET3_CMD_UPDATE_FEATURE:
1610         VMW_CBPRN("Set: Update features");
1611         vmxnet3_update_features(s);
1612         break;
1613 
1614     case VMXNET3_CMD_UPDATE_PMCFG:
1615         VMW_CBPRN("Set: Update power management config");
1616         vmxnet3_update_pm_state(s);
1617         break;
1618 
1619     case VMXNET3_CMD_GET_LINK:
1620         VMW_CBPRN("Set: Get link");
1621         break;
1622 
1623     case VMXNET3_CMD_RESET_DEV:
1624         VMW_CBPRN("Set: Reset device");
1625         vmxnet3_reset(s);
1626         break;
1627 
1628     case VMXNET3_CMD_QUIESCE_DEV:
1629         VMW_CBPRN("Set: VMXNET3_CMD_QUIESCE_DEV - deactivate the device");
1630         vmxnet3_deactivate_device(s);
1631         break;
1632 
1633     case VMXNET3_CMD_GET_CONF_INTR:
1634         VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration");
1635         break;
1636 
1637     case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1638         VMW_CBPRN("Set: VMXNET3_CMD_GET_ADAPTIVE_RING_INFO - "
1639                   "adaptive ring info flags");
1640         break;
1641 
1642     case VMXNET3_CMD_GET_DID_LO:
1643         VMW_CBPRN("Set: Get lower part of device ID");
1644         break;
1645 
1646     case VMXNET3_CMD_GET_DID_HI:
1647         VMW_CBPRN("Set: Get upper part of device ID");
1648         break;
1649 
1650     case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1651         VMW_CBPRN("Set: Get device extra info");
1652         break;
1653 
1654     default:
1655         VMW_CBPRN("Received unknown command: %" PRIx64, cmd);
1656         break;
1657     }
1658 }
1659 
vmxnet3_get_command_status(VMXNET3State * s)1660 static uint64_t vmxnet3_get_command_status(VMXNET3State *s)
1661 {
1662     uint64_t ret;
1663 
1664     switch (s->last_command) {
1665     case VMXNET3_CMD_ACTIVATE_DEV:
1666         ret = (s->device_active) ? 0 : 1;
1667         VMW_CFPRN("Device active: %" PRIx64, ret);
1668         break;
1669 
1670     case VMXNET3_CMD_RESET_DEV:
1671     case VMXNET3_CMD_QUIESCE_DEV:
1672     case VMXNET3_CMD_GET_QUEUE_STATUS:
1673     case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1674         ret = 0;
1675         break;
1676 
1677     case VMXNET3_CMD_GET_LINK:
1678         ret = s->link_status_and_speed;
1679         VMW_CFPRN("Link and speed: %" PRIx64, ret);
1680         break;
1681 
1682     case VMXNET3_CMD_GET_PERM_MAC_LO:
1683         ret = vmxnet3_get_mac_low(&s->perm_mac);
1684         break;
1685 
1686     case VMXNET3_CMD_GET_PERM_MAC_HI:
1687         ret = vmxnet3_get_mac_high(&s->perm_mac);
1688         break;
1689 
1690     case VMXNET3_CMD_GET_CONF_INTR:
1691         ret = vmxnet3_get_interrupt_config(s);
1692         break;
1693 
1694     case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1695         ret = VMXNET3_DISABLE_ADAPTIVE_RING;
1696         break;
1697 
1698     case VMXNET3_CMD_GET_DID_LO:
1699         ret = PCI_DEVICE_ID_VMWARE_VMXNET3;
1700         break;
1701 
1702     case VMXNET3_CMD_GET_DID_HI:
1703         ret = VMXNET3_DEVICE_REVISION;
1704         break;
1705 
1706     default:
1707         VMW_WRPRN("Received request for unknown command: %x", s->last_command);
1708         ret = 0;
1709         break;
1710     }
1711 
1712     return ret;
1713 }
1714 
vmxnet3_set_events(VMXNET3State * s,uint32_t val)1715 static void vmxnet3_set_events(VMXNET3State *s, uint32_t val)
1716 {
1717     uint32_t events;
1718     PCIDevice *d = PCI_DEVICE(s);
1719 
1720     VMW_CBPRN("Setting events: 0x%x", val);
1721     events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) | val;
1722     VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1723 }
1724 
vmxnet3_ack_events(VMXNET3State * s,uint32_t val)1725 static void vmxnet3_ack_events(VMXNET3State *s, uint32_t val)
1726 {
1727     PCIDevice *d = PCI_DEVICE(s);
1728     uint32_t events;
1729 
1730     VMW_CBPRN("Clearing events: 0x%x", val);
1731     events = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, ecr) & ~val;
1732     VMXNET3_WRITE_DRV_SHARED32(d, s->drv_shmem, ecr, events);
1733 }
1734 
1735 static void
vmxnet3_io_bar1_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)1736 vmxnet3_io_bar1_write(void *opaque,
1737                       hwaddr addr,
1738                       uint64_t val,
1739                       unsigned size)
1740 {
1741     VMXNET3State *s = opaque;
1742 
1743     switch (addr) {
1744     /* Vmxnet3 Revision Report Selection */
1745     case VMXNET3_REG_VRRS:
1746         VMW_CBPRN("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d",
1747                   val, size);
1748         break;
1749 
1750     /* UPT Version Report Selection */
1751     case VMXNET3_REG_UVRS:
1752         VMW_CBPRN("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d",
1753                   val, size);
1754         break;
1755 
1756     /* Driver Shared Address Low */
1757     case VMXNET3_REG_DSAL:
1758         VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d",
1759                   val, size);
1760         /*
1761          * Guest driver will first write the low part of the shared
1762          * memory address. We save it to temp variable and set the
1763          * shared address only after we get the high part
1764          */
1765         if (val == 0) {
1766             vmxnet3_deactivate_device(s);
1767         }
1768         s->temp_shared_guest_driver_memory = val;
1769         s->drv_shmem = 0;
1770         break;
1771 
1772     /* Driver Shared Address High */
1773     case VMXNET3_REG_DSAH:
1774         VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d",
1775                   val, size);
1776         /*
1777          * Set the shared memory between guest driver and device.
1778          * We already should have low address part.
1779          */
1780         s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32);
1781         break;
1782 
1783     /* Command */
1784     case VMXNET3_REG_CMD:
1785         VMW_CBPRN("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d",
1786                   val, size);
1787         vmxnet3_handle_command(s, val);
1788         break;
1789 
1790     /* MAC Address Low */
1791     case VMXNET3_REG_MACL:
1792         VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d",
1793                   val, size);
1794         s->temp_mac = val;
1795         break;
1796 
1797     /* MAC Address High */
1798     case VMXNET3_REG_MACH:
1799         VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d",
1800                   val, size);
1801         vmxnet3_set_variable_mac(s, val, s->temp_mac);
1802         break;
1803 
1804     /* Interrupt Cause Register */
1805     case VMXNET3_REG_ICR:
1806         VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
1807                   val, size);
1808         qemu_log_mask(LOG_GUEST_ERROR,
1809                       "%s: write to read-only register VMXNET3_REG_ICR\n",
1810                       TYPE_VMXNET3);
1811         break;
1812 
1813     /* Event Cause Register */
1814     case VMXNET3_REG_ECR:
1815         VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
1816                   val, size);
1817         vmxnet3_ack_events(s, val);
1818         break;
1819 
1820     default:
1821         VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d",
1822                   addr, val, size);
1823         break;
1824     }
1825 }
1826 
1827 static uint64_t
vmxnet3_io_bar1_read(void * opaque,hwaddr addr,unsigned size)1828 vmxnet3_io_bar1_read(void *opaque, hwaddr addr, unsigned size)
1829 {
1830         VMXNET3State *s = opaque;
1831         uint64_t ret = 0;
1832 
1833         switch (addr) {
1834         /* Vmxnet3 Revision Report Selection */
1835         case VMXNET3_REG_VRRS:
1836             VMW_CBPRN("Read BAR1 [VMXNET3_REG_VRRS], size %d", size);
1837             ret = VMXNET3_DEVICE_REVISION;
1838             break;
1839 
1840         /* UPT Version Report Selection */
1841         case VMXNET3_REG_UVRS:
1842             VMW_CBPRN("Read BAR1 [VMXNET3_REG_UVRS], size %d", size);
1843             ret = VMXNET3_UPT_REVISION;
1844             break;
1845 
1846         /* Command */
1847         case VMXNET3_REG_CMD:
1848             VMW_CBPRN("Read BAR1 [VMXNET3_REG_CMD], size %d", size);
1849             ret = vmxnet3_get_command_status(s);
1850             break;
1851 
1852         /* MAC Address Low */
1853         case VMXNET3_REG_MACL:
1854             VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACL], size %d", size);
1855             ret = vmxnet3_get_mac_low(&s->conf.macaddr);
1856             break;
1857 
1858         /* MAC Address High */
1859         case VMXNET3_REG_MACH:
1860             VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACH], size %d", size);
1861             ret = vmxnet3_get_mac_high(&s->conf.macaddr);
1862             break;
1863 
1864         /*
1865          * Interrupt Cause Register
1866          * Used for legacy interrupts only so interrupt index always 0
1867          */
1868         case VMXNET3_REG_ICR:
1869             VMW_CBPRN("Read BAR1 [VMXNET3_REG_ICR], size %d", size);
1870             if (vmxnet3_interrupt_asserted(s, 0)) {
1871                 vmxnet3_clear_interrupt(s, 0);
1872                 ret = true;
1873             } else {
1874                 ret = false;
1875             }
1876             break;
1877 
1878         default:
1879             VMW_CBPRN("Unknown read BAR1[%" PRIx64 "], %d bytes", addr, size);
1880             break;
1881         }
1882 
1883         return ret;
1884 }
1885 
1886 static int
vmxnet3_can_receive(NetClientState * nc)1887 vmxnet3_can_receive(NetClientState *nc)
1888 {
1889     VMXNET3State *s = qemu_get_nic_opaque(nc);
1890     return s->device_active &&
1891            VMXNET_FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP);
1892 }
1893 
1894 static inline bool
vmxnet3_is_registered_vlan(VMXNET3State * s,const void * data)1895 vmxnet3_is_registered_vlan(VMXNET3State *s, const void *data)
1896 {
1897     uint16_t vlan_tag = eth_get_pkt_tci(data) & VLAN_VID_MASK;
1898     if (IS_SPECIAL_VLAN_ID(vlan_tag)) {
1899         return true;
1900     }
1901 
1902     return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag);
1903 }
1904 
1905 static bool
vmxnet3_is_allowed_mcast_group(VMXNET3State * s,const uint8_t * group_mac)1906 vmxnet3_is_allowed_mcast_group(VMXNET3State *s, const uint8_t *group_mac)
1907 {
1908     int i;
1909     for (i = 0; i < s->mcast_list_len; i++) {
1910         if (!memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) {
1911             return true;
1912         }
1913     }
1914     return false;
1915 }
1916 
1917 static bool
vmxnet3_rx_filter_may_indicate(VMXNET3State * s,const void * data,size_t size)1918 vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data,
1919     size_t size)
1920 {
1921     struct eth_header *ehdr = PKT_GET_ETH_HDR(data);
1922 
1923     if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) {
1924         return true;
1925     }
1926 
1927     if (!vmxnet3_is_registered_vlan(s, data)) {
1928         return false;
1929     }
1930 
1931     switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
1932     case ETH_PKT_UCAST:
1933         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) {
1934             return false;
1935         }
1936         if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) {
1937             return false;
1938         }
1939         break;
1940 
1941     case ETH_PKT_BCAST:
1942         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) {
1943             return false;
1944         }
1945         break;
1946 
1947     case ETH_PKT_MCAST:
1948         if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) {
1949             return true;
1950         }
1951         if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) {
1952             return false;
1953         }
1954         if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) {
1955             return false;
1956         }
1957         break;
1958 
1959     default:
1960         g_assert_not_reached();
1961     }
1962 
1963     return true;
1964 }
1965 
1966 static ssize_t
vmxnet3_receive(NetClientState * nc,const uint8_t * buf,size_t size)1967 vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1968 {
1969     VMXNET3State *s = qemu_get_nic_opaque(nc);
1970     size_t bytes_indicated;
1971 
1972     if (!vmxnet3_can_receive(nc)) {
1973         VMW_PKPRN("Cannot receive now");
1974         return -1;
1975     }
1976 
1977     if (s->peer_has_vhdr) {
1978         net_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
1979         buf += sizeof(struct virtio_net_hdr);
1980         size -= sizeof(struct virtio_net_hdr);
1981     }
1982 
1983     net_rx_pkt_set_packet_type(s->rx_pkt,
1984         get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
1985 
1986     if (vmxnet3_rx_filter_may_indicate(s, buf, size)) {
1987         struct iovec iov = {
1988             .iov_base = (void *)buf,
1989             .iov_len = size
1990         };
1991 
1992         net_rx_pkt_set_protocols(s->rx_pkt, &iov, 1, 0);
1993         vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size);
1994         net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping);
1995         bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1;
1996         if (bytes_indicated < size) {
1997             VMW_PKPRN("RX: %zu of %zu bytes indicated", bytes_indicated, size);
1998         }
1999     } else {
2000         VMW_PKPRN("Packet dropped by RX filter");
2001         bytes_indicated = size;
2002     }
2003 
2004     assert(size > 0);
2005     assert(bytes_indicated != 0);
2006     return bytes_indicated;
2007 }
2008 
vmxnet3_set_link_status(NetClientState * nc)2009 static void vmxnet3_set_link_status(NetClientState *nc)
2010 {
2011     VMXNET3State *s = qemu_get_nic_opaque(nc);
2012 
2013     if (nc->link_down) {
2014         s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP;
2015     } else {
2016         s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP;
2017     }
2018 
2019     vmxnet3_set_events(s, VMXNET3_ECR_LINK);
2020     vmxnet3_trigger_interrupt(s, s->event_int_idx);
2021 }
2022 
2023 static NetClientInfo net_vmxnet3_info = {
2024         .type = NET_CLIENT_DRIVER_NIC,
2025         .size = sizeof(NICState),
2026         .receive = vmxnet3_receive,
2027         .link_status_changed = vmxnet3_set_link_status,
2028 };
2029 
vmxnet3_peer_has_vnet_hdr(VMXNET3State * s)2030 static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s)
2031 {
2032     NetClientState *nc = qemu_get_queue(s->nic);
2033 
2034     if (qemu_has_vnet_hdr(nc->peer)) {
2035         return true;
2036     }
2037 
2038     return false;
2039 }
2040 
vmxnet3_net_uninit(VMXNET3State * s)2041 static void vmxnet3_net_uninit(VMXNET3State *s)
2042 {
2043     g_free(s->mcast_list);
2044     vmxnet3_deactivate_device(s);
2045     qemu_del_nic(s->nic);
2046 }
2047 
vmxnet3_net_init(VMXNET3State * s)2048 static void vmxnet3_net_init(VMXNET3State *s)
2049 {
2050     DeviceState *d = DEVICE(s);
2051 
2052     VMW_CBPRN("vmxnet3_net_init called...");
2053 
2054     qemu_macaddr_default_if_unset(&s->conf.macaddr);
2055 
2056     /* Windows guest will query the address that was set on init */
2057     memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a));
2058 
2059     s->mcast_list = NULL;
2060     s->mcast_list_len = 0;
2061 
2062     s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP;
2063 
2064     VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a));
2065 
2066     s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf,
2067                           object_get_typename(OBJECT(s)),
2068                           d->id, &d->mem_reentrancy_guard, s);
2069 
2070     s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s);
2071     s->tx_sop = true;
2072     s->skip_current_tx_pkt = false;
2073     s->tx_pkt = NULL;
2074     s->rx_pkt = NULL;
2075     s->rx_vlan_stripping = false;
2076     s->lro_supported = false;
2077 
2078     if (s->peer_has_vhdr) {
2079         qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
2080             sizeof(struct virtio_net_hdr));
2081     }
2082 
2083     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
2084 }
2085 
2086 static void
vmxnet3_unuse_msix_vectors(VMXNET3State * s,int num_vectors)2087 vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors)
2088 {
2089     PCIDevice *d = PCI_DEVICE(s);
2090     int i;
2091     for (i = 0; i < num_vectors; i++) {
2092         msix_vector_unuse(d, i);
2093     }
2094 }
2095 
2096 static void
vmxnet3_use_msix_vectors(VMXNET3State * s,int num_vectors)2097 vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors)
2098 {
2099     PCIDevice *d = PCI_DEVICE(s);
2100     int i;
2101     for (i = 0; i < num_vectors; i++) {
2102         msix_vector_use(d, i);
2103     }
2104 }
2105 
2106 static bool
vmxnet3_init_msix(VMXNET3State * s)2107 vmxnet3_init_msix(VMXNET3State *s)
2108 {
2109     PCIDevice *d = PCI_DEVICE(s);
2110     int res = msix_init(d, VMXNET3_MAX_INTRS,
2111                         &s->msix_bar,
2112                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
2113                         &s->msix_bar,
2114                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA,
2115                         VMXNET3_MSIX_OFFSET, NULL);
2116 
2117     if (0 > res) {
2118         VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
2119         s->msix_used = false;
2120     } else {
2121         vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS);
2122         s->msix_used = true;
2123     }
2124     return s->msix_used;
2125 }
2126 
2127 static void
vmxnet3_cleanup_msix(VMXNET3State * s)2128 vmxnet3_cleanup_msix(VMXNET3State *s)
2129 {
2130     PCIDevice *d = PCI_DEVICE(s);
2131 
2132     if (s->msix_used) {
2133         vmxnet3_unuse_msix_vectors(s, VMXNET3_MAX_INTRS);
2134         msix_uninit(d, &s->msix_bar, &s->msix_bar);
2135     }
2136 }
2137 
2138 static void
vmxnet3_cleanup_msi(VMXNET3State * s)2139 vmxnet3_cleanup_msi(VMXNET3State *s)
2140 {
2141     PCIDevice *d = PCI_DEVICE(s);
2142 
2143     msi_uninit(d);
2144 }
2145 
2146 static const MemoryRegionOps b0_ops = {
2147     .read = vmxnet3_io_bar0_read,
2148     .write = vmxnet3_io_bar0_write,
2149     .endianness = DEVICE_LITTLE_ENDIAN,
2150     .impl = {
2151             .min_access_size = 4,
2152             .max_access_size = 4,
2153     },
2154 };
2155 
2156 static const MemoryRegionOps b1_ops = {
2157     .read = vmxnet3_io_bar1_read,
2158     .write = vmxnet3_io_bar1_write,
2159     .endianness = DEVICE_LITTLE_ENDIAN,
2160     .impl = {
2161             .min_access_size = 4,
2162             .max_access_size = 4,
2163     },
2164 };
2165 
vmxnet3_device_serial_num(VMXNET3State * s)2166 static uint64_t vmxnet3_device_serial_num(VMXNET3State *s)
2167 {
2168     uint64_t dsn_payload;
2169     uint8_t *dsnp = (uint8_t *)&dsn_payload;
2170 
2171     dsnp[0] = 0xfe;
2172     dsnp[1] = s->conf.macaddr.a[3];
2173     dsnp[2] = s->conf.macaddr.a[4];
2174     dsnp[3] = s->conf.macaddr.a[5];
2175     dsnp[4] = s->conf.macaddr.a[0];
2176     dsnp[5] = s->conf.macaddr.a[1];
2177     dsnp[6] = s->conf.macaddr.a[2];
2178     dsnp[7] = 0xff;
2179     return dsn_payload;
2180 }
2181 
2182 
2183 #define VMXNET3_USE_64BIT         (true)
2184 #define VMXNET3_PER_VECTOR_MASK   (false)
2185 
vmxnet3_pci_realize(PCIDevice * pci_dev,Error ** errp)2186 static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
2187 {
2188     VMXNET3State *s = VMXNET3(pci_dev);
2189     int ret;
2190 
2191     VMW_CBPRN("Starting init...");
2192 
2193     memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s,
2194                           "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
2195     pci_register_bar(pci_dev, VMXNET3_BAR0_IDX,
2196                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0);
2197 
2198     memory_region_init_io(&s->bar1, OBJECT(s), &b1_ops, s,
2199                           "vmxnet3-b1", VMXNET3_VD_REG_SIZE);
2200     pci_register_bar(pci_dev, VMXNET3_BAR1_IDX,
2201                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1);
2202 
2203     memory_region_init(&s->msix_bar, OBJECT(s), "vmxnet3-msix-bar",
2204                        VMXNET3_MSIX_BAR_SIZE);
2205     pci_register_bar(pci_dev, VMXNET3_MSIX_BAR_IDX,
2206                      PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar);
2207 
2208     vmxnet3_reset_interrupt_states(s);
2209 
2210     /* Interrupt pin A */
2211     pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
2212 
2213     ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET, VMXNET3_MAX_NMSIX_INTRS,
2214                    VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL);
2215     /* Any error other than -ENOTSUP(board's MSI support is broken)
2216      * is a programming error. Fall back to INTx silently on -ENOTSUP */
2217     assert(!ret || ret == -ENOTSUP);
2218 
2219     if (!vmxnet3_init_msix(s)) {
2220         VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent.");
2221     }
2222 
2223     vmxnet3_net_init(s);
2224 
2225     if (pci_is_express(pci_dev)) {
2226         if (pci_bus_is_express(pci_get_bus(pci_dev))) {
2227             pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET);
2228         }
2229 
2230         pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET,
2231                               vmxnet3_device_serial_num(s));
2232     }
2233 }
2234 
vmxnet3_instance_init(Object * obj)2235 static void vmxnet3_instance_init(Object *obj)
2236 {
2237     VMXNET3State *s = VMXNET3(obj);
2238     device_add_bootindex_property(obj, &s->conf.bootindex,
2239                                   "bootindex", "/ethernet-phy@0",
2240                                   DEVICE(obj));
2241     PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS;
2242 }
2243 
vmxnet3_pci_uninit(PCIDevice * pci_dev)2244 static void vmxnet3_pci_uninit(PCIDevice *pci_dev)
2245 {
2246     VMXNET3State *s = VMXNET3(pci_dev);
2247 
2248     VMW_CBPRN("Starting uninit...");
2249 
2250     vmxnet3_net_uninit(s);
2251 
2252     vmxnet3_cleanup_msix(s);
2253 
2254     vmxnet3_cleanup_msi(s);
2255 }
2256 
vmxnet3_qdev_reset(DeviceState * dev)2257 static void vmxnet3_qdev_reset(DeviceState *dev)
2258 {
2259     PCIDevice *d = PCI_DEVICE(dev);
2260     VMXNET3State *s = VMXNET3(d);
2261 
2262     VMW_CBPRN("Starting QDEV reset...");
2263     vmxnet3_reset(s);
2264 }
2265 
vmxnet3_mc_list_needed(void * opaque)2266 static bool vmxnet3_mc_list_needed(void *opaque)
2267 {
2268     return true;
2269 }
2270 
vmxnet3_mcast_list_pre_load(void * opaque)2271 static int vmxnet3_mcast_list_pre_load(void *opaque)
2272 {
2273     VMXNET3State *s = opaque;
2274 
2275     s->mcast_list = g_malloc(s->mcast_list_buff_size);
2276 
2277     return 0;
2278 }
2279 
2280 
vmxnet3_pre_save(void * opaque)2281 static int vmxnet3_pre_save(void *opaque)
2282 {
2283     VMXNET3State *s = opaque;
2284 
2285     s->mcast_list_buff_size = s->mcast_list_len * sizeof(MACAddr);
2286 
2287     return 0;
2288 }
2289 
2290 static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
2291     .name = "vmxnet3/mcast_list",
2292     .version_id = 1,
2293     .minimum_version_id = 1,
2294     .pre_load = vmxnet3_mcast_list_pre_load,
2295     .needed = vmxnet3_mc_list_needed,
2296     .fields = (const VMStateField[]) {
2297         VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL,
2298             mcast_list_buff_size),
2299         VMSTATE_END_OF_LIST()
2300     }
2301 };
2302 
2303 static const VMStateDescription vmstate_vmxnet3_ring = {
2304     .name = "vmxnet3-ring",
2305     .version_id = 0,
2306     .fields = (const VMStateField[]) {
2307         VMSTATE_UINT64(pa, Vmxnet3Ring),
2308         VMSTATE_UINT32(size, Vmxnet3Ring),
2309         VMSTATE_UINT32(cell_size, Vmxnet3Ring),
2310         VMSTATE_UINT32(next, Vmxnet3Ring),
2311         VMSTATE_UINT8(gen, Vmxnet3Ring),
2312         VMSTATE_END_OF_LIST()
2313     }
2314 };
2315 
2316 static const VMStateDescription vmstate_vmxnet3_tx_stats = {
2317     .name = "vmxnet3-tx-stats",
2318     .version_id = 0,
2319     .fields = (const VMStateField[]) {
2320         VMSTATE_UINT64(TSOPktsTxOK, struct UPT1_TxStats),
2321         VMSTATE_UINT64(TSOBytesTxOK, struct UPT1_TxStats),
2322         VMSTATE_UINT64(ucastPktsTxOK, struct UPT1_TxStats),
2323         VMSTATE_UINT64(ucastBytesTxOK, struct UPT1_TxStats),
2324         VMSTATE_UINT64(mcastPktsTxOK, struct UPT1_TxStats),
2325         VMSTATE_UINT64(mcastBytesTxOK, struct UPT1_TxStats),
2326         VMSTATE_UINT64(bcastPktsTxOK, struct UPT1_TxStats),
2327         VMSTATE_UINT64(bcastBytesTxOK, struct UPT1_TxStats),
2328         VMSTATE_UINT64(pktsTxError, struct UPT1_TxStats),
2329         VMSTATE_UINT64(pktsTxDiscard, struct UPT1_TxStats),
2330         VMSTATE_END_OF_LIST()
2331     }
2332 };
2333 
2334 static const VMStateDescription vmstate_vmxnet3_txq_descr = {
2335     .name = "vmxnet3-txq-descr",
2336     .version_id = 0,
2337     .fields = (const VMStateField[]) {
2338         VMSTATE_STRUCT(tx_ring, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_ring,
2339                        Vmxnet3Ring),
2340         VMSTATE_STRUCT(comp_ring, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_ring,
2341                        Vmxnet3Ring),
2342         VMSTATE_UINT8(intr_idx, Vmxnet3TxqDescr),
2343         VMSTATE_UINT64(tx_stats_pa, Vmxnet3TxqDescr),
2344         VMSTATE_STRUCT(txq_stats, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_tx_stats,
2345                        struct UPT1_TxStats),
2346         VMSTATE_END_OF_LIST()
2347     }
2348 };
2349 
2350 static const VMStateDescription vmstate_vmxnet3_rx_stats = {
2351     .name = "vmxnet3-rx-stats",
2352     .version_id = 0,
2353     .fields = (const VMStateField[]) {
2354         VMSTATE_UINT64(LROPktsRxOK, struct UPT1_RxStats),
2355         VMSTATE_UINT64(LROBytesRxOK, struct UPT1_RxStats),
2356         VMSTATE_UINT64(ucastPktsRxOK, struct UPT1_RxStats),
2357         VMSTATE_UINT64(ucastBytesRxOK, struct UPT1_RxStats),
2358         VMSTATE_UINT64(mcastPktsRxOK, struct UPT1_RxStats),
2359         VMSTATE_UINT64(mcastBytesRxOK, struct UPT1_RxStats),
2360         VMSTATE_UINT64(bcastPktsRxOK, struct UPT1_RxStats),
2361         VMSTATE_UINT64(bcastBytesRxOK, struct UPT1_RxStats),
2362         VMSTATE_UINT64(pktsRxOutOfBuf, struct UPT1_RxStats),
2363         VMSTATE_UINT64(pktsRxError, struct UPT1_RxStats),
2364         VMSTATE_END_OF_LIST()
2365     }
2366 };
2367 
2368 static const VMStateDescription vmstate_vmxnet3_rxq_descr = {
2369     .name = "vmxnet3-rxq-descr",
2370     .version_id = 0,
2371     .fields = (const VMStateField[]) {
2372         VMSTATE_STRUCT_ARRAY(rx_ring, Vmxnet3RxqDescr,
2373                              VMXNET3_RX_RINGS_PER_QUEUE, 0,
2374                              vmstate_vmxnet3_ring, Vmxnet3Ring),
2375         VMSTATE_STRUCT(comp_ring, Vmxnet3RxqDescr, 0, vmstate_vmxnet3_ring,
2376                        Vmxnet3Ring),
2377         VMSTATE_UINT8(intr_idx, Vmxnet3RxqDescr),
2378         VMSTATE_UINT64(rx_stats_pa, Vmxnet3RxqDescr),
2379         VMSTATE_STRUCT(rxq_stats, Vmxnet3RxqDescr, 0, vmstate_vmxnet3_rx_stats,
2380                        struct UPT1_RxStats),
2381         VMSTATE_END_OF_LIST()
2382     }
2383 };
2384 
vmxnet3_post_load(void * opaque,int version_id)2385 static int vmxnet3_post_load(void *opaque, int version_id)
2386 {
2387     VMXNET3State *s = opaque;
2388 
2389     net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags);
2390     net_rx_pkt_init(&s->rx_pkt);
2391 
2392     if (s->msix_used) {
2393         vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS);
2394     }
2395 
2396     if (!vmxnet3_validate_queues(s)) {
2397         return -1;
2398     }
2399     vmxnet3_validate_interrupts(s);
2400 
2401     return 0;
2402 }
2403 
2404 static const VMStateDescription vmstate_vmxnet3_int_state = {
2405     .name = "vmxnet3-int-state",
2406     .version_id = 0,
2407     .fields = (const VMStateField[]) {
2408         VMSTATE_BOOL(is_masked, Vmxnet3IntState),
2409         VMSTATE_BOOL(is_pending, Vmxnet3IntState),
2410         VMSTATE_BOOL(is_asserted, Vmxnet3IntState),
2411         VMSTATE_END_OF_LIST()
2412     }
2413 };
2414 
2415 static const VMStateDescription vmstate_vmxnet3 = {
2416     .name = "vmxnet3",
2417     .version_id = 1,
2418     .minimum_version_id = 1,
2419     .pre_save = vmxnet3_pre_save,
2420     .post_load = vmxnet3_post_load,
2421     .fields = (const VMStateField[]) {
2422             VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State),
2423             VMSTATE_MSIX(parent_obj, VMXNET3State),
2424             VMSTATE_BOOL(rx_packets_compound, VMXNET3State),
2425             VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State),
2426             VMSTATE_BOOL(lro_supported, VMXNET3State),
2427             VMSTATE_UINT32(rx_mode, VMXNET3State),
2428             VMSTATE_UINT32(mcast_list_len, VMXNET3State),
2429             VMSTATE_UINT32(mcast_list_buff_size, VMXNET3State),
2430             VMSTATE_UINT32_ARRAY(vlan_table, VMXNET3State, VMXNET3_VFT_SIZE),
2431             VMSTATE_UINT32(mtu, VMXNET3State),
2432             VMSTATE_UINT16(max_rx_frags, VMXNET3State),
2433             VMSTATE_UINT32(max_tx_frags, VMXNET3State),
2434             VMSTATE_UINT8(event_int_idx, VMXNET3State),
2435             VMSTATE_BOOL(auto_int_masking, VMXNET3State),
2436             VMSTATE_UINT8(txq_num, VMXNET3State),
2437             VMSTATE_UINT8(rxq_num, VMXNET3State),
2438             VMSTATE_UINT32(device_active, VMXNET3State),
2439             VMSTATE_UINT32(last_command, VMXNET3State),
2440             VMSTATE_UINT32(link_status_and_speed, VMXNET3State),
2441             VMSTATE_UINT32(temp_mac, VMXNET3State),
2442             VMSTATE_UINT64(drv_shmem, VMXNET3State),
2443             VMSTATE_UINT64(temp_shared_guest_driver_memory, VMXNET3State),
2444 
2445             VMSTATE_STRUCT_ARRAY(txq_descr, VMXNET3State,
2446                 VMXNET3_DEVICE_MAX_TX_QUEUES, 0, vmstate_vmxnet3_txq_descr,
2447                 Vmxnet3TxqDescr),
2448             VMSTATE_STRUCT_ARRAY(rxq_descr, VMXNET3State,
2449                 VMXNET3_DEVICE_MAX_RX_QUEUES, 0, vmstate_vmxnet3_rxq_descr,
2450                 Vmxnet3RxqDescr),
2451             VMSTATE_STRUCT_ARRAY(interrupt_states, VMXNET3State,
2452                 VMXNET3_MAX_INTRS, 0, vmstate_vmxnet3_int_state,
2453                 Vmxnet3IntState),
2454 
2455             VMSTATE_END_OF_LIST()
2456     },
2457     .subsections = (const VMStateDescription * const []) {
2458         &vmxstate_vmxnet3_mcast_list,
2459         NULL
2460     }
2461 };
2462 
2463 static const Property vmxnet3_properties[] = {
2464     DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
2465 };
2466 
vmxnet3_class_init(ObjectClass * class,const void * data)2467 static void vmxnet3_class_init(ObjectClass *class, const void *data)
2468 {
2469     DeviceClass *dc = DEVICE_CLASS(class);
2470     PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
2471 
2472     c->realize = vmxnet3_pci_realize;
2473     c->exit = vmxnet3_pci_uninit;
2474     c->vendor_id = PCI_VENDOR_ID_VMWARE;
2475     c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2476     c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION;
2477     c->romfile = "efi-vmxnet3.rom";
2478     c->class_id = PCI_CLASS_NETWORK_ETHERNET;
2479     c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
2480     c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2481     dc->desc = "VMWare Paravirtualized Ethernet v3";
2482     device_class_set_legacy_reset(dc, vmxnet3_qdev_reset);
2483     dc->vmsd = &vmstate_vmxnet3;
2484     device_class_set_props(dc, vmxnet3_properties);
2485     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2486 }
2487 
2488 static const TypeInfo vmxnet3_info = {
2489     .name          = TYPE_VMXNET3,
2490     .parent        = TYPE_PCI_DEVICE,
2491     .class_size    = sizeof(VMXNET3Class),
2492     .instance_size = sizeof(VMXNET3State),
2493     .class_init    = vmxnet3_class_init,
2494     .instance_init = vmxnet3_instance_init,
2495     .interfaces = (const InterfaceInfo[]) {
2496         { INTERFACE_PCIE_DEVICE },
2497         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2498         { }
2499     },
2500 };
2501 
vmxnet3_register_types(void)2502 static void vmxnet3_register_types(void)
2503 {
2504     VMW_CBPRN("vmxnet3_register_types called...");
2505     type_register_static(&vmxnet3_info);
2506 }
2507 
2508 type_init(vmxnet3_register_types)
2509