xref: /qemu/hw/net/virtio-net.c (revision efc5603292bfde97fd82fabcedce86310bedbc65)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qobject/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "system/system.h"
43 #include "system/replay.h"
44 #include "trace.h"
45 #include "monitor/qdev.h"
46 #include "monitor/monitor.h"
47 #include "hw/pci/pci_device.h"
48 #include "net_rx_pkt.h"
49 #include "hw/virtio/vhost.h"
50 #include "system/qtest.h"
51 
52 #define VIRTIO_NET_VM_VERSION    11
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129     if (!nc->peer) {
130         return;
131     }
132 
133     qemu_flush_or_purge_queued_packets(nc->peer, true);
134     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136 
137 /* TODO
138  * - we could suppress RX interrupt if we were so inclined.
139  */
140 
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143     VirtIONet *n = VIRTIO_NET(vdev);
144     struct virtio_net_config netcfg;
145     NetClientState *nc = qemu_get_queue(n->nic);
146     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147 
148     int ret = 0;
149     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150     virtio_stw_p(vdev, &netcfg.status, n->status);
151     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153     memcpy(netcfg.mac, n->mac, ETH_ALEN);
154     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155     netcfg.duplex = n->net_conf.duplex;
156     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160     virtio_stl_p(vdev, &netcfg.supported_hash_types,
161                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
162     memcpy(config, &netcfg, n->config_size);
163 
164     /*
165      * Is this VDPA? No peer means not VDPA: there's no way to
166      * disconnect/reconnect a VDPA peer.
167      */
168     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170                                    n->config_size);
171         if (ret == -1) {
172             return;
173         }
174 
175         /*
176          * Some NIC/kernel combinations present 0 as the mac address.  As that
177          * is not a legal address, try to proceed with the address from the
178          * QEMU command line in the hope that the address has been configured
179          * correctly elsewhere - just not reported by the device.
180          */
181         if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182             info_report("Zero hardware mac address detected. Ignoring.");
183             memcpy(netcfg.mac, n->mac, ETH_ALEN);
184         }
185 
186         netcfg.status |= virtio_tswap16(vdev,
187                                         n->status & VIRTIO_NET_S_ANNOUNCE);
188         memcpy(config, &netcfg, n->config_size);
189     }
190 }
191 
192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
193 {
194     VirtIONet *n = VIRTIO_NET(vdev);
195     struct virtio_net_config netcfg = {};
196     NetClientState *nc = qemu_get_queue(n->nic);
197 
198     memcpy(&netcfg, config, n->config_size);
199 
200     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
202         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
203         memcpy(n->mac, netcfg.mac, ETH_ALEN);
204         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
205     }
206 
207     /*
208      * Is this VDPA? No peer means not VDPA: there's no way to
209      * disconnect/reconnect a VDPA peer.
210      */
211     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212         vhost_net_set_config(get_vhost_net(nc->peer),
213                              (uint8_t *)&netcfg, 0, n->config_size,
214                              VHOST_SET_CONFIG_TYPE_FRONTEND);
215       }
216 }
217 
218 static bool virtio_net_started(VirtIONet *n, uint8_t status)
219 {
220     VirtIODevice *vdev = VIRTIO_DEVICE(n);
221     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
222         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
223 }
224 
225 static void virtio_net_announce_notify(VirtIONet *net)
226 {
227     VirtIODevice *vdev = VIRTIO_DEVICE(net);
228     trace_virtio_net_announce_notify();
229 
230     net->status |= VIRTIO_NET_S_ANNOUNCE;
231     virtio_notify_config(vdev);
232 }
233 
234 static void virtio_net_announce_timer(void *opaque)
235 {
236     VirtIONet *n = opaque;
237     trace_virtio_net_announce_timer(n->announce_timer.round);
238 
239     n->announce_timer.round--;
240     virtio_net_announce_notify(n);
241 }
242 
243 static void virtio_net_announce(NetClientState *nc)
244 {
245     VirtIONet *n = qemu_get_nic_opaque(nc);
246     VirtIODevice *vdev = VIRTIO_DEVICE(n);
247 
248     /*
249      * Make sure the virtio migration announcement timer isn't running
250      * If it is, let it trigger announcement so that we do not cause
251      * confusion.
252      */
253     if (n->announce_timer.round) {
254         return;
255     }
256 
257     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259             virtio_net_announce_notify(n);
260     }
261 }
262 
263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
264 {
265     VirtIODevice *vdev = VIRTIO_DEVICE(n);
266     NetClientState *nc = qemu_get_queue(n->nic);
267     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
268     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269               n->max_ncs - n->max_queue_pairs : 0;
270 
271     if (!get_vhost_net(nc->peer)) {
272         return;
273     }
274 
275     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276         !!n->vhost_started) {
277         return;
278     }
279     if (!n->vhost_started) {
280         int r, i;
281 
282         if (n->needs_vnet_hdr_swap) {
283             error_report("backend does not support %s vnet headers; "
284                          "falling back on userspace virtio",
285                          virtio_is_big_endian(vdev) ? "BE" : "LE");
286             return;
287         }
288 
289         /* Any packets outstanding? Purge them to avoid touching rings
290          * when vhost is running.
291          */
292         for (i = 0;  i < queue_pairs; i++) {
293             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
294 
295             /* Purge both directions: TX and RX. */
296             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298         }
299 
300         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302             if (r < 0) {
303                 error_report("%uBytes MTU not supported by the backend",
304                              n->net_conf.mtu);
305 
306                 return;
307             }
308         }
309 
310         n->vhost_started = 1;
311         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
312         if (r < 0) {
313             error_report("unable to start vhost net: %d: "
314                          "falling back on userspace virtio", -r);
315             n->vhost_started = 0;
316         }
317     } else {
318         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
319         n->vhost_started = 0;
320     }
321 }
322 
323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324                                           NetClientState *peer,
325                                           bool enable)
326 {
327     if (virtio_is_big_endian(vdev)) {
328         return qemu_set_vnet_be(peer, enable);
329     } else {
330         return qemu_set_vnet_le(peer, enable);
331     }
332 }
333 
334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
335                                        int queue_pairs, bool enable)
336 {
337     int i;
338 
339     for (i = 0; i < queue_pairs; i++) {
340         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341             enable) {
342             while (--i >= 0) {
343                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344             }
345 
346             return true;
347         }
348     }
349 
350     return false;
351 }
352 
353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
354 {
355     VirtIODevice *vdev = VIRTIO_DEVICE(n);
356     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
357 
358     if (virtio_net_started(n, status)) {
359         /* Before using the device, we tell the network backend about the
360          * endianness to use when parsing vnet headers. If the backend
361          * can't do it, we fallback onto fixing the headers in the core
362          * virtio-net code.
363          */
364         n->needs_vnet_hdr_swap = n->has_vnet_hdr &&
365                                  virtio_net_set_vnet_endian(vdev, n->nic->ncs,
366                                                             queue_pairs, true);
367     } else if (virtio_net_started(n, vdev->status)) {
368         /* After using the device, we need to reset the network backend to
369          * the default (guest native endianness), otherwise the guest may
370          * lose network connectivity if it is rebooted into a different
371          * endianness.
372          */
373         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
374     }
375 }
376 
377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
378 {
379     unsigned int dropped = virtqueue_drop_all(vq);
380     if (dropped) {
381         virtio_notify(vdev, vq);
382     }
383 }
384 
385 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
386 {
387     VirtIONet *n = VIRTIO_NET(vdev);
388     VirtIONetQueue *q;
389     int i;
390     uint8_t queue_status;
391 
392     virtio_net_vnet_endian_status(n, status);
393     virtio_net_vhost_status(n, status);
394 
395     for (i = 0; i < n->max_queue_pairs; i++) {
396         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
397         bool queue_started;
398         q = &n->vqs[i];
399 
400         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
401             queue_status = 0;
402         } else {
403             queue_status = status;
404         }
405         queue_started =
406             virtio_net_started(n, queue_status) && !n->vhost_started;
407 
408         if (queue_started) {
409             qemu_flush_queued_packets(ncs);
410         }
411 
412         if (!q->tx_waiting) {
413             continue;
414         }
415 
416         if (queue_started) {
417             if (q->tx_timer) {
418                 timer_mod(q->tx_timer,
419                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
420             } else {
421                 replay_bh_schedule_event(q->tx_bh);
422             }
423         } else {
424             if (q->tx_timer) {
425                 timer_del(q->tx_timer);
426             } else {
427                 qemu_bh_cancel(q->tx_bh);
428             }
429             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
430                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
431                 vdev->vm_running) {
432                 /* if tx is waiting we are likely have some packets in tx queue
433                  * and disabled notification */
434                 q->tx_waiting = 0;
435                 virtio_queue_set_notification(q->tx_vq, 1);
436                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
437             }
438         }
439     }
440 }
441 
442 static void virtio_net_set_link_status(NetClientState *nc)
443 {
444     VirtIONet *n = qemu_get_nic_opaque(nc);
445     VirtIODevice *vdev = VIRTIO_DEVICE(n);
446     uint16_t old_status = n->status;
447 
448     if (nc->link_down)
449         n->status &= ~VIRTIO_NET_S_LINK_UP;
450     else
451         n->status |= VIRTIO_NET_S_LINK_UP;
452 
453     if (n->status != old_status)
454         virtio_notify_config(vdev);
455 
456     virtio_net_set_status(vdev, vdev->status);
457 }
458 
459 static void rxfilter_notify(NetClientState *nc)
460 {
461     VirtIONet *n = qemu_get_nic_opaque(nc);
462 
463     if (nc->rxfilter_notify_enabled) {
464         char *path = object_get_canonical_path(OBJECT(n->qdev));
465         qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
466         g_free(path);
467 
468         /* disable event notification to avoid events flooding */
469         nc->rxfilter_notify_enabled = 0;
470     }
471 }
472 
473 static intList *get_vlan_table(VirtIONet *n)
474 {
475     intList *list;
476     int i, j;
477 
478     list = NULL;
479     for (i = 0; i < MAX_VLAN >> 5; i++) {
480         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
481             if (n->vlans[i] & (1U << j)) {
482                 QAPI_LIST_PREPEND(list, (i << 5) + j);
483             }
484         }
485     }
486 
487     return list;
488 }
489 
490 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
491 {
492     VirtIONet *n = qemu_get_nic_opaque(nc);
493     VirtIODevice *vdev = VIRTIO_DEVICE(n);
494     RxFilterInfo *info;
495     strList *str_list;
496     int i;
497 
498     info = g_malloc0(sizeof(*info));
499     info->name = g_strdup(nc->name);
500     info->promiscuous = n->promisc;
501 
502     if (n->nouni) {
503         info->unicast = RX_STATE_NONE;
504     } else if (n->alluni) {
505         info->unicast = RX_STATE_ALL;
506     } else {
507         info->unicast = RX_STATE_NORMAL;
508     }
509 
510     if (n->nomulti) {
511         info->multicast = RX_STATE_NONE;
512     } else if (n->allmulti) {
513         info->multicast = RX_STATE_ALL;
514     } else {
515         info->multicast = RX_STATE_NORMAL;
516     }
517 
518     info->broadcast_allowed = n->nobcast;
519     info->multicast_overflow = n->mac_table.multi_overflow;
520     info->unicast_overflow = n->mac_table.uni_overflow;
521 
522     info->main_mac = qemu_mac_strdup_printf(n->mac);
523 
524     str_list = NULL;
525     for (i = 0; i < n->mac_table.first_multi; i++) {
526         QAPI_LIST_PREPEND(str_list,
527                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
528     }
529     info->unicast_table = str_list;
530 
531     str_list = NULL;
532     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
533         QAPI_LIST_PREPEND(str_list,
534                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
535     }
536     info->multicast_table = str_list;
537     info->vlan_table = get_vlan_table(n);
538 
539     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
540         info->vlan = RX_STATE_ALL;
541     } else if (!info->vlan_table) {
542         info->vlan = RX_STATE_NONE;
543     } else {
544         info->vlan = RX_STATE_NORMAL;
545     }
546 
547     /* enable event notification after query */
548     nc->rxfilter_notify_enabled = 1;
549 
550     return info;
551 }
552 
553 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
554 {
555     VirtIONet *n = VIRTIO_NET(vdev);
556     NetClientState *nc;
557 
558     /* validate queue_index and skip for cvq */
559     if (queue_index >= n->max_queue_pairs * 2) {
560         return;
561     }
562 
563     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
564 
565     if (!nc->peer) {
566         return;
567     }
568 
569     if (get_vhost_net(nc->peer) &&
570         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
571         vhost_net_virtqueue_reset(vdev, nc, queue_index);
572     }
573 
574     flush_or_purge_queued_packets(nc);
575 }
576 
577 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
578 {
579     VirtIONet *n = VIRTIO_NET(vdev);
580     NetClientState *nc;
581     int r;
582 
583     /* validate queue_index and skip for cvq */
584     if (queue_index >= n->max_queue_pairs * 2) {
585         return;
586     }
587 
588     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
589 
590     if (!nc->peer || !vdev->vhost_started) {
591         return;
592     }
593 
594     if (get_vhost_net(nc->peer) &&
595         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
596         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
597         if (r < 0) {
598             error_report("unable to restart vhost net virtqueue: %d, "
599                             "when resetting the queue", queue_index);
600         }
601     }
602 }
603 
604 static void peer_test_vnet_hdr(VirtIONet *n)
605 {
606     NetClientState *nc = qemu_get_queue(n->nic);
607     if (!nc->peer) {
608         return;
609     }
610 
611     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
612 }
613 
614 static int peer_has_vnet_hdr(VirtIONet *n)
615 {
616     return n->has_vnet_hdr;
617 }
618 
619 static int peer_has_ufo(VirtIONet *n)
620 {
621     if (!peer_has_vnet_hdr(n))
622         return 0;
623 
624     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
625 
626     return n->has_ufo;
627 }
628 
629 static int peer_has_uso(VirtIONet *n)
630 {
631     if (!peer_has_vnet_hdr(n)) {
632         return 0;
633     }
634 
635     return qemu_has_uso(qemu_get_queue(n->nic)->peer);
636 }
637 
638 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
639                                        int version_1, int hash_report)
640 {
641     int i;
642     NetClientState *nc;
643 
644     n->mergeable_rx_bufs = mergeable_rx_bufs;
645 
646     if (version_1) {
647         n->guest_hdr_len = hash_report ?
648             sizeof(struct virtio_net_hdr_v1_hash) :
649             sizeof(struct virtio_net_hdr_mrg_rxbuf);
650         n->rss_data.populate_hash = !!hash_report;
651     } else {
652         n->guest_hdr_len = n->mergeable_rx_bufs ?
653             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
654             sizeof(struct virtio_net_hdr);
655         n->rss_data.populate_hash = false;
656     }
657 
658     for (i = 0; i < n->max_queue_pairs; i++) {
659         nc = qemu_get_subqueue(n->nic, i);
660 
661         if (peer_has_vnet_hdr(n) &&
662             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
663             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
664             n->host_hdr_len = n->guest_hdr_len;
665         }
666     }
667 }
668 
669 static int virtio_net_max_tx_queue_size(VirtIONet *n)
670 {
671     NetClientState *peer = n->nic_conf.peers.ncs[0];
672 
673     /*
674      * Backends other than vhost-user or vhost-vdpa don't support max queue
675      * size.
676      */
677     if (!peer) {
678         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
679     }
680 
681     switch(peer->info->type) {
682     case NET_CLIENT_DRIVER_VHOST_USER:
683     case NET_CLIENT_DRIVER_VHOST_VDPA:
684         return VIRTQUEUE_MAX_SIZE;
685     default:
686         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
687     };
688 }
689 
690 static int peer_attach(VirtIONet *n, int index)
691 {
692     NetClientState *nc = qemu_get_subqueue(n->nic, index);
693 
694     if (!nc->peer) {
695         return 0;
696     }
697 
698     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
699         vhost_set_vring_enable(nc->peer, 1);
700     }
701 
702     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
703         return 0;
704     }
705 
706     if (n->max_queue_pairs == 1) {
707         return 0;
708     }
709 
710     return tap_enable(nc->peer);
711 }
712 
713 static int peer_detach(VirtIONet *n, int index)
714 {
715     NetClientState *nc = qemu_get_subqueue(n->nic, index);
716 
717     if (!nc->peer) {
718         return 0;
719     }
720 
721     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
722         vhost_set_vring_enable(nc->peer, 0);
723     }
724 
725     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
726         return 0;
727     }
728 
729     return tap_disable(nc->peer);
730 }
731 
732 static void virtio_net_set_queue_pairs(VirtIONet *n)
733 {
734     int i;
735     int r;
736 
737     if (n->nic->peer_deleted) {
738         return;
739     }
740 
741     for (i = 0; i < n->max_queue_pairs; i++) {
742         if (i < n->curr_queue_pairs) {
743             r = peer_attach(n, i);
744             assert(!r);
745         } else {
746             r = peer_detach(n, i);
747             assert(!r);
748         }
749     }
750 }
751 
752 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
753 
754 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
755                                         Error **errp)
756 {
757     VirtIONet *n = VIRTIO_NET(vdev);
758     NetClientState *nc = qemu_get_queue(n->nic);
759 
760     /* Firstly sync all virtio-net possible supported features */
761     features |= n->host_features;
762 
763     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
764 
765     if (!peer_has_vnet_hdr(n)) {
766         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
767         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
768         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
769         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
770 
771         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
772         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
773         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
774         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
775 
776         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
777         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
778         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
779 
780         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
781     }
782 
783     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
784         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
785         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
786     }
787 
788     if (!peer_has_uso(n)) {
789         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
790         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
791         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
792     }
793 
794     if (!get_vhost_net(nc->peer)) {
795         return features;
796     }
797 
798     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
799         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
800     }
801     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
802     vdev->backend_features = features;
803 
804     if (n->mtu_bypass_backend &&
805             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
806         features |= (1ULL << VIRTIO_NET_F_MTU);
807     }
808 
809     /*
810      * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
811      * enabled. This happens in the vDPA case.
812      *
813      * Make sure the feature set is not incoherent, as the driver could refuse
814      * to start.
815      *
816      * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
817      * helping guest to notify the new location with vDPA devices that does not
818      * support it.
819      */
820     if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
821         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
822     }
823 
824     return features;
825 }
826 
827 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
828 {
829     uint64_t features = 0;
830 
831     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
832      * but also these: */
833     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
834     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
835     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
836     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
837     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
838 
839     return features;
840 }
841 
842 static void virtio_net_apply_guest_offloads(VirtIONet *n)
843 {
844     qemu_set_offload(qemu_get_queue(n->nic)->peer,
845             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
846             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
847             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
848             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
849             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
850             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
851             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
852 }
853 
854 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
855 {
856     static const uint64_t guest_offloads_mask =
857         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
858         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
859         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
860         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
861         (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
862         (1ULL << VIRTIO_NET_F_GUEST_USO4) |
863         (1ULL << VIRTIO_NET_F_GUEST_USO6);
864 
865     return guest_offloads_mask & features;
866 }
867 
868 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
869 {
870     VirtIODevice *vdev = VIRTIO_DEVICE(n);
871     return virtio_net_guest_offloads_by_features(vdev->guest_features);
872 }
873 
874 typedef struct {
875     VirtIONet *n;
876     DeviceState *dev;
877 } FailoverDevice;
878 
879 /**
880  * Set the failover primary device
881  *
882  * @opaque: FailoverId to setup
883  * @opts: opts for device we are handling
884  * @errp: returns an error if this function fails
885  */
886 static int failover_set_primary(DeviceState *dev, void *opaque)
887 {
888     FailoverDevice *fdev = opaque;
889     PCIDevice *pci_dev = (PCIDevice *)
890         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
891 
892     if (!pci_dev) {
893         return 0;
894     }
895 
896     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
897         fdev->dev = dev;
898         return 1;
899     }
900 
901     return 0;
902 }
903 
904 /**
905  * Find the primary device for this failover virtio-net
906  *
907  * @n: VirtIONet device
908  * @errp: returns an error if this function fails
909  */
910 static DeviceState *failover_find_primary_device(VirtIONet *n)
911 {
912     FailoverDevice fdev = {
913         .n = n,
914     };
915 
916     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
917                        NULL, NULL, &fdev);
918     return fdev.dev;
919 }
920 
921 static void failover_add_primary(VirtIONet *n, Error **errp)
922 {
923     Error *err = NULL;
924     DeviceState *dev = failover_find_primary_device(n);
925 
926     if (dev) {
927         return;
928     }
929 
930     if (!n->primary_opts) {
931         error_setg(errp, "Primary device not found");
932         error_append_hint(errp, "Virtio-net failover will not work. Make "
933                           "sure primary device has parameter"
934                           " failover_pair_id=%s\n", n->netclient_name);
935         return;
936     }
937 
938     dev = qdev_device_add_from_qdict(n->primary_opts,
939                                      n->primary_opts_from_json,
940                                      &err);
941     if (err) {
942         qobject_unref(n->primary_opts);
943         n->primary_opts = NULL;
944     } else {
945         object_unref(OBJECT(dev));
946     }
947     error_propagate(errp, err);
948 }
949 
950 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
951 {
952     VirtIONet *n = VIRTIO_NET(vdev);
953     Error *err = NULL;
954     int i;
955 
956     if (n->mtu_bypass_backend &&
957             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
958         features &= ~(1ULL << VIRTIO_NET_F_MTU);
959     }
960 
961     virtio_net_set_multiqueue(n,
962                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
963                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
964 
965     virtio_net_set_mrg_rx_bufs(n,
966                                virtio_has_feature(features,
967                                                   VIRTIO_NET_F_MRG_RXBUF),
968                                virtio_has_feature(features,
969                                                   VIRTIO_F_VERSION_1),
970                                virtio_has_feature(features,
971                                                   VIRTIO_NET_F_HASH_REPORT));
972 
973     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
974         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
975     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
976         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
977     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
978 
979     if (n->has_vnet_hdr) {
980         n->curr_guest_offloads =
981             virtio_net_guest_offloads_by_features(features);
982         virtio_net_apply_guest_offloads(n);
983     }
984 
985     for (i = 0;  i < n->max_queue_pairs; i++) {
986         NetClientState *nc = qemu_get_subqueue(n->nic, i);
987 
988         if (!get_vhost_net(nc->peer)) {
989             continue;
990         }
991         vhost_net_ack_features(get_vhost_net(nc->peer), features);
992 
993         /*
994          * keep acked_features in NetVhostUserState up-to-date so it
995          * can't miss any features configured by guest virtio driver.
996          */
997         vhost_net_save_acked_features(nc->peer);
998     }
999 
1000     if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1001         memset(n->vlans, 0xff, MAX_VLAN >> 3);
1002     }
1003 
1004     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1005         qapi_event_send_failover_negotiated(n->netclient_name);
1006         qatomic_set(&n->failover_primary_hidden, false);
1007         failover_add_primary(n, &err);
1008         if (err) {
1009             if (!qtest_enabled()) {
1010                 warn_report_err(err);
1011             } else {
1012                 error_free(err);
1013             }
1014         }
1015     }
1016 }
1017 
1018 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1019                                      struct iovec *iov, unsigned int iov_cnt)
1020 {
1021     uint8_t on;
1022     size_t s;
1023     NetClientState *nc = qemu_get_queue(n->nic);
1024 
1025     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1026     if (s != sizeof(on)) {
1027         return VIRTIO_NET_ERR;
1028     }
1029 
1030     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1031         n->promisc = on;
1032     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1033         n->allmulti = on;
1034     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1035         n->alluni = on;
1036     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1037         n->nomulti = on;
1038     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1039         n->nouni = on;
1040     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1041         n->nobcast = on;
1042     } else {
1043         return VIRTIO_NET_ERR;
1044     }
1045 
1046     rxfilter_notify(nc);
1047 
1048     return VIRTIO_NET_OK;
1049 }
1050 
1051 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1052                                      struct iovec *iov, unsigned int iov_cnt)
1053 {
1054     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1055     uint64_t offloads;
1056     size_t s;
1057 
1058     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1059         return VIRTIO_NET_ERR;
1060     }
1061 
1062     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1063     if (s != sizeof(offloads)) {
1064         return VIRTIO_NET_ERR;
1065     }
1066 
1067     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1068         uint64_t supported_offloads;
1069 
1070         offloads = virtio_ldq_p(vdev, &offloads);
1071 
1072         if (!n->has_vnet_hdr) {
1073             return VIRTIO_NET_ERR;
1074         }
1075 
1076         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1077             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1078         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1079             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1080         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1081 
1082         supported_offloads = virtio_net_supported_guest_offloads(n);
1083         if (offloads & ~supported_offloads) {
1084             return VIRTIO_NET_ERR;
1085         }
1086 
1087         n->curr_guest_offloads = offloads;
1088         virtio_net_apply_guest_offloads(n);
1089 
1090         return VIRTIO_NET_OK;
1091     } else {
1092         return VIRTIO_NET_ERR;
1093     }
1094 }
1095 
1096 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1097                                  struct iovec *iov, unsigned int iov_cnt)
1098 {
1099     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1100     struct virtio_net_ctrl_mac mac_data;
1101     size_t s;
1102     NetClientState *nc = qemu_get_queue(n->nic);
1103 
1104     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1105         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1106             return VIRTIO_NET_ERR;
1107         }
1108         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1109         assert(s == sizeof(n->mac));
1110         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1111         rxfilter_notify(nc);
1112 
1113         return VIRTIO_NET_OK;
1114     }
1115 
1116     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1117         return VIRTIO_NET_ERR;
1118     }
1119 
1120     int in_use = 0;
1121     int first_multi = 0;
1122     uint8_t uni_overflow = 0;
1123     uint8_t multi_overflow = 0;
1124     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1125 
1126     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1127                    sizeof(mac_data.entries));
1128     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1129     if (s != sizeof(mac_data.entries)) {
1130         goto error;
1131     }
1132     iov_discard_front(&iov, &iov_cnt, s);
1133 
1134     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1135         goto error;
1136     }
1137 
1138     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1139         s = iov_to_buf(iov, iov_cnt, 0, macs,
1140                        mac_data.entries * ETH_ALEN);
1141         if (s != mac_data.entries * ETH_ALEN) {
1142             goto error;
1143         }
1144         in_use += mac_data.entries;
1145     } else {
1146         uni_overflow = 1;
1147     }
1148 
1149     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1150 
1151     first_multi = in_use;
1152 
1153     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1154                    sizeof(mac_data.entries));
1155     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1156     if (s != sizeof(mac_data.entries)) {
1157         goto error;
1158     }
1159 
1160     iov_discard_front(&iov, &iov_cnt, s);
1161 
1162     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1163         goto error;
1164     }
1165 
1166     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1167         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1168                        mac_data.entries * ETH_ALEN);
1169         if (s != mac_data.entries * ETH_ALEN) {
1170             goto error;
1171         }
1172         in_use += mac_data.entries;
1173     } else {
1174         multi_overflow = 1;
1175     }
1176 
1177     n->mac_table.in_use = in_use;
1178     n->mac_table.first_multi = first_multi;
1179     n->mac_table.uni_overflow = uni_overflow;
1180     n->mac_table.multi_overflow = multi_overflow;
1181     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1182     g_free(macs);
1183     rxfilter_notify(nc);
1184 
1185     return VIRTIO_NET_OK;
1186 
1187 error:
1188     g_free(macs);
1189     return VIRTIO_NET_ERR;
1190 }
1191 
1192 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1193                                         struct iovec *iov, unsigned int iov_cnt)
1194 {
1195     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1196     uint16_t vid;
1197     size_t s;
1198     NetClientState *nc = qemu_get_queue(n->nic);
1199 
1200     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1201     vid = virtio_lduw_p(vdev, &vid);
1202     if (s != sizeof(vid)) {
1203         return VIRTIO_NET_ERR;
1204     }
1205 
1206     if (vid >= MAX_VLAN)
1207         return VIRTIO_NET_ERR;
1208 
1209     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1210         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1211     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1212         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1213     else
1214         return VIRTIO_NET_ERR;
1215 
1216     rxfilter_notify(nc);
1217 
1218     return VIRTIO_NET_OK;
1219 }
1220 
1221 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1222                                       struct iovec *iov, unsigned int iov_cnt)
1223 {
1224     trace_virtio_net_handle_announce(n->announce_timer.round);
1225     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1226         n->status & VIRTIO_NET_S_ANNOUNCE) {
1227         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1228         if (n->announce_timer.round) {
1229             qemu_announce_timer_step(&n->announce_timer);
1230         }
1231         return VIRTIO_NET_OK;
1232     } else {
1233         return VIRTIO_NET_ERR;
1234     }
1235 }
1236 
1237 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1238 {
1239     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1240     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1241         return false;
1242     }
1243 
1244     trace_virtio_net_rss_attach_ebpf(nic, prog_fd);
1245     return nc->info->set_steering_ebpf(nc, prog_fd);
1246 }
1247 
1248 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1249                                    struct EBPFRSSConfig *config)
1250 {
1251     config->redirect = data->redirect;
1252     config->populate_hash = data->populate_hash;
1253     config->hash_types = data->hash_types;
1254     config->indirections_len = data->indirections_len;
1255     config->default_queue = data->default_queue;
1256 }
1257 
1258 static bool virtio_net_attach_ebpf_rss(VirtIONet *n)
1259 {
1260     struct EBPFRSSConfig config = {};
1261 
1262     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1263         return false;
1264     }
1265 
1266     rss_data_to_rss_config(&n->rss_data, &config);
1267 
1268     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1269                           n->rss_data.indirections_table, n->rss_data.key,
1270                           NULL)) {
1271         return false;
1272     }
1273 
1274     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1275         return false;
1276     }
1277 
1278     return true;
1279 }
1280 
1281 static void virtio_net_detach_ebpf_rss(VirtIONet *n)
1282 {
1283     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1284 }
1285 
1286 static void virtio_net_commit_rss_config(VirtIONet *n)
1287 {
1288     if (n->rss_data.enabled) {
1289         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
1290         if (n->rss_data.populate_hash) {
1291             virtio_net_detach_ebpf_rss(n);
1292         } else if (!virtio_net_attach_ebpf_rss(n)) {
1293             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1294                 warn_report("Can't load eBPF RSS for vhost");
1295             } else {
1296                 warn_report("Can't load eBPF RSS - fallback to software RSS");
1297                 n->rss_data.enabled_software_rss = true;
1298             }
1299         }
1300 
1301         trace_virtio_net_rss_enable(n,
1302                                     n->rss_data.hash_types,
1303                                     n->rss_data.indirections_len,
1304                                     sizeof(n->rss_data.key));
1305     } else {
1306         virtio_net_detach_ebpf_rss(n);
1307         trace_virtio_net_rss_disable(n);
1308     }
1309 }
1310 
1311 static void virtio_net_disable_rss(VirtIONet *n)
1312 {
1313     if (!n->rss_data.enabled) {
1314         return;
1315     }
1316 
1317     n->rss_data.enabled = false;
1318     virtio_net_commit_rss_config(n);
1319 }
1320 
1321 static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
1322 {
1323     int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
1324     int ret = true;
1325     int i = 0;
1326 
1327     if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
1328         error_setg(errp, "Expected %d file descriptors but got %d",
1329                    EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
1330         return false;
1331     }
1332 
1333     for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
1334         fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp);
1335         if (fds[i] < 0) {
1336             ret = false;
1337             goto exit;
1338         }
1339     }
1340 
1341     ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp);
1342 
1343 exit:
1344     if (!ret) {
1345         for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
1346             close(fds[i]);
1347         }
1348     }
1349 
1350     return ret;
1351 }
1352 
1353 static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
1354 {
1355     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1356         return true;
1357     }
1358 
1359     trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds);
1360 
1361     /*
1362      * If user explicitly gave QEMU RSS FDs to use, then
1363      * failing to use them must be considered a fatal
1364      * error. If no RSS FDs were provided, QEMU is trying
1365      * eBPF on a "best effort" basis only, so report a
1366      * warning and allow fallback to software RSS.
1367      */
1368     if (n->ebpf_rss_fds) {
1369         return virtio_net_load_ebpf_fds(n, errp);
1370     }
1371 
1372     ebpf_rss_load(&n->ebpf_rss, &error_warn);
1373     return true;
1374 }
1375 
1376 static void virtio_net_unload_ebpf(VirtIONet *n)
1377 {
1378     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1379     ebpf_rss_unload(&n->ebpf_rss);
1380 }
1381 
1382 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1383                                       struct iovec *iov,
1384                                       unsigned int iov_cnt,
1385                                       bool do_rss)
1386 {
1387     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1388     struct virtio_net_rss_config cfg;
1389     size_t s, offset = 0, size_get;
1390     uint16_t queue_pairs, i;
1391     struct {
1392         uint16_t us;
1393         uint8_t b;
1394     } QEMU_PACKED temp;
1395     const char *err_msg = "";
1396     uint32_t err_value = 0;
1397 
1398     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1399         err_msg = "RSS is not negotiated";
1400         goto error;
1401     }
1402     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1403         err_msg = "Hash report is not negotiated";
1404         goto error;
1405     }
1406     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1407     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1408     if (s != size_get) {
1409         err_msg = "Short command buffer";
1410         err_value = (uint32_t)s;
1411         goto error;
1412     }
1413     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1414     n->rss_data.indirections_len =
1415         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1416     if (!do_rss) {
1417         n->rss_data.indirections_len = 0;
1418     }
1419     if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1420         err_msg = "Too large indirection table";
1421         err_value = n->rss_data.indirections_len;
1422         goto error;
1423     }
1424     n->rss_data.indirections_len++;
1425     if (!is_power_of_2(n->rss_data.indirections_len)) {
1426         err_msg = "Invalid size of indirection table";
1427         err_value = n->rss_data.indirections_len;
1428         goto error;
1429     }
1430     n->rss_data.default_queue = do_rss ?
1431         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1432     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1433         err_msg = "Invalid default queue";
1434         err_value = n->rss_data.default_queue;
1435         goto error;
1436     }
1437     offset += size_get;
1438     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1439     g_free(n->rss_data.indirections_table);
1440     n->rss_data.indirections_table = g_malloc(size_get);
1441     if (!n->rss_data.indirections_table) {
1442         err_msg = "Can't allocate indirections table";
1443         err_value = n->rss_data.indirections_len;
1444         goto error;
1445     }
1446     s = iov_to_buf(iov, iov_cnt, offset,
1447                    n->rss_data.indirections_table, size_get);
1448     if (s != size_get) {
1449         err_msg = "Short indirection table buffer";
1450         err_value = (uint32_t)s;
1451         goto error;
1452     }
1453     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1454         uint16_t val = n->rss_data.indirections_table[i];
1455         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1456     }
1457     offset += size_get;
1458     size_get = sizeof(temp);
1459     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1460     if (s != size_get) {
1461         err_msg = "Can't get queue_pairs";
1462         err_value = (uint32_t)s;
1463         goto error;
1464     }
1465     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1466     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1467         err_msg = "Invalid number of queue_pairs";
1468         err_value = queue_pairs;
1469         goto error;
1470     }
1471     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1472         err_msg = "Invalid key size";
1473         err_value = temp.b;
1474         goto error;
1475     }
1476     if (!temp.b && n->rss_data.hash_types) {
1477         err_msg = "No key provided";
1478         err_value = 0;
1479         goto error;
1480     }
1481     if (!temp.b && !n->rss_data.hash_types) {
1482         virtio_net_disable_rss(n);
1483         return queue_pairs;
1484     }
1485     offset += size_get;
1486     size_get = temp.b;
1487     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1488     if (s != size_get) {
1489         err_msg = "Can get key buffer";
1490         err_value = (uint32_t)s;
1491         goto error;
1492     }
1493     n->rss_data.enabled = true;
1494     virtio_net_commit_rss_config(n);
1495     return queue_pairs;
1496 error:
1497     trace_virtio_net_rss_error(n, err_msg, err_value);
1498     virtio_net_disable_rss(n);
1499     return 0;
1500 }
1501 
1502 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1503                                 struct iovec *iov, unsigned int iov_cnt)
1504 {
1505     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1506     uint16_t queue_pairs;
1507     NetClientState *nc = qemu_get_queue(n->nic);
1508 
1509     virtio_net_disable_rss(n);
1510     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1511         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1512         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1513     }
1514     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1515         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1516     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1517         struct virtio_net_ctrl_mq mq;
1518         size_t s;
1519         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1520             return VIRTIO_NET_ERR;
1521         }
1522         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1523         if (s != sizeof(mq)) {
1524             return VIRTIO_NET_ERR;
1525         }
1526         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1527 
1528     } else {
1529         return VIRTIO_NET_ERR;
1530     }
1531 
1532     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1533         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1534         queue_pairs > n->max_queue_pairs ||
1535         !n->multiqueue) {
1536         return VIRTIO_NET_ERR;
1537     }
1538 
1539     n->curr_queue_pairs = queue_pairs;
1540     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1541         /*
1542          * Avoid updating the backend for a vdpa device: We're only interested
1543          * in updating the device model queues.
1544          */
1545         return VIRTIO_NET_OK;
1546     }
1547     /* stop the backend before changing the number of queue_pairs to avoid handling a
1548      * disabled queue */
1549     virtio_net_set_status(vdev, vdev->status);
1550     virtio_net_set_queue_pairs(n);
1551 
1552     return VIRTIO_NET_OK;
1553 }
1554 
1555 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1556                                   const struct iovec *in_sg, unsigned in_num,
1557                                   const struct iovec *out_sg,
1558                                   unsigned out_num)
1559 {
1560     VirtIONet *n = VIRTIO_NET(vdev);
1561     struct virtio_net_ctrl_hdr ctrl;
1562     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1563     size_t s;
1564     struct iovec *iov, *iov2;
1565 
1566     if (iov_size(in_sg, in_num) < sizeof(status) ||
1567         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1568         virtio_error(vdev, "virtio-net ctrl missing headers");
1569         return 0;
1570     }
1571 
1572     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1573     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1574     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1575     if (s != sizeof(ctrl)) {
1576         status = VIRTIO_NET_ERR;
1577     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1578         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1579     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1580         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1581     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1582         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1583     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1584         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1585     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1586         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1587     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1588         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1589     }
1590 
1591     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1592     assert(s == sizeof(status));
1593 
1594     g_free(iov2);
1595     return sizeof(status);
1596 }
1597 
1598 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1599 {
1600     VirtQueueElement *elem;
1601 
1602     for (;;) {
1603         size_t written;
1604         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1605         if (!elem) {
1606             break;
1607         }
1608 
1609         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1610                                              elem->out_sg, elem->out_num);
1611         if (written > 0) {
1612             virtqueue_push(vq, elem, written);
1613             virtio_notify(vdev, vq);
1614             g_free(elem);
1615         } else {
1616             virtqueue_detach_element(vq, elem, 0);
1617             g_free(elem);
1618             break;
1619         }
1620     }
1621 }
1622 
1623 /* RX */
1624 
1625 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1626 {
1627     VirtIONet *n = VIRTIO_NET(vdev);
1628     int queue_index = vq2q(virtio_get_queue_index(vq));
1629 
1630     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1631 }
1632 
1633 static bool virtio_net_can_receive(NetClientState *nc)
1634 {
1635     VirtIONet *n = qemu_get_nic_opaque(nc);
1636     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1637     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1638 
1639     if (!vdev->vm_running) {
1640         return false;
1641     }
1642 
1643     if (nc->queue_index >= n->curr_queue_pairs) {
1644         return false;
1645     }
1646 
1647     if (!virtio_queue_ready(q->rx_vq) ||
1648         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1649         return false;
1650     }
1651 
1652     return true;
1653 }
1654 
1655 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1656 {
1657     int opaque;
1658     unsigned int in_bytes;
1659     VirtIONet *n = q->n;
1660 
1661     while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
1662         opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
1663                                            bufsize, 0);
1664         /* Buffer is enough, disable notifiaction */
1665         if (bufsize <= in_bytes) {
1666             break;
1667         }
1668 
1669         if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
1670             /* Guest has added some buffers, try again */
1671             continue;
1672         } else {
1673             return 0;
1674         }
1675     }
1676 
1677     virtio_queue_set_notification(q->rx_vq, 0);
1678 
1679     return 1;
1680 }
1681 
1682 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1683 {
1684     virtio_tswap16s(vdev, &hdr->hdr_len);
1685     virtio_tswap16s(vdev, &hdr->gso_size);
1686     virtio_tswap16s(vdev, &hdr->csum_start);
1687     virtio_tswap16s(vdev, &hdr->csum_offset);
1688 }
1689 
1690 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1691  * it never finds out that the packets don't have valid checksums.  This
1692  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1693  * fix this with Xen but it hasn't appeared in an upstream release of
1694  * dhclient yet.
1695  *
1696  * To avoid breaking existing guests, we catch udp packets and add
1697  * checksums.  This is terrible but it's better than hacking the guest
1698  * kernels.
1699  *
1700  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1701  * we should provide a mechanism to disable it to avoid polluting the host
1702  * cache.
1703  */
1704 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1705                                         size_t *hdr_len, const uint8_t *buf,
1706                                         size_t buf_size, size_t *buf_offset)
1707 {
1708     size_t csum_size = ETH_HLEN + sizeof(struct ip_header) +
1709                        sizeof(struct udp_header);
1710 
1711     buf += *buf_offset;
1712     buf_size -= *buf_offset;
1713 
1714     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1715         (buf_size >= csum_size && buf_size < 1500) && /* normal sized MTU */
1716         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1717         (buf[23] == 17) && /* ip.protocol == UDP */
1718         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1719         memcpy((uint8_t *)hdr + *hdr_len, buf, csum_size);
1720         net_checksum_calculate((uint8_t *)hdr + *hdr_len, csum_size, CSUM_UDP);
1721         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1722         *hdr_len += csum_size;
1723         *buf_offset += csum_size;
1724     }
1725 }
1726 
1727 static size_t receive_header(VirtIONet *n, struct virtio_net_hdr *hdr,
1728                              const void *buf, size_t buf_size,
1729                              size_t *buf_offset)
1730 {
1731     size_t hdr_len = n->guest_hdr_len;
1732 
1733     memcpy(hdr, buf, sizeof(struct virtio_net_hdr));
1734 
1735     *buf_offset = n->host_hdr_len;
1736     work_around_broken_dhclient(hdr, &hdr_len, buf, buf_size, buf_offset);
1737 
1738     if (n->needs_vnet_hdr_swap) {
1739         virtio_net_hdr_swap(VIRTIO_DEVICE(n), hdr);
1740     }
1741 
1742     return hdr_len;
1743 }
1744 
1745 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1746 {
1747     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1748     static const uint8_t vlan[] = {0x81, 0x00};
1749     uint8_t *ptr = (uint8_t *)buf;
1750     int i;
1751 
1752     if (n->promisc)
1753         return 1;
1754 
1755     ptr += n->host_hdr_len;
1756 
1757     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1758         int vid = lduw_be_p(ptr + 14) & 0xfff;
1759         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1760             return 0;
1761     }
1762 
1763     if (ptr[0] & 1) { // multicast
1764         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1765             return !n->nobcast;
1766         } else if (n->nomulti) {
1767             return 0;
1768         } else if (n->allmulti || n->mac_table.multi_overflow) {
1769             return 1;
1770         }
1771 
1772         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1773             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1774                 return 1;
1775             }
1776         }
1777     } else { // unicast
1778         if (n->nouni) {
1779             return 0;
1780         } else if (n->alluni || n->mac_table.uni_overflow) {
1781             return 1;
1782         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1783             return 1;
1784         }
1785 
1786         for (i = 0; i < n->mac_table.first_multi; i++) {
1787             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1788                 return 1;
1789             }
1790         }
1791     }
1792 
1793     return 0;
1794 }
1795 
1796 static uint8_t virtio_net_get_hash_type(bool hasip4,
1797                                         bool hasip6,
1798                                         EthL4HdrProto l4hdr_proto,
1799                                         uint32_t types)
1800 {
1801     if (hasip4) {
1802         switch (l4hdr_proto) {
1803         case ETH_L4_HDR_PROTO_TCP:
1804             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1805                 return NetPktRssIpV4Tcp;
1806             }
1807             break;
1808 
1809         case ETH_L4_HDR_PROTO_UDP:
1810             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1811                 return NetPktRssIpV4Udp;
1812             }
1813             break;
1814 
1815         default:
1816             break;
1817         }
1818 
1819         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1820             return NetPktRssIpV4;
1821         }
1822     } else if (hasip6) {
1823         switch (l4hdr_proto) {
1824         case ETH_L4_HDR_PROTO_TCP:
1825             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1826                 return NetPktRssIpV6TcpEx;
1827             }
1828             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1829                 return NetPktRssIpV6Tcp;
1830             }
1831             break;
1832 
1833         case ETH_L4_HDR_PROTO_UDP:
1834             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1835                 return NetPktRssIpV6UdpEx;
1836             }
1837             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1838                 return NetPktRssIpV6Udp;
1839             }
1840             break;
1841 
1842         default:
1843             break;
1844         }
1845 
1846         if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1847             return NetPktRssIpV6Ex;
1848         }
1849         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1850             return NetPktRssIpV6;
1851         }
1852     }
1853     return 0xff;
1854 }
1855 
1856 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1857                                   size_t size,
1858                                   struct virtio_net_hdr_v1_hash *hdr)
1859 {
1860     VirtIONet *n = qemu_get_nic_opaque(nc);
1861     unsigned int index = nc->queue_index, new_index = index;
1862     struct NetRxPkt *pkt = n->rx_pkt;
1863     uint8_t net_hash_type;
1864     uint32_t hash;
1865     bool hasip4, hasip6;
1866     EthL4HdrProto l4hdr_proto;
1867     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1868         VIRTIO_NET_HASH_REPORT_IPv4,
1869         VIRTIO_NET_HASH_REPORT_TCPv4,
1870         VIRTIO_NET_HASH_REPORT_TCPv6,
1871         VIRTIO_NET_HASH_REPORT_IPv6,
1872         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1873         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1874         VIRTIO_NET_HASH_REPORT_UDPv4,
1875         VIRTIO_NET_HASH_REPORT_UDPv6,
1876         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1877     };
1878     struct iovec iov = {
1879         .iov_base = (void *)buf,
1880         .iov_len = size
1881     };
1882 
1883     net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1884     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1885     net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1886                                              n->rss_data.hash_types);
1887     if (net_hash_type > NetPktRssIpV6UdpEx) {
1888         if (n->rss_data.populate_hash) {
1889             hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
1890             hdr->hash_report = 0;
1891         }
1892         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1893     }
1894 
1895     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1896 
1897     if (n->rss_data.populate_hash) {
1898         hdr->hash_value = hash;
1899         hdr->hash_report = reports[net_hash_type];
1900     }
1901 
1902     if (n->rss_data.redirect) {
1903         new_index = hash & (n->rss_data.indirections_len - 1);
1904         new_index = n->rss_data.indirections_table[new_index];
1905     }
1906 
1907     return (index == new_index) ? -1 : new_index;
1908 }
1909 
1910 typedef struct Header {
1911     struct virtio_net_hdr_v1_hash virtio_net;
1912     struct eth_header eth;
1913     struct ip_header ip;
1914     struct udp_header udp;
1915 } Header;
1916 
1917 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1918                                       size_t size)
1919 {
1920     VirtIONet *n = qemu_get_nic_opaque(nc);
1921     VirtIONetQueue *q;
1922     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1923     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1924     size_t lens[VIRTQUEUE_MAX_SIZE];
1925     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1926     Header hdr;
1927     unsigned mhdr_cnt = 0;
1928     size_t offset, i, guest_offset, j;
1929     ssize_t err;
1930 
1931     memset(&hdr.virtio_net, 0, sizeof(hdr.virtio_net));
1932 
1933     if (n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1934         int index = virtio_net_process_rss(nc, buf, size, &hdr.virtio_net);
1935         if (index >= 0) {
1936             nc = qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
1937         }
1938     }
1939 
1940     if (!virtio_net_can_receive(nc)) {
1941         return -1;
1942     }
1943 
1944     q = virtio_net_get_subqueue(nc);
1945 
1946     /* hdr_len refers to the header we supply to the guest */
1947     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1948         return 0;
1949     }
1950 
1951     if (!receive_filter(n, buf, size))
1952         return size;
1953 
1954     offset = i = 0;
1955 
1956     while (offset < size) {
1957         VirtQueueElement *elem;
1958         int len, total;
1959         const struct iovec *sg;
1960 
1961         total = 0;
1962 
1963         if (i == VIRTQUEUE_MAX_SIZE) {
1964             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1965             err = size;
1966             goto err;
1967         }
1968 
1969         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1970         if (!elem) {
1971             if (i) {
1972                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1973                              "i %zd mergeable %d offset %zd, size %zd, "
1974                              "guest hdr len %zd, host hdr len %zd "
1975                              "guest features 0x%" PRIx64,
1976                              i, n->mergeable_rx_bufs, offset, size,
1977                              n->guest_hdr_len, n->host_hdr_len,
1978                              vdev->guest_features);
1979             }
1980             err = -1;
1981             goto err;
1982         }
1983 
1984         if (elem->in_num < 1) {
1985             virtio_error(vdev,
1986                          "virtio-net receive queue contains no in buffers");
1987             virtqueue_detach_element(q->rx_vq, elem, 0);
1988             g_free(elem);
1989             err = -1;
1990             goto err;
1991         }
1992 
1993         sg = elem->in_sg;
1994         if (i == 0) {
1995             assert(offset == 0);
1996             if (n->mergeable_rx_bufs) {
1997                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1998                                     sg, elem->in_num,
1999                                     offsetof(typeof(hdr),
2000                                              virtio_net.hdr.num_buffers),
2001                                     sizeof(hdr.virtio_net.hdr.num_buffers));
2002             } else {
2003                 hdr.virtio_net.hdr.num_buffers = cpu_to_le16(1);
2004             }
2005 
2006             guest_offset = n->has_vnet_hdr ?
2007                            receive_header(n, (struct virtio_net_hdr *)&hdr,
2008                                           buf, size, &offset) :
2009                            n->guest_hdr_len;
2010 
2011             iov_from_buf(sg, elem->in_num, 0, &hdr, guest_offset);
2012             total += guest_offset;
2013         } else {
2014             guest_offset = 0;
2015         }
2016 
2017         /* copy in packet.  ugh */
2018         len = iov_from_buf(sg, elem->in_num, guest_offset,
2019                            buf + offset, size - offset);
2020         total += len;
2021         offset += len;
2022         /* If buffers can't be merged, at this point we
2023          * must have consumed the complete packet.
2024          * Otherwise, drop it. */
2025         if (!n->mergeable_rx_bufs && offset < size) {
2026             virtqueue_unpop(q->rx_vq, elem, total);
2027             g_free(elem);
2028             err = size;
2029             goto err;
2030         }
2031 
2032         elems[i] = elem;
2033         lens[i] = total;
2034         i++;
2035     }
2036 
2037     if (mhdr_cnt) {
2038         virtio_stw_p(vdev, &hdr.virtio_net.hdr.num_buffers, i);
2039         iov_from_buf(mhdr_sg, mhdr_cnt,
2040                      0,
2041                      &hdr.virtio_net.hdr.num_buffers,
2042                      sizeof hdr.virtio_net.hdr.num_buffers);
2043     }
2044 
2045     for (j = 0; j < i; j++) {
2046         /* signal other side */
2047         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2048         g_free(elems[j]);
2049     }
2050 
2051     virtqueue_flush(q->rx_vq, i);
2052     virtio_notify(vdev, q->rx_vq);
2053 
2054     return size;
2055 
2056 err:
2057     for (j = 0; j < i; j++) {
2058         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2059         g_free(elems[j]);
2060     }
2061 
2062     return err;
2063 }
2064 
2065 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2066                                   size_t size)
2067 {
2068     RCU_READ_LOCK_GUARD();
2069 
2070     return virtio_net_receive_rcu(nc, buf, size);
2071 }
2072 
2073 /*
2074  * Accessors to read and write the IP packet data length field. This
2075  * is a potentially unaligned network-byte-order 16 bit unsigned integer
2076  * pointed to by unit->ip_len.
2077  */
2078 static uint16_t read_unit_ip_len(VirtioNetRscUnit *unit)
2079 {
2080     return lduw_be_p(unit->ip_plen);
2081 }
2082 
2083 static void write_unit_ip_len(VirtioNetRscUnit *unit, uint16_t l)
2084 {
2085     stw_be_p(unit->ip_plen, l);
2086 }
2087 
2088 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2089                                          const uint8_t *buf,
2090                                          VirtioNetRscUnit *unit)
2091 {
2092     uint16_t ip_hdrlen;
2093     struct ip_header *ip;
2094 
2095     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2096                               + sizeof(struct eth_header));
2097     unit->ip = (void *)ip;
2098     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2099     unit->ip_plen = &ip->ip_len;
2100     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2101     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2102     unit->payload = read_unit_ip_len(unit) - ip_hdrlen - unit->tcp_hdrlen;
2103 }
2104 
2105 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2106                                          const uint8_t *buf,
2107                                          VirtioNetRscUnit *unit)
2108 {
2109     struct ip6_header *ip6;
2110 
2111     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2112                                  + sizeof(struct eth_header));
2113     unit->ip = ip6;
2114     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2115     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2116                                         + sizeof(struct ip6_header));
2117     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2118 
2119     /* There is a difference between payload length in ipv4 and v6,
2120        ip header is excluded in ipv6 */
2121     unit->payload = read_unit_ip_len(unit) - unit->tcp_hdrlen;
2122 }
2123 
2124 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2125                                        VirtioNetRscSeg *seg)
2126 {
2127     int ret;
2128     struct virtio_net_hdr_v1 *h;
2129 
2130     h = (struct virtio_net_hdr_v1 *)seg->buf;
2131     h->flags = 0;
2132     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2133 
2134     if (seg->is_coalesced) {
2135         h->rsc.segments = seg->packets;
2136         h->rsc.dup_acks = seg->dup_ack;
2137         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2138         if (chain->proto == ETH_P_IP) {
2139             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2140         } else {
2141             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2142         }
2143     }
2144 
2145     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2146     QTAILQ_REMOVE(&chain->buffers, seg, next);
2147     g_free(seg->buf);
2148     g_free(seg);
2149 
2150     return ret;
2151 }
2152 
2153 static void virtio_net_rsc_purge(void *opq)
2154 {
2155     VirtioNetRscSeg *seg, *rn;
2156     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2157 
2158     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2159         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2160             chain->stat.purge_failed++;
2161             continue;
2162         }
2163     }
2164 
2165     chain->stat.timer++;
2166     if (!QTAILQ_EMPTY(&chain->buffers)) {
2167         timer_mod(chain->drain_timer,
2168               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2169     }
2170 }
2171 
2172 static void virtio_net_rsc_cleanup(VirtIONet *n)
2173 {
2174     VirtioNetRscChain *chain, *rn_chain;
2175     VirtioNetRscSeg *seg, *rn_seg;
2176 
2177     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2178         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2179             QTAILQ_REMOVE(&chain->buffers, seg, next);
2180             g_free(seg->buf);
2181             g_free(seg);
2182         }
2183 
2184         timer_free(chain->drain_timer);
2185         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2186         g_free(chain);
2187     }
2188 }
2189 
2190 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2191                                      NetClientState *nc,
2192                                      const uint8_t *buf, size_t size)
2193 {
2194     uint16_t hdr_len;
2195     VirtioNetRscSeg *seg;
2196 
2197     hdr_len = chain->n->guest_hdr_len;
2198     seg = g_new(VirtioNetRscSeg, 1);
2199     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2200         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2201     memcpy(seg->buf, buf, size);
2202     seg->size = size;
2203     seg->packets = 1;
2204     seg->dup_ack = 0;
2205     seg->is_coalesced = 0;
2206     seg->nc = nc;
2207 
2208     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2209     chain->stat.cache++;
2210 
2211     switch (chain->proto) {
2212     case ETH_P_IP:
2213         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2214         break;
2215     case ETH_P_IPV6:
2216         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2217         break;
2218     default:
2219         g_assert_not_reached();
2220     }
2221 }
2222 
2223 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2224                                          VirtioNetRscSeg *seg,
2225                                          const uint8_t *buf,
2226                                          struct tcp_header *n_tcp,
2227                                          struct tcp_header *o_tcp)
2228 {
2229     uint32_t nack, oack;
2230     uint16_t nwin, owin;
2231 
2232     nack = htonl(n_tcp->th_ack);
2233     nwin = htons(n_tcp->th_win);
2234     oack = htonl(o_tcp->th_ack);
2235     owin = htons(o_tcp->th_win);
2236 
2237     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2238         chain->stat.ack_out_of_win++;
2239         return RSC_FINAL;
2240     } else if (nack == oack) {
2241         /* duplicated ack or window probe */
2242         if (nwin == owin) {
2243             /* duplicated ack, add dup ack count due to whql test up to 1 */
2244             chain->stat.dup_ack++;
2245             return RSC_FINAL;
2246         } else {
2247             /* Coalesce window update */
2248             o_tcp->th_win = n_tcp->th_win;
2249             chain->stat.win_update++;
2250             return RSC_COALESCE;
2251         }
2252     } else {
2253         /* pure ack, go to 'C', finalize*/
2254         chain->stat.pure_ack++;
2255         return RSC_FINAL;
2256     }
2257 }
2258 
2259 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2260                                             VirtioNetRscSeg *seg,
2261                                             const uint8_t *buf,
2262                                             VirtioNetRscUnit *n_unit)
2263 {
2264     void *data;
2265     uint16_t o_ip_len;
2266     uint32_t nseq, oseq;
2267     VirtioNetRscUnit *o_unit;
2268 
2269     o_unit = &seg->unit;
2270     o_ip_len = read_unit_ip_len(o_unit);
2271     nseq = htonl(n_unit->tcp->th_seq);
2272     oseq = htonl(o_unit->tcp->th_seq);
2273 
2274     /* out of order or retransmitted. */
2275     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2276         chain->stat.data_out_of_win++;
2277         return RSC_FINAL;
2278     }
2279 
2280     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2281     if (nseq == oseq) {
2282         if ((o_unit->payload == 0) && n_unit->payload) {
2283             /* From no payload to payload, normal case, not a dup ack or etc */
2284             chain->stat.data_after_pure_ack++;
2285             goto coalesce;
2286         } else {
2287             return virtio_net_rsc_handle_ack(chain, seg, buf,
2288                                              n_unit->tcp, o_unit->tcp);
2289         }
2290     } else if ((nseq - oseq) != o_unit->payload) {
2291         /* Not a consistent packet, out of order */
2292         chain->stat.data_out_of_order++;
2293         return RSC_FINAL;
2294     } else {
2295 coalesce:
2296         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2297             chain->stat.over_size++;
2298             return RSC_FINAL;
2299         }
2300 
2301         /* Here comes the right data, the payload length in v4/v6 is different,
2302            so use the field value to update and record the new data len */
2303         o_unit->payload += n_unit->payload; /* update new data len */
2304 
2305         /* update field in ip header */
2306         write_unit_ip_len(o_unit, o_ip_len + n_unit->payload);
2307 
2308         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2309            for windows guest, while this may change the behavior for linux
2310            guest (only if it uses RSC feature). */
2311         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2312 
2313         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2314         o_unit->tcp->th_win = n_unit->tcp->th_win;
2315 
2316         memmove(seg->buf + seg->size, data, n_unit->payload);
2317         seg->size += n_unit->payload;
2318         seg->packets++;
2319         chain->stat.coalesced++;
2320         return RSC_COALESCE;
2321     }
2322 }
2323 
2324 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2325                                         VirtioNetRscSeg *seg,
2326                                         const uint8_t *buf, size_t size,
2327                                         VirtioNetRscUnit *unit)
2328 {
2329     struct ip_header *ip1, *ip2;
2330 
2331     ip1 = (struct ip_header *)(unit->ip);
2332     ip2 = (struct ip_header *)(seg->unit.ip);
2333     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2334         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2335         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2336         chain->stat.no_match++;
2337         return RSC_NO_MATCH;
2338     }
2339 
2340     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2341 }
2342 
2343 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2344                                         VirtioNetRscSeg *seg,
2345                                         const uint8_t *buf, size_t size,
2346                                         VirtioNetRscUnit *unit)
2347 {
2348     struct ip6_header *ip1, *ip2;
2349 
2350     ip1 = (struct ip6_header *)(unit->ip);
2351     ip2 = (struct ip6_header *)(seg->unit.ip);
2352     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2353         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2354         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2355         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2356             chain->stat.no_match++;
2357             return RSC_NO_MATCH;
2358     }
2359 
2360     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2361 }
2362 
2363 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2364  * to prevent out of order */
2365 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2366                                          struct tcp_header *tcp)
2367 {
2368     uint16_t tcp_hdr;
2369     uint16_t tcp_flag;
2370 
2371     tcp_flag = htons(tcp->th_offset_flags);
2372     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2373     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2374     if (tcp_flag & TH_SYN) {
2375         chain->stat.tcp_syn++;
2376         return RSC_BYPASS;
2377     }
2378 
2379     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2380         chain->stat.tcp_ctrl_drain++;
2381         return RSC_FINAL;
2382     }
2383 
2384     if (tcp_hdr > sizeof(struct tcp_header)) {
2385         chain->stat.tcp_all_opt++;
2386         return RSC_FINAL;
2387     }
2388 
2389     return RSC_CANDIDATE;
2390 }
2391 
2392 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2393                                          NetClientState *nc,
2394                                          const uint8_t *buf, size_t size,
2395                                          VirtioNetRscUnit *unit)
2396 {
2397     int ret;
2398     VirtioNetRscSeg *seg, *nseg;
2399 
2400     if (QTAILQ_EMPTY(&chain->buffers)) {
2401         chain->stat.empty_cache++;
2402         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2403         timer_mod(chain->drain_timer,
2404               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2405         return size;
2406     }
2407 
2408     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2409         if (chain->proto == ETH_P_IP) {
2410             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2411         } else {
2412             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2413         }
2414 
2415         if (ret == RSC_FINAL) {
2416             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2417                 /* Send failed */
2418                 chain->stat.final_failed++;
2419                 return 0;
2420             }
2421 
2422             /* Send current packet */
2423             return virtio_net_do_receive(nc, buf, size);
2424         } else if (ret == RSC_NO_MATCH) {
2425             continue;
2426         } else {
2427             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2428             seg->is_coalesced = 1;
2429             return size;
2430         }
2431     }
2432 
2433     chain->stat.no_match_cache++;
2434     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2435     return size;
2436 }
2437 
2438 /* Drain a connection data, this is to avoid out of order segments */
2439 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2440                                         NetClientState *nc,
2441                                         const uint8_t *buf, size_t size,
2442                                         uint16_t ip_start, uint16_t ip_size,
2443                                         uint16_t tcp_port)
2444 {
2445     VirtioNetRscSeg *seg, *nseg;
2446     uint32_t ppair1, ppair2;
2447 
2448     ppair1 = *(uint32_t *)(buf + tcp_port);
2449     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2450         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2451         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2452             || (ppair1 != ppair2)) {
2453             continue;
2454         }
2455         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2456             chain->stat.drain_failed++;
2457         }
2458 
2459         break;
2460     }
2461 
2462     return virtio_net_do_receive(nc, buf, size);
2463 }
2464 
2465 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2466                                             struct ip_header *ip,
2467                                             const uint8_t *buf, size_t size)
2468 {
2469     uint16_t ip_len;
2470 
2471     /* Not an ipv4 packet */
2472     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2473         chain->stat.ip_option++;
2474         return RSC_BYPASS;
2475     }
2476 
2477     /* Don't handle packets with ip option */
2478     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2479         chain->stat.ip_option++;
2480         return RSC_BYPASS;
2481     }
2482 
2483     if (ip->ip_p != IPPROTO_TCP) {
2484         chain->stat.bypass_not_tcp++;
2485         return RSC_BYPASS;
2486     }
2487 
2488     /* Don't handle packets with ip fragment */
2489     if (!(htons(ip->ip_off) & IP_DF)) {
2490         chain->stat.ip_frag++;
2491         return RSC_BYPASS;
2492     }
2493 
2494     /* Don't handle packets with ecn flag */
2495     if (IPTOS_ECN(ip->ip_tos)) {
2496         chain->stat.ip_ecn++;
2497         return RSC_BYPASS;
2498     }
2499 
2500     ip_len = htons(ip->ip_len);
2501     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2502         || ip_len > (size - chain->n->guest_hdr_len -
2503                      sizeof(struct eth_header))) {
2504         chain->stat.ip_hacked++;
2505         return RSC_BYPASS;
2506     }
2507 
2508     return RSC_CANDIDATE;
2509 }
2510 
2511 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2512                                       NetClientState *nc,
2513                                       const uint8_t *buf, size_t size)
2514 {
2515     int32_t ret;
2516     uint16_t hdr_len;
2517     VirtioNetRscUnit unit;
2518 
2519     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2520 
2521     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2522         + sizeof(struct tcp_header))) {
2523         chain->stat.bypass_not_tcp++;
2524         return virtio_net_do_receive(nc, buf, size);
2525     }
2526 
2527     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2528     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2529         != RSC_CANDIDATE) {
2530         return virtio_net_do_receive(nc, buf, size);
2531     }
2532 
2533     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2534     if (ret == RSC_BYPASS) {
2535         return virtio_net_do_receive(nc, buf, size);
2536     } else if (ret == RSC_FINAL) {
2537         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2538                 ((hdr_len + sizeof(struct eth_header)) + 12),
2539                 VIRTIO_NET_IP4_ADDR_SIZE,
2540                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2541     }
2542 
2543     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2544 }
2545 
2546 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2547                                             struct ip6_header *ip6,
2548                                             const uint8_t *buf, size_t size)
2549 {
2550     uint16_t ip_len;
2551 
2552     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2553         != IP_HEADER_VERSION_6) {
2554         return RSC_BYPASS;
2555     }
2556 
2557     /* Both option and protocol is checked in this */
2558     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2559         chain->stat.bypass_not_tcp++;
2560         return RSC_BYPASS;
2561     }
2562 
2563     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2564     if (ip_len < sizeof(struct tcp_header) ||
2565         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2566                   - sizeof(struct ip6_header))) {
2567         chain->stat.ip_hacked++;
2568         return RSC_BYPASS;
2569     }
2570 
2571     /* Don't handle packets with ecn flag */
2572     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2573         chain->stat.ip_ecn++;
2574         return RSC_BYPASS;
2575     }
2576 
2577     return RSC_CANDIDATE;
2578 }
2579 
2580 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2581                                       const uint8_t *buf, size_t size)
2582 {
2583     int32_t ret;
2584     uint16_t hdr_len;
2585     VirtioNetRscChain *chain;
2586     VirtioNetRscUnit unit;
2587 
2588     chain = opq;
2589     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2590 
2591     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2592         + sizeof(tcp_header))) {
2593         return virtio_net_do_receive(nc, buf, size);
2594     }
2595 
2596     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2597     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2598                                                  unit.ip, buf, size)) {
2599         return virtio_net_do_receive(nc, buf, size);
2600     }
2601 
2602     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2603     if (ret == RSC_BYPASS) {
2604         return virtio_net_do_receive(nc, buf, size);
2605     } else if (ret == RSC_FINAL) {
2606         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2607                 ((hdr_len + sizeof(struct eth_header)) + 8),
2608                 VIRTIO_NET_IP6_ADDR_SIZE,
2609                 hdr_len + sizeof(struct eth_header)
2610                 + sizeof(struct ip6_header));
2611     }
2612 
2613     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2614 }
2615 
2616 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2617                                                       NetClientState *nc,
2618                                                       uint16_t proto)
2619 {
2620     VirtioNetRscChain *chain;
2621 
2622     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2623         return NULL;
2624     }
2625 
2626     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2627         if (chain->proto == proto) {
2628             return chain;
2629         }
2630     }
2631 
2632     chain = g_malloc(sizeof(*chain));
2633     chain->n = n;
2634     chain->proto = proto;
2635     if (proto == (uint16_t)ETH_P_IP) {
2636         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2637         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2638     } else {
2639         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2640         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2641     }
2642     chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2643                                       virtio_net_rsc_purge, chain);
2644     memset(&chain->stat, 0, sizeof(chain->stat));
2645 
2646     QTAILQ_INIT(&chain->buffers);
2647     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2648 
2649     return chain;
2650 }
2651 
2652 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2653                                       const uint8_t *buf,
2654                                       size_t size)
2655 {
2656     uint16_t proto;
2657     VirtioNetRscChain *chain;
2658     struct eth_header *eth;
2659     VirtIONet *n;
2660 
2661     n = qemu_get_nic_opaque(nc);
2662     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2663         return virtio_net_do_receive(nc, buf, size);
2664     }
2665 
2666     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2667     proto = htons(eth->h_proto);
2668 
2669     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2670     if (chain) {
2671         chain->stat.received++;
2672         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2673             return virtio_net_rsc_receive4(chain, nc, buf, size);
2674         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2675             return virtio_net_rsc_receive6(chain, nc, buf, size);
2676         }
2677     }
2678     return virtio_net_do_receive(nc, buf, size);
2679 }
2680 
2681 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2682                                   size_t size)
2683 {
2684     VirtIONet *n = qemu_get_nic_opaque(nc);
2685     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2686         return virtio_net_rsc_receive(nc, buf, size);
2687     } else {
2688         return virtio_net_do_receive(nc, buf, size);
2689     }
2690 }
2691 
2692 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2693 
2694 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2695 {
2696     VirtIONet *n = qemu_get_nic_opaque(nc);
2697     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2698     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2699     int ret;
2700 
2701     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2702     virtio_notify(vdev, q->tx_vq);
2703 
2704     g_free(q->async_tx.elem);
2705     q->async_tx.elem = NULL;
2706 
2707     virtio_queue_set_notification(q->tx_vq, 1);
2708     ret = virtio_net_flush_tx(q);
2709     if (ret >= n->tx_burst) {
2710         /*
2711          * the flush has been stopped by tx_burst
2712          * we will not receive notification for the
2713          * remainining part, so re-schedule
2714          */
2715         virtio_queue_set_notification(q->tx_vq, 0);
2716         if (q->tx_bh) {
2717             replay_bh_schedule_event(q->tx_bh);
2718         } else {
2719             timer_mod(q->tx_timer,
2720                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2721         }
2722         q->tx_waiting = 1;
2723     }
2724 }
2725 
2726 /* TX */
2727 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2728 {
2729     VirtIONet *n = q->n;
2730     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2731     VirtQueueElement *elem;
2732     int32_t num_packets = 0;
2733     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2734     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2735         return num_packets;
2736     }
2737 
2738     if (q->async_tx.elem) {
2739         virtio_queue_set_notification(q->tx_vq, 0);
2740         return num_packets;
2741     }
2742 
2743     for (;;) {
2744         ssize_t ret;
2745         unsigned int out_num;
2746         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2747         struct virtio_net_hdr vhdr;
2748 
2749         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2750         if (!elem) {
2751             break;
2752         }
2753 
2754         out_num = elem->out_num;
2755         out_sg = elem->out_sg;
2756         if (out_num < 1) {
2757             virtio_error(vdev, "virtio-net header not in first element");
2758             goto detach;
2759         }
2760 
2761         if (n->needs_vnet_hdr_swap) {
2762             if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) <
2763                 sizeof(vhdr)) {
2764                 virtio_error(vdev, "virtio-net header incorrect");
2765                 goto detach;
2766             }
2767             virtio_net_hdr_swap(vdev, &vhdr);
2768             sg2[0].iov_base = &vhdr;
2769             sg2[0].iov_len = sizeof(vhdr);
2770             out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num,
2771                                sizeof(vhdr), -1);
2772             if (out_num == VIRTQUEUE_MAX_SIZE) {
2773                 goto drop;
2774             }
2775             out_num += 1;
2776             out_sg = sg2;
2777         }
2778         /*
2779          * If host wants to see the guest header as is, we can
2780          * pass it on unchanged. Otherwise, copy just the parts
2781          * that host is interested in.
2782          */
2783         assert(n->host_hdr_len <= n->guest_hdr_len);
2784         if (n->host_hdr_len != n->guest_hdr_len) {
2785             if (iov_size(out_sg, out_num) < n->guest_hdr_len) {
2786                 virtio_error(vdev, "virtio-net header is invalid");
2787                 goto detach;
2788             }
2789             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2790                                        out_sg, out_num,
2791                                        0, n->host_hdr_len);
2792             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2793                              out_sg, out_num,
2794                              n->guest_hdr_len, -1);
2795             out_num = sg_num;
2796             out_sg = sg;
2797 
2798             if (out_num < 1) {
2799                 virtio_error(vdev, "virtio-net nothing to send");
2800                 goto detach;
2801             }
2802         }
2803 
2804         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2805                                       out_sg, out_num, virtio_net_tx_complete);
2806         if (ret == 0) {
2807             virtio_queue_set_notification(q->tx_vq, 0);
2808             q->async_tx.elem = elem;
2809             return -EBUSY;
2810         }
2811 
2812 drop:
2813         virtqueue_push(q->tx_vq, elem, 0);
2814         virtio_notify(vdev, q->tx_vq);
2815         g_free(elem);
2816 
2817         if (++num_packets >= n->tx_burst) {
2818             break;
2819         }
2820     }
2821     return num_packets;
2822 
2823 detach:
2824     virtqueue_detach_element(q->tx_vq, elem, 0);
2825     g_free(elem);
2826     return -EINVAL;
2827 }
2828 
2829 static void virtio_net_tx_timer(void *opaque);
2830 
2831 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2832 {
2833     VirtIONet *n = VIRTIO_NET(vdev);
2834     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2835 
2836     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2837         virtio_net_drop_tx_queue_data(vdev, vq);
2838         return;
2839     }
2840 
2841     /* This happens when device was stopped but VCPU wasn't. */
2842     if (!vdev->vm_running) {
2843         q->tx_waiting = 1;
2844         return;
2845     }
2846 
2847     if (q->tx_waiting) {
2848         /* We already have queued packets, immediately flush */
2849         timer_del(q->tx_timer);
2850         virtio_net_tx_timer(q);
2851     } else {
2852         /* re-arm timer to flush it (and more) on next tick */
2853         timer_mod(q->tx_timer,
2854                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2855         q->tx_waiting = 1;
2856         virtio_queue_set_notification(vq, 0);
2857     }
2858 }
2859 
2860 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2861 {
2862     VirtIONet *n = VIRTIO_NET(vdev);
2863     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2864 
2865     if (unlikely(n->vhost_started)) {
2866         return;
2867     }
2868 
2869     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2870         virtio_net_drop_tx_queue_data(vdev, vq);
2871         return;
2872     }
2873 
2874     if (unlikely(q->tx_waiting)) {
2875         return;
2876     }
2877     q->tx_waiting = 1;
2878     /* This happens when device was stopped but VCPU wasn't. */
2879     if (!vdev->vm_running) {
2880         return;
2881     }
2882     virtio_queue_set_notification(vq, 0);
2883     replay_bh_schedule_event(q->tx_bh);
2884 }
2885 
2886 static void virtio_net_tx_timer(void *opaque)
2887 {
2888     VirtIONetQueue *q = opaque;
2889     VirtIONet *n = q->n;
2890     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2891     int ret;
2892 
2893     /* This happens when device was stopped but BH wasn't. */
2894     if (!vdev->vm_running) {
2895         /* Make sure tx waiting is set, so we'll run when restarted. */
2896         assert(q->tx_waiting);
2897         return;
2898     }
2899 
2900     q->tx_waiting = 0;
2901 
2902     /* Just in case the driver is not ready on more */
2903     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2904         return;
2905     }
2906 
2907     ret = virtio_net_flush_tx(q);
2908     if (ret == -EBUSY || ret == -EINVAL) {
2909         return;
2910     }
2911     /*
2912      * If we flush a full burst of packets, assume there are
2913      * more coming and immediately rearm
2914      */
2915     if (ret >= n->tx_burst) {
2916         q->tx_waiting = 1;
2917         timer_mod(q->tx_timer,
2918                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2919         return;
2920     }
2921     /*
2922      * If less than a full burst, re-enable notification and flush
2923      * anything that may have come in while we weren't looking.  If
2924      * we find something, assume the guest is still active and rearm
2925      */
2926     virtio_queue_set_notification(q->tx_vq, 1);
2927     ret = virtio_net_flush_tx(q);
2928     if (ret > 0) {
2929         virtio_queue_set_notification(q->tx_vq, 0);
2930         q->tx_waiting = 1;
2931         timer_mod(q->tx_timer,
2932                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2933     }
2934 }
2935 
2936 static void virtio_net_tx_bh(void *opaque)
2937 {
2938     VirtIONetQueue *q = opaque;
2939     VirtIONet *n = q->n;
2940     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2941     int32_t ret;
2942 
2943     /* This happens when device was stopped but BH wasn't. */
2944     if (!vdev->vm_running) {
2945         /* Make sure tx waiting is set, so we'll run when restarted. */
2946         assert(q->tx_waiting);
2947         return;
2948     }
2949 
2950     q->tx_waiting = 0;
2951 
2952     /* Just in case the driver is not ready on more */
2953     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2954         return;
2955     }
2956 
2957     ret = virtio_net_flush_tx(q);
2958     if (ret == -EBUSY || ret == -EINVAL) {
2959         return; /* Notification re-enable handled by tx_complete or device
2960                  * broken */
2961     }
2962 
2963     /* If we flush a full burst of packets, assume there are
2964      * more coming and immediately reschedule */
2965     if (ret >= n->tx_burst) {
2966         replay_bh_schedule_event(q->tx_bh);
2967         q->tx_waiting = 1;
2968         return;
2969     }
2970 
2971     /* If less than a full burst, re-enable notification and flush
2972      * anything that may have come in while we weren't looking.  If
2973      * we find something, assume the guest is still active and reschedule */
2974     virtio_queue_set_notification(q->tx_vq, 1);
2975     ret = virtio_net_flush_tx(q);
2976     if (ret == -EINVAL) {
2977         return;
2978     } else if (ret > 0) {
2979         virtio_queue_set_notification(q->tx_vq, 0);
2980         replay_bh_schedule_event(q->tx_bh);
2981         q->tx_waiting = 1;
2982     }
2983 }
2984 
2985 static void virtio_net_add_queue(VirtIONet *n, int index)
2986 {
2987     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2988 
2989     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2990                                            virtio_net_handle_rx);
2991 
2992     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2993         n->vqs[index].tx_vq =
2994             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2995                              virtio_net_handle_tx_timer);
2996         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2997                                               virtio_net_tx_timer,
2998                                               &n->vqs[index]);
2999     } else {
3000         n->vqs[index].tx_vq =
3001             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
3002                              virtio_net_handle_tx_bh);
3003         n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
3004                                                   &DEVICE(vdev)->mem_reentrancy_guard);
3005     }
3006 
3007     n->vqs[index].tx_waiting = 0;
3008     n->vqs[index].n = n;
3009 }
3010 
3011 static void virtio_net_del_queue(VirtIONet *n, int index)
3012 {
3013     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3014     VirtIONetQueue *q = &n->vqs[index];
3015     NetClientState *nc = qemu_get_subqueue(n->nic, index);
3016 
3017     qemu_purge_queued_packets(nc);
3018 
3019     virtio_del_queue(vdev, index * 2);
3020     if (q->tx_timer) {
3021         timer_free(q->tx_timer);
3022         q->tx_timer = NULL;
3023     } else {
3024         qemu_bh_delete(q->tx_bh);
3025         q->tx_bh = NULL;
3026     }
3027     q->tx_waiting = 0;
3028     virtio_del_queue(vdev, index * 2 + 1);
3029 }
3030 
3031 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
3032 {
3033     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3034     int old_num_queues = virtio_get_num_queues(vdev);
3035     int new_num_queues = new_max_queue_pairs * 2 + 1;
3036     int i;
3037 
3038     assert(old_num_queues >= 3);
3039     assert(old_num_queues % 2 == 1);
3040 
3041     if (old_num_queues == new_num_queues) {
3042         return;
3043     }
3044 
3045     /*
3046      * We always need to remove and add ctrl vq if
3047      * old_num_queues != new_num_queues. Remove ctrl_vq first,
3048      * and then we only enter one of the following two loops.
3049      */
3050     virtio_del_queue(vdev, old_num_queues - 1);
3051 
3052     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
3053         /* new_num_queues < old_num_queues */
3054         virtio_net_del_queue(n, i / 2);
3055     }
3056 
3057     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
3058         /* new_num_queues > old_num_queues */
3059         virtio_net_add_queue(n, i / 2);
3060     }
3061 
3062     /* add ctrl_vq last */
3063     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3064 }
3065 
3066 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
3067 {
3068     int max = multiqueue ? n->max_queue_pairs : 1;
3069 
3070     n->multiqueue = multiqueue;
3071     virtio_net_change_num_queue_pairs(n, max);
3072 
3073     virtio_net_set_queue_pairs(n);
3074 }
3075 
3076 static int virtio_net_pre_load_queues(VirtIODevice *vdev)
3077 {
3078     virtio_net_set_multiqueue(VIRTIO_NET(vdev),
3079                               virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_RSS) ||
3080                               virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MQ));
3081 
3082     return 0;
3083 }
3084 
3085 static int virtio_net_post_load_device(void *opaque, int version_id)
3086 {
3087     VirtIONet *n = opaque;
3088     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3089     int i, link_down;
3090 
3091     trace_virtio_net_post_load_device();
3092     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3093                                virtio_vdev_has_feature(vdev,
3094                                                        VIRTIO_F_VERSION_1),
3095                                virtio_vdev_has_feature(vdev,
3096                                                        VIRTIO_NET_F_HASH_REPORT));
3097 
3098     /* MAC_TABLE_ENTRIES may be different from the saved image */
3099     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3100         n->mac_table.in_use = 0;
3101     }
3102 
3103     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3104         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3105     }
3106 
3107     /*
3108      * curr_guest_offloads will be later overwritten by the
3109      * virtio_set_features_nocheck call done from the virtio_load.
3110      * Here we make sure it is preserved and restored accordingly
3111      * in the virtio_net_post_load_virtio callback.
3112      */
3113     n->saved_guest_offloads = n->curr_guest_offloads;
3114 
3115     virtio_net_set_queue_pairs(n);
3116 
3117     /* Find the first multicast entry in the saved MAC filter */
3118     for (i = 0; i < n->mac_table.in_use; i++) {
3119         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3120             break;
3121         }
3122     }
3123     n->mac_table.first_multi = i;
3124 
3125     /* nc.link_down can't be migrated, so infer link_down according
3126      * to link status bit in n->status */
3127     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3128     for (i = 0; i < n->max_queue_pairs; i++) {
3129         qemu_get_subqueue(n->nic, i)->link_down = link_down;
3130     }
3131 
3132     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3133         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3134         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3135                                   QEMU_CLOCK_VIRTUAL,
3136                                   virtio_net_announce_timer, n);
3137         if (n->announce_timer.round) {
3138             timer_mod(n->announce_timer.tm,
3139                       qemu_clock_get_ms(n->announce_timer.type));
3140         } else {
3141             qemu_announce_timer_del(&n->announce_timer, false);
3142         }
3143     }
3144 
3145     virtio_net_commit_rss_config(n);
3146     return 0;
3147 }
3148 
3149 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3150 {
3151     VirtIONet *n = VIRTIO_NET(vdev);
3152     /*
3153      * The actual needed state is now in saved_guest_offloads,
3154      * see virtio_net_post_load_device for detail.
3155      * Restore it back and apply the desired offloads.
3156      */
3157     n->curr_guest_offloads = n->saved_guest_offloads;
3158     if (peer_has_vnet_hdr(n)) {
3159         virtio_net_apply_guest_offloads(n);
3160     }
3161 
3162     return 0;
3163 }
3164 
3165 /* tx_waiting field of a VirtIONetQueue */
3166 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3167     .name = "virtio-net-queue-tx_waiting",
3168     .fields = (const VMStateField[]) {
3169         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3170         VMSTATE_END_OF_LIST()
3171    },
3172 };
3173 
3174 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3175 {
3176     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3177 }
3178 
3179 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3180 {
3181     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3182                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3183 }
3184 
3185 static bool mac_table_fits(void *opaque, int version_id)
3186 {
3187     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3188 }
3189 
3190 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3191 {
3192     return !mac_table_fits(opaque, version_id);
3193 }
3194 
3195 /* This temporary type is shared by all the WITH_TMP methods
3196  * although only some fields are used by each.
3197  */
3198 struct VirtIONetMigTmp {
3199     VirtIONet      *parent;
3200     VirtIONetQueue *vqs_1;
3201     uint16_t        curr_queue_pairs_1;
3202     uint8_t         has_ufo;
3203     uint32_t        has_vnet_hdr;
3204 };
3205 
3206 /* The 2nd and subsequent tx_waiting flags are loaded later than
3207  * the 1st entry in the queue_pairs and only if there's more than one
3208  * entry.  We use the tmp mechanism to calculate a temporary
3209  * pointer and count and also validate the count.
3210  */
3211 
3212 static int virtio_net_tx_waiting_pre_save(void *opaque)
3213 {
3214     struct VirtIONetMigTmp *tmp = opaque;
3215 
3216     tmp->vqs_1 = tmp->parent->vqs + 1;
3217     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3218     if (tmp->parent->curr_queue_pairs == 0) {
3219         tmp->curr_queue_pairs_1 = 0;
3220     }
3221 
3222     return 0;
3223 }
3224 
3225 static int virtio_net_tx_waiting_pre_load(void *opaque)
3226 {
3227     struct VirtIONetMigTmp *tmp = opaque;
3228 
3229     /* Reuse the pointer setup from save */
3230     virtio_net_tx_waiting_pre_save(opaque);
3231 
3232     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3233         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3234             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3235 
3236         return -EINVAL;
3237     }
3238 
3239     return 0; /* all good */
3240 }
3241 
3242 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3243     .name      = "virtio-net-tx_waiting",
3244     .pre_load  = virtio_net_tx_waiting_pre_load,
3245     .pre_save  = virtio_net_tx_waiting_pre_save,
3246     .fields    = (const VMStateField[]) {
3247         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3248                                      curr_queue_pairs_1,
3249                                      vmstate_virtio_net_queue_tx_waiting,
3250                                      struct VirtIONetQueue),
3251         VMSTATE_END_OF_LIST()
3252     },
3253 };
3254 
3255 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3256  * flag set we need to check that we have it
3257  */
3258 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3259 {
3260     struct VirtIONetMigTmp *tmp = opaque;
3261 
3262     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3263         error_report("virtio-net: saved image requires TUN_F_UFO support");
3264         return -EINVAL;
3265     }
3266 
3267     return 0;
3268 }
3269 
3270 static int virtio_net_ufo_pre_save(void *opaque)
3271 {
3272     struct VirtIONetMigTmp *tmp = opaque;
3273 
3274     tmp->has_ufo = tmp->parent->has_ufo;
3275 
3276     return 0;
3277 }
3278 
3279 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3280     .name      = "virtio-net-ufo",
3281     .post_load = virtio_net_ufo_post_load,
3282     .pre_save  = virtio_net_ufo_pre_save,
3283     .fields    = (const VMStateField[]) {
3284         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3285         VMSTATE_END_OF_LIST()
3286     },
3287 };
3288 
3289 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3290  * flag set we need to check that we have it
3291  */
3292 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3293 {
3294     struct VirtIONetMigTmp *tmp = opaque;
3295 
3296     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3297         error_report("virtio-net: saved image requires vnet_hdr=on");
3298         return -EINVAL;
3299     }
3300 
3301     return 0;
3302 }
3303 
3304 static int virtio_net_vnet_pre_save(void *opaque)
3305 {
3306     struct VirtIONetMigTmp *tmp = opaque;
3307 
3308     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3309 
3310     return 0;
3311 }
3312 
3313 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3314     .name      = "virtio-net-vnet",
3315     .post_load = virtio_net_vnet_post_load,
3316     .pre_save  = virtio_net_vnet_pre_save,
3317     .fields    = (const VMStateField[]) {
3318         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3319         VMSTATE_END_OF_LIST()
3320     },
3321 };
3322 
3323 static bool virtio_net_rss_needed(void *opaque)
3324 {
3325     return VIRTIO_NET(opaque)->rss_data.enabled;
3326 }
3327 
3328 static const VMStateDescription vmstate_virtio_net_rss = {
3329     .name      = "virtio-net-device/rss",
3330     .version_id = 1,
3331     .minimum_version_id = 1,
3332     .needed = virtio_net_rss_needed,
3333     .fields = (const VMStateField[]) {
3334         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3335         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3336         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3337         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3338         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3339         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3340         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3341                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3342         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3343                                     rss_data.indirections_len, 0,
3344                                     vmstate_info_uint16, uint16_t),
3345         VMSTATE_END_OF_LIST()
3346     },
3347 };
3348 
3349 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3350 {
3351     VirtIONet *n = VIRTIO_NET(vdev);
3352     NetClientState *nc;
3353     struct vhost_net *net;
3354 
3355     if (!n->nic) {
3356         return NULL;
3357     }
3358 
3359     nc = qemu_get_queue(n->nic);
3360     if (!nc) {
3361         return NULL;
3362     }
3363 
3364     net = get_vhost_net(nc->peer);
3365     if (!net) {
3366         return NULL;
3367     }
3368 
3369     return &net->dev;
3370 }
3371 
3372 static int vhost_user_net_save_state(QEMUFile *f, void *pv, size_t size,
3373                                      const VMStateField *field,
3374                                      JSONWriter *vmdesc)
3375 {
3376     VirtIONet *n = pv;
3377     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3378     struct vhost_dev *vhdev;
3379     Error *local_error = NULL;
3380     int ret;
3381 
3382     vhdev = virtio_net_get_vhost(vdev);
3383     if (vhdev == NULL) {
3384         error_reportf_err(local_error,
3385                           "Error getting vhost back-end of %s device %s: ",
3386                           vdev->name, vdev->parent_obj.canonical_path);
3387         return -1;
3388     }
3389 
3390     ret = vhost_save_backend_state(vhdev, f, &local_error);
3391     if (ret < 0) {
3392         error_reportf_err(local_error,
3393                           "Error saving back-end state of %s device %s: ",
3394                           vdev->name, vdev->parent_obj.canonical_path);
3395         return ret;
3396     }
3397 
3398     return 0;
3399 }
3400 
3401 static int vhost_user_net_load_state(QEMUFile *f, void *pv, size_t size,
3402                                      const VMStateField *field)
3403 {
3404     VirtIONet *n = pv;
3405     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3406     struct vhost_dev *vhdev;
3407     Error *local_error = NULL;
3408     int ret;
3409 
3410     vhdev = virtio_net_get_vhost(vdev);
3411     if (vhdev == NULL) {
3412         error_reportf_err(local_error,
3413                           "Error getting vhost back-end of %s device %s: ",
3414                           vdev->name, vdev->parent_obj.canonical_path);
3415         return -1;
3416     }
3417 
3418     ret = vhost_load_backend_state(vhdev, f, &local_error);
3419     if (ret < 0) {
3420         error_reportf_err(local_error,
3421                           "Error loading  back-end state of %s device %s: ",
3422                           vdev->name, vdev->parent_obj.canonical_path);
3423         return ret;
3424     }
3425 
3426     return 0;
3427 }
3428 
3429 static bool vhost_user_net_is_internal_migration(void *opaque)
3430 {
3431     VirtIONet *n = opaque;
3432     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3433     struct vhost_dev *vhdev;
3434 
3435     vhdev = virtio_net_get_vhost(vdev);
3436     if (vhdev == NULL) {
3437         return false;
3438     }
3439 
3440     return vhost_supports_device_state(vhdev);
3441 }
3442 
3443 static const VMStateDescription vhost_user_net_backend_state = {
3444     .name = "virtio-net-device/backend",
3445     .version_id = 0,
3446     .needed = vhost_user_net_is_internal_migration,
3447     .fields = (const VMStateField[]) {
3448         {
3449             .name = "backend",
3450             .info = &(const VMStateInfo) {
3451                 .name = "virtio-net vhost-user backend state",
3452                 .get = vhost_user_net_load_state,
3453                 .put = vhost_user_net_save_state,
3454             },
3455          },
3456          VMSTATE_END_OF_LIST()
3457     }
3458 };
3459 
3460 static const VMStateDescription vmstate_virtio_net_device = {
3461     .name = "virtio-net-device",
3462     .version_id = VIRTIO_NET_VM_VERSION,
3463     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3464     .post_load = virtio_net_post_load_device,
3465     .fields = (const VMStateField[]) {
3466         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3467         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3468                                vmstate_virtio_net_queue_tx_waiting,
3469                                VirtIONetQueue),
3470         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3471         VMSTATE_UINT16(status, VirtIONet),
3472         VMSTATE_UINT8(promisc, VirtIONet),
3473         VMSTATE_UINT8(allmulti, VirtIONet),
3474         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3475 
3476         /* Guarded pair: If it fits we load it, else we throw it away
3477          * - can happen if source has a larger MAC table.; post-load
3478          *  sets flags in this case.
3479          */
3480         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3481                                 0, mac_table_fits, mac_table.in_use,
3482                                  ETH_ALEN),
3483         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3484                                      mac_table.in_use, ETH_ALEN),
3485 
3486         /* Note: This is an array of uint32's that's always been saved as a
3487          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3488          * but based on the uint.
3489          */
3490         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3491         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3492                          vmstate_virtio_net_has_vnet),
3493         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3494         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3495         VMSTATE_UINT8(alluni, VirtIONet),
3496         VMSTATE_UINT8(nomulti, VirtIONet),
3497         VMSTATE_UINT8(nouni, VirtIONet),
3498         VMSTATE_UINT8(nobcast, VirtIONet),
3499         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3500                          vmstate_virtio_net_has_ufo),
3501         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3502                             vmstate_info_uint16_equal, uint16_t),
3503         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3504         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3505                          vmstate_virtio_net_tx_waiting),
3506         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3507                             has_ctrl_guest_offloads),
3508         VMSTATE_END_OF_LIST()
3509     },
3510     .subsections = (const VMStateDescription * const []) {
3511         &vmstate_virtio_net_rss,
3512         &vhost_user_net_backend_state,
3513         NULL
3514     }
3515 };
3516 
3517 static NetClientInfo net_virtio_info = {
3518     .type = NET_CLIENT_DRIVER_NIC,
3519     .size = sizeof(NICState),
3520     .can_receive = virtio_net_can_receive,
3521     .receive = virtio_net_receive,
3522     .link_status_changed = virtio_net_set_link_status,
3523     .query_rx_filter = virtio_net_query_rxfilter,
3524     .announce = virtio_net_announce,
3525 };
3526 
3527 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3528 {
3529     VirtIONet *n = VIRTIO_NET(vdev);
3530     NetClientState *nc;
3531     assert(n->vhost_started);
3532     if (!n->multiqueue && idx == 2) {
3533         /* Must guard against invalid features and bogus queue index
3534          * from being set by malicious guest, or penetrated through
3535          * buggy migration stream.
3536          */
3537         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3538             qemu_log_mask(LOG_GUEST_ERROR,
3539                           "%s: bogus vq index ignored\n", __func__);
3540             return false;
3541         }
3542         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3543     } else {
3544         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3545     }
3546     /*
3547      * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3548      * as the macro of configure interrupt's IDX, If this driver does not
3549      * support, the function will return false
3550      */
3551 
3552     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3553         return vhost_net_config_pending(get_vhost_net(nc->peer));
3554     }
3555     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3556 }
3557 
3558 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3559                                            bool mask)
3560 {
3561     VirtIONet *n = VIRTIO_NET(vdev);
3562     NetClientState *nc;
3563     assert(n->vhost_started);
3564     if (!n->multiqueue && idx == 2) {
3565         /* Must guard against invalid features and bogus queue index
3566          * from being set by malicious guest, or penetrated through
3567          * buggy migration stream.
3568          */
3569         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3570             qemu_log_mask(LOG_GUEST_ERROR,
3571                           "%s: bogus vq index ignored\n", __func__);
3572             return;
3573         }
3574         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3575     } else {
3576         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3577     }
3578     /*
3579      *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3580      * as the macro of configure interrupt's IDX, If this driver does not
3581      * support, the function will return
3582      */
3583 
3584     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3585         vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3586         return;
3587     }
3588     vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3589 }
3590 
3591 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3592 {
3593     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3594 
3595     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3596 }
3597 
3598 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3599                                    const char *type)
3600 {
3601     /*
3602      * The name can be NULL, the netclient name will be type.x.
3603      */
3604     assert(type != NULL);
3605 
3606     g_free(n->netclient_name);
3607     g_free(n->netclient_type);
3608     n->netclient_name = g_strdup(name);
3609     n->netclient_type = g_strdup(type);
3610 }
3611 
3612 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3613 {
3614     HotplugHandler *hotplug_ctrl;
3615     PCIDevice *pci_dev;
3616     Error *err = NULL;
3617 
3618     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3619     if (hotplug_ctrl) {
3620         pci_dev = PCI_DEVICE(dev);
3621         pci_dev->partially_hotplugged = true;
3622         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3623         if (err) {
3624             error_report_err(err);
3625             return false;
3626         }
3627     } else {
3628         return false;
3629     }
3630     return true;
3631 }
3632 
3633 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3634                                     Error **errp)
3635 {
3636     Error *err = NULL;
3637     HotplugHandler *hotplug_ctrl;
3638     PCIDevice *pdev = PCI_DEVICE(dev);
3639     BusState *primary_bus;
3640 
3641     if (!pdev->partially_hotplugged) {
3642         return true;
3643     }
3644     primary_bus = dev->parent_bus;
3645     if (!primary_bus) {
3646         error_setg(errp, "virtio_net: couldn't find primary bus");
3647         return false;
3648     }
3649     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3650     qatomic_set(&n->failover_primary_hidden, false);
3651     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3652     if (hotplug_ctrl) {
3653         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3654         if (err) {
3655             goto out;
3656         }
3657         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3658     }
3659     pdev->partially_hotplugged = false;
3660 
3661 out:
3662     error_propagate(errp, err);
3663     return !err;
3664 }
3665 
3666 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e)
3667 {
3668     bool should_be_hidden;
3669     Error *err = NULL;
3670     DeviceState *dev = failover_find_primary_device(n);
3671 
3672     if (!dev) {
3673         return;
3674     }
3675 
3676     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3677 
3678     if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) {
3679         if (failover_unplug_primary(n, dev)) {
3680             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3681             qapi_event_send_unplug_primary(dev->id);
3682             qatomic_set(&n->failover_primary_hidden, true);
3683         } else {
3684             warn_report("couldn't unplug primary device");
3685         }
3686     } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
3687         /* We already unplugged the device let's plug it back */
3688         if (!failover_replug_primary(n, dev, &err)) {
3689             if (err) {
3690                 error_report_err(err);
3691             }
3692         }
3693     }
3694 }
3695 
3696 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier,
3697                                                MigrationEvent *e, Error **errp)
3698 {
3699     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3700     virtio_net_handle_migration_primary(n, e);
3701     return 0;
3702 }
3703 
3704 static bool failover_hide_primary_device(DeviceListener *listener,
3705                                          const QDict *device_opts,
3706                                          bool from_json,
3707                                          Error **errp)
3708 {
3709     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3710     const char *standby_id;
3711 
3712     if (!device_opts) {
3713         return false;
3714     }
3715 
3716     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3717         return false;
3718     }
3719 
3720     if (!qdict_haskey(device_opts, "id")) {
3721         error_setg(errp, "Device with failover_pair_id needs to have id");
3722         return false;
3723     }
3724 
3725     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3726     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3727         return false;
3728     }
3729 
3730     /*
3731      * The hide helper can be called several times for a given device.
3732      * Check there is only one primary for a virtio-net device but
3733      * don't duplicate the qdict several times if it's called for the same
3734      * device.
3735      */
3736     if (n->primary_opts) {
3737         const char *old, *new;
3738         /* devices with failover_pair_id always have an id */
3739         old = qdict_get_str(n->primary_opts, "id");
3740         new = qdict_get_str(device_opts, "id");
3741         if (strcmp(old, new) != 0) {
3742             error_setg(errp, "Cannot attach more than one primary device to "
3743                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3744             return false;
3745         }
3746     } else {
3747         n->primary_opts = qdict_clone_shallow(device_opts);
3748         n->primary_opts_from_json = from_json;
3749     }
3750 
3751     /* failover_primary_hidden is set during feature negotiation */
3752     return qatomic_read(&n->failover_primary_hidden);
3753 }
3754 
3755 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3756 {
3757     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3758     VirtIONet *n = VIRTIO_NET(dev);
3759     NetClientState *nc;
3760     int i;
3761 
3762     if (n->net_conf.mtu) {
3763         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3764     }
3765 
3766     if (n->net_conf.duplex_str) {
3767         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3768             n->net_conf.duplex = DUPLEX_HALF;
3769         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3770             n->net_conf.duplex = DUPLEX_FULL;
3771         } else {
3772             error_setg(errp, "'duplex' must be 'half' or 'full'");
3773             return;
3774         }
3775         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3776     } else {
3777         n->net_conf.duplex = DUPLEX_UNKNOWN;
3778     }
3779 
3780     if (n->net_conf.speed < SPEED_UNKNOWN) {
3781         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3782         return;
3783     }
3784     if (n->net_conf.speed >= 0) {
3785         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3786     }
3787 
3788     if (n->failover) {
3789         n->primary_listener.hide_device = failover_hide_primary_device;
3790         qatomic_set(&n->failover_primary_hidden, true);
3791         device_listener_register(&n->primary_listener);
3792         migration_add_notifier(&n->migration_state,
3793                                virtio_net_migration_state_notifier);
3794         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3795     }
3796 
3797     virtio_net_set_config_size(n, n->host_features);
3798     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3799 
3800     /*
3801      * We set a lower limit on RX queue size to what it always was.
3802      * Guests that want a smaller ring can always resize it without
3803      * help from us (using virtio 1 and up).
3804      */
3805     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3806         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3807         !is_power_of_2(n->net_conf.rx_queue_size)) {
3808         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3809                    "must be a power of 2 between %d and %d.",
3810                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3811                    VIRTQUEUE_MAX_SIZE);
3812         virtio_cleanup(vdev);
3813         return;
3814     }
3815 
3816     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3817         n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3818         !is_power_of_2(n->net_conf.tx_queue_size)) {
3819         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3820                    "must be a power of 2 between %d and %d",
3821                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3822                    virtio_net_max_tx_queue_size(n));
3823         virtio_cleanup(vdev);
3824         return;
3825     }
3826 
3827     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3828 
3829     /*
3830      * Figure out the datapath queue pairs since the backend could
3831      * provide control queue via peers as well.
3832      */
3833     if (n->nic_conf.peers.queues) {
3834         for (i = 0; i < n->max_ncs; i++) {
3835             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3836                 ++n->max_queue_pairs;
3837             }
3838         }
3839     }
3840     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3841 
3842     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3843         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3844                    "must be a positive integer less than %d.",
3845                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3846         virtio_cleanup(vdev);
3847         return;
3848     }
3849     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3850     n->curr_queue_pairs = 1;
3851     n->tx_timeout = n->net_conf.txtimer;
3852 
3853     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3854                        && strcmp(n->net_conf.tx, "bh")) {
3855         warn_report("virtio-net: "
3856                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3857                     n->net_conf.tx);
3858         error_printf("Defaulting to \"bh\"");
3859     }
3860 
3861     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3862                                     n->net_conf.tx_queue_size);
3863 
3864     virtio_net_add_queue(n, 0);
3865 
3866     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3867     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3868     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3869     n->status = VIRTIO_NET_S_LINK_UP;
3870     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3871                               QEMU_CLOCK_VIRTUAL,
3872                               virtio_net_announce_timer, n);
3873     n->announce_timer.round = 0;
3874 
3875     if (n->netclient_type) {
3876         /*
3877          * Happen when virtio_net_set_netclient_name has been called.
3878          */
3879         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3880                               n->netclient_type, n->netclient_name,
3881                               &dev->mem_reentrancy_guard, n);
3882     } else {
3883         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3884                               object_get_typename(OBJECT(dev)), dev->id,
3885                               &dev->mem_reentrancy_guard, n);
3886     }
3887 
3888     for (i = 0; i < n->max_queue_pairs; i++) {
3889         n->nic->ncs[i].do_not_pad = true;
3890     }
3891 
3892     peer_test_vnet_hdr(n);
3893     if (peer_has_vnet_hdr(n)) {
3894         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3895     } else {
3896         n->host_hdr_len = 0;
3897     }
3898 
3899     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3900 
3901     n->vqs[0].tx_waiting = 0;
3902     n->tx_burst = n->net_conf.txburst;
3903     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3904     n->promisc = 1; /* for compatibility */
3905 
3906     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3907 
3908     n->vlans = g_malloc0(MAX_VLAN >> 3);
3909 
3910     nc = qemu_get_queue(n->nic);
3911     nc->rxfilter_notify_enabled = 1;
3912 
3913    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3914         struct virtio_net_config netcfg = {};
3915         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3916         vhost_net_set_config(get_vhost_net(nc->peer),
3917             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3918     }
3919     QTAILQ_INIT(&n->rsc_chains);
3920     n->qdev = dev;
3921 
3922     net_rx_pkt_init(&n->rx_pkt);
3923 
3924     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3925         virtio_net_load_ebpf(n, errp);
3926     }
3927 }
3928 
3929 static void virtio_net_device_unrealize(DeviceState *dev)
3930 {
3931     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3932     VirtIONet *n = VIRTIO_NET(dev);
3933     int i, max_queue_pairs;
3934 
3935     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3936         virtio_net_unload_ebpf(n);
3937     }
3938 
3939     /* This will stop vhost backend if appropriate. */
3940     virtio_net_set_status(vdev, 0);
3941 
3942     g_free(n->netclient_name);
3943     n->netclient_name = NULL;
3944     g_free(n->netclient_type);
3945     n->netclient_type = NULL;
3946 
3947     g_free(n->mac_table.macs);
3948     g_free(n->vlans);
3949 
3950     if (n->failover) {
3951         qobject_unref(n->primary_opts);
3952         device_listener_unregister(&n->primary_listener);
3953         migration_remove_notifier(&n->migration_state);
3954     } else {
3955         assert(n->primary_opts == NULL);
3956     }
3957 
3958     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3959     for (i = 0; i < max_queue_pairs; i++) {
3960         virtio_net_del_queue(n, i);
3961     }
3962     /* delete also control vq */
3963     virtio_del_queue(vdev, max_queue_pairs * 2);
3964     qemu_announce_timer_del(&n->announce_timer, false);
3965     g_free(n->vqs);
3966     qemu_del_nic(n->nic);
3967     virtio_net_rsc_cleanup(n);
3968     g_free(n->rss_data.indirections_table);
3969     net_rx_pkt_uninit(n->rx_pkt);
3970     virtio_cleanup(vdev);
3971 }
3972 
3973 static void virtio_net_reset(VirtIODevice *vdev)
3974 {
3975     VirtIONet *n = VIRTIO_NET(vdev);
3976     int i;
3977 
3978     /* Reset back to compatibility mode */
3979     n->promisc = 1;
3980     n->allmulti = 0;
3981     n->alluni = 0;
3982     n->nomulti = 0;
3983     n->nouni = 0;
3984     n->nobcast = 0;
3985     /* multiqueue is disabled by default */
3986     n->curr_queue_pairs = 1;
3987     timer_del(n->announce_timer.tm);
3988     n->announce_timer.round = 0;
3989     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
3990 
3991     /* Flush any MAC and VLAN filter table state */
3992     n->mac_table.in_use = 0;
3993     n->mac_table.first_multi = 0;
3994     n->mac_table.multi_overflow = 0;
3995     n->mac_table.uni_overflow = 0;
3996     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
3997     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
3998     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
3999     memset(n->vlans, 0, MAX_VLAN >> 3);
4000 
4001     /* Flush any async TX */
4002     for (i = 0;  i < n->max_queue_pairs; i++) {
4003         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
4004     }
4005 
4006     virtio_net_disable_rss(n);
4007 }
4008 
4009 static void virtio_net_instance_init(Object *obj)
4010 {
4011     VirtIONet *n = VIRTIO_NET(obj);
4012 
4013     /*
4014      * The default config_size is sizeof(struct virtio_net_config).
4015      * Can be overridden with virtio_net_set_config_size.
4016      */
4017     n->config_size = sizeof(struct virtio_net_config);
4018     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
4019                                   "bootindex", "/ethernet-phy@0",
4020                                   DEVICE(n));
4021 
4022     ebpf_rss_init(&n->ebpf_rss);
4023 }
4024 
4025 static int virtio_net_pre_save(void *opaque)
4026 {
4027     VirtIONet *n = opaque;
4028 
4029     /* At this point, backend must be stopped, otherwise
4030      * it might keep writing to memory. */
4031     assert(!n->vhost_started);
4032 
4033     return 0;
4034 }
4035 
4036 static bool primary_unplug_pending(void *opaque)
4037 {
4038     DeviceState *dev = opaque;
4039     DeviceState *primary;
4040     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
4041     VirtIONet *n = VIRTIO_NET(vdev);
4042 
4043     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
4044         return false;
4045     }
4046     primary = failover_find_primary_device(n);
4047     return primary ? primary->pending_deleted_event : false;
4048 }
4049 
4050 static bool dev_unplug_pending(void *opaque)
4051 {
4052     DeviceState *dev = opaque;
4053     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
4054 
4055     return vdc->primary_unplug_pending(dev);
4056 }
4057 
4058 static const VMStateDescription vmstate_virtio_net = {
4059     .name = "virtio-net",
4060     .minimum_version_id = VIRTIO_NET_VM_VERSION,
4061     .version_id = VIRTIO_NET_VM_VERSION,
4062     .fields = (const VMStateField[]) {
4063         VMSTATE_VIRTIO_DEVICE,
4064         VMSTATE_END_OF_LIST()
4065     },
4066     .pre_save = virtio_net_pre_save,
4067     .dev_unplug_pending = dev_unplug_pending,
4068 };
4069 
4070 static const Property virtio_net_properties[] = {
4071     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
4072                     VIRTIO_NET_F_CSUM, true),
4073     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
4074                     VIRTIO_NET_F_GUEST_CSUM, true),
4075     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
4076     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
4077                     VIRTIO_NET_F_GUEST_TSO4, true),
4078     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
4079                     VIRTIO_NET_F_GUEST_TSO6, true),
4080     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
4081                     VIRTIO_NET_F_GUEST_ECN, true),
4082     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
4083                     VIRTIO_NET_F_GUEST_UFO, true),
4084     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
4085                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
4086     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
4087                     VIRTIO_NET_F_HOST_TSO4, true),
4088     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
4089                     VIRTIO_NET_F_HOST_TSO6, true),
4090     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
4091                     VIRTIO_NET_F_HOST_ECN, true),
4092     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
4093                     VIRTIO_NET_F_HOST_UFO, true),
4094     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
4095                     VIRTIO_NET_F_MRG_RXBUF, true),
4096     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
4097                     VIRTIO_NET_F_STATUS, true),
4098     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
4099                     VIRTIO_NET_F_CTRL_VQ, true),
4100     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
4101                     VIRTIO_NET_F_CTRL_RX, true),
4102     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
4103                     VIRTIO_NET_F_CTRL_VLAN, true),
4104     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
4105                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
4106     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
4107                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
4108     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
4109                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
4110     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
4111     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
4112                     VIRTIO_NET_F_RSS, false),
4113     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
4114                     VIRTIO_NET_F_HASH_REPORT, false),
4115     DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds,
4116                       ebpf_rss_fds, qdev_prop_string, char*),
4117     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
4118                     VIRTIO_NET_F_RSC_EXT, false),
4119     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
4120                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
4121     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
4122     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
4123                        TX_TIMER_INTERVAL),
4124     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
4125     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
4126     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
4127                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
4128     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
4129                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
4130     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
4131     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
4132                      true),
4133     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
4134     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
4135     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
4136     DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
4137                       VIRTIO_NET_F_GUEST_USO4, true),
4138     DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
4139                       VIRTIO_NET_F_GUEST_USO6, true),
4140     DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
4141                       VIRTIO_NET_F_HOST_USO, true),
4142 };
4143 
4144 static void virtio_net_class_init(ObjectClass *klass, void *data)
4145 {
4146     DeviceClass *dc = DEVICE_CLASS(klass);
4147     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4148 
4149     device_class_set_props(dc, virtio_net_properties);
4150     dc->vmsd = &vmstate_virtio_net;
4151     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
4152     vdc->realize = virtio_net_device_realize;
4153     vdc->unrealize = virtio_net_device_unrealize;
4154     vdc->get_config = virtio_net_get_config;
4155     vdc->set_config = virtio_net_set_config;
4156     vdc->get_features = virtio_net_get_features;
4157     vdc->set_features = virtio_net_set_features;
4158     vdc->bad_features = virtio_net_bad_features;
4159     vdc->reset = virtio_net_reset;
4160     vdc->queue_reset = virtio_net_queue_reset;
4161     vdc->queue_enable = virtio_net_queue_enable;
4162     vdc->set_status = virtio_net_set_status;
4163     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
4164     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
4165     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
4166     vdc->pre_load_queues = virtio_net_pre_load_queues;
4167     vdc->post_load = virtio_net_post_load_virtio;
4168     vdc->vmsd = &vmstate_virtio_net_device;
4169     vdc->primary_unplug_pending = primary_unplug_pending;
4170     vdc->get_vhost = virtio_net_get_vhost;
4171     vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
4172 }
4173 
4174 static const TypeInfo virtio_net_info = {
4175     .name = TYPE_VIRTIO_NET,
4176     .parent = TYPE_VIRTIO_DEVICE,
4177     .instance_size = sizeof(VirtIONet),
4178     .instance_init = virtio_net_instance_init,
4179     .class_init = virtio_net_class_init,
4180 };
4181 
4182 static void virtio_register_types(void)
4183 {
4184     type_register_static(&virtio_net_info);
4185 }
4186 
4187 type_init(virtio_register_types)
4188