xref: /cloud-hypervisor/virtio-devices/src/net.rs (revision 6f8bd27cf7629733582d930519e98d19e90afb16)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the THIRD-PARTY file.
7 
8 use super::Error as DeviceError;
9 use super::{
10     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
11     RateLimiterConfig, VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterruptType,
12     EPOLL_HELPER_EVENT_LAST,
13 };
14 use crate::seccomp_filters::Thread;
15 use crate::thread_helper::spawn_virtio_thread;
16 use crate::GuestMemoryMmap;
17 use crate::VirtioInterrupt;
18 use anyhow::anyhow;
19 use net_util::CtrlQueue;
20 use net_util::{
21     build_net_config_space, build_net_config_space_with_mq, open_tap,
22     virtio_features_to_tap_offload, MacAddr, NetCounters, NetQueuePair, OpenTapError, RxVirtio,
23     Tap, TapError, TxVirtio, VirtioNetConfig,
24 };
25 use seccompiler::SeccompAction;
26 use std::net::Ipv4Addr;
27 use std::num::Wrapping;
28 use std::ops::Deref;
29 use std::os::unix::io::{AsRawFd, RawFd};
30 use std::result;
31 use std::sync::atomic::{AtomicBool, Ordering};
32 use std::sync::{Arc, Barrier};
33 use std::thread;
34 use std::vec::Vec;
35 use std::{collections::HashMap, convert::TryInto};
36 use thiserror::Error;
37 use versionize::{VersionMap, Versionize, VersionizeResult};
38 use versionize_derive::Versionize;
39 use virtio_bindings::bindings::virtio_net::*;
40 use virtio_bindings::bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
41 use virtio_queue::{Queue, QueueT};
42 use vm_memory::{ByteValued, GuestAddressSpace, GuestMemoryAtomic};
43 use vm_migration::VersionMapped;
44 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
45 use vm_virtio::AccessPlatform;
46 use vmm_sys_util::eventfd::EventFd;
47 
48 /// Control queue
49 // Event available on the control queue.
50 const CTRL_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
51 
52 // Following the VIRTIO specification, the MTU should be at least 1280.
53 pub const MIN_MTU: u16 = 1280;
54 
55 pub struct NetCtrlEpollHandler {
56     pub mem: GuestMemoryAtomic<GuestMemoryMmap>,
57     pub kill_evt: EventFd,
58     pub pause_evt: EventFd,
59     pub ctrl_q: CtrlQueue,
60     pub queue_evt: EventFd,
61     pub queue: Queue,
62     pub access_platform: Option<Arc<dyn AccessPlatform>>,
63     pub interrupt_cb: Arc<dyn VirtioInterrupt>,
64     pub queue_index: u16,
65 }
66 
67 impl NetCtrlEpollHandler {
68     fn signal_used_queue(&self, queue_index: u16) -> result::Result<(), DeviceError> {
69         self.interrupt_cb
70             .trigger(VirtioInterruptType::Queue(queue_index))
71             .map_err(|e| {
72                 error!("Failed to signal used queue: {:?}", e);
73                 DeviceError::FailedSignalingUsedQueue(e)
74             })
75     }
76 
77     pub fn run_ctrl(
78         &mut self,
79         paused: Arc<AtomicBool>,
80         paused_sync: Arc<Barrier>,
81     ) -> std::result::Result<(), EpollHelperError> {
82         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
83         helper.add_event(self.queue_evt.as_raw_fd(), CTRL_QUEUE_EVENT)?;
84         helper.run(paused, paused_sync, self)?;
85 
86         Ok(())
87     }
88 }
89 
90 impl EpollHelperHandler for NetCtrlEpollHandler {
91     fn handle_event(
92         &mut self,
93         _helper: &mut EpollHelper,
94         event: &epoll::Event,
95     ) -> result::Result<(), EpollHelperError> {
96         let ev_type = event.data as u16;
97         match ev_type {
98             CTRL_QUEUE_EVENT => {
99                 let mem = self.mem.memory();
100                 self.queue_evt.read().map_err(|e| {
101                     EpollHelperError::HandleEvent(anyhow!(
102                         "Failed to get control queue event: {:?}",
103                         e
104                     ))
105                 })?;
106                 self.ctrl_q
107                     .process(mem.deref(), &mut self.queue, self.access_platform.as_ref())
108                     .map_err(|e| {
109                         EpollHelperError::HandleEvent(anyhow!(
110                             "Failed to process control queue: {:?}",
111                             e
112                         ))
113                     })?;
114                 match self.queue.needs_notification(mem.deref()) {
115                     Ok(true) => {
116                         self.signal_used_queue(self.queue_index).map_err(|e| {
117                             EpollHelperError::HandleEvent(anyhow!(
118                                 "Error signalling that control queue was used: {:?}",
119                                 e
120                             ))
121                         })?;
122                     }
123                     Ok(false) => {}
124                     Err(e) => {
125                         return Err(EpollHelperError::HandleEvent(anyhow!(
126                             "Error getting notification state of control queue: {}",
127                             e
128                         )));
129                     }
130                 };
131             }
132             _ => {
133                 return Err(EpollHelperError::HandleEvent(anyhow!(
134                     "Unknown event for virtio-net control queue"
135                 )));
136             }
137         }
138 
139         Ok(())
140     }
141 }
142 
143 /// Rx/Tx queue pair
144 // The guest has made a buffer available to receive a frame into.
145 pub const RX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
146 // The transmit queue has a frame that is ready to send from the guest.
147 pub const TX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
148 // A frame is available for reading from the tap device to receive in the guest.
149 pub const RX_TAP_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3;
150 // The TAP can be written to. Used after an EAGAIN error to retry TX.
151 pub const TX_TAP_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 4;
152 // New 'wake up' event from the rx rate limiter
153 pub const RX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 5;
154 // New 'wake up' event from the tx rate limiter
155 pub const TX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 6;
156 
157 #[derive(Error, Debug)]
158 pub enum Error {
159     #[error("Failed to open taps: {0}")]
160     OpenTap(OpenTapError),
161     #[error("Using existing tap: {0}")]
162     TapError(TapError),
163     #[error("Error calling dup() on tap fd: {0}")]
164     DuplicateTapFd(std::io::Error),
165 }
166 
167 pub type Result<T> = result::Result<T, Error>;
168 
169 struct NetEpollHandler {
170     net: NetQueuePair,
171     mem: GuestMemoryAtomic<GuestMemoryMmap>,
172     interrupt_cb: Arc<dyn VirtioInterrupt>,
173     kill_evt: EventFd,
174     pause_evt: EventFd,
175     queue_index_base: u16,
176     queue_pair: (Queue, Queue),
177     queue_evt_pair: (EventFd, EventFd),
178     // Always generate interrupts until the driver has signalled to the device.
179     // This mitigates a problem with interrupts from tap events being "lost" upon
180     // a restore as the vCPU thread isn't ready to handle the interrupt. This causes
181     // issues when combined with VIRTIO_RING_F_EVENT_IDX interrupt suppression.
182     driver_awake: bool,
183 }
184 
185 impl NetEpollHandler {
186     fn signal_used_queue(&self, queue_index: u16) -> result::Result<(), DeviceError> {
187         self.interrupt_cb
188             .trigger(VirtioInterruptType::Queue(queue_index))
189             .map_err(|e| {
190                 error!("Failed to signal used queue: {:?}", e);
191                 DeviceError::FailedSignalingUsedQueue(e)
192             })
193     }
194 
195     fn handle_rx_event(&mut self) -> result::Result<(), DeviceError> {
196         let queue_evt = &self.queue_evt_pair.0;
197         if let Err(e) = queue_evt.read() {
198             error!("Failed to get rx queue event: {:?}", e);
199         }
200 
201         self.net.rx_desc_avail = true;
202 
203         let rate_limit_reached = self
204             .net
205             .rx_rate_limiter
206             .as_ref()
207             .map_or(false, |r| r.is_blocked());
208 
209         // Start to listen on RX_TAP_EVENT only when the rate limit is not reached
210         if !self.net.rx_tap_listening && !rate_limit_reached {
211             net_util::register_listener(
212                 self.net.epoll_fd.unwrap(),
213                 self.net.tap.as_raw_fd(),
214                 epoll::Events::EPOLLIN,
215                 u64::from(self.net.tap_rx_event_id),
216             )
217             .map_err(DeviceError::IoError)?;
218             self.net.rx_tap_listening = true;
219         }
220 
221         Ok(())
222     }
223 
224     fn process_tx(&mut self) -> result::Result<(), DeviceError> {
225         if self
226             .net
227             .process_tx(&self.mem.memory(), &mut self.queue_pair.1)
228             .map_err(DeviceError::NetQueuePair)?
229             || !self.driver_awake
230         {
231             self.signal_used_queue(self.queue_index_base + 1)?;
232             debug!("Signalling TX queue");
233         } else {
234             debug!("Not signalling TX queue");
235         }
236         Ok(())
237     }
238 
239     fn handle_tx_event(&mut self) -> result::Result<(), DeviceError> {
240         let rate_limit_reached = self
241             .net
242             .tx_rate_limiter
243             .as_ref()
244             .map_or(false, |r| r.is_blocked());
245 
246         if !rate_limit_reached {
247             self.process_tx()?;
248         }
249 
250         Ok(())
251     }
252 
253     fn handle_rx_tap_event(&mut self) -> result::Result<(), DeviceError> {
254         if self
255             .net
256             .process_rx(&self.mem.memory(), &mut self.queue_pair.0)
257             .map_err(DeviceError::NetQueuePair)?
258             || !self.driver_awake
259         {
260             self.signal_used_queue(self.queue_index_base)?;
261             debug!("Signalling RX queue");
262         } else {
263             debug!("Not signalling RX queue");
264         }
265         Ok(())
266     }
267 
268     fn run(
269         &mut self,
270         paused: Arc<AtomicBool>,
271         paused_sync: Arc<Barrier>,
272     ) -> result::Result<(), EpollHelperError> {
273         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
274         helper.add_event(self.queue_evt_pair.0.as_raw_fd(), RX_QUEUE_EVENT)?;
275         helper.add_event(self.queue_evt_pair.1.as_raw_fd(), TX_QUEUE_EVENT)?;
276         if let Some(rate_limiter) = &self.net.rx_rate_limiter {
277             helper.add_event(rate_limiter.as_raw_fd(), RX_RATE_LIMITER_EVENT)?;
278         }
279         if let Some(rate_limiter) = &self.net.tx_rate_limiter {
280             helper.add_event(rate_limiter.as_raw_fd(), TX_RATE_LIMITER_EVENT)?;
281         }
282 
283         let mem = self.mem.memory();
284         // If there are some already available descriptors on the RX queue,
285         // then we can start the thread while listening onto the TAP.
286         if self
287             .queue_pair
288             .0
289             .used_idx(mem.deref(), Ordering::Acquire)
290             .map_err(EpollHelperError::QueueRingIndex)?
291             < self
292                 .queue_pair
293                 .0
294                 .avail_idx(mem.deref(), Ordering::Acquire)
295                 .map_err(EpollHelperError::QueueRingIndex)?
296         {
297             helper.add_event(self.net.tap.as_raw_fd(), RX_TAP_EVENT)?;
298             self.net.rx_tap_listening = true;
299             info!("Listener registered at start");
300         }
301 
302         // The NetQueuePair needs the epoll fd.
303         self.net.epoll_fd = Some(helper.as_raw_fd());
304 
305         helper.run(paused, paused_sync, self)?;
306 
307         Ok(())
308     }
309 }
310 
311 impl EpollHelperHandler for NetEpollHandler {
312     fn handle_event(
313         &mut self,
314         _helper: &mut EpollHelper,
315         event: &epoll::Event,
316     ) -> result::Result<(), EpollHelperError> {
317         let ev_type = event.data as u16;
318         match ev_type {
319             RX_QUEUE_EVENT => {
320                 self.driver_awake = true;
321                 self.handle_rx_event().map_err(|e| {
322                     EpollHelperError::HandleEvent(anyhow!("Error processing RX queue: {:?}", e))
323                 })?;
324             }
325             TX_QUEUE_EVENT => {
326                 let queue_evt = &self.queue_evt_pair.1;
327                 if let Err(e) = queue_evt.read() {
328                     error!("Failed to get tx queue event: {:?}", e);
329                 }
330                 self.driver_awake = true;
331                 self.handle_tx_event().map_err(|e| {
332                     EpollHelperError::HandleEvent(anyhow!("Error processing TX queue: {:?}", e))
333                 })?;
334             }
335             TX_TAP_EVENT => {
336                 self.handle_tx_event().map_err(|e| {
337                     EpollHelperError::HandleEvent(anyhow!(
338                         "Error processing TX queue (TAP event): {:?}",
339                         e
340                     ))
341                 })?;
342             }
343             RX_TAP_EVENT => {
344                 self.handle_rx_tap_event().map_err(|e| {
345                     EpollHelperError::HandleEvent(anyhow!("Error processing tap queue: {:?}", e))
346                 })?;
347             }
348             RX_RATE_LIMITER_EVENT => {
349                 if let Some(rate_limiter) = &mut self.net.rx_rate_limiter {
350                     // Upon rate limiter event, call the rate limiter handler and register the
351                     // TAP fd for further processing if some RX buffers are available
352                     rate_limiter.event_handler().map_err(|e| {
353                         EpollHelperError::HandleEvent(anyhow!(
354                             "Error from 'rate_limiter.event_handler()': {:?}",
355                             e
356                         ))
357                     })?;
358 
359                     if !self.net.rx_tap_listening && self.net.rx_desc_avail {
360                         net_util::register_listener(
361                             self.net.epoll_fd.unwrap(),
362                             self.net.tap.as_raw_fd(),
363                             epoll::Events::EPOLLIN,
364                             u64::from(self.net.tap_rx_event_id),
365                         )
366                         .map_err(|e| {
367                             EpollHelperError::HandleEvent(anyhow!(
368                                 "Error register_listener with `RX_RATE_LIMITER_EVENT`: {:?}",
369                                 e
370                             ))
371                         })?;
372 
373                         self.net.rx_tap_listening = true;
374                     }
375                 } else {
376                     return Err(EpollHelperError::HandleEvent(anyhow!(
377                         "Unexpected RX_RATE_LIMITER_EVENT"
378                     )));
379                 }
380             }
381             TX_RATE_LIMITER_EVENT => {
382                 if let Some(rate_limiter) = &mut self.net.tx_rate_limiter {
383                     // Upon rate limiter event, call the rate limiter handler
384                     // and restart processing the queue.
385                     rate_limiter.event_handler().map_err(|e| {
386                         EpollHelperError::HandleEvent(anyhow!(
387                             "Error from 'rate_limiter.event_handler()': {:?}",
388                             e
389                         ))
390                     })?;
391 
392                     self.driver_awake = true;
393                     self.process_tx().map_err(|e| {
394                         EpollHelperError::HandleEvent(anyhow!("Error processing TX queue: {:?}", e))
395                     })?;
396                 } else {
397                     return Err(EpollHelperError::HandleEvent(anyhow!(
398                         "Unexpected TX_RATE_LIMITER_EVENT"
399                     )));
400                 }
401             }
402             _ => {
403                 return Err(EpollHelperError::HandleEvent(anyhow!(
404                     "Unexpected event: {}",
405                     ev_type
406                 )));
407             }
408         }
409         Ok(())
410     }
411 }
412 
413 pub struct Net {
414     common: VirtioCommon,
415     id: String,
416     taps: Vec<Tap>,
417     config: VirtioNetConfig,
418     ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>,
419     counters: NetCounters,
420     seccomp_action: SeccompAction,
421     rate_limiter_config: Option<RateLimiterConfig>,
422     exit_evt: EventFd,
423 }
424 
425 #[derive(Versionize)]
426 pub struct NetState {
427     pub avail_features: u64,
428     pub acked_features: u64,
429     pub config: VirtioNetConfig,
430     pub queue_size: Vec<u16>,
431 }
432 
433 impl VersionMapped for NetState {}
434 
435 impl Net {
436     /// Create a new virtio network device with the given TAP interface.
437     #[allow(clippy::too_many_arguments)]
438     fn new_with_tap(
439         id: String,
440         taps: Vec<Tap>,
441         guest_mac: Option<MacAddr>,
442         iommu: bool,
443         num_queues: usize,
444         queue_size: u16,
445         seccomp_action: SeccompAction,
446         rate_limiter_config: Option<RateLimiterConfig>,
447         exit_evt: EventFd,
448         state: Option<NetState>,
449     ) -> Result<Self> {
450         assert!(!taps.is_empty());
451 
452         let mtu = taps[0].mtu().map_err(Error::TapError)? as u16;
453 
454         let (avail_features, acked_features, config, queue_sizes) = if let Some(state) = state {
455             info!("Restoring virtio-net {}", id);
456             (
457                 state.avail_features,
458                 state.acked_features,
459                 state.config,
460                 state.queue_size,
461             )
462         } else {
463             let mut avail_features = 1 << VIRTIO_NET_F_CSUM
464                 | 1 << VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
465                 | 1 << VIRTIO_NET_F_GUEST_CSUM
466                 | 1 << VIRTIO_NET_F_GUEST_ECN
467                 | 1 << VIRTIO_NET_F_GUEST_TSO4
468                 | 1 << VIRTIO_NET_F_GUEST_TSO6
469                 | 1 << VIRTIO_NET_F_GUEST_UFO
470                 | 1 << VIRTIO_NET_F_HOST_ECN
471                 | 1 << VIRTIO_NET_F_HOST_TSO4
472                 | 1 << VIRTIO_NET_F_HOST_TSO6
473                 | 1 << VIRTIO_NET_F_HOST_UFO
474                 | 1 << VIRTIO_NET_F_MTU
475                 | 1 << VIRTIO_RING_F_EVENT_IDX
476                 | 1 << VIRTIO_F_VERSION_1;
477 
478             if iommu {
479                 avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
480             }
481 
482             avail_features |= 1 << VIRTIO_NET_F_CTRL_VQ;
483             let queue_num = num_queues + 1;
484 
485             let mut config = VirtioNetConfig::default();
486             if let Some(mac) = guest_mac {
487                 build_net_config_space(
488                     &mut config,
489                     mac,
490                     num_queues,
491                     Some(mtu),
492                     &mut avail_features,
493                 );
494             } else {
495                 build_net_config_space_with_mq(
496                     &mut config,
497                     num_queues,
498                     Some(mtu),
499                     &mut avail_features,
500                 );
501             }
502 
503             (avail_features, 0, config, vec![queue_size; queue_num])
504         };
505 
506         Ok(Net {
507             common: VirtioCommon {
508                 device_type: VirtioDeviceType::Net as u32,
509                 avail_features,
510                 acked_features,
511                 queue_sizes,
512                 paused_sync: Some(Arc::new(Barrier::new((num_queues / 2) + 1))),
513                 min_queues: 2,
514                 ..Default::default()
515             },
516             id,
517             taps,
518             config,
519             ctrl_queue_epoll_thread: None,
520             counters: NetCounters::default(),
521             seccomp_action,
522             rate_limiter_config,
523             exit_evt,
524         })
525     }
526 
527     /// Create a new virtio network device with the given IP address and
528     /// netmask.
529     #[allow(clippy::too_many_arguments)]
530     pub fn new(
531         id: String,
532         if_name: Option<&str>,
533         ip_addr: Option<Ipv4Addr>,
534         netmask: Option<Ipv4Addr>,
535         guest_mac: Option<MacAddr>,
536         host_mac: &mut Option<MacAddr>,
537         mtu: Option<u16>,
538         iommu: bool,
539         num_queues: usize,
540         queue_size: u16,
541         seccomp_action: SeccompAction,
542         rate_limiter_config: Option<RateLimiterConfig>,
543         exit_evt: EventFd,
544         state: Option<NetState>,
545     ) -> Result<Self> {
546         let taps = open_tap(
547             if_name,
548             ip_addr,
549             netmask,
550             host_mac,
551             mtu,
552             num_queues / 2,
553             None,
554         )
555         .map_err(Error::OpenTap)?;
556 
557         Self::new_with_tap(
558             id,
559             taps,
560             guest_mac,
561             iommu,
562             num_queues,
563             queue_size,
564             seccomp_action,
565             rate_limiter_config,
566             exit_evt,
567             state,
568         )
569     }
570 
571     #[allow(clippy::too_many_arguments)]
572     pub fn from_tap_fds(
573         id: String,
574         fds: &[RawFd],
575         guest_mac: Option<MacAddr>,
576         mtu: Option<u16>,
577         iommu: bool,
578         queue_size: u16,
579         seccomp_action: SeccompAction,
580         rate_limiter_config: Option<RateLimiterConfig>,
581         exit_evt: EventFd,
582         state: Option<NetState>,
583     ) -> Result<Self> {
584         let mut taps: Vec<Tap> = Vec::new();
585         let num_queue_pairs = fds.len();
586 
587         for fd in fds.iter() {
588             // Duplicate so that it can survive reboots
589             // SAFETY: FFI call to dup. Trivially safe.
590             let fd = unsafe { libc::dup(*fd) };
591             if fd < 0 {
592                 return Err(Error::DuplicateTapFd(std::io::Error::last_os_error()));
593             }
594             let tap = Tap::from_tap_fd(fd, num_queue_pairs).map_err(Error::TapError)?;
595             taps.push(tap);
596         }
597 
598         assert!(!taps.is_empty());
599 
600         if let Some(mtu) = mtu {
601             taps[0].set_mtu(mtu as i32).map_err(Error::TapError)?;
602         }
603 
604         Self::new_with_tap(
605             id,
606             taps,
607             guest_mac,
608             iommu,
609             num_queue_pairs * 2,
610             queue_size,
611             seccomp_action,
612             rate_limiter_config,
613             exit_evt,
614             state,
615         )
616     }
617 
618     fn state(&self) -> NetState {
619         NetState {
620             avail_features: self.common.avail_features,
621             acked_features: self.common.acked_features,
622             config: self.config,
623             queue_size: self.common.queue_sizes.clone(),
624         }
625     }
626 }
627 
628 impl Drop for Net {
629     fn drop(&mut self) {
630         if let Some(kill_evt) = self.common.kill_evt.take() {
631             // Ignore the result because there is nothing we can do about it.
632             let _ = kill_evt.write(1);
633         }
634         // Needed to ensure all references to tap FDs are dropped (#4868)
635         self.common.wait_for_epoll_threads();
636     }
637 }
638 
639 impl VirtioDevice for Net {
640     fn device_type(&self) -> u32 {
641         self.common.device_type
642     }
643 
644     fn queue_max_sizes(&self) -> &[u16] {
645         &self.common.queue_sizes
646     }
647 
648     fn features(&self) -> u64 {
649         self.common.avail_features
650     }
651 
652     fn ack_features(&mut self, value: u64) {
653         self.common.ack_features(value)
654     }
655 
656     fn read_config(&self, offset: u64, data: &mut [u8]) {
657         self.read_config_from_slice(self.config.as_slice(), offset, data);
658     }
659 
660     fn activate(
661         &mut self,
662         mem: GuestMemoryAtomic<GuestMemoryMmap>,
663         interrupt_cb: Arc<dyn VirtioInterrupt>,
664         mut queues: Vec<(usize, Queue, EventFd)>,
665     ) -> ActivateResult {
666         self.common.activate(&queues, &interrupt_cb)?;
667 
668         let num_queues = queues.len();
669         let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into());
670         if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 {
671             let ctrl_queue_index = num_queues - 1;
672             let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index);
673 
674             ctrl_queue.set_event_idx(event_idx);
675 
676             let (kill_evt, pause_evt) = self.common.dup_eventfds();
677             let mut ctrl_handler = NetCtrlEpollHandler {
678                 mem: mem.clone(),
679                 kill_evt,
680                 pause_evt,
681                 ctrl_q: CtrlQueue::new(self.taps.clone()),
682                 queue: ctrl_queue,
683                 queue_evt: ctrl_queue_evt,
684                 access_platform: self.common.access_platform.clone(),
685                 queue_index: ctrl_queue_index as u16,
686                 interrupt_cb: interrupt_cb.clone(),
687             };
688 
689             let paused = self.common.paused.clone();
690             // Let's update the barrier as we need 1 for each RX/TX pair +
691             // 1 for the control queue + 1 for the main thread signalling
692             // the pause.
693             self.common.paused_sync = Some(Arc::new(Barrier::new(self.taps.len() + 2)));
694             let paused_sync = self.common.paused_sync.clone();
695 
696             let mut epoll_threads = Vec::new();
697             spawn_virtio_thread(
698                 &format!("{}_ctrl", &self.id),
699                 &self.seccomp_action,
700                 Thread::VirtioNetCtl,
701                 &mut epoll_threads,
702                 &self.exit_evt,
703                 move || ctrl_handler.run_ctrl(paused, paused_sync.unwrap()),
704             )?;
705             self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0));
706         }
707 
708         let mut epoll_threads = Vec::new();
709         let mut taps = self.taps.clone();
710         for i in 0..queues.len() / 2 {
711             let rx = RxVirtio::new();
712             let tx = TxVirtio::new();
713             let rx_tap_listening = false;
714 
715             let (_, queue_0, queue_evt_0) = queues.remove(0);
716             let (_, queue_1, queue_evt_1) = queues.remove(0);
717             let mut queue_pair = (queue_0, queue_1);
718             queue_pair.0.set_event_idx(event_idx);
719             queue_pair.1.set_event_idx(event_idx);
720 
721             let queue_evt_pair = (queue_evt_0, queue_evt_1);
722 
723             let (kill_evt, pause_evt) = self.common.dup_eventfds();
724 
725             let rx_rate_limiter: Option<rate_limiter::RateLimiter> = self
726                 .rate_limiter_config
727                 .map(RateLimiterConfig::try_into)
728                 .transpose()
729                 .map_err(ActivateError::CreateRateLimiter)?;
730 
731             let tx_rate_limiter: Option<rate_limiter::RateLimiter> = self
732                 .rate_limiter_config
733                 .map(RateLimiterConfig::try_into)
734                 .transpose()
735                 .map_err(ActivateError::CreateRateLimiter)?;
736 
737             let tap = taps.remove(0);
738             tap.set_offload(virtio_features_to_tap_offload(self.common.acked_features))
739                 .map_err(|e| {
740                     error!("Error programming tap offload: {:?}", e);
741                     ActivateError::BadActivate
742                 })?;
743 
744             let mut handler = NetEpollHandler {
745                 net: NetQueuePair {
746                     tap_for_write_epoll: tap.clone(),
747                     tap,
748                     rx,
749                     tx,
750                     epoll_fd: None,
751                     rx_tap_listening,
752                     tx_tap_listening: false,
753                     counters: self.counters.clone(),
754                     tap_rx_event_id: RX_TAP_EVENT,
755                     tap_tx_event_id: TX_TAP_EVENT,
756                     rx_desc_avail: false,
757                     rx_rate_limiter,
758                     tx_rate_limiter,
759                     access_platform: self.common.access_platform.clone(),
760                 },
761                 mem: mem.clone(),
762                 queue_index_base: (i * 2) as u16,
763                 queue_pair,
764                 queue_evt_pair,
765                 interrupt_cb: interrupt_cb.clone(),
766                 kill_evt,
767                 pause_evt,
768                 driver_awake: false,
769             };
770 
771             let paused = self.common.paused.clone();
772             let paused_sync = self.common.paused_sync.clone();
773 
774             spawn_virtio_thread(
775                 &format!("{}_qp{}", self.id.clone(), i),
776                 &self.seccomp_action,
777                 Thread::VirtioNet,
778                 &mut epoll_threads,
779                 &self.exit_evt,
780                 move || handler.run(paused, paused_sync.unwrap()),
781             )?;
782         }
783 
784         self.common.epoll_threads = Some(epoll_threads);
785 
786         event!("virtio-device", "activated", "id", &self.id);
787         Ok(())
788     }
789 
790     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
791         let result = self.common.reset();
792         event!("virtio-device", "reset", "id", &self.id);
793         result
794     }
795 
796     fn counters(&self) -> Option<HashMap<&'static str, Wrapping<u64>>> {
797         let mut counters = HashMap::new();
798 
799         counters.insert(
800             "rx_bytes",
801             Wrapping(self.counters.rx_bytes.load(Ordering::Acquire)),
802         );
803         counters.insert(
804             "rx_frames",
805             Wrapping(self.counters.rx_frames.load(Ordering::Acquire)),
806         );
807         counters.insert(
808             "tx_bytes",
809             Wrapping(self.counters.tx_bytes.load(Ordering::Acquire)),
810         );
811         counters.insert(
812             "tx_frames",
813             Wrapping(self.counters.tx_frames.load(Ordering::Acquire)),
814         );
815 
816         Some(counters)
817     }
818 
819     fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) {
820         self.common.set_access_platform(access_platform)
821     }
822 }
823 
824 impl Pausable for Net {
825     fn pause(&mut self) -> result::Result<(), MigratableError> {
826         self.common.pause()
827     }
828 
829     fn resume(&mut self) -> result::Result<(), MigratableError> {
830         self.common.resume()?;
831 
832         if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread {
833             ctrl_queue_epoll_thread.thread().unpark();
834         }
835         Ok(())
836     }
837 }
838 
839 impl Snapshottable for Net {
840     fn id(&self) -> String {
841         self.id.clone()
842     }
843 
844     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
845         Snapshot::new_from_versioned_state(&self.id, &self.state())
846     }
847 }
848 impl Transportable for Net {}
849 impl Migratable for Net {}
850