xref: /cloud-hypervisor/virtio-devices/src/net.rs (revision 88a9f799449c04180c6b9a21d3b9c0c4b57e2bd6)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the THIRD-PARTY file.
7 
8 use std::collections::HashMap;
9 use std::net::Ipv4Addr;
10 use std::num::Wrapping;
11 use std::ops::Deref;
12 use std::os::unix::io::{AsRawFd, RawFd};
13 use std::result;
14 use std::sync::atomic::{AtomicBool, Ordering};
15 use std::sync::{Arc, Barrier};
16 use std::thread;
17 
18 use anyhow::anyhow;
19 #[cfg(not(fuzzing))]
20 use net_util::virtio_features_to_tap_offload;
21 use net_util::CtrlQueue;
22 use net_util::{
23     build_net_config_space, build_net_config_space_with_mq, open_tap, MacAddr, NetCounters,
24     NetQueuePair, OpenTapError, RxVirtio, Tap, TapError, TxVirtio, VirtioNetConfig,
25 };
26 use seccompiler::SeccompAction;
27 use serde::{Deserialize, Serialize};
28 use thiserror::Error;
29 use virtio_bindings::virtio_config::*;
30 use virtio_bindings::virtio_net::*;
31 use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
32 use virtio_queue::{Queue, QueueT};
33 use vm_memory::{ByteValued, GuestAddressSpace, GuestMemoryAtomic};
34 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
35 use vm_virtio::AccessPlatform;
36 use vmm_sys_util::eventfd::EventFd;
37 
38 use super::Error as DeviceError;
39 use super::{
40     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
41     RateLimiterConfig, VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterruptType,
42     EPOLL_HELPER_EVENT_LAST,
43 };
44 use crate::seccomp_filters::Thread;
45 use crate::thread_helper::spawn_virtio_thread;
46 use crate::GuestMemoryMmap;
47 use crate::VirtioInterrupt;
48 
49 /// Control queue
50 // Event available on the control queue.
51 const CTRL_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
52 
53 // Following the VIRTIO specification, the MTU should be at least 1280.
54 pub const MIN_MTU: u16 = 1280;
55 
56 pub struct NetCtrlEpollHandler {
57     pub mem: GuestMemoryAtomic<GuestMemoryMmap>,
58     pub kill_evt: EventFd,
59     pub pause_evt: EventFd,
60     pub ctrl_q: CtrlQueue,
61     pub queue_evt: EventFd,
62     pub queue: Queue,
63     pub access_platform: Option<Arc<dyn AccessPlatform>>,
64     pub interrupt_cb: Arc<dyn VirtioInterrupt>,
65     pub queue_index: u16,
66 }
67 
68 impl NetCtrlEpollHandler {
69     fn signal_used_queue(&self, queue_index: u16) -> result::Result<(), DeviceError> {
70         self.interrupt_cb
71             .trigger(VirtioInterruptType::Queue(queue_index))
72             .map_err(|e| {
73                 error!("Failed to signal used queue: {:?}", e);
74                 DeviceError::FailedSignalingUsedQueue(e)
75             })
76     }
77 
78     pub fn run_ctrl(
79         &mut self,
80         paused: Arc<AtomicBool>,
81         paused_sync: Arc<Barrier>,
82     ) -> std::result::Result<(), EpollHelperError> {
83         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
84         helper.add_event(self.queue_evt.as_raw_fd(), CTRL_QUEUE_EVENT)?;
85         helper.run(paused, paused_sync, self)?;
86 
87         Ok(())
88     }
89 }
90 
91 impl EpollHelperHandler for NetCtrlEpollHandler {
92     fn handle_event(
93         &mut self,
94         _helper: &mut EpollHelper,
95         event: &epoll::Event,
96     ) -> result::Result<(), EpollHelperError> {
97         let ev_type = event.data as u16;
98         match ev_type {
99             CTRL_QUEUE_EVENT => {
100                 let mem = self.mem.memory();
101                 self.queue_evt.read().map_err(|e| {
102                     EpollHelperError::HandleEvent(anyhow!(
103                         "Failed to get control queue event: {:?}",
104                         e
105                     ))
106                 })?;
107                 self.ctrl_q
108                     .process(mem.deref(), &mut self.queue, self.access_platform.as_ref())
109                     .map_err(|e| {
110                         EpollHelperError::HandleEvent(anyhow!(
111                             "Failed to process control queue: {:?}",
112                             e
113                         ))
114                     })?;
115                 match self.queue.needs_notification(mem.deref()) {
116                     Ok(true) => {
117                         self.signal_used_queue(self.queue_index).map_err(|e| {
118                             EpollHelperError::HandleEvent(anyhow!(
119                                 "Error signalling that control queue was used: {:?}",
120                                 e
121                             ))
122                         })?;
123                     }
124                     Ok(false) => {}
125                     Err(e) => {
126                         return Err(EpollHelperError::HandleEvent(anyhow!(
127                             "Error getting notification state of control queue: {}",
128                             e
129                         )));
130                     }
131                 };
132             }
133             _ => {
134                 return Err(EpollHelperError::HandleEvent(anyhow!(
135                     "Unknown event for virtio-net control queue"
136                 )));
137             }
138         }
139 
140         Ok(())
141     }
142 }
143 
144 /// Rx/Tx queue pair
145 // The guest has made a buffer available to receive a frame into.
146 pub const RX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
147 // The transmit queue has a frame that is ready to send from the guest.
148 pub const TX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
149 // A frame is available for reading from the tap device to receive in the guest.
150 pub const RX_TAP_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3;
151 // The TAP can be written to. Used after an EAGAIN error to retry TX.
152 pub const TX_TAP_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 4;
153 // New 'wake up' event from the rx rate limiter
154 pub const RX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 5;
155 // New 'wake up' event from the tx rate limiter
156 pub const TX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 6;
157 
158 #[derive(Error, Debug)]
159 pub enum Error {
160     #[error("Failed to open taps: {0}")]
161     OpenTap(OpenTapError),
162     #[error("Using existing tap: {0}")]
163     TapError(TapError),
164     #[error("Error calling dup() on tap fd: {0}")]
165     DuplicateTapFd(std::io::Error),
166 }
167 
168 pub type Result<T> = result::Result<T, Error>;
169 
170 struct NetEpollHandler {
171     net: NetQueuePair,
172     mem: GuestMemoryAtomic<GuestMemoryMmap>,
173     interrupt_cb: Arc<dyn VirtioInterrupt>,
174     kill_evt: EventFd,
175     pause_evt: EventFd,
176     queue_index_base: u16,
177     queue_pair: (Queue, Queue),
178     queue_evt_pair: (EventFd, EventFd),
179     // Always generate interrupts until the driver has signalled to the device.
180     // This mitigates a problem with interrupts from tap events being "lost" upon
181     // a restore as the vCPU thread isn't ready to handle the interrupt. This causes
182     // issues when combined with VIRTIO_RING_F_EVENT_IDX interrupt suppression.
183     driver_awake: bool,
184 }
185 
186 impl NetEpollHandler {
187     fn signal_used_queue(&self, queue_index: u16) -> result::Result<(), DeviceError> {
188         self.interrupt_cb
189             .trigger(VirtioInterruptType::Queue(queue_index))
190             .map_err(|e| {
191                 error!("Failed to signal used queue: {:?}", e);
192                 DeviceError::FailedSignalingUsedQueue(e)
193             })
194     }
195 
196     fn handle_rx_event(&mut self) -> result::Result<(), DeviceError> {
197         let queue_evt = &self.queue_evt_pair.0;
198         if let Err(e) = queue_evt.read() {
199             error!("Failed to get rx queue event: {:?}", e);
200         }
201 
202         self.net.rx_desc_avail = true;
203 
204         let rate_limit_reached = self
205             .net
206             .rx_rate_limiter
207             .as_ref()
208             .map_or(false, |r| r.is_blocked());
209 
210         // Start to listen on RX_TAP_EVENT only when the rate limit is not reached
211         if !self.net.rx_tap_listening && !rate_limit_reached {
212             net_util::register_listener(
213                 self.net.epoll_fd.unwrap(),
214                 self.net.tap.as_raw_fd(),
215                 epoll::Events::EPOLLIN,
216                 u64::from(self.net.tap_rx_event_id),
217             )
218             .map_err(DeviceError::IoError)?;
219             self.net.rx_tap_listening = true;
220         }
221 
222         Ok(())
223     }
224 
225     fn process_tx(&mut self) -> result::Result<(), DeviceError> {
226         if self
227             .net
228             .process_tx(&self.mem.memory(), &mut self.queue_pair.1)
229             .map_err(DeviceError::NetQueuePair)?
230             || !self.driver_awake
231         {
232             self.signal_used_queue(self.queue_index_base + 1)?;
233             debug!("Signalling TX queue");
234         } else {
235             debug!("Not signalling TX queue");
236         }
237         Ok(())
238     }
239 
240     fn handle_tx_event(&mut self) -> result::Result<(), DeviceError> {
241         let rate_limit_reached = self
242             .net
243             .tx_rate_limiter
244             .as_ref()
245             .map_or(false, |r| r.is_blocked());
246 
247         if !rate_limit_reached {
248             self.process_tx()?;
249         }
250 
251         Ok(())
252     }
253 
254     fn handle_rx_tap_event(&mut self) -> result::Result<(), DeviceError> {
255         if self
256             .net
257             .process_rx(&self.mem.memory(), &mut self.queue_pair.0)
258             .map_err(DeviceError::NetQueuePair)?
259             || !self.driver_awake
260         {
261             self.signal_used_queue(self.queue_index_base)?;
262             debug!("Signalling RX queue");
263         } else {
264             debug!("Not signalling RX queue");
265         }
266         Ok(())
267     }
268 
269     fn run(
270         &mut self,
271         paused: Arc<AtomicBool>,
272         paused_sync: Arc<Barrier>,
273     ) -> result::Result<(), EpollHelperError> {
274         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
275         helper.add_event(self.queue_evt_pair.0.as_raw_fd(), RX_QUEUE_EVENT)?;
276         helper.add_event(self.queue_evt_pair.1.as_raw_fd(), TX_QUEUE_EVENT)?;
277         if let Some(rate_limiter) = &self.net.rx_rate_limiter {
278             helper.add_event(rate_limiter.as_raw_fd(), RX_RATE_LIMITER_EVENT)?;
279         }
280         if let Some(rate_limiter) = &self.net.tx_rate_limiter {
281             helper.add_event(rate_limiter.as_raw_fd(), TX_RATE_LIMITER_EVENT)?;
282         }
283 
284         let mem = self.mem.memory();
285         // If there are some already available descriptors on the RX queue,
286         // then we can start the thread while listening onto the TAP.
287         if self
288             .queue_pair
289             .0
290             .used_idx(mem.deref(), Ordering::Acquire)
291             .map_err(EpollHelperError::QueueRingIndex)?
292             < self
293                 .queue_pair
294                 .0
295                 .avail_idx(mem.deref(), Ordering::Acquire)
296                 .map_err(EpollHelperError::QueueRingIndex)?
297         {
298             helper.add_event(self.net.tap.as_raw_fd(), RX_TAP_EVENT)?;
299             self.net.rx_tap_listening = true;
300             info!("Listener registered at start");
301         }
302 
303         // The NetQueuePair needs the epoll fd.
304         self.net.epoll_fd = Some(helper.as_raw_fd());
305 
306         helper.run(paused, paused_sync, self)?;
307 
308         Ok(())
309     }
310 }
311 
312 impl EpollHelperHandler for NetEpollHandler {
313     fn handle_event(
314         &mut self,
315         _helper: &mut EpollHelper,
316         event: &epoll::Event,
317     ) -> result::Result<(), EpollHelperError> {
318         let ev_type = event.data as u16;
319         match ev_type {
320             RX_QUEUE_EVENT => {
321                 self.driver_awake = true;
322                 self.handle_rx_event().map_err(|e| {
323                     EpollHelperError::HandleEvent(anyhow!("Error processing RX queue: {:?}", e))
324                 })?;
325             }
326             TX_QUEUE_EVENT => {
327                 let queue_evt = &self.queue_evt_pair.1;
328                 if let Err(e) = queue_evt.read() {
329                     error!("Failed to get tx queue event: {:?}", e);
330                 }
331                 self.driver_awake = true;
332                 self.handle_tx_event().map_err(|e| {
333                     EpollHelperError::HandleEvent(anyhow!("Error processing TX queue: {:?}", e))
334                 })?;
335             }
336             TX_TAP_EVENT => {
337                 self.handle_tx_event().map_err(|e| {
338                     EpollHelperError::HandleEvent(anyhow!(
339                         "Error processing TX queue (TAP event): {:?}",
340                         e
341                     ))
342                 })?;
343             }
344             RX_TAP_EVENT => {
345                 self.handle_rx_tap_event().map_err(|e| {
346                     EpollHelperError::HandleEvent(anyhow!("Error processing tap queue: {:?}", e))
347                 })?;
348             }
349             RX_RATE_LIMITER_EVENT => {
350                 if let Some(rate_limiter) = &mut self.net.rx_rate_limiter {
351                     // Upon rate limiter event, call the rate limiter handler and register the
352                     // TAP fd for further processing if some RX buffers are available
353                     rate_limiter.event_handler().map_err(|e| {
354                         EpollHelperError::HandleEvent(anyhow!(
355                             "Error from 'rate_limiter.event_handler()': {:?}",
356                             e
357                         ))
358                     })?;
359 
360                     if !self.net.rx_tap_listening && self.net.rx_desc_avail {
361                         net_util::register_listener(
362                             self.net.epoll_fd.unwrap(),
363                             self.net.tap.as_raw_fd(),
364                             epoll::Events::EPOLLIN,
365                             u64::from(self.net.tap_rx_event_id),
366                         )
367                         .map_err(|e| {
368                             EpollHelperError::HandleEvent(anyhow!(
369                                 "Error register_listener with `RX_RATE_LIMITER_EVENT`: {:?}",
370                                 e
371                             ))
372                         })?;
373 
374                         self.net.rx_tap_listening = true;
375                     }
376                 } else {
377                     return Err(EpollHelperError::HandleEvent(anyhow!(
378                         "Unexpected RX_RATE_LIMITER_EVENT"
379                     )));
380                 }
381             }
382             TX_RATE_LIMITER_EVENT => {
383                 if let Some(rate_limiter) = &mut self.net.tx_rate_limiter {
384                     // Upon rate limiter event, call the rate limiter handler
385                     // and restart processing the queue.
386                     rate_limiter.event_handler().map_err(|e| {
387                         EpollHelperError::HandleEvent(anyhow!(
388                             "Error from 'rate_limiter.event_handler()': {:?}",
389                             e
390                         ))
391                     })?;
392 
393                     self.driver_awake = true;
394                     self.process_tx().map_err(|e| {
395                         EpollHelperError::HandleEvent(anyhow!("Error processing TX queue: {:?}", e))
396                     })?;
397                 } else {
398                     return Err(EpollHelperError::HandleEvent(anyhow!(
399                         "Unexpected TX_RATE_LIMITER_EVENT"
400                     )));
401                 }
402             }
403             _ => {
404                 return Err(EpollHelperError::HandleEvent(anyhow!(
405                     "Unexpected event: {}",
406                     ev_type
407                 )));
408             }
409         }
410         Ok(())
411     }
412 }
413 
414 pub struct Net {
415     common: VirtioCommon,
416     id: String,
417     taps: Vec<Tap>,
418     config: VirtioNetConfig,
419     ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>,
420     counters: NetCounters,
421     seccomp_action: SeccompAction,
422     rate_limiter_config: Option<RateLimiterConfig>,
423     exit_evt: EventFd,
424 }
425 
426 #[derive(Serialize, Deserialize)]
427 pub struct NetState {
428     pub avail_features: u64,
429     pub acked_features: u64,
430     pub config: VirtioNetConfig,
431     pub queue_size: Vec<u16>,
432 }
433 
434 impl Net {
435     /// Create a new virtio network device with the given TAP interface.
436     #[allow(clippy::too_many_arguments)]
437     pub fn new_with_tap(
438         id: String,
439         taps: Vec<Tap>,
440         guest_mac: Option<MacAddr>,
441         iommu: bool,
442         num_queues: usize,
443         queue_size: u16,
444         seccomp_action: SeccompAction,
445         rate_limiter_config: Option<RateLimiterConfig>,
446         exit_evt: EventFd,
447         state: Option<NetState>,
448         offload_tso: bool,
449         offload_ufo: bool,
450         offload_csum: bool,
451     ) -> Result<Self> {
452         assert!(!taps.is_empty());
453 
454         let mtu = taps[0].mtu().map_err(Error::TapError)? as u16;
455 
456         let (avail_features, acked_features, config, queue_sizes, paused) =
457             if let Some(state) = state {
458                 info!("Restoring virtio-net {}", id);
459                 (
460                     state.avail_features,
461                     state.acked_features,
462                     state.config,
463                     state.queue_size,
464                     true,
465                 )
466             } else {
467                 let mut avail_features =
468                     1 << VIRTIO_NET_F_MTU | 1 << VIRTIO_RING_F_EVENT_IDX | 1 << VIRTIO_F_VERSION_1;
469 
470                 if iommu {
471                     avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
472                 }
473 
474                 // Configure TSO/UFO features when hardware checksum offload is enabled.
475                 if offload_csum {
476                     avail_features |= 1 << VIRTIO_NET_F_CSUM
477                         | 1 << VIRTIO_NET_F_GUEST_CSUM
478                         | 1 << VIRTIO_NET_F_CTRL_GUEST_OFFLOADS;
479 
480                     if offload_tso {
481                         avail_features |= 1 << VIRTIO_NET_F_HOST_ECN
482                             | 1 << VIRTIO_NET_F_HOST_TSO4
483                             | 1 << VIRTIO_NET_F_HOST_TSO6
484                             | 1 << VIRTIO_NET_F_GUEST_ECN
485                             | 1 << VIRTIO_NET_F_GUEST_TSO4
486                             | 1 << VIRTIO_NET_F_GUEST_TSO6;
487                     }
488 
489                     if offload_ufo {
490                         avail_features |= 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_NET_F_GUEST_UFO;
491                     }
492                 }
493 
494                 avail_features |= 1 << VIRTIO_NET_F_CTRL_VQ;
495                 let queue_num = num_queues + 1;
496 
497                 let mut config = VirtioNetConfig::default();
498                 if let Some(mac) = guest_mac {
499                     build_net_config_space(
500                         &mut config,
501                         mac,
502                         num_queues,
503                         Some(mtu),
504                         &mut avail_features,
505                     );
506                 } else {
507                     build_net_config_space_with_mq(
508                         &mut config,
509                         num_queues,
510                         Some(mtu),
511                         &mut avail_features,
512                     );
513                 }
514 
515                 (
516                     avail_features,
517                     0,
518                     config,
519                     vec![queue_size; queue_num],
520                     false,
521                 )
522             };
523 
524         Ok(Net {
525             common: VirtioCommon {
526                 device_type: VirtioDeviceType::Net as u32,
527                 avail_features,
528                 acked_features,
529                 queue_sizes,
530                 paused_sync: Some(Arc::new(Barrier::new((num_queues / 2) + 1))),
531                 min_queues: 2,
532                 paused: Arc::new(AtomicBool::new(paused)),
533                 ..Default::default()
534             },
535             id,
536             taps,
537             config,
538             ctrl_queue_epoll_thread: None,
539             counters: NetCounters::default(),
540             seccomp_action,
541             rate_limiter_config,
542             exit_evt,
543         })
544     }
545 
546     /// Create a new virtio network device with the given IP address and
547     /// netmask.
548     #[allow(clippy::too_many_arguments)]
549     pub fn new(
550         id: String,
551         if_name: Option<&str>,
552         ip_addr: Option<Ipv4Addr>,
553         netmask: Option<Ipv4Addr>,
554         guest_mac: Option<MacAddr>,
555         host_mac: &mut Option<MacAddr>,
556         mtu: Option<u16>,
557         iommu: bool,
558         num_queues: usize,
559         queue_size: u16,
560         seccomp_action: SeccompAction,
561         rate_limiter_config: Option<RateLimiterConfig>,
562         exit_evt: EventFd,
563         state: Option<NetState>,
564         offload_tso: bool,
565         offload_ufo: bool,
566         offload_csum: bool,
567     ) -> Result<Self> {
568         let taps = open_tap(
569             if_name,
570             ip_addr,
571             netmask,
572             host_mac,
573             mtu,
574             num_queues / 2,
575             None,
576         )
577         .map_err(Error::OpenTap)?;
578 
579         Self::new_with_tap(
580             id,
581             taps,
582             guest_mac,
583             iommu,
584             num_queues,
585             queue_size,
586             seccomp_action,
587             rate_limiter_config,
588             exit_evt,
589             state,
590             offload_tso,
591             offload_ufo,
592             offload_csum,
593         )
594     }
595 
596     #[allow(clippy::too_many_arguments)]
597     pub fn from_tap_fds(
598         id: String,
599         fds: &[RawFd],
600         guest_mac: Option<MacAddr>,
601         mtu: Option<u16>,
602         iommu: bool,
603         queue_size: u16,
604         seccomp_action: SeccompAction,
605         rate_limiter_config: Option<RateLimiterConfig>,
606         exit_evt: EventFd,
607         state: Option<NetState>,
608         offload_tso: bool,
609         offload_ufo: bool,
610         offload_csum: bool,
611     ) -> Result<Self> {
612         let mut taps: Vec<Tap> = Vec::new();
613         let num_queue_pairs = fds.len();
614 
615         for fd in fds.iter() {
616             // Duplicate so that it can survive reboots
617             // SAFETY: FFI call to dup. Trivially safe.
618             let fd = unsafe { libc::dup(*fd) };
619             if fd < 0 {
620                 return Err(Error::DuplicateTapFd(std::io::Error::last_os_error()));
621             }
622             let tap = Tap::from_tap_fd(fd, num_queue_pairs).map_err(Error::TapError)?;
623             taps.push(tap);
624         }
625 
626         assert!(!taps.is_empty());
627 
628         if let Some(mtu) = mtu {
629             taps[0].set_mtu(mtu as i32).map_err(Error::TapError)?;
630         }
631 
632         Self::new_with_tap(
633             id,
634             taps,
635             guest_mac,
636             iommu,
637             num_queue_pairs * 2,
638             queue_size,
639             seccomp_action,
640             rate_limiter_config,
641             exit_evt,
642             state,
643             offload_tso,
644             offload_ufo,
645             offload_csum,
646         )
647     }
648 
649     fn state(&self) -> NetState {
650         NetState {
651             avail_features: self.common.avail_features,
652             acked_features: self.common.acked_features,
653             config: self.config,
654             queue_size: self.common.queue_sizes.clone(),
655         }
656     }
657 
658     #[cfg(fuzzing)]
659     pub fn wait_for_epoll_threads(&mut self) {
660         self.common.wait_for_epoll_threads();
661     }
662 }
663 
664 impl Drop for Net {
665     fn drop(&mut self) {
666         if let Some(kill_evt) = self.common.kill_evt.take() {
667             // Ignore the result because there is nothing we can do about it.
668             let _ = kill_evt.write(1);
669         }
670         // Needed to ensure all references to tap FDs are dropped (#4868)
671         self.common.wait_for_epoll_threads();
672         if let Some(thread) = self.ctrl_queue_epoll_thread.take() {
673             if let Err(e) = thread.join() {
674                 error!("Error joining thread: {:?}", e);
675             }
676         }
677     }
678 }
679 
680 impl VirtioDevice for Net {
681     fn device_type(&self) -> u32 {
682         self.common.device_type
683     }
684 
685     fn queue_max_sizes(&self) -> &[u16] {
686         &self.common.queue_sizes
687     }
688 
689     fn features(&self) -> u64 {
690         self.common.avail_features
691     }
692 
693     fn ack_features(&mut self, value: u64) {
694         self.common.ack_features(value)
695     }
696 
697     fn read_config(&self, offset: u64, data: &mut [u8]) {
698         self.read_config_from_slice(self.config.as_slice(), offset, data);
699     }
700 
701     fn activate(
702         &mut self,
703         mem: GuestMemoryAtomic<GuestMemoryMmap>,
704         interrupt_cb: Arc<dyn VirtioInterrupt>,
705         mut queues: Vec<(usize, Queue, EventFd)>,
706     ) -> ActivateResult {
707         self.common.activate(&queues, &interrupt_cb)?;
708 
709         let num_queues = queues.len();
710         let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into());
711         if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 {
712             let ctrl_queue_index = num_queues - 1;
713             let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index);
714 
715             ctrl_queue.set_event_idx(event_idx);
716 
717             let (kill_evt, pause_evt) = self.common.dup_eventfds();
718             let mut ctrl_handler = NetCtrlEpollHandler {
719                 mem: mem.clone(),
720                 kill_evt,
721                 pause_evt,
722                 ctrl_q: CtrlQueue::new(self.taps.clone()),
723                 queue: ctrl_queue,
724                 queue_evt: ctrl_queue_evt,
725                 access_platform: self.common.access_platform.clone(),
726                 queue_index: ctrl_queue_index as u16,
727                 interrupt_cb: interrupt_cb.clone(),
728             };
729 
730             let paused = self.common.paused.clone();
731             // Let's update the barrier as we need 1 for each RX/TX pair +
732             // 1 for the control queue + 1 for the main thread signalling
733             // the pause.
734             self.common.paused_sync = Some(Arc::new(Barrier::new(self.taps.len() + 2)));
735             let paused_sync = self.common.paused_sync.clone();
736 
737             let mut epoll_threads = Vec::new();
738             spawn_virtio_thread(
739                 &format!("{}_ctrl", &self.id),
740                 &self.seccomp_action,
741                 Thread::VirtioNetCtl,
742                 &mut epoll_threads,
743                 &self.exit_evt,
744                 move || ctrl_handler.run_ctrl(paused, paused_sync.unwrap()),
745             )?;
746             self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0));
747         }
748 
749         let mut epoll_threads = Vec::new();
750         let mut taps = self.taps.clone();
751         for i in 0..queues.len() / 2 {
752             let rx = RxVirtio::new();
753             let tx = TxVirtio::new();
754             let rx_tap_listening = false;
755 
756             let (_, queue_0, queue_evt_0) = queues.remove(0);
757             let (_, queue_1, queue_evt_1) = queues.remove(0);
758             let mut queue_pair = (queue_0, queue_1);
759             queue_pair.0.set_event_idx(event_idx);
760             queue_pair.1.set_event_idx(event_idx);
761 
762             let queue_evt_pair = (queue_evt_0, queue_evt_1);
763 
764             let (kill_evt, pause_evt) = self.common.dup_eventfds();
765 
766             let rx_rate_limiter: Option<rate_limiter::RateLimiter> = self
767                 .rate_limiter_config
768                 .map(RateLimiterConfig::try_into)
769                 .transpose()
770                 .map_err(ActivateError::CreateRateLimiter)?;
771 
772             let tx_rate_limiter: Option<rate_limiter::RateLimiter> = self
773                 .rate_limiter_config
774                 .map(RateLimiterConfig::try_into)
775                 .transpose()
776                 .map_err(ActivateError::CreateRateLimiter)?;
777 
778             let tap = taps.remove(0);
779             #[cfg(not(fuzzing))]
780             tap.set_offload(virtio_features_to_tap_offload(self.common.acked_features))
781                 .map_err(|e| {
782                     error!("Error programming tap offload: {:?}", e);
783                     ActivateError::BadActivate
784                 })?;
785 
786             let mut handler = NetEpollHandler {
787                 net: NetQueuePair {
788                     tap_for_write_epoll: tap.clone(),
789                     tap,
790                     rx,
791                     tx,
792                     epoll_fd: None,
793                     rx_tap_listening,
794                     tx_tap_listening: false,
795                     counters: self.counters.clone(),
796                     tap_rx_event_id: RX_TAP_EVENT,
797                     tap_tx_event_id: TX_TAP_EVENT,
798                     rx_desc_avail: false,
799                     rx_rate_limiter,
800                     tx_rate_limiter,
801                     access_platform: self.common.access_platform.clone(),
802                 },
803                 mem: mem.clone(),
804                 queue_index_base: (i * 2) as u16,
805                 queue_pair,
806                 queue_evt_pair,
807                 interrupt_cb: interrupt_cb.clone(),
808                 kill_evt,
809                 pause_evt,
810                 driver_awake: false,
811             };
812 
813             let paused = self.common.paused.clone();
814             let paused_sync = self.common.paused_sync.clone();
815 
816             spawn_virtio_thread(
817                 &format!("{}_qp{}", self.id.clone(), i),
818                 &self.seccomp_action,
819                 Thread::VirtioNet,
820                 &mut epoll_threads,
821                 &self.exit_evt,
822                 move || handler.run(paused, paused_sync.unwrap()),
823             )?;
824         }
825 
826         self.common.epoll_threads = Some(epoll_threads);
827 
828         event!("virtio-device", "activated", "id", &self.id);
829         Ok(())
830     }
831 
832     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
833         let result = self.common.reset();
834         event!("virtio-device", "reset", "id", &self.id);
835         result
836     }
837 
838     fn counters(&self) -> Option<HashMap<&'static str, Wrapping<u64>>> {
839         let mut counters = HashMap::new();
840 
841         counters.insert(
842             "rx_bytes",
843             Wrapping(self.counters.rx_bytes.load(Ordering::Acquire)),
844         );
845         counters.insert(
846             "rx_frames",
847             Wrapping(self.counters.rx_frames.load(Ordering::Acquire)),
848         );
849         counters.insert(
850             "tx_bytes",
851             Wrapping(self.counters.tx_bytes.load(Ordering::Acquire)),
852         );
853         counters.insert(
854             "tx_frames",
855             Wrapping(self.counters.tx_frames.load(Ordering::Acquire)),
856         );
857 
858         Some(counters)
859     }
860 
861     fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) {
862         self.common.set_access_platform(access_platform)
863     }
864 }
865 
866 impl Pausable for Net {
867     fn pause(&mut self) -> result::Result<(), MigratableError> {
868         self.common.pause()
869     }
870 
871     fn resume(&mut self) -> result::Result<(), MigratableError> {
872         self.common.resume()?;
873 
874         if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread {
875             ctrl_queue_epoll_thread.thread().unpark();
876         }
877         Ok(())
878     }
879 }
880 
881 impl Snapshottable for Net {
882     fn id(&self) -> String {
883         self.id.clone()
884     }
885 
886     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
887         Snapshot::new_from_state(&self.state())
888     }
889 }
890 impl Transportable for Net {}
891 impl Migratable for Net {}
892