xref: /cloud-hypervisor/virtio-devices/src/net.rs (revision adb318f4cd0079246b3cb07e01c4e978330445d2)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the THIRD-PARTY file.
7 
8 use super::Error as DeviceError;
9 use super::{
10     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
11     RateLimiterConfig, VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterruptType,
12     EPOLL_HELPER_EVENT_LAST,
13 };
14 use crate::seccomp_filters::Thread;
15 use crate::thread_helper::spawn_virtio_thread;
16 use crate::GuestMemoryMmap;
17 use crate::VirtioInterrupt;
18 use anyhow::anyhow;
19 #[cfg(not(fuzzing))]
20 use net_util::virtio_features_to_tap_offload;
21 use net_util::CtrlQueue;
22 use net_util::{
23     build_net_config_space, build_net_config_space_with_mq, open_tap, MacAddr, NetCounters,
24     NetQueuePair, OpenTapError, RxVirtio, Tap, TapError, TxVirtio, VirtioNetConfig,
25 };
26 use seccompiler::SeccompAction;
27 use std::collections::HashMap;
28 use std::net::Ipv4Addr;
29 use std::num::Wrapping;
30 use std::ops::Deref;
31 use std::os::unix::io::{AsRawFd, RawFd};
32 use std::result;
33 use std::sync::atomic::{AtomicBool, Ordering};
34 use std::sync::{Arc, Barrier};
35 use std::thread;
36 use thiserror::Error;
37 use versionize::{VersionMap, Versionize, VersionizeResult};
38 use versionize_derive::Versionize;
39 use virtio_bindings::virtio_config::*;
40 use virtio_bindings::virtio_net::*;
41 use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
42 use virtio_queue::{Queue, QueueT};
43 use vm_memory::{ByteValued, GuestAddressSpace, GuestMemoryAtomic};
44 use vm_migration::VersionMapped;
45 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
46 use vm_virtio::AccessPlatform;
47 use vmm_sys_util::eventfd::EventFd;
48 
49 /// Control queue
50 // Event available on the control queue.
51 const CTRL_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
52 
53 // Following the VIRTIO specification, the MTU should be at least 1280.
54 pub const MIN_MTU: u16 = 1280;
55 
56 pub struct NetCtrlEpollHandler {
57     pub mem: GuestMemoryAtomic<GuestMemoryMmap>,
58     pub kill_evt: EventFd,
59     pub pause_evt: EventFd,
60     pub ctrl_q: CtrlQueue,
61     pub queue_evt: EventFd,
62     pub queue: Queue,
63     pub access_platform: Option<Arc<dyn AccessPlatform>>,
64     pub interrupt_cb: Arc<dyn VirtioInterrupt>,
65     pub queue_index: u16,
66 }
67 
68 impl NetCtrlEpollHandler {
69     fn signal_used_queue(&self, queue_index: u16) -> result::Result<(), DeviceError> {
70         self.interrupt_cb
71             .trigger(VirtioInterruptType::Queue(queue_index))
72             .map_err(|e| {
73                 error!("Failed to signal used queue: {:?}", e);
74                 DeviceError::FailedSignalingUsedQueue(e)
75             })
76     }
77 
78     pub fn run_ctrl(
79         &mut self,
80         paused: Arc<AtomicBool>,
81         paused_sync: Arc<Barrier>,
82     ) -> std::result::Result<(), EpollHelperError> {
83         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
84         helper.add_event(self.queue_evt.as_raw_fd(), CTRL_QUEUE_EVENT)?;
85         helper.run(paused, paused_sync, self)?;
86 
87         Ok(())
88     }
89 }
90 
91 impl EpollHelperHandler for NetCtrlEpollHandler {
92     fn handle_event(
93         &mut self,
94         _helper: &mut EpollHelper,
95         event: &epoll::Event,
96     ) -> result::Result<(), EpollHelperError> {
97         let ev_type = event.data as u16;
98         match ev_type {
99             CTRL_QUEUE_EVENT => {
100                 let mem = self.mem.memory();
101                 self.queue_evt.read().map_err(|e| {
102                     EpollHelperError::HandleEvent(anyhow!(
103                         "Failed to get control queue event: {:?}",
104                         e
105                     ))
106                 })?;
107                 self.ctrl_q
108                     .process(mem.deref(), &mut self.queue, self.access_platform.as_ref())
109                     .map_err(|e| {
110                         EpollHelperError::HandleEvent(anyhow!(
111                             "Failed to process control queue: {:?}",
112                             e
113                         ))
114                     })?;
115                 match self.queue.needs_notification(mem.deref()) {
116                     Ok(true) => {
117                         self.signal_used_queue(self.queue_index).map_err(|e| {
118                             EpollHelperError::HandleEvent(anyhow!(
119                                 "Error signalling that control queue was used: {:?}",
120                                 e
121                             ))
122                         })?;
123                     }
124                     Ok(false) => {}
125                     Err(e) => {
126                         return Err(EpollHelperError::HandleEvent(anyhow!(
127                             "Error getting notification state of control queue: {}",
128                             e
129                         )));
130                     }
131                 };
132             }
133             _ => {
134                 return Err(EpollHelperError::HandleEvent(anyhow!(
135                     "Unknown event for virtio-net control queue"
136                 )));
137             }
138         }
139 
140         Ok(())
141     }
142 }
143 
144 /// Rx/Tx queue pair
145 // The guest has made a buffer available to receive a frame into.
146 pub const RX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
147 // The transmit queue has a frame that is ready to send from the guest.
148 pub const TX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
149 // A frame is available for reading from the tap device to receive in the guest.
150 pub const RX_TAP_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3;
151 // The TAP can be written to. Used after an EAGAIN error to retry TX.
152 pub const TX_TAP_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 4;
153 // New 'wake up' event from the rx rate limiter
154 pub const RX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 5;
155 // New 'wake up' event from the tx rate limiter
156 pub const TX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 6;
157 
158 #[derive(Error, Debug)]
159 pub enum Error {
160     #[error("Failed to open taps: {0}")]
161     OpenTap(OpenTapError),
162     #[error("Using existing tap: {0}")]
163     TapError(TapError),
164     #[error("Error calling dup() on tap fd: {0}")]
165     DuplicateTapFd(std::io::Error),
166 }
167 
168 pub type Result<T> = result::Result<T, Error>;
169 
170 struct NetEpollHandler {
171     net: NetQueuePair,
172     mem: GuestMemoryAtomic<GuestMemoryMmap>,
173     interrupt_cb: Arc<dyn VirtioInterrupt>,
174     kill_evt: EventFd,
175     pause_evt: EventFd,
176     queue_index_base: u16,
177     queue_pair: (Queue, Queue),
178     queue_evt_pair: (EventFd, EventFd),
179     // Always generate interrupts until the driver has signalled to the device.
180     // This mitigates a problem with interrupts from tap events being "lost" upon
181     // a restore as the vCPU thread isn't ready to handle the interrupt. This causes
182     // issues when combined with VIRTIO_RING_F_EVENT_IDX interrupt suppression.
183     driver_awake: bool,
184 }
185 
186 impl NetEpollHandler {
187     fn signal_used_queue(&self, queue_index: u16) -> result::Result<(), DeviceError> {
188         self.interrupt_cb
189             .trigger(VirtioInterruptType::Queue(queue_index))
190             .map_err(|e| {
191                 error!("Failed to signal used queue: {:?}", e);
192                 DeviceError::FailedSignalingUsedQueue(e)
193             })
194     }
195 
196     fn handle_rx_event(&mut self) -> result::Result<(), DeviceError> {
197         let queue_evt = &self.queue_evt_pair.0;
198         if let Err(e) = queue_evt.read() {
199             error!("Failed to get rx queue event: {:?}", e);
200         }
201 
202         self.net.rx_desc_avail = true;
203 
204         let rate_limit_reached = self
205             .net
206             .rx_rate_limiter
207             .as_ref()
208             .map_or(false, |r| r.is_blocked());
209 
210         // Start to listen on RX_TAP_EVENT only when the rate limit is not reached
211         if !self.net.rx_tap_listening && !rate_limit_reached {
212             net_util::register_listener(
213                 self.net.epoll_fd.unwrap(),
214                 self.net.tap.as_raw_fd(),
215                 epoll::Events::EPOLLIN,
216                 u64::from(self.net.tap_rx_event_id),
217             )
218             .map_err(DeviceError::IoError)?;
219             self.net.rx_tap_listening = true;
220         }
221 
222         Ok(())
223     }
224 
225     fn process_tx(&mut self) -> result::Result<(), DeviceError> {
226         if self
227             .net
228             .process_tx(&self.mem.memory(), &mut self.queue_pair.1)
229             .map_err(DeviceError::NetQueuePair)?
230             || !self.driver_awake
231         {
232             self.signal_used_queue(self.queue_index_base + 1)?;
233             debug!("Signalling TX queue");
234         } else {
235             debug!("Not signalling TX queue");
236         }
237         Ok(())
238     }
239 
240     fn handle_tx_event(&mut self) -> result::Result<(), DeviceError> {
241         let rate_limit_reached = self
242             .net
243             .tx_rate_limiter
244             .as_ref()
245             .map_or(false, |r| r.is_blocked());
246 
247         if !rate_limit_reached {
248             self.process_tx()?;
249         }
250 
251         Ok(())
252     }
253 
254     fn handle_rx_tap_event(&mut self) -> result::Result<(), DeviceError> {
255         if self
256             .net
257             .process_rx(&self.mem.memory(), &mut self.queue_pair.0)
258             .map_err(DeviceError::NetQueuePair)?
259             || !self.driver_awake
260         {
261             self.signal_used_queue(self.queue_index_base)?;
262             debug!("Signalling RX queue");
263         } else {
264             debug!("Not signalling RX queue");
265         }
266         Ok(())
267     }
268 
269     fn run(
270         &mut self,
271         paused: Arc<AtomicBool>,
272         paused_sync: Arc<Barrier>,
273     ) -> result::Result<(), EpollHelperError> {
274         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
275         helper.add_event(self.queue_evt_pair.0.as_raw_fd(), RX_QUEUE_EVENT)?;
276         helper.add_event(self.queue_evt_pair.1.as_raw_fd(), TX_QUEUE_EVENT)?;
277         if let Some(rate_limiter) = &self.net.rx_rate_limiter {
278             helper.add_event(rate_limiter.as_raw_fd(), RX_RATE_LIMITER_EVENT)?;
279         }
280         if let Some(rate_limiter) = &self.net.tx_rate_limiter {
281             helper.add_event(rate_limiter.as_raw_fd(), TX_RATE_LIMITER_EVENT)?;
282         }
283 
284         let mem = self.mem.memory();
285         // If there are some already available descriptors on the RX queue,
286         // then we can start the thread while listening onto the TAP.
287         if self
288             .queue_pair
289             .0
290             .used_idx(mem.deref(), Ordering::Acquire)
291             .map_err(EpollHelperError::QueueRingIndex)?
292             < self
293                 .queue_pair
294                 .0
295                 .avail_idx(mem.deref(), Ordering::Acquire)
296                 .map_err(EpollHelperError::QueueRingIndex)?
297         {
298             helper.add_event(self.net.tap.as_raw_fd(), RX_TAP_EVENT)?;
299             self.net.rx_tap_listening = true;
300             info!("Listener registered at start");
301         }
302 
303         // The NetQueuePair needs the epoll fd.
304         self.net.epoll_fd = Some(helper.as_raw_fd());
305 
306         helper.run(paused, paused_sync, self)?;
307 
308         Ok(())
309     }
310 }
311 
312 impl EpollHelperHandler for NetEpollHandler {
313     fn handle_event(
314         &mut self,
315         _helper: &mut EpollHelper,
316         event: &epoll::Event,
317     ) -> result::Result<(), EpollHelperError> {
318         let ev_type = event.data as u16;
319         match ev_type {
320             RX_QUEUE_EVENT => {
321                 self.driver_awake = true;
322                 self.handle_rx_event().map_err(|e| {
323                     EpollHelperError::HandleEvent(anyhow!("Error processing RX queue: {:?}", e))
324                 })?;
325             }
326             TX_QUEUE_EVENT => {
327                 let queue_evt = &self.queue_evt_pair.1;
328                 if let Err(e) = queue_evt.read() {
329                     error!("Failed to get tx queue event: {:?}", e);
330                 }
331                 self.driver_awake = true;
332                 self.handle_tx_event().map_err(|e| {
333                     EpollHelperError::HandleEvent(anyhow!("Error processing TX queue: {:?}", e))
334                 })?;
335             }
336             TX_TAP_EVENT => {
337                 self.handle_tx_event().map_err(|e| {
338                     EpollHelperError::HandleEvent(anyhow!(
339                         "Error processing TX queue (TAP event): {:?}",
340                         e
341                     ))
342                 })?;
343             }
344             RX_TAP_EVENT => {
345                 self.handle_rx_tap_event().map_err(|e| {
346                     EpollHelperError::HandleEvent(anyhow!("Error processing tap queue: {:?}", e))
347                 })?;
348             }
349             RX_RATE_LIMITER_EVENT => {
350                 if let Some(rate_limiter) = &mut self.net.rx_rate_limiter {
351                     // Upon rate limiter event, call the rate limiter handler and register the
352                     // TAP fd for further processing if some RX buffers are available
353                     rate_limiter.event_handler().map_err(|e| {
354                         EpollHelperError::HandleEvent(anyhow!(
355                             "Error from 'rate_limiter.event_handler()': {:?}",
356                             e
357                         ))
358                     })?;
359 
360                     if !self.net.rx_tap_listening && self.net.rx_desc_avail {
361                         net_util::register_listener(
362                             self.net.epoll_fd.unwrap(),
363                             self.net.tap.as_raw_fd(),
364                             epoll::Events::EPOLLIN,
365                             u64::from(self.net.tap_rx_event_id),
366                         )
367                         .map_err(|e| {
368                             EpollHelperError::HandleEvent(anyhow!(
369                                 "Error register_listener with `RX_RATE_LIMITER_EVENT`: {:?}",
370                                 e
371                             ))
372                         })?;
373 
374                         self.net.rx_tap_listening = true;
375                     }
376                 } else {
377                     return Err(EpollHelperError::HandleEvent(anyhow!(
378                         "Unexpected RX_RATE_LIMITER_EVENT"
379                     )));
380                 }
381             }
382             TX_RATE_LIMITER_EVENT => {
383                 if let Some(rate_limiter) = &mut self.net.tx_rate_limiter {
384                     // Upon rate limiter event, call the rate limiter handler
385                     // and restart processing the queue.
386                     rate_limiter.event_handler().map_err(|e| {
387                         EpollHelperError::HandleEvent(anyhow!(
388                             "Error from 'rate_limiter.event_handler()': {:?}",
389                             e
390                         ))
391                     })?;
392 
393                     self.driver_awake = true;
394                     self.process_tx().map_err(|e| {
395                         EpollHelperError::HandleEvent(anyhow!("Error processing TX queue: {:?}", e))
396                     })?;
397                 } else {
398                     return Err(EpollHelperError::HandleEvent(anyhow!(
399                         "Unexpected TX_RATE_LIMITER_EVENT"
400                     )));
401                 }
402             }
403             _ => {
404                 return Err(EpollHelperError::HandleEvent(anyhow!(
405                     "Unexpected event: {}",
406                     ev_type
407                 )));
408             }
409         }
410         Ok(())
411     }
412 }
413 
414 pub struct Net {
415     common: VirtioCommon,
416     id: String,
417     taps: Vec<Tap>,
418     config: VirtioNetConfig,
419     ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>,
420     counters: NetCounters,
421     seccomp_action: SeccompAction,
422     rate_limiter_config: Option<RateLimiterConfig>,
423     exit_evt: EventFd,
424 }
425 
426 #[derive(Versionize)]
427 pub struct NetState {
428     pub avail_features: u64,
429     pub acked_features: u64,
430     pub config: VirtioNetConfig,
431     pub queue_size: Vec<u16>,
432 }
433 
434 impl VersionMapped for NetState {}
435 
436 impl Net {
437     /// Create a new virtio network device with the given TAP interface.
438     #[allow(clippy::too_many_arguments)]
439     pub fn new_with_tap(
440         id: String,
441         taps: Vec<Tap>,
442         guest_mac: Option<MacAddr>,
443         iommu: bool,
444         num_queues: usize,
445         queue_size: u16,
446         seccomp_action: SeccompAction,
447         rate_limiter_config: Option<RateLimiterConfig>,
448         exit_evt: EventFd,
449         state: Option<NetState>,
450         offload_tso: bool,
451         offload_ufo: bool,
452         offload_csum: bool,
453     ) -> Result<Self> {
454         assert!(!taps.is_empty());
455 
456         let mtu = taps[0].mtu().map_err(Error::TapError)? as u16;
457 
458         let (avail_features, acked_features, config, queue_sizes, paused) =
459             if let Some(state) = state {
460                 info!("Restoring virtio-net {}", id);
461                 (
462                     state.avail_features,
463                     state.acked_features,
464                     state.config,
465                     state.queue_size,
466                     true,
467                 )
468             } else {
469                 let mut avail_features =
470                     1 << VIRTIO_NET_F_MTU | 1 << VIRTIO_RING_F_EVENT_IDX | 1 << VIRTIO_F_VERSION_1;
471 
472                 if iommu {
473                     avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
474                 }
475 
476                 // Configure TSO/UFO features when hardware checksum offload is enabled.
477                 if offload_csum {
478                     avail_features |= 1 << VIRTIO_NET_F_CSUM
479                         | 1 << VIRTIO_NET_F_GUEST_CSUM
480                         | 1 << VIRTIO_NET_F_CTRL_GUEST_OFFLOADS;
481 
482                     if offload_tso {
483                         avail_features |= 1 << VIRTIO_NET_F_HOST_ECN
484                             | 1 << VIRTIO_NET_F_HOST_TSO4
485                             | 1 << VIRTIO_NET_F_HOST_TSO6
486                             | 1 << VIRTIO_NET_F_GUEST_ECN
487                             | 1 << VIRTIO_NET_F_GUEST_TSO4
488                             | 1 << VIRTIO_NET_F_GUEST_TSO6;
489                     }
490 
491                     if offload_ufo {
492                         avail_features |= 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_NET_F_GUEST_UFO;
493                     }
494                 }
495 
496                 avail_features |= 1 << VIRTIO_NET_F_CTRL_VQ;
497                 let queue_num = num_queues + 1;
498 
499                 let mut config = VirtioNetConfig::default();
500                 if let Some(mac) = guest_mac {
501                     build_net_config_space(
502                         &mut config,
503                         mac,
504                         num_queues,
505                         Some(mtu),
506                         &mut avail_features,
507                     );
508                 } else {
509                     build_net_config_space_with_mq(
510                         &mut config,
511                         num_queues,
512                         Some(mtu),
513                         &mut avail_features,
514                     );
515                 }
516 
517                 (
518                     avail_features,
519                     0,
520                     config,
521                     vec![queue_size; queue_num],
522                     false,
523                 )
524             };
525 
526         Ok(Net {
527             common: VirtioCommon {
528                 device_type: VirtioDeviceType::Net as u32,
529                 avail_features,
530                 acked_features,
531                 queue_sizes,
532                 paused_sync: Some(Arc::new(Barrier::new((num_queues / 2) + 1))),
533                 min_queues: 2,
534                 paused: Arc::new(AtomicBool::new(paused)),
535                 ..Default::default()
536             },
537             id,
538             taps,
539             config,
540             ctrl_queue_epoll_thread: None,
541             counters: NetCounters::default(),
542             seccomp_action,
543             rate_limiter_config,
544             exit_evt,
545         })
546     }
547 
548     /// Create a new virtio network device with the given IP address and
549     /// netmask.
550     #[allow(clippy::too_many_arguments)]
551     pub fn new(
552         id: String,
553         if_name: Option<&str>,
554         ip_addr: Option<Ipv4Addr>,
555         netmask: Option<Ipv4Addr>,
556         guest_mac: Option<MacAddr>,
557         host_mac: &mut Option<MacAddr>,
558         mtu: Option<u16>,
559         iommu: bool,
560         num_queues: usize,
561         queue_size: u16,
562         seccomp_action: SeccompAction,
563         rate_limiter_config: Option<RateLimiterConfig>,
564         exit_evt: EventFd,
565         state: Option<NetState>,
566         offload_tso: bool,
567         offload_ufo: bool,
568         offload_csum: bool,
569     ) -> Result<Self> {
570         let taps = open_tap(
571             if_name,
572             ip_addr,
573             netmask,
574             host_mac,
575             mtu,
576             num_queues / 2,
577             None,
578         )
579         .map_err(Error::OpenTap)?;
580 
581         Self::new_with_tap(
582             id,
583             taps,
584             guest_mac,
585             iommu,
586             num_queues,
587             queue_size,
588             seccomp_action,
589             rate_limiter_config,
590             exit_evt,
591             state,
592             offload_tso,
593             offload_ufo,
594             offload_csum,
595         )
596     }
597 
598     #[allow(clippy::too_many_arguments)]
599     pub fn from_tap_fds(
600         id: String,
601         fds: &[RawFd],
602         guest_mac: Option<MacAddr>,
603         mtu: Option<u16>,
604         iommu: bool,
605         queue_size: u16,
606         seccomp_action: SeccompAction,
607         rate_limiter_config: Option<RateLimiterConfig>,
608         exit_evt: EventFd,
609         state: Option<NetState>,
610         offload_tso: bool,
611         offload_ufo: bool,
612         offload_csum: bool,
613     ) -> Result<Self> {
614         let mut taps: Vec<Tap> = Vec::new();
615         let num_queue_pairs = fds.len();
616 
617         for fd in fds.iter() {
618             // Duplicate so that it can survive reboots
619             // SAFETY: FFI call to dup. Trivially safe.
620             let fd = unsafe { libc::dup(*fd) };
621             if fd < 0 {
622                 return Err(Error::DuplicateTapFd(std::io::Error::last_os_error()));
623             }
624             let tap = Tap::from_tap_fd(fd, num_queue_pairs).map_err(Error::TapError)?;
625             taps.push(tap);
626         }
627 
628         assert!(!taps.is_empty());
629 
630         if let Some(mtu) = mtu {
631             taps[0].set_mtu(mtu as i32).map_err(Error::TapError)?;
632         }
633 
634         Self::new_with_tap(
635             id,
636             taps,
637             guest_mac,
638             iommu,
639             num_queue_pairs * 2,
640             queue_size,
641             seccomp_action,
642             rate_limiter_config,
643             exit_evt,
644             state,
645             offload_tso,
646             offload_ufo,
647             offload_csum,
648         )
649     }
650 
651     fn state(&self) -> NetState {
652         NetState {
653             avail_features: self.common.avail_features,
654             acked_features: self.common.acked_features,
655             config: self.config,
656             queue_size: self.common.queue_sizes.clone(),
657         }
658     }
659 
660     #[cfg(fuzzing)]
661     pub fn wait_for_epoll_threads(&mut self) {
662         self.common.wait_for_epoll_threads();
663     }
664 }
665 
666 impl Drop for Net {
667     fn drop(&mut self) {
668         if let Some(kill_evt) = self.common.kill_evt.take() {
669             // Ignore the result because there is nothing we can do about it.
670             let _ = kill_evt.write(1);
671         }
672         // Needed to ensure all references to tap FDs are dropped (#4868)
673         self.common.wait_for_epoll_threads();
674         if let Some(thread) = self.ctrl_queue_epoll_thread.take() {
675             if let Err(e) = thread.join() {
676                 error!("Error joining thread: {:?}", e);
677             }
678         }
679     }
680 }
681 
682 impl VirtioDevice for Net {
683     fn device_type(&self) -> u32 {
684         self.common.device_type
685     }
686 
687     fn queue_max_sizes(&self) -> &[u16] {
688         &self.common.queue_sizes
689     }
690 
691     fn features(&self) -> u64 {
692         self.common.avail_features
693     }
694 
695     fn ack_features(&mut self, value: u64) {
696         self.common.ack_features(value)
697     }
698 
699     fn read_config(&self, offset: u64, data: &mut [u8]) {
700         self.read_config_from_slice(self.config.as_slice(), offset, data);
701     }
702 
703     fn activate(
704         &mut self,
705         mem: GuestMemoryAtomic<GuestMemoryMmap>,
706         interrupt_cb: Arc<dyn VirtioInterrupt>,
707         mut queues: Vec<(usize, Queue, EventFd)>,
708     ) -> ActivateResult {
709         self.common.activate(&queues, &interrupt_cb)?;
710 
711         let num_queues = queues.len();
712         let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into());
713         if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 {
714             let ctrl_queue_index = num_queues - 1;
715             let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index);
716 
717             ctrl_queue.set_event_idx(event_idx);
718 
719             let (kill_evt, pause_evt) = self.common.dup_eventfds();
720             let mut ctrl_handler = NetCtrlEpollHandler {
721                 mem: mem.clone(),
722                 kill_evt,
723                 pause_evt,
724                 ctrl_q: CtrlQueue::new(self.taps.clone()),
725                 queue: ctrl_queue,
726                 queue_evt: ctrl_queue_evt,
727                 access_platform: self.common.access_platform.clone(),
728                 queue_index: ctrl_queue_index as u16,
729                 interrupt_cb: interrupt_cb.clone(),
730             };
731 
732             let paused = self.common.paused.clone();
733             // Let's update the barrier as we need 1 for each RX/TX pair +
734             // 1 for the control queue + 1 for the main thread signalling
735             // the pause.
736             self.common.paused_sync = Some(Arc::new(Barrier::new(self.taps.len() + 2)));
737             let paused_sync = self.common.paused_sync.clone();
738 
739             let mut epoll_threads = Vec::new();
740             spawn_virtio_thread(
741                 &format!("{}_ctrl", &self.id),
742                 &self.seccomp_action,
743                 Thread::VirtioNetCtl,
744                 &mut epoll_threads,
745                 &self.exit_evt,
746                 move || ctrl_handler.run_ctrl(paused, paused_sync.unwrap()),
747             )?;
748             self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0));
749         }
750 
751         let mut epoll_threads = Vec::new();
752         let mut taps = self.taps.clone();
753         for i in 0..queues.len() / 2 {
754             let rx = RxVirtio::new();
755             let tx = TxVirtio::new();
756             let rx_tap_listening = false;
757 
758             let (_, queue_0, queue_evt_0) = queues.remove(0);
759             let (_, queue_1, queue_evt_1) = queues.remove(0);
760             let mut queue_pair = (queue_0, queue_1);
761             queue_pair.0.set_event_idx(event_idx);
762             queue_pair.1.set_event_idx(event_idx);
763 
764             let queue_evt_pair = (queue_evt_0, queue_evt_1);
765 
766             let (kill_evt, pause_evt) = self.common.dup_eventfds();
767 
768             let rx_rate_limiter: Option<rate_limiter::RateLimiter> = self
769                 .rate_limiter_config
770                 .map(RateLimiterConfig::try_into)
771                 .transpose()
772                 .map_err(ActivateError::CreateRateLimiter)?;
773 
774             let tx_rate_limiter: Option<rate_limiter::RateLimiter> = self
775                 .rate_limiter_config
776                 .map(RateLimiterConfig::try_into)
777                 .transpose()
778                 .map_err(ActivateError::CreateRateLimiter)?;
779 
780             let tap = taps.remove(0);
781             #[cfg(not(fuzzing))]
782             tap.set_offload(virtio_features_to_tap_offload(self.common.acked_features))
783                 .map_err(|e| {
784                     error!("Error programming tap offload: {:?}", e);
785                     ActivateError::BadActivate
786                 })?;
787 
788             let mut handler = NetEpollHandler {
789                 net: NetQueuePair {
790                     tap_for_write_epoll: tap.clone(),
791                     tap,
792                     rx,
793                     tx,
794                     epoll_fd: None,
795                     rx_tap_listening,
796                     tx_tap_listening: false,
797                     counters: self.counters.clone(),
798                     tap_rx_event_id: RX_TAP_EVENT,
799                     tap_tx_event_id: TX_TAP_EVENT,
800                     rx_desc_avail: false,
801                     rx_rate_limiter,
802                     tx_rate_limiter,
803                     access_platform: self.common.access_platform.clone(),
804                 },
805                 mem: mem.clone(),
806                 queue_index_base: (i * 2) as u16,
807                 queue_pair,
808                 queue_evt_pair,
809                 interrupt_cb: interrupt_cb.clone(),
810                 kill_evt,
811                 pause_evt,
812                 driver_awake: false,
813             };
814 
815             let paused = self.common.paused.clone();
816             let paused_sync = self.common.paused_sync.clone();
817 
818             spawn_virtio_thread(
819                 &format!("{}_qp{}", self.id.clone(), i),
820                 &self.seccomp_action,
821                 Thread::VirtioNet,
822                 &mut epoll_threads,
823                 &self.exit_evt,
824                 move || handler.run(paused, paused_sync.unwrap()),
825             )?;
826         }
827 
828         self.common.epoll_threads = Some(epoll_threads);
829 
830         event!("virtio-device", "activated", "id", &self.id);
831         Ok(())
832     }
833 
834     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
835         let result = self.common.reset();
836         event!("virtio-device", "reset", "id", &self.id);
837         result
838     }
839 
840     fn counters(&self) -> Option<HashMap<&'static str, Wrapping<u64>>> {
841         let mut counters = HashMap::new();
842 
843         counters.insert(
844             "rx_bytes",
845             Wrapping(self.counters.rx_bytes.load(Ordering::Acquire)),
846         );
847         counters.insert(
848             "rx_frames",
849             Wrapping(self.counters.rx_frames.load(Ordering::Acquire)),
850         );
851         counters.insert(
852             "tx_bytes",
853             Wrapping(self.counters.tx_bytes.load(Ordering::Acquire)),
854         );
855         counters.insert(
856             "tx_frames",
857             Wrapping(self.counters.tx_frames.load(Ordering::Acquire)),
858         );
859 
860         Some(counters)
861     }
862 
863     fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) {
864         self.common.set_access_platform(access_platform)
865     }
866 }
867 
868 impl Pausable for Net {
869     fn pause(&mut self) -> result::Result<(), MigratableError> {
870         self.common.pause()
871     }
872 
873     fn resume(&mut self) -> result::Result<(), MigratableError> {
874         self.common.resume()?;
875 
876         if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread {
877             ctrl_queue_epoll_thread.thread().unpark();
878         }
879         Ok(())
880     }
881 }
882 
883 impl Snapshottable for Net {
884     fn id(&self) -> String {
885         self.id.clone()
886     }
887 
888     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
889         Snapshot::new_from_versioned_state(&self.state())
890     }
891 }
892 impl Transportable for Net {}
893 impl Migratable for Net {}
894