xref: /cloud-hypervisor/virtio-devices/src/net.rs (revision 3ce0fef7fd546467398c914dbc74d8542e45cf6f)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the THIRD-PARTY file.
7 
8 use super::Error as DeviceError;
9 use super::{
10     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
11     RateLimiterConfig, VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterruptType,
12     EPOLL_HELPER_EVENT_LAST,
13 };
14 use crate::seccomp_filters::Thread;
15 use crate::thread_helper::spawn_virtio_thread;
16 use crate::GuestMemoryMmap;
17 use crate::VirtioInterrupt;
18 use anyhow::anyhow;
19 #[cfg(not(fuzzing))]
20 use net_util::virtio_features_to_tap_offload;
21 use net_util::CtrlQueue;
22 use net_util::{
23     build_net_config_space, build_net_config_space_with_mq, open_tap, MacAddr, NetCounters,
24     NetQueuePair, OpenTapError, RxVirtio, Tap, TapError, TxVirtio, VirtioNetConfig,
25 };
26 use seccompiler::SeccompAction;
27 use std::net::Ipv4Addr;
28 use std::num::Wrapping;
29 use std::ops::Deref;
30 use std::os::unix::io::{AsRawFd, RawFd};
31 use std::result;
32 use std::sync::atomic::{AtomicBool, Ordering};
33 use std::sync::{Arc, Barrier};
34 use std::thread;
35 use std::vec::Vec;
36 use std::{collections::HashMap, convert::TryInto};
37 use thiserror::Error;
38 use versionize::{VersionMap, Versionize, VersionizeResult};
39 use versionize_derive::Versionize;
40 use virtio_bindings::virtio_config::*;
41 use virtio_bindings::virtio_net::*;
42 use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
43 use virtio_queue::{Queue, QueueT};
44 use vm_memory::{ByteValued, GuestAddressSpace, GuestMemoryAtomic};
45 use vm_migration::VersionMapped;
46 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
47 use vm_virtio::AccessPlatform;
48 use vmm_sys_util::eventfd::EventFd;
49 
50 /// Control queue
51 // Event available on the control queue.
52 const CTRL_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
53 
54 // Following the VIRTIO specification, the MTU should be at least 1280.
55 pub const MIN_MTU: u16 = 1280;
56 
57 pub struct NetCtrlEpollHandler {
58     pub mem: GuestMemoryAtomic<GuestMemoryMmap>,
59     pub kill_evt: EventFd,
60     pub pause_evt: EventFd,
61     pub ctrl_q: CtrlQueue,
62     pub queue_evt: EventFd,
63     pub queue: Queue,
64     pub access_platform: Option<Arc<dyn AccessPlatform>>,
65     pub interrupt_cb: Arc<dyn VirtioInterrupt>,
66     pub queue_index: u16,
67 }
68 
69 impl NetCtrlEpollHandler {
70     fn signal_used_queue(&self, queue_index: u16) -> result::Result<(), DeviceError> {
71         self.interrupt_cb
72             .trigger(VirtioInterruptType::Queue(queue_index))
73             .map_err(|e| {
74                 error!("Failed to signal used queue: {:?}", e);
75                 DeviceError::FailedSignalingUsedQueue(e)
76             })
77     }
78 
79     pub fn run_ctrl(
80         &mut self,
81         paused: Arc<AtomicBool>,
82         paused_sync: Arc<Barrier>,
83     ) -> std::result::Result<(), EpollHelperError> {
84         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
85         helper.add_event(self.queue_evt.as_raw_fd(), CTRL_QUEUE_EVENT)?;
86         helper.run(paused, paused_sync, self)?;
87 
88         Ok(())
89     }
90 }
91 
92 impl EpollHelperHandler for NetCtrlEpollHandler {
93     fn handle_event(
94         &mut self,
95         _helper: &mut EpollHelper,
96         event: &epoll::Event,
97     ) -> result::Result<(), EpollHelperError> {
98         let ev_type = event.data as u16;
99         match ev_type {
100             CTRL_QUEUE_EVENT => {
101                 let mem = self.mem.memory();
102                 self.queue_evt.read().map_err(|e| {
103                     EpollHelperError::HandleEvent(anyhow!(
104                         "Failed to get control queue event: {:?}",
105                         e
106                     ))
107                 })?;
108                 self.ctrl_q
109                     .process(mem.deref(), &mut self.queue, self.access_platform.as_ref())
110                     .map_err(|e| {
111                         EpollHelperError::HandleEvent(anyhow!(
112                             "Failed to process control queue: {:?}",
113                             e
114                         ))
115                     })?;
116                 match self.queue.needs_notification(mem.deref()) {
117                     Ok(true) => {
118                         self.signal_used_queue(self.queue_index).map_err(|e| {
119                             EpollHelperError::HandleEvent(anyhow!(
120                                 "Error signalling that control queue was used: {:?}",
121                                 e
122                             ))
123                         })?;
124                     }
125                     Ok(false) => {}
126                     Err(e) => {
127                         return Err(EpollHelperError::HandleEvent(anyhow!(
128                             "Error getting notification state of control queue: {}",
129                             e
130                         )));
131                     }
132                 };
133             }
134             _ => {
135                 return Err(EpollHelperError::HandleEvent(anyhow!(
136                     "Unknown event for virtio-net control queue"
137                 )));
138             }
139         }
140 
141         Ok(())
142     }
143 }
144 
145 /// Rx/Tx queue pair
146 // The guest has made a buffer available to receive a frame into.
147 pub const RX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
148 // The transmit queue has a frame that is ready to send from the guest.
149 pub const TX_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
150 // A frame is available for reading from the tap device to receive in the guest.
151 pub const RX_TAP_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3;
152 // The TAP can be written to. Used after an EAGAIN error to retry TX.
153 pub const TX_TAP_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 4;
154 // New 'wake up' event from the rx rate limiter
155 pub const RX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 5;
156 // New 'wake up' event from the tx rate limiter
157 pub const TX_RATE_LIMITER_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 6;
158 
159 #[derive(Error, Debug)]
160 pub enum Error {
161     #[error("Failed to open taps: {0}")]
162     OpenTap(OpenTapError),
163     #[error("Using existing tap: {0}")]
164     TapError(TapError),
165     #[error("Error calling dup() on tap fd: {0}")]
166     DuplicateTapFd(std::io::Error),
167 }
168 
169 pub type Result<T> = result::Result<T, Error>;
170 
171 struct NetEpollHandler {
172     net: NetQueuePair,
173     mem: GuestMemoryAtomic<GuestMemoryMmap>,
174     interrupt_cb: Arc<dyn VirtioInterrupt>,
175     kill_evt: EventFd,
176     pause_evt: EventFd,
177     queue_index_base: u16,
178     queue_pair: (Queue, Queue),
179     queue_evt_pair: (EventFd, EventFd),
180     // Always generate interrupts until the driver has signalled to the device.
181     // This mitigates a problem with interrupts from tap events being "lost" upon
182     // a restore as the vCPU thread isn't ready to handle the interrupt. This causes
183     // issues when combined with VIRTIO_RING_F_EVENT_IDX interrupt suppression.
184     driver_awake: bool,
185 }
186 
187 impl NetEpollHandler {
188     fn signal_used_queue(&self, queue_index: u16) -> result::Result<(), DeviceError> {
189         self.interrupt_cb
190             .trigger(VirtioInterruptType::Queue(queue_index))
191             .map_err(|e| {
192                 error!("Failed to signal used queue: {:?}", e);
193                 DeviceError::FailedSignalingUsedQueue(e)
194             })
195     }
196 
197     fn handle_rx_event(&mut self) -> result::Result<(), DeviceError> {
198         let queue_evt = &self.queue_evt_pair.0;
199         if let Err(e) = queue_evt.read() {
200             error!("Failed to get rx queue event: {:?}", e);
201         }
202 
203         self.net.rx_desc_avail = true;
204 
205         let rate_limit_reached = self
206             .net
207             .rx_rate_limiter
208             .as_ref()
209             .map_or(false, |r| r.is_blocked());
210 
211         // Start to listen on RX_TAP_EVENT only when the rate limit is not reached
212         if !self.net.rx_tap_listening && !rate_limit_reached {
213             net_util::register_listener(
214                 self.net.epoll_fd.unwrap(),
215                 self.net.tap.as_raw_fd(),
216                 epoll::Events::EPOLLIN,
217                 u64::from(self.net.tap_rx_event_id),
218             )
219             .map_err(DeviceError::IoError)?;
220             self.net.rx_tap_listening = true;
221         }
222 
223         Ok(())
224     }
225 
226     fn process_tx(&mut self) -> result::Result<(), DeviceError> {
227         if self
228             .net
229             .process_tx(&self.mem.memory(), &mut self.queue_pair.1)
230             .map_err(DeviceError::NetQueuePair)?
231             || !self.driver_awake
232         {
233             self.signal_used_queue(self.queue_index_base + 1)?;
234             debug!("Signalling TX queue");
235         } else {
236             debug!("Not signalling TX queue");
237         }
238         Ok(())
239     }
240 
241     fn handle_tx_event(&mut self) -> result::Result<(), DeviceError> {
242         let rate_limit_reached = self
243             .net
244             .tx_rate_limiter
245             .as_ref()
246             .map_or(false, |r| r.is_blocked());
247 
248         if !rate_limit_reached {
249             self.process_tx()?;
250         }
251 
252         Ok(())
253     }
254 
255     fn handle_rx_tap_event(&mut self) -> result::Result<(), DeviceError> {
256         if self
257             .net
258             .process_rx(&self.mem.memory(), &mut self.queue_pair.0)
259             .map_err(DeviceError::NetQueuePair)?
260             || !self.driver_awake
261         {
262             self.signal_used_queue(self.queue_index_base)?;
263             debug!("Signalling RX queue");
264         } else {
265             debug!("Not signalling RX queue");
266         }
267         Ok(())
268     }
269 
270     fn run(
271         &mut self,
272         paused: Arc<AtomicBool>,
273         paused_sync: Arc<Barrier>,
274     ) -> result::Result<(), EpollHelperError> {
275         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
276         helper.add_event(self.queue_evt_pair.0.as_raw_fd(), RX_QUEUE_EVENT)?;
277         helper.add_event(self.queue_evt_pair.1.as_raw_fd(), TX_QUEUE_EVENT)?;
278         if let Some(rate_limiter) = &self.net.rx_rate_limiter {
279             helper.add_event(rate_limiter.as_raw_fd(), RX_RATE_LIMITER_EVENT)?;
280         }
281         if let Some(rate_limiter) = &self.net.tx_rate_limiter {
282             helper.add_event(rate_limiter.as_raw_fd(), TX_RATE_LIMITER_EVENT)?;
283         }
284 
285         let mem = self.mem.memory();
286         // If there are some already available descriptors on the RX queue,
287         // then we can start the thread while listening onto the TAP.
288         if self
289             .queue_pair
290             .0
291             .used_idx(mem.deref(), Ordering::Acquire)
292             .map_err(EpollHelperError::QueueRingIndex)?
293             < self
294                 .queue_pair
295                 .0
296                 .avail_idx(mem.deref(), Ordering::Acquire)
297                 .map_err(EpollHelperError::QueueRingIndex)?
298         {
299             helper.add_event(self.net.tap.as_raw_fd(), RX_TAP_EVENT)?;
300             self.net.rx_tap_listening = true;
301             info!("Listener registered at start");
302         }
303 
304         // The NetQueuePair needs the epoll fd.
305         self.net.epoll_fd = Some(helper.as_raw_fd());
306 
307         helper.run(paused, paused_sync, self)?;
308 
309         Ok(())
310     }
311 }
312 
313 impl EpollHelperHandler for NetEpollHandler {
314     fn handle_event(
315         &mut self,
316         _helper: &mut EpollHelper,
317         event: &epoll::Event,
318     ) -> result::Result<(), EpollHelperError> {
319         let ev_type = event.data as u16;
320         match ev_type {
321             RX_QUEUE_EVENT => {
322                 self.driver_awake = true;
323                 self.handle_rx_event().map_err(|e| {
324                     EpollHelperError::HandleEvent(anyhow!("Error processing RX queue: {:?}", e))
325                 })?;
326             }
327             TX_QUEUE_EVENT => {
328                 let queue_evt = &self.queue_evt_pair.1;
329                 if let Err(e) = queue_evt.read() {
330                     error!("Failed to get tx queue event: {:?}", e);
331                 }
332                 self.driver_awake = true;
333                 self.handle_tx_event().map_err(|e| {
334                     EpollHelperError::HandleEvent(anyhow!("Error processing TX queue: {:?}", e))
335                 })?;
336             }
337             TX_TAP_EVENT => {
338                 self.handle_tx_event().map_err(|e| {
339                     EpollHelperError::HandleEvent(anyhow!(
340                         "Error processing TX queue (TAP event): {:?}",
341                         e
342                     ))
343                 })?;
344             }
345             RX_TAP_EVENT => {
346                 self.handle_rx_tap_event().map_err(|e| {
347                     EpollHelperError::HandleEvent(anyhow!("Error processing tap queue: {:?}", e))
348                 })?;
349             }
350             RX_RATE_LIMITER_EVENT => {
351                 if let Some(rate_limiter) = &mut self.net.rx_rate_limiter {
352                     // Upon rate limiter event, call the rate limiter handler and register the
353                     // TAP fd for further processing if some RX buffers are available
354                     rate_limiter.event_handler().map_err(|e| {
355                         EpollHelperError::HandleEvent(anyhow!(
356                             "Error from 'rate_limiter.event_handler()': {:?}",
357                             e
358                         ))
359                     })?;
360 
361                     if !self.net.rx_tap_listening && self.net.rx_desc_avail {
362                         net_util::register_listener(
363                             self.net.epoll_fd.unwrap(),
364                             self.net.tap.as_raw_fd(),
365                             epoll::Events::EPOLLIN,
366                             u64::from(self.net.tap_rx_event_id),
367                         )
368                         .map_err(|e| {
369                             EpollHelperError::HandleEvent(anyhow!(
370                                 "Error register_listener with `RX_RATE_LIMITER_EVENT`: {:?}",
371                                 e
372                             ))
373                         })?;
374 
375                         self.net.rx_tap_listening = true;
376                     }
377                 } else {
378                     return Err(EpollHelperError::HandleEvent(anyhow!(
379                         "Unexpected RX_RATE_LIMITER_EVENT"
380                     )));
381                 }
382             }
383             TX_RATE_LIMITER_EVENT => {
384                 if let Some(rate_limiter) = &mut self.net.tx_rate_limiter {
385                     // Upon rate limiter event, call the rate limiter handler
386                     // and restart processing the queue.
387                     rate_limiter.event_handler().map_err(|e| {
388                         EpollHelperError::HandleEvent(anyhow!(
389                             "Error from 'rate_limiter.event_handler()': {:?}",
390                             e
391                         ))
392                     })?;
393 
394                     self.driver_awake = true;
395                     self.process_tx().map_err(|e| {
396                         EpollHelperError::HandleEvent(anyhow!("Error processing TX queue: {:?}", e))
397                     })?;
398                 } else {
399                     return Err(EpollHelperError::HandleEvent(anyhow!(
400                         "Unexpected TX_RATE_LIMITER_EVENT"
401                     )));
402                 }
403             }
404             _ => {
405                 return Err(EpollHelperError::HandleEvent(anyhow!(
406                     "Unexpected event: {}",
407                     ev_type
408                 )));
409             }
410         }
411         Ok(())
412     }
413 }
414 
415 pub struct Net {
416     common: VirtioCommon,
417     id: String,
418     taps: Vec<Tap>,
419     config: VirtioNetConfig,
420     ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>,
421     counters: NetCounters,
422     seccomp_action: SeccompAction,
423     rate_limiter_config: Option<RateLimiterConfig>,
424     exit_evt: EventFd,
425 }
426 
427 #[derive(Versionize)]
428 pub struct NetState {
429     pub avail_features: u64,
430     pub acked_features: u64,
431     pub config: VirtioNetConfig,
432     pub queue_size: Vec<u16>,
433 }
434 
435 impl VersionMapped for NetState {}
436 
437 impl Net {
438     /// Create a new virtio network device with the given TAP interface.
439     #[allow(clippy::too_many_arguments)]
440     pub fn new_with_tap(
441         id: String,
442         taps: Vec<Tap>,
443         guest_mac: Option<MacAddr>,
444         iommu: bool,
445         num_queues: usize,
446         queue_size: u16,
447         seccomp_action: SeccompAction,
448         rate_limiter_config: Option<RateLimiterConfig>,
449         exit_evt: EventFd,
450         state: Option<NetState>,
451         offload_tso: bool,
452         offload_ufo: bool,
453         offload_csum: bool,
454     ) -> Result<Self> {
455         assert!(!taps.is_empty());
456 
457         let mtu = taps[0].mtu().map_err(Error::TapError)? as u16;
458 
459         let (avail_features, acked_features, config, queue_sizes, paused) =
460             if let Some(state) = state {
461                 info!("Restoring virtio-net {}", id);
462                 (
463                     state.avail_features,
464                     state.acked_features,
465                     state.config,
466                     state.queue_size,
467                     true,
468                 )
469             } else {
470                 let mut avail_features =
471                     1 << VIRTIO_NET_F_MTU | 1 << VIRTIO_RING_F_EVENT_IDX | 1 << VIRTIO_F_VERSION_1;
472 
473                 if iommu {
474                     avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
475                 }
476 
477                 // Configure TSO/UFO features when hardware checksum offload is enabled.
478                 if offload_csum {
479                     avail_features |= 1 << VIRTIO_NET_F_CSUM
480                         | 1 << VIRTIO_NET_F_GUEST_CSUM
481                         | 1 << VIRTIO_NET_F_CTRL_GUEST_OFFLOADS;
482 
483                     if offload_tso {
484                         avail_features |= 1 << VIRTIO_NET_F_HOST_ECN
485                             | 1 << VIRTIO_NET_F_HOST_TSO4
486                             | 1 << VIRTIO_NET_F_HOST_TSO6
487                             | 1 << VIRTIO_NET_F_GUEST_ECN
488                             | 1 << VIRTIO_NET_F_GUEST_TSO4
489                             | 1 << VIRTIO_NET_F_GUEST_TSO6;
490                     }
491 
492                     if offload_ufo {
493                         avail_features |= 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_NET_F_GUEST_UFO;
494                     }
495                 }
496 
497                 avail_features |= 1 << VIRTIO_NET_F_CTRL_VQ;
498                 let queue_num = num_queues + 1;
499 
500                 let mut config = VirtioNetConfig::default();
501                 if let Some(mac) = guest_mac {
502                     build_net_config_space(
503                         &mut config,
504                         mac,
505                         num_queues,
506                         Some(mtu),
507                         &mut avail_features,
508                     );
509                 } else {
510                     build_net_config_space_with_mq(
511                         &mut config,
512                         num_queues,
513                         Some(mtu),
514                         &mut avail_features,
515                     );
516                 }
517 
518                 (
519                     avail_features,
520                     0,
521                     config,
522                     vec![queue_size; queue_num],
523                     false,
524                 )
525             };
526 
527         Ok(Net {
528             common: VirtioCommon {
529                 device_type: VirtioDeviceType::Net as u32,
530                 avail_features,
531                 acked_features,
532                 queue_sizes,
533                 paused_sync: Some(Arc::new(Barrier::new((num_queues / 2) + 1))),
534                 min_queues: 2,
535                 paused: Arc::new(AtomicBool::new(paused)),
536                 ..Default::default()
537             },
538             id,
539             taps,
540             config,
541             ctrl_queue_epoll_thread: None,
542             counters: NetCounters::default(),
543             seccomp_action,
544             rate_limiter_config,
545             exit_evt,
546         })
547     }
548 
549     /// Create a new virtio network device with the given IP address and
550     /// netmask.
551     #[allow(clippy::too_many_arguments)]
552     pub fn new(
553         id: String,
554         if_name: Option<&str>,
555         ip_addr: Option<Ipv4Addr>,
556         netmask: Option<Ipv4Addr>,
557         guest_mac: Option<MacAddr>,
558         host_mac: &mut Option<MacAddr>,
559         mtu: Option<u16>,
560         iommu: bool,
561         num_queues: usize,
562         queue_size: u16,
563         seccomp_action: SeccompAction,
564         rate_limiter_config: Option<RateLimiterConfig>,
565         exit_evt: EventFd,
566         state: Option<NetState>,
567         offload_tso: bool,
568         offload_ufo: bool,
569         offload_csum: bool,
570     ) -> Result<Self> {
571         let taps = open_tap(
572             if_name,
573             ip_addr,
574             netmask,
575             host_mac,
576             mtu,
577             num_queues / 2,
578             None,
579         )
580         .map_err(Error::OpenTap)?;
581 
582         Self::new_with_tap(
583             id,
584             taps,
585             guest_mac,
586             iommu,
587             num_queues,
588             queue_size,
589             seccomp_action,
590             rate_limiter_config,
591             exit_evt,
592             state,
593             offload_tso,
594             offload_ufo,
595             offload_csum,
596         )
597     }
598 
599     #[allow(clippy::too_many_arguments)]
600     pub fn from_tap_fds(
601         id: String,
602         fds: &[RawFd],
603         guest_mac: Option<MacAddr>,
604         mtu: Option<u16>,
605         iommu: bool,
606         queue_size: u16,
607         seccomp_action: SeccompAction,
608         rate_limiter_config: Option<RateLimiterConfig>,
609         exit_evt: EventFd,
610         state: Option<NetState>,
611         offload_tso: bool,
612         offload_ufo: bool,
613         offload_csum: bool,
614     ) -> Result<Self> {
615         let mut taps: Vec<Tap> = Vec::new();
616         let num_queue_pairs = fds.len();
617 
618         for fd in fds.iter() {
619             // Duplicate so that it can survive reboots
620             // SAFETY: FFI call to dup. Trivially safe.
621             let fd = unsafe { libc::dup(*fd) };
622             if fd < 0 {
623                 return Err(Error::DuplicateTapFd(std::io::Error::last_os_error()));
624             }
625             let tap = Tap::from_tap_fd(fd, num_queue_pairs).map_err(Error::TapError)?;
626             taps.push(tap);
627         }
628 
629         assert!(!taps.is_empty());
630 
631         if let Some(mtu) = mtu {
632             taps[0].set_mtu(mtu as i32).map_err(Error::TapError)?;
633         }
634 
635         Self::new_with_tap(
636             id,
637             taps,
638             guest_mac,
639             iommu,
640             num_queue_pairs * 2,
641             queue_size,
642             seccomp_action,
643             rate_limiter_config,
644             exit_evt,
645             state,
646             offload_tso,
647             offload_ufo,
648             offload_csum,
649         )
650     }
651 
652     fn state(&self) -> NetState {
653         NetState {
654             avail_features: self.common.avail_features,
655             acked_features: self.common.acked_features,
656             config: self.config,
657             queue_size: self.common.queue_sizes.clone(),
658         }
659     }
660 
661     #[cfg(fuzzing)]
662     pub fn wait_for_epoll_threads(&mut self) {
663         self.common.wait_for_epoll_threads();
664     }
665 }
666 
667 impl Drop for Net {
668     fn drop(&mut self) {
669         if let Some(kill_evt) = self.common.kill_evt.take() {
670             // Ignore the result because there is nothing we can do about it.
671             let _ = kill_evt.write(1);
672         }
673         // Needed to ensure all references to tap FDs are dropped (#4868)
674         self.common.wait_for_epoll_threads();
675         if let Some(thread) = self.ctrl_queue_epoll_thread.take() {
676             if let Err(e) = thread.join() {
677                 error!("Error joining thread: {:?}", e);
678             }
679         }
680     }
681 }
682 
683 impl VirtioDevice for Net {
684     fn device_type(&self) -> u32 {
685         self.common.device_type
686     }
687 
688     fn queue_max_sizes(&self) -> &[u16] {
689         &self.common.queue_sizes
690     }
691 
692     fn features(&self) -> u64 {
693         self.common.avail_features
694     }
695 
696     fn ack_features(&mut self, value: u64) {
697         self.common.ack_features(value)
698     }
699 
700     fn read_config(&self, offset: u64, data: &mut [u8]) {
701         self.read_config_from_slice(self.config.as_slice(), offset, data);
702     }
703 
704     fn activate(
705         &mut self,
706         mem: GuestMemoryAtomic<GuestMemoryMmap>,
707         interrupt_cb: Arc<dyn VirtioInterrupt>,
708         mut queues: Vec<(usize, Queue, EventFd)>,
709     ) -> ActivateResult {
710         self.common.activate(&queues, &interrupt_cb)?;
711 
712         let num_queues = queues.len();
713         let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into());
714         if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 {
715             let ctrl_queue_index = num_queues - 1;
716             let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index);
717 
718             ctrl_queue.set_event_idx(event_idx);
719 
720             let (kill_evt, pause_evt) = self.common.dup_eventfds();
721             let mut ctrl_handler = NetCtrlEpollHandler {
722                 mem: mem.clone(),
723                 kill_evt,
724                 pause_evt,
725                 ctrl_q: CtrlQueue::new(self.taps.clone()),
726                 queue: ctrl_queue,
727                 queue_evt: ctrl_queue_evt,
728                 access_platform: self.common.access_platform.clone(),
729                 queue_index: ctrl_queue_index as u16,
730                 interrupt_cb: interrupt_cb.clone(),
731             };
732 
733             let paused = self.common.paused.clone();
734             // Let's update the barrier as we need 1 for each RX/TX pair +
735             // 1 for the control queue + 1 for the main thread signalling
736             // the pause.
737             self.common.paused_sync = Some(Arc::new(Barrier::new(self.taps.len() + 2)));
738             let paused_sync = self.common.paused_sync.clone();
739 
740             let mut epoll_threads = Vec::new();
741             spawn_virtio_thread(
742                 &format!("{}_ctrl", &self.id),
743                 &self.seccomp_action,
744                 Thread::VirtioNetCtl,
745                 &mut epoll_threads,
746                 &self.exit_evt,
747                 move || ctrl_handler.run_ctrl(paused, paused_sync.unwrap()),
748             )?;
749             self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0));
750         }
751 
752         let mut epoll_threads = Vec::new();
753         let mut taps = self.taps.clone();
754         for i in 0..queues.len() / 2 {
755             let rx = RxVirtio::new();
756             let tx = TxVirtio::new();
757             let rx_tap_listening = false;
758 
759             let (_, queue_0, queue_evt_0) = queues.remove(0);
760             let (_, queue_1, queue_evt_1) = queues.remove(0);
761             let mut queue_pair = (queue_0, queue_1);
762             queue_pair.0.set_event_idx(event_idx);
763             queue_pair.1.set_event_idx(event_idx);
764 
765             let queue_evt_pair = (queue_evt_0, queue_evt_1);
766 
767             let (kill_evt, pause_evt) = self.common.dup_eventfds();
768 
769             let rx_rate_limiter: Option<rate_limiter::RateLimiter> = self
770                 .rate_limiter_config
771                 .map(RateLimiterConfig::try_into)
772                 .transpose()
773                 .map_err(ActivateError::CreateRateLimiter)?;
774 
775             let tx_rate_limiter: Option<rate_limiter::RateLimiter> = self
776                 .rate_limiter_config
777                 .map(RateLimiterConfig::try_into)
778                 .transpose()
779                 .map_err(ActivateError::CreateRateLimiter)?;
780 
781             let tap = taps.remove(0);
782             #[cfg(not(fuzzing))]
783             tap.set_offload(virtio_features_to_tap_offload(self.common.acked_features))
784                 .map_err(|e| {
785                     error!("Error programming tap offload: {:?}", e);
786                     ActivateError::BadActivate
787                 })?;
788 
789             let mut handler = NetEpollHandler {
790                 net: NetQueuePair {
791                     tap_for_write_epoll: tap.clone(),
792                     tap,
793                     rx,
794                     tx,
795                     epoll_fd: None,
796                     rx_tap_listening,
797                     tx_tap_listening: false,
798                     counters: self.counters.clone(),
799                     tap_rx_event_id: RX_TAP_EVENT,
800                     tap_tx_event_id: TX_TAP_EVENT,
801                     rx_desc_avail: false,
802                     rx_rate_limiter,
803                     tx_rate_limiter,
804                     access_platform: self.common.access_platform.clone(),
805                 },
806                 mem: mem.clone(),
807                 queue_index_base: (i * 2) as u16,
808                 queue_pair,
809                 queue_evt_pair,
810                 interrupt_cb: interrupt_cb.clone(),
811                 kill_evt,
812                 pause_evt,
813                 driver_awake: false,
814             };
815 
816             let paused = self.common.paused.clone();
817             let paused_sync = self.common.paused_sync.clone();
818 
819             spawn_virtio_thread(
820                 &format!("{}_qp{}", self.id.clone(), i),
821                 &self.seccomp_action,
822                 Thread::VirtioNet,
823                 &mut epoll_threads,
824                 &self.exit_evt,
825                 move || handler.run(paused, paused_sync.unwrap()),
826             )?;
827         }
828 
829         self.common.epoll_threads = Some(epoll_threads);
830 
831         event!("virtio-device", "activated", "id", &self.id);
832         Ok(())
833     }
834 
835     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
836         let result = self.common.reset();
837         event!("virtio-device", "reset", "id", &self.id);
838         result
839     }
840 
841     fn counters(&self) -> Option<HashMap<&'static str, Wrapping<u64>>> {
842         let mut counters = HashMap::new();
843 
844         counters.insert(
845             "rx_bytes",
846             Wrapping(self.counters.rx_bytes.load(Ordering::Acquire)),
847         );
848         counters.insert(
849             "rx_frames",
850             Wrapping(self.counters.rx_frames.load(Ordering::Acquire)),
851         );
852         counters.insert(
853             "tx_bytes",
854             Wrapping(self.counters.tx_bytes.load(Ordering::Acquire)),
855         );
856         counters.insert(
857             "tx_frames",
858             Wrapping(self.counters.tx_frames.load(Ordering::Acquire)),
859         );
860 
861         Some(counters)
862     }
863 
864     fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) {
865         self.common.set_access_platform(access_platform)
866     }
867 }
868 
869 impl Pausable for Net {
870     fn pause(&mut self) -> result::Result<(), MigratableError> {
871         self.common.pause()
872     }
873 
874     fn resume(&mut self) -> result::Result<(), MigratableError> {
875         self.common.resume()?;
876 
877         if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread {
878             ctrl_queue_epoll_thread.thread().unpark();
879         }
880         Ok(())
881     }
882 }
883 
884 impl Snapshottable for Net {
885     fn id(&self) -> String {
886         self.id.clone()
887     }
888 
889     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
890         Snapshot::new_from_versioned_state(&self.state())
891     }
892 }
893 impl Transportable for Net {}
894 impl Migratable for Net {}
895