xref: /cloud-hypervisor/virtio-devices/src/vhost_user/net.rs (revision 3ce0fef7fd546467398c914dbc74d8542e45cf6f)
1 // Copyright 2019 Intel Corporation. All Rights Reserved.
2 // SPDX-License-Identifier: Apache-2.0
3 
4 use crate::seccomp_filters::Thread;
5 use crate::thread_helper::spawn_virtio_thread;
6 use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle};
7 use crate::vhost_user::{Error, Result, VhostUserCommon};
8 use crate::{
9     ActivateResult, NetCtrlEpollHandler, VirtioCommon, VirtioDevice, VirtioDeviceType,
10     VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1,
11 };
12 use crate::{GuestMemoryMmap, GuestRegionMmap};
13 use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig};
14 use seccompiler::SeccompAction;
15 use std::result;
16 use std::sync::atomic::AtomicBool;
17 use std::sync::{Arc, Barrier, Mutex};
18 use std::thread;
19 use std::vec::Vec;
20 use versionize::{VersionMap, Versionize, VersionizeResult};
21 use versionize_derive::Versionize;
22 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
23 use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler};
24 use virtio_bindings::virtio_net::{
25     VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN,
26     VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO,
27     VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO,
28     VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU,
29 };
30 use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
31 use virtio_queue::{Queue, QueueT};
32 use vm_memory::{ByteValued, GuestMemoryAtomic};
33 use vm_migration::{
34     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable,
35     Transportable, VersionMapped,
36 };
37 use vmm_sys_util::eventfd::EventFd;
38 
39 const DEFAULT_QUEUE_NUMBER: usize = 2;
40 
41 #[derive(Versionize)]
42 pub struct State {
43     pub avail_features: u64,
44     pub acked_features: u64,
45     pub config: VirtioNetConfig,
46     pub acked_protocol_features: u64,
47     pub vu_num_queues: usize,
48 }
49 
50 impl VersionMapped for State {}
51 
52 struct BackendReqHandler {}
53 impl VhostUserFrontendReqHandler for BackendReqHandler {}
54 
55 pub struct Net {
56     common: VirtioCommon,
57     vu_common: VhostUserCommon,
58     id: String,
59     config: VirtioNetConfig,
60     guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
61     ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>,
62     epoll_thread: Option<thread::JoinHandle<()>>,
63     seccomp_action: SeccompAction,
64     exit_evt: EventFd,
65     iommu: bool,
66 }
67 
68 impl Net {
69     /// Create a new vhost-user-net device
70     #[allow(clippy::too_many_arguments)]
71     pub fn new(
72         id: String,
73         mac_addr: MacAddr,
74         mtu: Option<u16>,
75         vu_cfg: VhostUserConfig,
76         server: bool,
77         seccomp_action: SeccompAction,
78         exit_evt: EventFd,
79         iommu: bool,
80         state: Option<State>,
81         offload_tso: bool,
82         offload_ufo: bool,
83         offload_csum: bool,
84     ) -> Result<Net> {
85         let mut num_queues = vu_cfg.num_queues;
86 
87         let mut vu =
88             VhostUserHandle::connect_vhost_user(server, &vu_cfg.socket, num_queues as u64, false)?;
89 
90         let (
91             avail_features,
92             acked_features,
93             acked_protocol_features,
94             vu_num_queues,
95             config,
96             paused,
97         ) = if let Some(state) = state {
98             info!("Restoring vhost-user-net {}", id);
99 
100             // The backend acknowledged features must not contain
101             // VIRTIO_NET_F_MAC since we don't expect the backend
102             // to handle it.
103             let backend_acked_features = state.acked_features & !(1 << VIRTIO_NET_F_MAC);
104 
105             vu.set_protocol_features_vhost_user(
106                 backend_acked_features,
107                 state.acked_protocol_features,
108             )?;
109 
110             // If the control queue feature has been negotiated, let's
111             // increase the number of queues.
112             if state.acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 {
113                 num_queues += 1;
114             }
115 
116             (
117                 state.avail_features,
118                 state.acked_features,
119                 state.acked_protocol_features,
120                 state.vu_num_queues,
121                 state.config,
122                 true,
123             )
124         } else {
125             // Filling device and vring features VMM supports.
126             let mut avail_features = 1 << VIRTIO_NET_F_MRG_RXBUF
127                 | 1 << VIRTIO_NET_F_CTRL_VQ
128                 | 1 << VIRTIO_F_RING_EVENT_IDX
129                 | 1 << VIRTIO_F_VERSION_1
130                 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
131 
132             if mtu.is_some() {
133                 avail_features |= 1u64 << VIRTIO_NET_F_MTU;
134             }
135 
136             // Configure TSO/UFO features when hardware checksum offload is enabled.
137             if offload_csum {
138                 avail_features |= 1 << VIRTIO_NET_F_CSUM | 1 << VIRTIO_NET_F_GUEST_CSUM;
139 
140                 if offload_tso {
141                     avail_features |= 1 << VIRTIO_NET_F_HOST_ECN
142                         | 1 << VIRTIO_NET_F_HOST_TSO4
143                         | 1 << VIRTIO_NET_F_HOST_TSO6
144                         | 1 << VIRTIO_NET_F_GUEST_ECN
145                         | 1 << VIRTIO_NET_F_GUEST_TSO4
146                         | 1 << VIRTIO_NET_F_GUEST_TSO6;
147                 }
148 
149                 if offload_ufo {
150                     avail_features |= 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_NET_F_GUEST_UFO;
151                 }
152             }
153 
154             let mut config = VirtioNetConfig::default();
155             build_net_config_space(&mut config, mac_addr, num_queues, mtu, &mut avail_features);
156 
157             let avail_protocol_features = VhostUserProtocolFeatures::MQ
158                 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS
159                 | VhostUserProtocolFeatures::REPLY_ACK
160                 | VhostUserProtocolFeatures::INFLIGHT_SHMFD
161                 | VhostUserProtocolFeatures::LOG_SHMFD;
162 
163             let (mut acked_features, acked_protocol_features) =
164                 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?;
165 
166             let backend_num_queues =
167                 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 {
168                     vu.socket_handle()
169                         .get_queue_num()
170                         .map_err(Error::VhostUserGetQueueMaxNum)? as usize
171                 } else {
172                     DEFAULT_QUEUE_NUMBER
173                 };
174 
175             if num_queues > backend_num_queues {
176                 error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n",
177                 num_queues, backend_num_queues);
178                 return Err(Error::BadQueueNum);
179             }
180 
181             // If the control queue feature has been negotiated, let's increase
182             // the number of queues.
183             let vu_num_queues = num_queues;
184             if acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 {
185                 num_queues += 1;
186             }
187 
188             // Make sure the virtio feature to set the MAC address is exposed to
189             // the guest, even if it hasn't been negotiated with the backend.
190             acked_features |= 1 << VIRTIO_NET_F_MAC;
191 
192             (
193                 acked_features,
194                 // If part of the available features that have been acked,
195                 // the PROTOCOL_FEATURES bit must be already set through
196                 // the VIRTIO acked features as we know the guest would
197                 // never ack it, thus the feature would be lost.
198                 acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(),
199                 acked_protocol_features,
200                 vu_num_queues,
201                 config,
202                 false,
203             )
204         };
205 
206         Ok(Net {
207             id,
208             common: VirtioCommon {
209                 device_type: VirtioDeviceType::Net as u32,
210                 queue_sizes: vec![vu_cfg.queue_size; num_queues],
211                 avail_features,
212                 acked_features,
213                 paused_sync: Some(Arc::new(Barrier::new(2))),
214                 min_queues: DEFAULT_QUEUE_NUMBER as u16,
215                 paused: Arc::new(AtomicBool::new(paused)),
216                 ..Default::default()
217             },
218             vu_common: VhostUserCommon {
219                 vu: Some(Arc::new(Mutex::new(vu))),
220                 acked_protocol_features,
221                 socket_path: vu_cfg.socket,
222                 vu_num_queues,
223                 server,
224                 ..Default::default()
225             },
226             config,
227             guest_memory: None,
228             ctrl_queue_epoll_thread: None,
229             epoll_thread: None,
230             seccomp_action,
231             exit_evt,
232             iommu,
233         })
234     }
235 
236     fn state(&self) -> State {
237         State {
238             avail_features: self.common.avail_features,
239             acked_features: self.common.acked_features,
240             config: self.config,
241             acked_protocol_features: self.vu_common.acked_protocol_features,
242             vu_num_queues: self.vu_common.vu_num_queues,
243         }
244     }
245 }
246 
247 impl Drop for Net {
248     fn drop(&mut self) {
249         if let Some(kill_evt) = self.common.kill_evt.take() {
250             if let Err(e) = kill_evt.write(1) {
251                 error!("failed to kill vhost-user-net: {:?}", e);
252             }
253         }
254 
255         self.common.wait_for_epoll_threads();
256 
257         if let Some(thread) = self.epoll_thread.take() {
258             if let Err(e) = thread.join() {
259                 error!("Error joining thread: {:?}", e);
260             }
261         }
262         if let Some(thread) = self.ctrl_queue_epoll_thread.take() {
263             if let Err(e) = thread.join() {
264                 error!("Error joining thread: {:?}", e);
265             }
266         }
267     }
268 }
269 
270 impl VirtioDevice for Net {
271     fn device_type(&self) -> u32 {
272         self.common.device_type
273     }
274 
275     fn queue_max_sizes(&self) -> &[u16] {
276         &self.common.queue_sizes
277     }
278 
279     fn features(&self) -> u64 {
280         let mut features = self.common.avail_features;
281         if self.iommu {
282             features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
283         }
284         features
285     }
286 
287     fn ack_features(&mut self, value: u64) {
288         self.common.ack_features(value)
289     }
290 
291     fn read_config(&self, offset: u64, data: &mut [u8]) {
292         self.read_config_from_slice(self.config.as_slice(), offset, data);
293     }
294 
295     fn activate(
296         &mut self,
297         mem: GuestMemoryAtomic<GuestMemoryMmap>,
298         interrupt_cb: Arc<dyn VirtioInterrupt>,
299         mut queues: Vec<(usize, Queue, EventFd)>,
300     ) -> ActivateResult {
301         self.common.activate(&queues, &interrupt_cb)?;
302         self.guest_memory = Some(mem.clone());
303 
304         let num_queues = queues.len();
305         let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into());
306         if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 {
307             let ctrl_queue_index = num_queues - 1;
308             let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index);
309 
310             ctrl_queue.set_event_idx(event_idx);
311 
312             let (kill_evt, pause_evt) = self.common.dup_eventfds();
313 
314             let mut ctrl_handler = NetCtrlEpollHandler {
315                 mem: mem.clone(),
316                 kill_evt,
317                 pause_evt,
318                 ctrl_q: CtrlQueue::new(Vec::new()),
319                 queue: ctrl_queue,
320                 queue_evt: ctrl_queue_evt,
321                 access_platform: None,
322                 interrupt_cb: interrupt_cb.clone(),
323                 queue_index: ctrl_queue_index as u16,
324             };
325 
326             let paused = self.common.paused.clone();
327             // Let's update the barrier as we need 1 for the control queue
328             // thread + 1 for the common vhost-user thread + 1 for the main
329             // thread signalling the pause.
330             self.common.paused_sync = Some(Arc::new(Barrier::new(3)));
331             let paused_sync = self.common.paused_sync.clone();
332 
333             let mut epoll_threads = Vec::new();
334             spawn_virtio_thread(
335                 &format!("{}_ctrl", &self.id),
336                 &self.seccomp_action,
337                 Thread::VirtioVhostNetCtl,
338                 &mut epoll_threads,
339                 &self.exit_evt,
340                 move || ctrl_handler.run_ctrl(paused, paused_sync.unwrap()),
341             )?;
342             self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0));
343         }
344 
345         let backend_req_handler: Option<FrontendReqHandler<BackendReqHandler>> = None;
346 
347         // The backend acknowledged features must not contain VIRTIO_NET_F_MAC
348         // since we don't expect the backend to handle it.
349         let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC);
350 
351         // Run a dedicated thread for handling potential reconnections with
352         // the backend.
353         let (kill_evt, pause_evt) = self.common.dup_eventfds();
354 
355         let mut handler = self.vu_common.activate(
356             mem,
357             queues,
358             interrupt_cb,
359             backend_acked_features,
360             backend_req_handler,
361             kill_evt,
362             pause_evt,
363         )?;
364 
365         let paused = self.common.paused.clone();
366         let paused_sync = self.common.paused_sync.clone();
367 
368         let mut epoll_threads = Vec::new();
369         spawn_virtio_thread(
370             &self.id,
371             &self.seccomp_action,
372             Thread::VirtioVhostNet,
373             &mut epoll_threads,
374             &self.exit_evt,
375             move || handler.run(paused, paused_sync.unwrap()),
376         )?;
377         self.epoll_thread = Some(epoll_threads.remove(0));
378 
379         Ok(())
380     }
381 
382     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
383         // We first must resume the virtio thread if it was paused.
384         if self.common.pause_evt.take().is_some() {
385             self.common.resume().ok()?;
386         }
387 
388         if let Some(vu) = &self.vu_common.vu {
389             if let Err(e) = vu.lock().unwrap().reset_vhost_user() {
390                 error!("Failed to reset vhost-user daemon: {:?}", e);
391                 return None;
392             }
393         }
394 
395         if let Some(kill_evt) = self.common.kill_evt.take() {
396             // Ignore the result because there is nothing we can do about it.
397             let _ = kill_evt.write(1);
398         }
399 
400         event!("virtio-device", "reset", "id", &self.id);
401 
402         // Return the interrupt
403         Some(self.common.interrupt_cb.take().unwrap())
404     }
405 
406     fn shutdown(&mut self) {
407         self.vu_common.shutdown();
408     }
409 
410     fn add_memory_region(
411         &mut self,
412         region: &Arc<GuestRegionMmap>,
413     ) -> std::result::Result<(), crate::Error> {
414         self.vu_common.add_memory_region(&self.guest_memory, region)
415     }
416 }
417 
418 impl Pausable for Net {
419     fn pause(&mut self) -> result::Result<(), MigratableError> {
420         self.vu_common.pause()?;
421         self.common.pause()
422     }
423 
424     fn resume(&mut self) -> result::Result<(), MigratableError> {
425         self.common.resume()?;
426 
427         if let Some(epoll_thread) = &self.epoll_thread {
428             epoll_thread.thread().unpark();
429         }
430 
431         if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread {
432             ctrl_queue_epoll_thread.thread().unpark();
433         }
434 
435         self.vu_common.resume()
436     }
437 }
438 
439 impl Snapshottable for Net {
440     fn id(&self) -> String {
441         self.id.clone()
442     }
443 
444     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
445         self.vu_common.snapshot(&self.state())
446     }
447 }
448 impl Transportable for Net {}
449 
450 impl Migratable for Net {
451     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
452         self.vu_common.start_dirty_log(&self.guest_memory)
453     }
454 
455     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
456         self.vu_common.stop_dirty_log()
457     }
458 
459     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
460         self.vu_common.dirty_log(&self.guest_memory)
461     }
462 
463     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
464         self.vu_common.start_migration()
465     }
466 
467     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
468         self.vu_common
469             .complete_migration(self.common.kill_evt.take())
470     }
471 }
472