xref: /cloud-hypervisor/virtio-devices/src/vhost_user/net.rs (revision adb318f4cd0079246b3cb07e01c4e978330445d2)
1 // Copyright 2019 Intel Corporation. All Rights Reserved.
2 // SPDX-License-Identifier: Apache-2.0
3 
4 use crate::seccomp_filters::Thread;
5 use crate::thread_helper::spawn_virtio_thread;
6 use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle};
7 use crate::vhost_user::{Error, Result, VhostUserCommon};
8 use crate::{
9     ActivateResult, NetCtrlEpollHandler, VirtioCommon, VirtioDevice, VirtioDeviceType,
10     VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1,
11 };
12 use crate::{GuestMemoryMmap, GuestRegionMmap};
13 use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig};
14 use seccompiler::SeccompAction;
15 use std::result;
16 use std::sync::atomic::AtomicBool;
17 use std::sync::{Arc, Barrier, Mutex};
18 use std::thread;
19 use versionize::{VersionMap, Versionize, VersionizeResult};
20 use versionize_derive::Versionize;
21 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
22 use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler};
23 use virtio_bindings::virtio_net::{
24     VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN,
25     VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO,
26     VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO,
27     VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU,
28 };
29 use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
30 use virtio_queue::{Queue, QueueT};
31 use vm_memory::{ByteValued, GuestMemoryAtomic};
32 use vm_migration::{
33     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable,
34     Transportable, VersionMapped,
35 };
36 use vmm_sys_util::eventfd::EventFd;
37 
38 const DEFAULT_QUEUE_NUMBER: usize = 2;
39 
40 #[derive(Versionize)]
41 pub struct State {
42     pub avail_features: u64,
43     pub acked_features: u64,
44     pub config: VirtioNetConfig,
45     pub acked_protocol_features: u64,
46     pub vu_num_queues: usize,
47 }
48 
49 impl VersionMapped for State {}
50 
51 struct BackendReqHandler {}
52 impl VhostUserFrontendReqHandler for BackendReqHandler {}
53 
54 pub struct Net {
55     common: VirtioCommon,
56     vu_common: VhostUserCommon,
57     id: String,
58     config: VirtioNetConfig,
59     guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
60     ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>,
61     epoll_thread: Option<thread::JoinHandle<()>>,
62     seccomp_action: SeccompAction,
63     exit_evt: EventFd,
64     iommu: bool,
65 }
66 
67 impl Net {
68     /// Create a new vhost-user-net device
69     #[allow(clippy::too_many_arguments)]
70     pub fn new(
71         id: String,
72         mac_addr: MacAddr,
73         mtu: Option<u16>,
74         vu_cfg: VhostUserConfig,
75         server: bool,
76         seccomp_action: SeccompAction,
77         exit_evt: EventFd,
78         iommu: bool,
79         state: Option<State>,
80         offload_tso: bool,
81         offload_ufo: bool,
82         offload_csum: bool,
83     ) -> Result<Net> {
84         let mut num_queues = vu_cfg.num_queues;
85 
86         let mut vu =
87             VhostUserHandle::connect_vhost_user(server, &vu_cfg.socket, num_queues as u64, false)?;
88 
89         let (
90             avail_features,
91             acked_features,
92             acked_protocol_features,
93             vu_num_queues,
94             config,
95             paused,
96         ) = if let Some(state) = state {
97             info!("Restoring vhost-user-net {}", id);
98 
99             // The backend acknowledged features must not contain
100             // VIRTIO_NET_F_MAC since we don't expect the backend
101             // to handle it.
102             let backend_acked_features = state.acked_features & !(1 << VIRTIO_NET_F_MAC);
103 
104             vu.set_protocol_features_vhost_user(
105                 backend_acked_features,
106                 state.acked_protocol_features,
107             )?;
108 
109             // If the control queue feature has been negotiated, let's
110             // increase the number of queues.
111             if state.acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 {
112                 num_queues += 1;
113             }
114 
115             (
116                 state.avail_features,
117                 state.acked_features,
118                 state.acked_protocol_features,
119                 state.vu_num_queues,
120                 state.config,
121                 true,
122             )
123         } else {
124             // Filling device and vring features VMM supports.
125             let mut avail_features = 1 << VIRTIO_NET_F_MRG_RXBUF
126                 | 1 << VIRTIO_NET_F_CTRL_VQ
127                 | 1 << VIRTIO_F_RING_EVENT_IDX
128                 | 1 << VIRTIO_F_VERSION_1
129                 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
130 
131             if mtu.is_some() {
132                 avail_features |= 1u64 << VIRTIO_NET_F_MTU;
133             }
134 
135             // Configure TSO/UFO features when hardware checksum offload is enabled.
136             if offload_csum {
137                 avail_features |= 1 << VIRTIO_NET_F_CSUM | 1 << VIRTIO_NET_F_GUEST_CSUM;
138 
139                 if offload_tso {
140                     avail_features |= 1 << VIRTIO_NET_F_HOST_ECN
141                         | 1 << VIRTIO_NET_F_HOST_TSO4
142                         | 1 << VIRTIO_NET_F_HOST_TSO6
143                         | 1 << VIRTIO_NET_F_GUEST_ECN
144                         | 1 << VIRTIO_NET_F_GUEST_TSO4
145                         | 1 << VIRTIO_NET_F_GUEST_TSO6;
146                 }
147 
148                 if offload_ufo {
149                     avail_features |= 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_NET_F_GUEST_UFO;
150                 }
151             }
152 
153             let mut config = VirtioNetConfig::default();
154             build_net_config_space(&mut config, mac_addr, num_queues, mtu, &mut avail_features);
155 
156             let avail_protocol_features = VhostUserProtocolFeatures::MQ
157                 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS
158                 | VhostUserProtocolFeatures::REPLY_ACK
159                 | VhostUserProtocolFeatures::INFLIGHT_SHMFD
160                 | VhostUserProtocolFeatures::LOG_SHMFD;
161 
162             let (mut acked_features, acked_protocol_features) =
163                 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?;
164 
165             let backend_num_queues =
166                 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 {
167                     vu.socket_handle()
168                         .get_queue_num()
169                         .map_err(Error::VhostUserGetQueueMaxNum)? as usize
170                 } else {
171                     DEFAULT_QUEUE_NUMBER
172                 };
173 
174             if num_queues > backend_num_queues {
175                 error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n",
176                 num_queues, backend_num_queues);
177                 return Err(Error::BadQueueNum);
178             }
179 
180             // If the control queue feature has been negotiated, let's increase
181             // the number of queues.
182             let vu_num_queues = num_queues;
183             if acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 {
184                 num_queues += 1;
185             }
186 
187             // Make sure the virtio feature to set the MAC address is exposed to
188             // the guest, even if it hasn't been negotiated with the backend.
189             acked_features |= 1 << VIRTIO_NET_F_MAC;
190 
191             (
192                 acked_features,
193                 // If part of the available features that have been acked,
194                 // the PROTOCOL_FEATURES bit must be already set through
195                 // the VIRTIO acked features as we know the guest would
196                 // never ack it, thus the feature would be lost.
197                 acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(),
198                 acked_protocol_features,
199                 vu_num_queues,
200                 config,
201                 false,
202             )
203         };
204 
205         Ok(Net {
206             id,
207             common: VirtioCommon {
208                 device_type: VirtioDeviceType::Net as u32,
209                 queue_sizes: vec![vu_cfg.queue_size; num_queues],
210                 avail_features,
211                 acked_features,
212                 paused_sync: Some(Arc::new(Barrier::new(2))),
213                 min_queues: DEFAULT_QUEUE_NUMBER as u16,
214                 paused: Arc::new(AtomicBool::new(paused)),
215                 ..Default::default()
216             },
217             vu_common: VhostUserCommon {
218                 vu: Some(Arc::new(Mutex::new(vu))),
219                 acked_protocol_features,
220                 socket_path: vu_cfg.socket,
221                 vu_num_queues,
222                 server,
223                 ..Default::default()
224             },
225             config,
226             guest_memory: None,
227             ctrl_queue_epoll_thread: None,
228             epoll_thread: None,
229             seccomp_action,
230             exit_evt,
231             iommu,
232         })
233     }
234 
235     fn state(&self) -> State {
236         State {
237             avail_features: self.common.avail_features,
238             acked_features: self.common.acked_features,
239             config: self.config,
240             acked_protocol_features: self.vu_common.acked_protocol_features,
241             vu_num_queues: self.vu_common.vu_num_queues,
242         }
243     }
244 }
245 
246 impl Drop for Net {
247     fn drop(&mut self) {
248         if let Some(kill_evt) = self.common.kill_evt.take() {
249             if let Err(e) = kill_evt.write(1) {
250                 error!("failed to kill vhost-user-net: {:?}", e);
251             }
252         }
253 
254         self.common.wait_for_epoll_threads();
255 
256         if let Some(thread) = self.epoll_thread.take() {
257             if let Err(e) = thread.join() {
258                 error!("Error joining thread: {:?}", e);
259             }
260         }
261         if let Some(thread) = self.ctrl_queue_epoll_thread.take() {
262             if let Err(e) = thread.join() {
263                 error!("Error joining thread: {:?}", e);
264             }
265         }
266     }
267 }
268 
269 impl VirtioDevice for Net {
270     fn device_type(&self) -> u32 {
271         self.common.device_type
272     }
273 
274     fn queue_max_sizes(&self) -> &[u16] {
275         &self.common.queue_sizes
276     }
277 
278     fn features(&self) -> u64 {
279         let mut features = self.common.avail_features;
280         if self.iommu {
281             features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
282         }
283         features
284     }
285 
286     fn ack_features(&mut self, value: u64) {
287         self.common.ack_features(value)
288     }
289 
290     fn read_config(&self, offset: u64, data: &mut [u8]) {
291         self.read_config_from_slice(self.config.as_slice(), offset, data);
292     }
293 
294     fn activate(
295         &mut self,
296         mem: GuestMemoryAtomic<GuestMemoryMmap>,
297         interrupt_cb: Arc<dyn VirtioInterrupt>,
298         mut queues: Vec<(usize, Queue, EventFd)>,
299     ) -> ActivateResult {
300         self.common.activate(&queues, &interrupt_cb)?;
301         self.guest_memory = Some(mem.clone());
302 
303         let num_queues = queues.len();
304         let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into());
305         if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 {
306             let ctrl_queue_index = num_queues - 1;
307             let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index);
308 
309             ctrl_queue.set_event_idx(event_idx);
310 
311             let (kill_evt, pause_evt) = self.common.dup_eventfds();
312 
313             let mut ctrl_handler = NetCtrlEpollHandler {
314                 mem: mem.clone(),
315                 kill_evt,
316                 pause_evt,
317                 ctrl_q: CtrlQueue::new(Vec::new()),
318                 queue: ctrl_queue,
319                 queue_evt: ctrl_queue_evt,
320                 access_platform: None,
321                 interrupt_cb: interrupt_cb.clone(),
322                 queue_index: ctrl_queue_index as u16,
323             };
324 
325             let paused = self.common.paused.clone();
326             // Let's update the barrier as we need 1 for the control queue
327             // thread + 1 for the common vhost-user thread + 1 for the main
328             // thread signalling the pause.
329             self.common.paused_sync = Some(Arc::new(Barrier::new(3)));
330             let paused_sync = self.common.paused_sync.clone();
331 
332             let mut epoll_threads = Vec::new();
333             spawn_virtio_thread(
334                 &format!("{}_ctrl", &self.id),
335                 &self.seccomp_action,
336                 Thread::VirtioVhostNetCtl,
337                 &mut epoll_threads,
338                 &self.exit_evt,
339                 move || ctrl_handler.run_ctrl(paused, paused_sync.unwrap()),
340             )?;
341             self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0));
342         }
343 
344         let backend_req_handler: Option<FrontendReqHandler<BackendReqHandler>> = None;
345 
346         // The backend acknowledged features must not contain VIRTIO_NET_F_MAC
347         // since we don't expect the backend to handle it.
348         let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC);
349 
350         // Run a dedicated thread for handling potential reconnections with
351         // the backend.
352         let (kill_evt, pause_evt) = self.common.dup_eventfds();
353 
354         let mut handler = self.vu_common.activate(
355             mem,
356             queues,
357             interrupt_cb,
358             backend_acked_features,
359             backend_req_handler,
360             kill_evt,
361             pause_evt,
362         )?;
363 
364         let paused = self.common.paused.clone();
365         let paused_sync = self.common.paused_sync.clone();
366 
367         let mut epoll_threads = Vec::new();
368         spawn_virtio_thread(
369             &self.id,
370             &self.seccomp_action,
371             Thread::VirtioVhostNet,
372             &mut epoll_threads,
373             &self.exit_evt,
374             move || handler.run(paused, paused_sync.unwrap()),
375         )?;
376         self.epoll_thread = Some(epoll_threads.remove(0));
377 
378         Ok(())
379     }
380 
381     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
382         // We first must resume the virtio thread if it was paused.
383         if self.common.pause_evt.take().is_some() {
384             self.common.resume().ok()?;
385         }
386 
387         if let Some(vu) = &self.vu_common.vu {
388             if let Err(e) = vu.lock().unwrap().reset_vhost_user() {
389                 error!("Failed to reset vhost-user daemon: {:?}", e);
390                 return None;
391             }
392         }
393 
394         if let Some(kill_evt) = self.common.kill_evt.take() {
395             // Ignore the result because there is nothing we can do about it.
396             let _ = kill_evt.write(1);
397         }
398 
399         event!("virtio-device", "reset", "id", &self.id);
400 
401         // Return the interrupt
402         Some(self.common.interrupt_cb.take().unwrap())
403     }
404 
405     fn shutdown(&mut self) {
406         self.vu_common.shutdown();
407     }
408 
409     fn add_memory_region(
410         &mut self,
411         region: &Arc<GuestRegionMmap>,
412     ) -> std::result::Result<(), crate::Error> {
413         self.vu_common.add_memory_region(&self.guest_memory, region)
414     }
415 }
416 
417 impl Pausable for Net {
418     fn pause(&mut self) -> result::Result<(), MigratableError> {
419         self.vu_common.pause()?;
420         self.common.pause()
421     }
422 
423     fn resume(&mut self) -> result::Result<(), MigratableError> {
424         self.common.resume()?;
425 
426         if let Some(epoll_thread) = &self.epoll_thread {
427             epoll_thread.thread().unpark();
428         }
429 
430         if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread {
431             ctrl_queue_epoll_thread.thread().unpark();
432         }
433 
434         self.vu_common.resume()
435     }
436 }
437 
438 impl Snapshottable for Net {
439     fn id(&self) -> String {
440         self.id.clone()
441     }
442 
443     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
444         self.vu_common.snapshot(&self.state())
445     }
446 }
447 impl Transportable for Net {}
448 
449 impl Migratable for Net {
450     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
451         self.vu_common.start_dirty_log(&self.guest_memory)
452     }
453 
454     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
455         self.vu_common.stop_dirty_log()
456     }
457 
458     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
459         self.vu_common.dirty_log(&self.guest_memory)
460     }
461 
462     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
463         self.vu_common.start_migration()
464     }
465 
466     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
467         self.vu_common
468             .complete_migration(self.common.kill_evt.take())
469     }
470 }
471