xref: /cloud-hypervisor/virtio-devices/src/vhost_user/net.rs (revision 7d7bfb2034001d4cb15df2ddc56d2d350c8da30f)
1 // Copyright 2019 Intel Corporation. All Rights Reserved.
2 // SPDX-License-Identifier: Apache-2.0
3 
4 use crate::seccomp_filters::Thread;
5 use crate::thread_helper::spawn_virtio_thread;
6 use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle};
7 use crate::vhost_user::{Error, Result, VhostUserCommon};
8 use crate::{
9     ActivateResult, NetCtrlEpollHandler, VirtioCommon, VirtioDevice, VirtioDeviceType,
10     VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1,
11 };
12 use crate::{GuestMemoryMmap, GuestRegionMmap};
13 use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig};
14 use seccompiler::SeccompAction;
15 use std::result;
16 use std::sync::{Arc, Barrier, Mutex};
17 use std::thread;
18 use std::vec::Vec;
19 use versionize::{VersionMap, Versionize, VersionizeResult};
20 use versionize_derive::Versionize;
21 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
22 use vhost::vhost_user::{MasterReqHandler, VhostUserMaster, VhostUserMasterReqHandler};
23 use virtio_bindings::bindings::virtio_net::{
24     VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN,
25     VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO,
26     VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO,
27     VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF,
28 };
29 use virtio_bindings::bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
30 use virtio_queue::Queue;
31 use vm_memory::{ByteValued, GuestMemoryAtomic};
32 use vm_migration::{
33     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable,
34     Transportable, VersionMapped,
35 };
36 use vmm_sys_util::eventfd::EventFd;
37 
38 const DEFAULT_QUEUE_NUMBER: usize = 2;
39 
40 #[derive(Versionize)]
41 pub struct State {
42     pub avail_features: u64,
43     pub acked_features: u64,
44     pub config: VirtioNetConfig,
45     pub acked_protocol_features: u64,
46     pub vu_num_queues: usize,
47 }
48 
49 impl VersionMapped for State {}
50 
51 struct SlaveReqHandler {}
52 impl VhostUserMasterReqHandler for SlaveReqHandler {}
53 
54 pub struct Net {
55     common: VirtioCommon,
56     vu_common: VhostUserCommon,
57     id: String,
58     config: VirtioNetConfig,
59     guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
60     ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>,
61     epoll_thread: Option<thread::JoinHandle<()>>,
62     seccomp_action: SeccompAction,
63     exit_evt: EventFd,
64     iommu: bool,
65 }
66 
67 impl Net {
68     /// Create a new vhost-user-net device
69     #[allow(clippy::too_many_arguments)]
70     pub fn new(
71         id: String,
72         mac_addr: MacAddr,
73         vu_cfg: VhostUserConfig,
74         server: bool,
75         seccomp_action: SeccompAction,
76         restoring: bool,
77         exit_evt: EventFd,
78         iommu: bool,
79     ) -> Result<Net> {
80         let mut num_queues = vu_cfg.num_queues;
81 
82         if restoring {
83             // We need 'queue_sizes' to report a number of queues that will be
84             // enough to handle all the potential queues. Including the control
85             // queue (with +1) will guarantee that. VirtioPciDevice::new() will
86             // create the actual queues based on this information.
87             return Ok(Net {
88                 common: VirtioCommon {
89                     device_type: VirtioDeviceType::Net as u32,
90                     queue_sizes: vec![vu_cfg.queue_size; num_queues + 1],
91                     paused_sync: Some(Arc::new(Barrier::new(2))),
92                     min_queues: DEFAULT_QUEUE_NUMBER as u16,
93                     ..Default::default()
94                 },
95                 vu_common: VhostUserCommon {
96                     socket_path: vu_cfg.socket,
97                     vu_num_queues: num_queues,
98                     server,
99                     ..Default::default()
100                 },
101                 id,
102                 config: VirtioNetConfig::default(),
103                 guest_memory: None,
104                 ctrl_queue_epoll_thread: None,
105                 epoll_thread: None,
106                 seccomp_action,
107                 exit_evt,
108                 iommu,
109             });
110         }
111 
112         // Filling device and vring features VMM supports.
113         let mut avail_features = 1 << VIRTIO_NET_F_CSUM
114             | 1 << VIRTIO_NET_F_GUEST_CSUM
115             | 1 << VIRTIO_NET_F_GUEST_TSO4
116             | 1 << VIRTIO_NET_F_GUEST_TSO6
117             | 1 << VIRTIO_NET_F_GUEST_ECN
118             | 1 << VIRTIO_NET_F_GUEST_UFO
119             | 1 << VIRTIO_NET_F_HOST_TSO4
120             | 1 << VIRTIO_NET_F_HOST_TSO6
121             | 1 << VIRTIO_NET_F_HOST_ECN
122             | 1 << VIRTIO_NET_F_HOST_UFO
123             | 1 << VIRTIO_NET_F_MRG_RXBUF
124             | 1 << VIRTIO_NET_F_CTRL_VQ
125             | 1 << VIRTIO_F_RING_EVENT_IDX
126             | 1 << VIRTIO_F_VERSION_1
127             | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
128 
129         let mut config = VirtioNetConfig::default();
130         build_net_config_space(&mut config, mac_addr, num_queues, &mut avail_features);
131 
132         let mut vu =
133             VhostUserHandle::connect_vhost_user(server, &vu_cfg.socket, num_queues as u64, false)?;
134 
135         let avail_protocol_features = VhostUserProtocolFeatures::MQ
136             | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS
137             | VhostUserProtocolFeatures::REPLY_ACK
138             | VhostUserProtocolFeatures::INFLIGHT_SHMFD
139             | VhostUserProtocolFeatures::LOG_SHMFD;
140 
141         let (mut acked_features, acked_protocol_features) =
142             vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?;
143 
144         let backend_num_queues =
145             if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 {
146                 vu.socket_handle()
147                     .get_queue_num()
148                     .map_err(Error::VhostUserGetQueueMaxNum)? as usize
149             } else {
150                 DEFAULT_QUEUE_NUMBER
151             };
152 
153         if num_queues > backend_num_queues {
154             error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n",
155                 num_queues, backend_num_queues);
156             return Err(Error::BadQueueNum);
157         }
158 
159         // If the control queue feature has been negotiated, let's increase
160         // the number of queues.
161         let vu_num_queues = num_queues;
162         if acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 {
163             num_queues += 1;
164         }
165 
166         // Make sure the virtio feature to set the MAC address is exposed to
167         // the guest, even if it hasn't been negotiated with the backend.
168         acked_features |= 1 << VIRTIO_NET_F_MAC;
169 
170         Ok(Net {
171             id,
172             common: VirtioCommon {
173                 device_type: VirtioDeviceType::Net as u32,
174                 queue_sizes: vec![vu_cfg.queue_size; num_queues],
175                 avail_features: acked_features,
176                 // If part of the available features that have been acked, the
177                 // PROTOCOL_FEATURES bit must be already set through the VIRTIO
178                 // acked features as we know the guest would never ack it, thus
179                 // the feature would be lost.
180                 acked_features: acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(),
181                 paused_sync: Some(Arc::new(Barrier::new(2))),
182                 min_queues: DEFAULT_QUEUE_NUMBER as u16,
183                 ..Default::default()
184             },
185             vu_common: VhostUserCommon {
186                 vu: Some(Arc::new(Mutex::new(vu))),
187                 acked_protocol_features,
188                 socket_path: vu_cfg.socket,
189                 vu_num_queues,
190                 server,
191                 ..Default::default()
192             },
193             config,
194             guest_memory: None,
195             ctrl_queue_epoll_thread: None,
196             epoll_thread: None,
197             seccomp_action,
198             exit_evt,
199             iommu,
200         })
201     }
202 
203     fn state(&self) -> State {
204         State {
205             avail_features: self.common.avail_features,
206             acked_features: self.common.acked_features,
207             config: self.config,
208             acked_protocol_features: self.vu_common.acked_protocol_features,
209             vu_num_queues: self.vu_common.vu_num_queues,
210         }
211     }
212 
213     fn set_state(&mut self, state: &State) {
214         self.common.avail_features = state.avail_features;
215         self.common.acked_features = state.acked_features;
216         self.config = state.config;
217         self.vu_common.acked_protocol_features = state.acked_protocol_features;
218         self.vu_common.vu_num_queues = state.vu_num_queues;
219 
220         // The backend acknowledged features must not contain VIRTIO_NET_F_MAC
221         // since we don't expect the backend to handle it.
222         let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC);
223 
224         if let Err(e) = self
225             .vu_common
226             .restore_backend_connection(backend_acked_features)
227         {
228             error!(
229                 "Failed restoring connection with vhost-user backend: {:?}",
230                 e
231             );
232         }
233     }
234 }
235 
236 impl Drop for Net {
237     fn drop(&mut self) {
238         if let Some(kill_evt) = self.common.kill_evt.take() {
239             if let Err(e) = kill_evt.write(1) {
240                 error!("failed to kill vhost-user-net: {:?}", e);
241             }
242         }
243     }
244 }
245 
246 impl VirtioDevice for Net {
247     fn device_type(&self) -> u32 {
248         self.common.device_type
249     }
250 
251     fn queue_max_sizes(&self) -> &[u16] {
252         &self.common.queue_sizes
253     }
254 
255     fn features(&self) -> u64 {
256         let mut features = self.common.avail_features;
257         if self.iommu {
258             features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
259         }
260         features
261     }
262 
263     fn ack_features(&mut self, value: u64) {
264         self.common.ack_features(value)
265     }
266 
267     fn read_config(&self, offset: u64, data: &mut [u8]) {
268         self.read_config_from_slice(self.config.as_slice(), offset, data);
269     }
270 
271     fn activate(
272         &mut self,
273         mem: GuestMemoryAtomic<GuestMemoryMmap>,
274         interrupt_cb: Arc<dyn VirtioInterrupt>,
275         mut queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>,
276         mut queue_evts: Vec<EventFd>,
277     ) -> ActivateResult {
278         self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
279         self.guest_memory = Some(mem.clone());
280 
281         let num_queues = queues.len();
282         let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into());
283         if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 {
284             let ctrl_queue_index = num_queues - 1;
285             let mut ctrl_queue = queues.remove(ctrl_queue_index);
286             let ctrl_queue_evt = queue_evts.remove(ctrl_queue_index);
287 
288             ctrl_queue.set_event_idx(event_idx);
289 
290             let (kill_evt, pause_evt) = self.common.dup_eventfds();
291 
292             let mut ctrl_handler = NetCtrlEpollHandler {
293                 kill_evt,
294                 pause_evt,
295                 ctrl_q: CtrlQueue::new(Vec::new()),
296                 queue: ctrl_queue,
297                 queue_evt: ctrl_queue_evt,
298                 access_platform: None,
299                 interrupt_cb: interrupt_cb.clone(),
300                 queue_index: ctrl_queue_index as u16,
301             };
302 
303             let paused = self.common.paused.clone();
304             // Let's update the barrier as we need 1 for the control queue
305             // thread + 1 for the common vhost-user thread + 1 for the main
306             // thread signalling the pause.
307             self.common.paused_sync = Some(Arc::new(Barrier::new(3)));
308             let paused_sync = self.common.paused_sync.clone();
309 
310             let mut epoll_threads = Vec::new();
311             spawn_virtio_thread(
312                 &format!("{}_ctrl", &self.id),
313                 &self.seccomp_action,
314                 Thread::VirtioVhostNetCtl,
315                 &mut epoll_threads,
316                 &self.exit_evt,
317                 move || {
318                     if let Err(e) = ctrl_handler.run_ctrl(paused, paused_sync.unwrap()) {
319                         error!("Error running worker: {:?}", e);
320                     }
321                 },
322             )?;
323             self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0));
324         }
325 
326         let slave_req_handler: Option<MasterReqHandler<SlaveReqHandler>> = None;
327 
328         // The backend acknowledged features must not contain VIRTIO_NET_F_MAC
329         // since we don't expect the backend to handle it.
330         let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC);
331 
332         // Run a dedicated thread for handling potential reconnections with
333         // the backend.
334         let (kill_evt, pause_evt) = self.common.dup_eventfds();
335 
336         let mut handler = self.vu_common.activate(
337             mem,
338             queues,
339             queue_evts,
340             interrupt_cb,
341             backend_acked_features,
342             slave_req_handler,
343             kill_evt,
344             pause_evt,
345         )?;
346 
347         let paused = self.common.paused.clone();
348         let paused_sync = self.common.paused_sync.clone();
349 
350         let mut epoll_threads = Vec::new();
351         spawn_virtio_thread(
352             &self.id,
353             &self.seccomp_action,
354             Thread::VirtioVhostNet,
355             &mut epoll_threads,
356             &self.exit_evt,
357             move || {
358                 if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
359                     error!("Error running worker: {:?}", e);
360                 }
361             },
362         )?;
363         self.epoll_thread = Some(epoll_threads.remove(0));
364 
365         Ok(())
366     }
367 
368     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
369         // We first must resume the virtio thread if it was paused.
370         if self.common.pause_evt.take().is_some() {
371             self.common.resume().ok()?;
372         }
373 
374         if let Some(vu) = &self.vu_common.vu {
375             if let Err(e) = vu
376                 .lock()
377                 .unwrap()
378                 .reset_vhost_user(self.common.queue_sizes.len())
379             {
380                 error!("Failed to reset vhost-user daemon: {:?}", e);
381                 return None;
382             }
383         }
384 
385         if let Some(kill_evt) = self.common.kill_evt.take() {
386             // Ignore the result because there is nothing we can do about it.
387             let _ = kill_evt.write(1);
388         }
389 
390         event!("virtio-device", "reset", "id", &self.id);
391 
392         // Return the interrupt
393         Some(self.common.interrupt_cb.take().unwrap())
394     }
395 
396     fn shutdown(&mut self) {
397         self.vu_common.shutdown();
398     }
399 
400     fn add_memory_region(
401         &mut self,
402         region: &Arc<GuestRegionMmap>,
403     ) -> std::result::Result<(), crate::Error> {
404         self.vu_common.add_memory_region(&self.guest_memory, region)
405     }
406 }
407 
408 impl Pausable for Net {
409     fn pause(&mut self) -> result::Result<(), MigratableError> {
410         self.vu_common.pause()?;
411         self.common.pause()
412     }
413 
414     fn resume(&mut self) -> result::Result<(), MigratableError> {
415         self.common.resume()?;
416 
417         if let Some(epoll_thread) = &self.epoll_thread {
418             epoll_thread.thread().unpark();
419         }
420 
421         if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread {
422             ctrl_queue_epoll_thread.thread().unpark();
423         }
424 
425         self.vu_common.resume()
426     }
427 }
428 
429 impl Snapshottable for Net {
430     fn id(&self) -> String {
431         self.id.clone()
432     }
433 
434     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
435         self.vu_common.snapshot(&self.id(), &self.state())
436     }
437 
438     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
439         self.set_state(&snapshot.to_versioned_state(&self.id)?);
440         Ok(())
441     }
442 }
443 impl Transportable for Net {}
444 
445 impl Migratable for Net {
446     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
447         self.vu_common.start_dirty_log(&self.guest_memory)
448     }
449 
450     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
451         self.vu_common.stop_dirty_log()
452     }
453 
454     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
455         self.vu_common.dirty_log(&self.guest_memory)
456     }
457 
458     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
459         self.vu_common.start_migration()
460     }
461 
462     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
463         self.vu_common
464             .complete_migration(self.common.kill_evt.take())
465     }
466 }
467