xref: /cloud-hypervisor/virtio-devices/src/vhost_user/net.rs (revision 61e57e1cb149de03ae1e0b799b9e5ba9a4a63ace)
1 // Copyright 2019 Intel Corporation. All Rights Reserved.
2 // SPDX-License-Identifier: Apache-2.0
3 
4 use std::sync::atomic::AtomicBool;
5 use std::sync::{Arc, Barrier, Mutex};
6 use std::{result, thread};
7 
8 use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig};
9 use seccompiler::SeccompAction;
10 use serde::{Deserialize, Serialize};
11 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
12 use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler};
13 use virtio_bindings::virtio_net::{
14     VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN,
15     VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO,
16     VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO,
17     VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU,
18 };
19 use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
20 use virtio_queue::{Queue, QueueT};
21 use vm_memory::{ByteValued, GuestMemoryAtomic};
22 use vm_migration::protocol::MemoryRangeTable;
23 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
24 use vmm_sys_util::eventfd::EventFd;
25 
26 use crate::seccomp_filters::Thread;
27 use crate::thread_helper::spawn_virtio_thread;
28 use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle};
29 use crate::vhost_user::{Error, Result, VhostUserCommon};
30 use crate::{
31     ActivateResult, GuestMemoryMmap, GuestRegionMmap, NetCtrlEpollHandler, VirtioCommon,
32     VirtioDevice, VirtioDeviceType, VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM,
33     VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1,
34 };
35 
36 const DEFAULT_QUEUE_NUMBER: usize = 2;
37 
38 #[derive(Serialize, Deserialize)]
39 pub struct State {
40     pub avail_features: u64,
41     pub acked_features: u64,
42     pub config: VirtioNetConfig,
43     pub acked_protocol_features: u64,
44     pub vu_num_queues: usize,
45 }
46 
47 struct BackendReqHandler {}
48 impl VhostUserFrontendReqHandler for BackendReqHandler {}
49 
50 pub struct Net {
51     common: VirtioCommon,
52     vu_common: VhostUserCommon,
53     id: String,
54     config: VirtioNetConfig,
55     guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
56     ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>,
57     epoll_thread: Option<thread::JoinHandle<()>>,
58     seccomp_action: SeccompAction,
59     exit_evt: EventFd,
60     iommu: bool,
61 }
62 
63 impl Net {
64     /// Create a new vhost-user-net device
65     #[allow(clippy::too_many_arguments)]
66     pub fn new(
67         id: String,
68         mac_addr: MacAddr,
69         mtu: Option<u16>,
70         vu_cfg: VhostUserConfig,
71         server: bool,
72         seccomp_action: SeccompAction,
73         exit_evt: EventFd,
74         iommu: bool,
75         state: Option<State>,
76         offload_tso: bool,
77         offload_ufo: bool,
78         offload_csum: bool,
79     ) -> Result<Net> {
80         let mut num_queues = vu_cfg.num_queues;
81 
82         let mut vu =
83             VhostUserHandle::connect_vhost_user(server, &vu_cfg.socket, num_queues as u64, false)?;
84 
85         let (
86             avail_features,
87             acked_features,
88             acked_protocol_features,
89             vu_num_queues,
90             config,
91             paused,
92         ) = if let Some(state) = state {
93             info!("Restoring vhost-user-net {}", id);
94 
95             // The backend acknowledged features must not contain
96             // VIRTIO_NET_F_MAC since we don't expect the backend
97             // to handle it.
98             let backend_acked_features = state.acked_features & !(1 << VIRTIO_NET_F_MAC);
99 
100             vu.set_protocol_features_vhost_user(
101                 backend_acked_features,
102                 state.acked_protocol_features,
103             )?;
104 
105             // If the control queue feature has been negotiated, let's
106             // increase the number of queues.
107             if state.acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 {
108                 num_queues += 1;
109             }
110 
111             (
112                 state.avail_features,
113                 state.acked_features,
114                 state.acked_protocol_features,
115                 state.vu_num_queues,
116                 state.config,
117                 true,
118             )
119         } else {
120             // Filling device and vring features VMM supports.
121             let mut avail_features = 1 << VIRTIO_NET_F_MRG_RXBUF
122                 | 1 << VIRTIO_NET_F_CTRL_VQ
123                 | 1 << VIRTIO_F_RING_EVENT_IDX
124                 | 1 << VIRTIO_F_VERSION_1
125                 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
126 
127             if mtu.is_some() {
128                 avail_features |= 1u64 << VIRTIO_NET_F_MTU;
129             }
130 
131             // Configure TSO/UFO features when hardware checksum offload is enabled.
132             if offload_csum {
133                 avail_features |= 1 << VIRTIO_NET_F_CSUM | 1 << VIRTIO_NET_F_GUEST_CSUM;
134 
135                 if offload_tso {
136                     avail_features |= 1 << VIRTIO_NET_F_HOST_ECN
137                         | 1 << VIRTIO_NET_F_HOST_TSO4
138                         | 1 << VIRTIO_NET_F_HOST_TSO6
139                         | 1 << VIRTIO_NET_F_GUEST_ECN
140                         | 1 << VIRTIO_NET_F_GUEST_TSO4
141                         | 1 << VIRTIO_NET_F_GUEST_TSO6;
142                 }
143 
144                 if offload_ufo {
145                     avail_features |= 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_NET_F_GUEST_UFO;
146                 }
147             }
148 
149             let mut config = VirtioNetConfig::default();
150             build_net_config_space(&mut config, mac_addr, num_queues, mtu, &mut avail_features);
151 
152             let avail_protocol_features = VhostUserProtocolFeatures::MQ
153                 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS
154                 | VhostUserProtocolFeatures::REPLY_ACK
155                 | VhostUserProtocolFeatures::INFLIGHT_SHMFD
156                 | VhostUserProtocolFeatures::LOG_SHMFD;
157 
158             let (mut acked_features, acked_protocol_features) =
159                 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?;
160 
161             let backend_num_queues =
162                 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 {
163                     vu.socket_handle()
164                         .get_queue_num()
165                         .map_err(Error::VhostUserGetQueueMaxNum)? as usize
166                 } else {
167                     DEFAULT_QUEUE_NUMBER
168                 };
169 
170             if num_queues > backend_num_queues {
171                 error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n",
172                 num_queues, backend_num_queues);
173                 return Err(Error::BadQueueNum);
174             }
175 
176             // If the control queue feature has been negotiated, let's increase
177             // the number of queues.
178             let vu_num_queues = num_queues;
179             if acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 {
180                 num_queues += 1;
181             }
182 
183             // Make sure the virtio feature to set the MAC address is exposed to
184             // the guest, even if it hasn't been negotiated with the backend.
185             acked_features |= 1 << VIRTIO_NET_F_MAC;
186 
187             (
188                 acked_features,
189                 // If part of the available features that have been acked,
190                 // the PROTOCOL_FEATURES bit must be already set through
191                 // the VIRTIO acked features as we know the guest would
192                 // never ack it, thus the feature would be lost.
193                 acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(),
194                 acked_protocol_features,
195                 vu_num_queues,
196                 config,
197                 false,
198             )
199         };
200 
201         Ok(Net {
202             id,
203             common: VirtioCommon {
204                 device_type: VirtioDeviceType::Net as u32,
205                 queue_sizes: vec![vu_cfg.queue_size; num_queues],
206                 avail_features,
207                 acked_features,
208                 paused_sync: Some(Arc::new(Barrier::new(2))),
209                 min_queues: DEFAULT_QUEUE_NUMBER as u16,
210                 paused: Arc::new(AtomicBool::new(paused)),
211                 ..Default::default()
212             },
213             vu_common: VhostUserCommon {
214                 vu: Some(Arc::new(Mutex::new(vu))),
215                 acked_protocol_features,
216                 socket_path: vu_cfg.socket,
217                 vu_num_queues,
218                 server,
219                 ..Default::default()
220             },
221             config,
222             guest_memory: None,
223             ctrl_queue_epoll_thread: None,
224             epoll_thread: None,
225             seccomp_action,
226             exit_evt,
227             iommu,
228         })
229     }
230 
231     fn state(&self) -> State {
232         State {
233             avail_features: self.common.avail_features,
234             acked_features: self.common.acked_features,
235             config: self.config,
236             acked_protocol_features: self.vu_common.acked_protocol_features,
237             vu_num_queues: self.vu_common.vu_num_queues,
238         }
239     }
240 }
241 
242 impl Drop for Net {
243     fn drop(&mut self) {
244         if let Some(kill_evt) = self.common.kill_evt.take() {
245             if let Err(e) = kill_evt.write(1) {
246                 error!("failed to kill vhost-user-net: {:?}", e);
247             }
248         }
249 
250         self.common.wait_for_epoll_threads();
251 
252         if let Some(thread) = self.epoll_thread.take() {
253             if let Err(e) = thread.join() {
254                 error!("Error joining thread: {:?}", e);
255             }
256         }
257         if let Some(thread) = self.ctrl_queue_epoll_thread.take() {
258             if let Err(e) = thread.join() {
259                 error!("Error joining thread: {:?}", e);
260             }
261         }
262     }
263 }
264 
265 impl VirtioDevice for Net {
266     fn device_type(&self) -> u32 {
267         self.common.device_type
268     }
269 
270     fn queue_max_sizes(&self) -> &[u16] {
271         &self.common.queue_sizes
272     }
273 
274     fn features(&self) -> u64 {
275         let mut features = self.common.avail_features;
276         if self.iommu {
277             features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
278         }
279         features
280     }
281 
282     fn ack_features(&mut self, value: u64) {
283         self.common.ack_features(value)
284     }
285 
286     fn read_config(&self, offset: u64, data: &mut [u8]) {
287         self.read_config_from_slice(self.config.as_slice(), offset, data);
288     }
289 
290     fn activate(
291         &mut self,
292         mem: GuestMemoryAtomic<GuestMemoryMmap>,
293         interrupt_cb: Arc<dyn VirtioInterrupt>,
294         mut queues: Vec<(usize, Queue, EventFd)>,
295     ) -> ActivateResult {
296         self.common.activate(&queues, &interrupt_cb)?;
297         self.guest_memory = Some(mem.clone());
298 
299         let num_queues = queues.len();
300         let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into());
301         if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 {
302             let ctrl_queue_index = num_queues - 1;
303             let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index);
304 
305             ctrl_queue.set_event_idx(event_idx);
306 
307             let (kill_evt, pause_evt) = self.common.dup_eventfds();
308 
309             let mut ctrl_handler = NetCtrlEpollHandler {
310                 mem: mem.clone(),
311                 kill_evt,
312                 pause_evt,
313                 ctrl_q: CtrlQueue::new(Vec::new()),
314                 queue: ctrl_queue,
315                 queue_evt: ctrl_queue_evt,
316                 access_platform: None,
317                 interrupt_cb: interrupt_cb.clone(),
318                 queue_index: ctrl_queue_index as u16,
319             };
320 
321             let paused = self.common.paused.clone();
322             // Let's update the barrier as we need 1 for the control queue
323             // thread + 1 for the common vhost-user thread + 1 for the main
324             // thread signalling the pause.
325             self.common.paused_sync = Some(Arc::new(Barrier::new(3)));
326             let paused_sync = self.common.paused_sync.clone();
327 
328             let mut epoll_threads = Vec::new();
329             spawn_virtio_thread(
330                 &format!("{}_ctrl", &self.id),
331                 &self.seccomp_action,
332                 Thread::VirtioVhostNetCtl,
333                 &mut epoll_threads,
334                 &self.exit_evt,
335                 move || ctrl_handler.run_ctrl(paused, paused_sync.unwrap()),
336             )?;
337             self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0));
338         }
339 
340         let backend_req_handler: Option<FrontendReqHandler<BackendReqHandler>> = None;
341 
342         // The backend acknowledged features must not contain VIRTIO_NET_F_MAC
343         // since we don't expect the backend to handle it.
344         let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC);
345 
346         // Run a dedicated thread for handling potential reconnections with
347         // the backend.
348         let (kill_evt, pause_evt) = self.common.dup_eventfds();
349 
350         let mut handler = self.vu_common.activate(
351             mem,
352             queues,
353             interrupt_cb,
354             backend_acked_features,
355             backend_req_handler,
356             kill_evt,
357             pause_evt,
358         )?;
359 
360         let paused = self.common.paused.clone();
361         let paused_sync = self.common.paused_sync.clone();
362 
363         let mut epoll_threads = Vec::new();
364         spawn_virtio_thread(
365             &self.id,
366             &self.seccomp_action,
367             Thread::VirtioVhostNet,
368             &mut epoll_threads,
369             &self.exit_evt,
370             move || handler.run(paused, paused_sync.unwrap()),
371         )?;
372         self.epoll_thread = Some(epoll_threads.remove(0));
373 
374         Ok(())
375     }
376 
377     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
378         // We first must resume the virtio thread if it was paused.
379         if self.common.pause_evt.take().is_some() {
380             self.common.resume().ok()?;
381         }
382 
383         if let Some(vu) = &self.vu_common.vu {
384             if let Err(e) = vu.lock().unwrap().reset_vhost_user() {
385                 error!("Failed to reset vhost-user daemon: {:?}", e);
386                 return None;
387             }
388         }
389 
390         if let Some(kill_evt) = self.common.kill_evt.take() {
391             // Ignore the result because there is nothing we can do about it.
392             let _ = kill_evt.write(1);
393         }
394 
395         event!("virtio-device", "reset", "id", &self.id);
396 
397         // Return the interrupt
398         Some(self.common.interrupt_cb.take().unwrap())
399     }
400 
401     fn shutdown(&mut self) {
402         self.vu_common.shutdown();
403     }
404 
405     fn add_memory_region(
406         &mut self,
407         region: &Arc<GuestRegionMmap>,
408     ) -> std::result::Result<(), crate::Error> {
409         self.vu_common.add_memory_region(&self.guest_memory, region)
410     }
411 }
412 
413 impl Pausable for Net {
414     fn pause(&mut self) -> result::Result<(), MigratableError> {
415         self.vu_common.pause()?;
416         self.common.pause()
417     }
418 
419     fn resume(&mut self) -> result::Result<(), MigratableError> {
420         self.common.resume()?;
421 
422         if let Some(epoll_thread) = &self.epoll_thread {
423             epoll_thread.thread().unpark();
424         }
425 
426         if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread {
427             ctrl_queue_epoll_thread.thread().unpark();
428         }
429 
430         self.vu_common.resume()
431     }
432 }
433 
434 impl Snapshottable for Net {
435     fn id(&self) -> String {
436         self.id.clone()
437     }
438 
439     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
440         self.vu_common.snapshot(&self.state())
441     }
442 }
443 impl Transportable for Net {}
444 
445 impl Migratable for Net {
446     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
447         self.vu_common.start_dirty_log(&self.guest_memory)
448     }
449 
450     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
451         self.vu_common.stop_dirty_log()
452     }
453 
454     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
455         self.vu_common.dirty_log(&self.guest_memory)
456     }
457 
458     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
459         self.vu_common.start_migration()
460     }
461 
462     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
463         self.vu_common
464             .complete_migration(self.common.kill_evt.take())
465     }
466 }
467