xref: /cloud-hypervisor/virtio-devices/src/vhost_user/net.rs (revision 88a9f799449c04180c6b9a21d3b9c0c4b57e2bd6)
1 // Copyright 2019 Intel Corporation. All Rights Reserved.
2 // SPDX-License-Identifier: Apache-2.0
3 
4 use std::result;
5 use std::sync::atomic::AtomicBool;
6 use std::sync::{Arc, Barrier, Mutex};
7 use std::thread;
8 
9 use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig};
10 use seccompiler::SeccompAction;
11 use serde::{Deserialize, Serialize};
12 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
13 use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler};
14 use virtio_bindings::virtio_net::{
15     VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN,
16     VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO,
17     VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO,
18     VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU,
19 };
20 use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
21 use virtio_queue::{Queue, QueueT};
22 use vm_memory::{ByteValued, GuestMemoryAtomic};
23 use vm_migration::{
24     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable,
25     Transportable,
26 };
27 use vmm_sys_util::eventfd::EventFd;
28 
29 use crate::seccomp_filters::Thread;
30 use crate::thread_helper::spawn_virtio_thread;
31 use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle};
32 use crate::vhost_user::{Error, Result, VhostUserCommon};
33 use crate::{
34     ActivateResult, NetCtrlEpollHandler, VirtioCommon, VirtioDevice, VirtioDeviceType,
35     VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1,
36 };
37 use crate::{GuestMemoryMmap, GuestRegionMmap};
38 
39 const DEFAULT_QUEUE_NUMBER: usize = 2;
40 
41 #[derive(Serialize, Deserialize)]
42 pub struct State {
43     pub avail_features: u64,
44     pub acked_features: u64,
45     pub config: VirtioNetConfig,
46     pub acked_protocol_features: u64,
47     pub vu_num_queues: usize,
48 }
49 
50 struct BackendReqHandler {}
51 impl VhostUserFrontendReqHandler for BackendReqHandler {}
52 
53 pub struct Net {
54     common: VirtioCommon,
55     vu_common: VhostUserCommon,
56     id: String,
57     config: VirtioNetConfig,
58     guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
59     ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>,
60     epoll_thread: Option<thread::JoinHandle<()>>,
61     seccomp_action: SeccompAction,
62     exit_evt: EventFd,
63     iommu: bool,
64 }
65 
66 impl Net {
67     /// Create a new vhost-user-net device
68     #[allow(clippy::too_many_arguments)]
69     pub fn new(
70         id: String,
71         mac_addr: MacAddr,
72         mtu: Option<u16>,
73         vu_cfg: VhostUserConfig,
74         server: bool,
75         seccomp_action: SeccompAction,
76         exit_evt: EventFd,
77         iommu: bool,
78         state: Option<State>,
79         offload_tso: bool,
80         offload_ufo: bool,
81         offload_csum: bool,
82     ) -> Result<Net> {
83         let mut num_queues = vu_cfg.num_queues;
84 
85         let mut vu =
86             VhostUserHandle::connect_vhost_user(server, &vu_cfg.socket, num_queues as u64, false)?;
87 
88         let (
89             avail_features,
90             acked_features,
91             acked_protocol_features,
92             vu_num_queues,
93             config,
94             paused,
95         ) = if let Some(state) = state {
96             info!("Restoring vhost-user-net {}", id);
97 
98             // The backend acknowledged features must not contain
99             // VIRTIO_NET_F_MAC since we don't expect the backend
100             // to handle it.
101             let backend_acked_features = state.acked_features & !(1 << VIRTIO_NET_F_MAC);
102 
103             vu.set_protocol_features_vhost_user(
104                 backend_acked_features,
105                 state.acked_protocol_features,
106             )?;
107 
108             // If the control queue feature has been negotiated, let's
109             // increase the number of queues.
110             if state.acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 {
111                 num_queues += 1;
112             }
113 
114             (
115                 state.avail_features,
116                 state.acked_features,
117                 state.acked_protocol_features,
118                 state.vu_num_queues,
119                 state.config,
120                 true,
121             )
122         } else {
123             // Filling device and vring features VMM supports.
124             let mut avail_features = 1 << VIRTIO_NET_F_MRG_RXBUF
125                 | 1 << VIRTIO_NET_F_CTRL_VQ
126                 | 1 << VIRTIO_F_RING_EVENT_IDX
127                 | 1 << VIRTIO_F_VERSION_1
128                 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
129 
130             if mtu.is_some() {
131                 avail_features |= 1u64 << VIRTIO_NET_F_MTU;
132             }
133 
134             // Configure TSO/UFO features when hardware checksum offload is enabled.
135             if offload_csum {
136                 avail_features |= 1 << VIRTIO_NET_F_CSUM | 1 << VIRTIO_NET_F_GUEST_CSUM;
137 
138                 if offload_tso {
139                     avail_features |= 1 << VIRTIO_NET_F_HOST_ECN
140                         | 1 << VIRTIO_NET_F_HOST_TSO4
141                         | 1 << VIRTIO_NET_F_HOST_TSO6
142                         | 1 << VIRTIO_NET_F_GUEST_ECN
143                         | 1 << VIRTIO_NET_F_GUEST_TSO4
144                         | 1 << VIRTIO_NET_F_GUEST_TSO6;
145                 }
146 
147                 if offload_ufo {
148                     avail_features |= 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_NET_F_GUEST_UFO;
149                 }
150             }
151 
152             let mut config = VirtioNetConfig::default();
153             build_net_config_space(&mut config, mac_addr, num_queues, mtu, &mut avail_features);
154 
155             let avail_protocol_features = VhostUserProtocolFeatures::MQ
156                 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS
157                 | VhostUserProtocolFeatures::REPLY_ACK
158                 | VhostUserProtocolFeatures::INFLIGHT_SHMFD
159                 | VhostUserProtocolFeatures::LOG_SHMFD;
160 
161             let (mut acked_features, acked_protocol_features) =
162                 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?;
163 
164             let backend_num_queues =
165                 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 {
166                     vu.socket_handle()
167                         .get_queue_num()
168                         .map_err(Error::VhostUserGetQueueMaxNum)? as usize
169                 } else {
170                     DEFAULT_QUEUE_NUMBER
171                 };
172 
173             if num_queues > backend_num_queues {
174                 error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n",
175                 num_queues, backend_num_queues);
176                 return Err(Error::BadQueueNum);
177             }
178 
179             // If the control queue feature has been negotiated, let's increase
180             // the number of queues.
181             let vu_num_queues = num_queues;
182             if acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 {
183                 num_queues += 1;
184             }
185 
186             // Make sure the virtio feature to set the MAC address is exposed to
187             // the guest, even if it hasn't been negotiated with the backend.
188             acked_features |= 1 << VIRTIO_NET_F_MAC;
189 
190             (
191                 acked_features,
192                 // If part of the available features that have been acked,
193                 // the PROTOCOL_FEATURES bit must be already set through
194                 // the VIRTIO acked features as we know the guest would
195                 // never ack it, thus the feature would be lost.
196                 acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(),
197                 acked_protocol_features,
198                 vu_num_queues,
199                 config,
200                 false,
201             )
202         };
203 
204         Ok(Net {
205             id,
206             common: VirtioCommon {
207                 device_type: VirtioDeviceType::Net as u32,
208                 queue_sizes: vec![vu_cfg.queue_size; num_queues],
209                 avail_features,
210                 acked_features,
211                 paused_sync: Some(Arc::new(Barrier::new(2))),
212                 min_queues: DEFAULT_QUEUE_NUMBER as u16,
213                 paused: Arc::new(AtomicBool::new(paused)),
214                 ..Default::default()
215             },
216             vu_common: VhostUserCommon {
217                 vu: Some(Arc::new(Mutex::new(vu))),
218                 acked_protocol_features,
219                 socket_path: vu_cfg.socket,
220                 vu_num_queues,
221                 server,
222                 ..Default::default()
223             },
224             config,
225             guest_memory: None,
226             ctrl_queue_epoll_thread: None,
227             epoll_thread: None,
228             seccomp_action,
229             exit_evt,
230             iommu,
231         })
232     }
233 
234     fn state(&self) -> State {
235         State {
236             avail_features: self.common.avail_features,
237             acked_features: self.common.acked_features,
238             config: self.config,
239             acked_protocol_features: self.vu_common.acked_protocol_features,
240             vu_num_queues: self.vu_common.vu_num_queues,
241         }
242     }
243 }
244 
245 impl Drop for Net {
246     fn drop(&mut self) {
247         if let Some(kill_evt) = self.common.kill_evt.take() {
248             if let Err(e) = kill_evt.write(1) {
249                 error!("failed to kill vhost-user-net: {:?}", e);
250             }
251         }
252 
253         self.common.wait_for_epoll_threads();
254 
255         if let Some(thread) = self.epoll_thread.take() {
256             if let Err(e) = thread.join() {
257                 error!("Error joining thread: {:?}", e);
258             }
259         }
260         if let Some(thread) = self.ctrl_queue_epoll_thread.take() {
261             if let Err(e) = thread.join() {
262                 error!("Error joining thread: {:?}", e);
263             }
264         }
265     }
266 }
267 
268 impl VirtioDevice for Net {
269     fn device_type(&self) -> u32 {
270         self.common.device_type
271     }
272 
273     fn queue_max_sizes(&self) -> &[u16] {
274         &self.common.queue_sizes
275     }
276 
277     fn features(&self) -> u64 {
278         let mut features = self.common.avail_features;
279         if self.iommu {
280             features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
281         }
282         features
283     }
284 
285     fn ack_features(&mut self, value: u64) {
286         self.common.ack_features(value)
287     }
288 
289     fn read_config(&self, offset: u64, data: &mut [u8]) {
290         self.read_config_from_slice(self.config.as_slice(), offset, data);
291     }
292 
293     fn activate(
294         &mut self,
295         mem: GuestMemoryAtomic<GuestMemoryMmap>,
296         interrupt_cb: Arc<dyn VirtioInterrupt>,
297         mut queues: Vec<(usize, Queue, EventFd)>,
298     ) -> ActivateResult {
299         self.common.activate(&queues, &interrupt_cb)?;
300         self.guest_memory = Some(mem.clone());
301 
302         let num_queues = queues.len();
303         let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into());
304         if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 {
305             let ctrl_queue_index = num_queues - 1;
306             let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index);
307 
308             ctrl_queue.set_event_idx(event_idx);
309 
310             let (kill_evt, pause_evt) = self.common.dup_eventfds();
311 
312             let mut ctrl_handler = NetCtrlEpollHandler {
313                 mem: mem.clone(),
314                 kill_evt,
315                 pause_evt,
316                 ctrl_q: CtrlQueue::new(Vec::new()),
317                 queue: ctrl_queue,
318                 queue_evt: ctrl_queue_evt,
319                 access_platform: None,
320                 interrupt_cb: interrupt_cb.clone(),
321                 queue_index: ctrl_queue_index as u16,
322             };
323 
324             let paused = self.common.paused.clone();
325             // Let's update the barrier as we need 1 for the control queue
326             // thread + 1 for the common vhost-user thread + 1 for the main
327             // thread signalling the pause.
328             self.common.paused_sync = Some(Arc::new(Barrier::new(3)));
329             let paused_sync = self.common.paused_sync.clone();
330 
331             let mut epoll_threads = Vec::new();
332             spawn_virtio_thread(
333                 &format!("{}_ctrl", &self.id),
334                 &self.seccomp_action,
335                 Thread::VirtioVhostNetCtl,
336                 &mut epoll_threads,
337                 &self.exit_evt,
338                 move || ctrl_handler.run_ctrl(paused, paused_sync.unwrap()),
339             )?;
340             self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0));
341         }
342 
343         let backend_req_handler: Option<FrontendReqHandler<BackendReqHandler>> = None;
344 
345         // The backend acknowledged features must not contain VIRTIO_NET_F_MAC
346         // since we don't expect the backend to handle it.
347         let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC);
348 
349         // Run a dedicated thread for handling potential reconnections with
350         // the backend.
351         let (kill_evt, pause_evt) = self.common.dup_eventfds();
352 
353         let mut handler = self.vu_common.activate(
354             mem,
355             queues,
356             interrupt_cb,
357             backend_acked_features,
358             backend_req_handler,
359             kill_evt,
360             pause_evt,
361         )?;
362 
363         let paused = self.common.paused.clone();
364         let paused_sync = self.common.paused_sync.clone();
365 
366         let mut epoll_threads = Vec::new();
367         spawn_virtio_thread(
368             &self.id,
369             &self.seccomp_action,
370             Thread::VirtioVhostNet,
371             &mut epoll_threads,
372             &self.exit_evt,
373             move || handler.run(paused, paused_sync.unwrap()),
374         )?;
375         self.epoll_thread = Some(epoll_threads.remove(0));
376 
377         Ok(())
378     }
379 
380     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
381         // We first must resume the virtio thread if it was paused.
382         if self.common.pause_evt.take().is_some() {
383             self.common.resume().ok()?;
384         }
385 
386         if let Some(vu) = &self.vu_common.vu {
387             if let Err(e) = vu.lock().unwrap().reset_vhost_user() {
388                 error!("Failed to reset vhost-user daemon: {:?}", e);
389                 return None;
390             }
391         }
392 
393         if let Some(kill_evt) = self.common.kill_evt.take() {
394             // Ignore the result because there is nothing we can do about it.
395             let _ = kill_evt.write(1);
396         }
397 
398         event!("virtio-device", "reset", "id", &self.id);
399 
400         // Return the interrupt
401         Some(self.common.interrupt_cb.take().unwrap())
402     }
403 
404     fn shutdown(&mut self) {
405         self.vu_common.shutdown();
406     }
407 
408     fn add_memory_region(
409         &mut self,
410         region: &Arc<GuestRegionMmap>,
411     ) -> std::result::Result<(), crate::Error> {
412         self.vu_common.add_memory_region(&self.guest_memory, region)
413     }
414 }
415 
416 impl Pausable for Net {
417     fn pause(&mut self) -> result::Result<(), MigratableError> {
418         self.vu_common.pause()?;
419         self.common.pause()
420     }
421 
422     fn resume(&mut self) -> result::Result<(), MigratableError> {
423         self.common.resume()?;
424 
425         if let Some(epoll_thread) = &self.epoll_thread {
426             epoll_thread.thread().unpark();
427         }
428 
429         if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread {
430             ctrl_queue_epoll_thread.thread().unpark();
431         }
432 
433         self.vu_common.resume()
434     }
435 }
436 
437 impl Snapshottable for Net {
438     fn id(&self) -> String {
439         self.id.clone()
440     }
441 
442     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
443         self.vu_common.snapshot(&self.state())
444     }
445 }
446 impl Transportable for Net {}
447 
448 impl Migratable for Net {
449     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
450         self.vu_common.start_dirty_log(&self.guest_memory)
451     }
452 
453     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
454         self.vu_common.stop_dirty_log()
455     }
456 
457     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
458         self.vu_common.dirty_log(&self.guest_memory)
459     }
460 
461     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
462         self.vu_common.start_migration()
463     }
464 
465     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
466         self.vu_common
467             .complete_migration(self.common.kill_evt.take())
468     }
469 }
470