xref: /cloud-hypervisor/virtio-devices/src/vhost_user/net.rs (revision f7f2f25a574b1b2dba22c094fc8226d404157d15)
1 // Copyright 2019 Intel Corporation. All Rights Reserved.
2 // SPDX-License-Identifier: Apache-2.0
3 
4 use crate::seccomp_filters::{get_seccomp_filter, Thread};
5 use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle};
6 use crate::vhost_user::{Error, Inflight, Result, VhostUserEpollHandler};
7 use crate::{
8     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, Queue,
9     VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterrupt, EPOLL_HELPER_EVENT_LAST,
10     VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1,
11 };
12 use crate::{GuestMemoryMmap, GuestRegionMmap};
13 use anyhow::anyhow;
14 use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig};
15 use seccomp::{SeccompAction, SeccompFilter};
16 use std::ops::Deref;
17 use std::os::unix::io::AsRawFd;
18 use std::result;
19 use std::sync::atomic::AtomicBool;
20 use std::sync::{Arc, Barrier, Mutex};
21 use std::thread;
22 use std::vec::Vec;
23 use versionize::{VersionMap, Versionize, VersionizeResult};
24 use versionize_derive::Versionize;
25 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
26 use vhost::vhost_user::{MasterReqHandler, VhostUserMaster, VhostUserMasterReqHandler};
27 use virtio_bindings::bindings::virtio_net::{
28     VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN,
29     VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO,
30     VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO,
31     VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF,
32 };
33 use vm_memory::{Address, ByteValued, GuestAddressSpace, GuestMemory, GuestMemoryAtomic};
34 use vm_migration::{
35     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable,
36     Transportable, VersionMapped,
37 };
38 use vmm_sys_util::eventfd::EventFd;
39 
40 const DEFAULT_QUEUE_NUMBER: usize = 2;
41 
42 #[derive(Versionize)]
43 pub struct State {
44     pub avail_features: u64,
45     pub acked_features: u64,
46     pub config: VirtioNetConfig,
47 }
48 
49 impl VersionMapped for State {}
50 
51 struct SlaveReqHandler {}
52 impl VhostUserMasterReqHandler for SlaveReqHandler {}
53 
54 /// Control queue
55 // Event available on the control queue.
56 const CTRL_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
57 
58 pub struct NetCtrlEpollHandler {
59     pub mem: GuestMemoryAtomic<GuestMemoryMmap>,
60     pub kill_evt: EventFd,
61     pub pause_evt: EventFd,
62     pub ctrl_q: CtrlQueue,
63     pub queue_evt: EventFd,
64     pub queue: Queue,
65 }
66 
67 impl NetCtrlEpollHandler {
68     pub fn run_ctrl(
69         &mut self,
70         paused: Arc<AtomicBool>,
71         paused_sync: Arc<Barrier>,
72     ) -> std::result::Result<(), EpollHelperError> {
73         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
74         helper.add_event(self.queue_evt.as_raw_fd(), CTRL_QUEUE_EVENT)?;
75         helper.run(paused, paused_sync, self)?;
76 
77         Ok(())
78     }
79 }
80 
81 impl EpollHelperHandler for NetCtrlEpollHandler {
82     fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool {
83         let ev_type = event.data as u16;
84         match ev_type {
85             CTRL_QUEUE_EVENT => {
86                 let mem = self.mem.memory();
87                 if let Err(e) = self.queue_evt.read() {
88                     error!("failed to get ctl queue event: {:?}", e);
89                     return true;
90                 }
91                 if let Err(e) = self.ctrl_q.process(&mem, &mut self.queue) {
92                     error!("failed to process ctrl queue: {:?}", e);
93                     return true;
94                 }
95             }
96             _ => {
97                 error!("Unknown event for virtio-net");
98                 return true;
99             }
100         }
101 
102         false
103     }
104 }
105 
106 pub struct Net {
107     common: VirtioCommon,
108     id: String,
109     vu: Arc<Mutex<VhostUserHandle>>,
110     config: VirtioNetConfig,
111     guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
112     acked_protocol_features: u64,
113     socket_path: String,
114     server: bool,
115     ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>,
116     epoll_thread: Option<thread::JoinHandle<()>>,
117     seccomp_action: SeccompAction,
118     vu_num_queues: usize,
119 }
120 
121 impl Net {
122     /// Create a new vhost-user-net device
123     pub fn new(
124         id: String,
125         mac_addr: MacAddr,
126         vu_cfg: VhostUserConfig,
127         server: bool,
128         seccomp_action: SeccompAction,
129     ) -> Result<Net> {
130         let mut num_queues = vu_cfg.num_queues;
131 
132         // Filling device and vring features VMM supports.
133         let mut avail_features = 1 << VIRTIO_NET_F_CSUM
134             | 1 << VIRTIO_NET_F_GUEST_CSUM
135             | 1 << VIRTIO_NET_F_GUEST_TSO4
136             | 1 << VIRTIO_NET_F_GUEST_TSO6
137             | 1 << VIRTIO_NET_F_GUEST_ECN
138             | 1 << VIRTIO_NET_F_GUEST_UFO
139             | 1 << VIRTIO_NET_F_HOST_TSO4
140             | 1 << VIRTIO_NET_F_HOST_TSO6
141             | 1 << VIRTIO_NET_F_HOST_ECN
142             | 1 << VIRTIO_NET_F_HOST_UFO
143             | 1 << VIRTIO_NET_F_MRG_RXBUF
144             | 1 << VIRTIO_NET_F_CTRL_VQ
145             | 1 << VIRTIO_F_RING_EVENT_IDX
146             | 1 << VIRTIO_F_VERSION_1
147             | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
148 
149         let mut config = VirtioNetConfig::default();
150         build_net_config_space(&mut config, mac_addr, num_queues, &mut avail_features);
151 
152         let mut vu =
153             VhostUserHandle::connect_vhost_user(server, &vu_cfg.socket, num_queues as u64, false)?;
154 
155         let avail_protocol_features = VhostUserProtocolFeatures::MQ
156             | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS
157             | VhostUserProtocolFeatures::REPLY_ACK
158             | VhostUserProtocolFeatures::INFLIGHT_SHMFD;
159 
160         let (mut acked_features, acked_protocol_features) =
161             vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?;
162 
163         let backend_num_queues =
164             if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 {
165                 vu.socket_handle()
166                     .get_queue_num()
167                     .map_err(Error::VhostUserGetQueueMaxNum)? as usize
168             } else {
169                 DEFAULT_QUEUE_NUMBER
170             };
171 
172         if num_queues > backend_num_queues {
173             error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n",
174                 num_queues, backend_num_queues);
175             return Err(Error::BadQueueNum);
176         }
177 
178         // If the control queue feature has been negotiated, let's increase
179         // the number of queues.
180         let vu_num_queues = num_queues;
181         if acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 {
182             num_queues += 1;
183         }
184 
185         // Make sure the virtio feature to set the MAC address is exposed to
186         // the guest, even if it hasn't been negotiated with the backend.
187         acked_features |= 1 << VIRTIO_NET_F_MAC;
188 
189         Ok(Net {
190             id,
191             common: VirtioCommon {
192                 device_type: VirtioDeviceType::Net as u32,
193                 queue_sizes: vec![vu_cfg.queue_size; num_queues],
194                 avail_features: acked_features,
195                 acked_features: 0,
196                 paused_sync: Some(Arc::new(Barrier::new(2))),
197                 min_queues: DEFAULT_QUEUE_NUMBER as u16,
198                 ..Default::default()
199             },
200             vu: Arc::new(Mutex::new(vu)),
201             config,
202             guest_memory: None,
203             acked_protocol_features,
204             socket_path: vu_cfg.socket,
205             server,
206             ctrl_queue_epoll_thread: None,
207             epoll_thread: None,
208             seccomp_action,
209             vu_num_queues,
210         })
211     }
212 
213     fn state(&self) -> State {
214         State {
215             avail_features: self.common.avail_features,
216             acked_features: self.common.acked_features,
217             config: self.config,
218         }
219     }
220 
221     fn set_state(&mut self, state: &State) {
222         self.common.avail_features = state.avail_features;
223         self.common.acked_features = state.acked_features;
224         self.config = state.config;
225     }
226 }
227 
228 impl Drop for Net {
229     fn drop(&mut self) {
230         if let Some(kill_evt) = self.common.kill_evt.take() {
231             if let Err(e) = kill_evt.write(1) {
232                 error!("failed to kill vhost-user-net: {:?}", e);
233             }
234         }
235     }
236 }
237 
238 impl VirtioDevice for Net {
239     fn device_type(&self) -> u32 {
240         self.common.device_type
241     }
242 
243     fn queue_max_sizes(&self) -> &[u16] {
244         &self.common.queue_sizes
245     }
246 
247     fn features(&self) -> u64 {
248         self.common.avail_features
249     }
250 
251     fn ack_features(&mut self, value: u64) {
252         self.common.ack_features(value)
253     }
254 
255     fn read_config(&self, offset: u64, data: &mut [u8]) {
256         self.read_config_from_slice(self.config.as_slice(), offset, data);
257     }
258 
259     fn activate(
260         &mut self,
261         mem: GuestMemoryAtomic<GuestMemoryMmap>,
262         interrupt_cb: Arc<dyn VirtioInterrupt>,
263         mut queues: Vec<Queue>,
264         mut queue_evts: Vec<EventFd>,
265     ) -> ActivateResult {
266         self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
267         self.guest_memory = Some(mem.clone());
268 
269         let num_queues = queues.len();
270         if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 {
271             let cvq_queue = queues.remove(num_queues - 1);
272             let cvq_queue_evt = queue_evts.remove(num_queues - 1);
273 
274             let (kill_evt, pause_evt) = self.common.dup_eventfds();
275 
276             let mut ctrl_handler = NetCtrlEpollHandler {
277                 mem: mem.clone(),
278                 kill_evt,
279                 pause_evt,
280                 ctrl_q: CtrlQueue::new(Vec::new()),
281                 queue: cvq_queue,
282                 queue_evt: cvq_queue_evt,
283             };
284 
285             let paused = self.common.paused.clone();
286             // Let's update the barrier as we need 1 for the control queue
287             // thread + 1 for the common vhost-user thread + 1 for the main
288             // thread signalling the pause.
289             self.common.paused_sync = Some(Arc::new(Barrier::new(3)));
290             let paused_sync = self.common.paused_sync.clone();
291 
292             // Retrieve seccomp filter for virtio_net_ctl thread
293             let virtio_vhost_net_ctl_seccomp_filter =
294                 get_seccomp_filter(&self.seccomp_action, Thread::VirtioVhostNetCtl)
295                     .map_err(ActivateError::CreateSeccompFilter)?;
296             thread::Builder::new()
297                 .name(format!("{}_ctrl", self.id))
298                 .spawn(move || {
299                     if let Err(e) = SeccompFilter::apply(virtio_vhost_net_ctl_seccomp_filter) {
300                         error!("Error applying seccomp filter: {:?}", e);
301                     } else if let Err(e) = ctrl_handler.run_ctrl(paused, paused_sync.unwrap()) {
302                         error!("Error running worker: {:?}", e);
303                     }
304                 })
305                 .map(|thread| self.ctrl_queue_epoll_thread = Some(thread))
306                 .map_err(|e| {
307                     error!("failed to clone queue EventFd: {}", e);
308                     ActivateError::BadActivate
309                 })?;
310         }
311 
312         let slave_req_handler: Option<MasterReqHandler<SlaveReqHandler>> = None;
313 
314         // The backend acknowledged features must contain the protocol feature
315         // bit in case it was initially set but lost through the features
316         // negotiation with the guest. Additionally, it must not contain
317         // VIRTIO_NET_F_MAC since we don't expect the backend to handle it.
318         let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC)
319             | (self.common.avail_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits());
320 
321         let mut inflight: Option<Inflight> =
322             if self.acked_protocol_features & VhostUserProtocolFeatures::INFLIGHT_SHMFD.bits() != 0
323             {
324                 Some(Inflight::default())
325             } else {
326                 None
327             };
328 
329         self.vu
330             .lock()
331             .unwrap()
332             .setup_vhost_user(
333                 &mem.memory(),
334                 queues.clone(),
335                 queue_evts.iter().map(|q| q.try_clone().unwrap()).collect(),
336                 &interrupt_cb,
337                 backend_acked_features,
338                 &slave_req_handler,
339                 inflight.as_mut(),
340             )
341             .map_err(ActivateError::VhostUserNetSetup)?;
342 
343         // Run a dedicated thread for handling potential reconnections with
344         // the backend.
345         let (kill_evt, pause_evt) = self.common.dup_eventfds();
346 
347         let mut handler: VhostUserEpollHandler<SlaveReqHandler> = VhostUserEpollHandler {
348             vu: self.vu.clone(),
349             mem,
350             kill_evt,
351             pause_evt,
352             queues,
353             queue_evts,
354             virtio_interrupt: interrupt_cb,
355             acked_features: backend_acked_features,
356             acked_protocol_features: self.acked_protocol_features,
357             socket_path: self.socket_path.clone(),
358             server: self.server,
359             slave_req_handler: None,
360             inflight,
361         };
362 
363         let paused = self.common.paused.clone();
364         let paused_sync = self.common.paused_sync.clone();
365 
366         thread::Builder::new()
367             .name(self.id.to_string())
368             .spawn(move || {
369                 if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
370                     error!("Error running vhost-user-net worker: {:?}", e);
371                 }
372             })
373             .map(|thread| self.epoll_thread = Some(thread))
374             .map_err(|e| {
375                 error!("failed to clone queue EventFd: {}", e);
376                 ActivateError::BadActivate
377             })?;
378 
379         Ok(())
380     }
381 
382     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
383         // We first must resume the virtio thread if it was paused.
384         if self.common.pause_evt.take().is_some() {
385             self.common.resume().ok()?;
386         }
387 
388         if let Err(e) = self
389             .vu
390             .lock()
391             .unwrap()
392             .reset_vhost_user(self.common.queue_sizes.len())
393         {
394             error!("Failed to reset vhost-user daemon: {:?}", e);
395             return None;
396         }
397 
398         if let Some(kill_evt) = self.common.kill_evt.take() {
399             // Ignore the result because there is nothing we can do about it.
400             let _ = kill_evt.write(1);
401         }
402 
403         event!("virtio-device", "reset", "id", &self.id);
404 
405         // Return the interrupt
406         Some(self.common.interrupt_cb.take().unwrap())
407     }
408 
409     fn shutdown(&mut self) {
410         let _ = unsafe { libc::close(self.vu.lock().unwrap().socket_handle().as_raw_fd()) };
411 
412         // Remove socket path if needed
413         if self.server {
414             let _ = std::fs::remove_file(&self.socket_path);
415         }
416     }
417 
418     fn add_memory_region(
419         &mut self,
420         region: &Arc<GuestRegionMmap>,
421     ) -> std::result::Result<(), crate::Error> {
422         if self.acked_protocol_features & VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS.bits() != 0
423         {
424             self.vu
425                 .lock()
426                 .unwrap()
427                 .add_memory_region(region)
428                 .map_err(crate::Error::VhostUserAddMemoryRegion)
429         } else if let Some(guest_memory) = &self.guest_memory {
430             self.vu
431                 .lock()
432                 .unwrap()
433                 .update_mem_table(guest_memory.memory().deref())
434                 .map_err(crate::Error::VhostUserUpdateMemory)
435         } else {
436             Ok(())
437         }
438     }
439 }
440 
441 impl Pausable for Net {
442     fn pause(&mut self) -> result::Result<(), MigratableError> {
443         self.vu
444             .lock()
445             .unwrap()
446             .pause_vhost_user(self.vu_num_queues)
447             .map_err(|e| {
448                 MigratableError::Pause(anyhow!("Error pausing vhost-user-net backend: {:?}", e))
449             })?;
450 
451         self.common.pause()
452     }
453 
454     fn resume(&mut self) -> result::Result<(), MigratableError> {
455         self.common.resume()?;
456 
457         if let Some(epoll_thread) = &self.epoll_thread {
458             epoll_thread.thread().unpark();
459         }
460 
461         if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread {
462             ctrl_queue_epoll_thread.thread().unpark();
463         }
464 
465         self.vu
466             .lock()
467             .unwrap()
468             .resume_vhost_user(self.vu_num_queues)
469             .map_err(|e| {
470                 MigratableError::Resume(anyhow!("Error resuming vhost-user-net backend: {:?}", e))
471             })
472     }
473 }
474 
475 impl Snapshottable for Net {
476     fn id(&self) -> String {
477         self.id.clone()
478     }
479 
480     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
481         Snapshot::new_from_versioned_state(&self.id(), &self.state())
482     }
483 
484     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
485         self.set_state(&snapshot.to_versioned_state(&self.id)?);
486         Ok(())
487     }
488 }
489 impl Transportable for Net {}
490 
491 impl Migratable for Net {
492     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
493         if let Some(guest_memory) = &self.guest_memory {
494             let last_ram_addr = guest_memory.memory().last_addr().raw_value();
495             self.vu
496                 .lock()
497                 .unwrap()
498                 .start_dirty_log(last_ram_addr)
499                 .map_err(|e| {
500                     MigratableError::MigrateStart(anyhow!(
501                         "Error starting migration for vhost-user-blk backend: {:?}",
502                         e
503                     ))
504                 })
505         } else {
506             Err(MigratableError::MigrateStart(anyhow!(
507                 "Missing guest memory"
508             )))
509         }
510     }
511 
512     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
513         self.vu.lock().unwrap().stop_dirty_log().map_err(|e| {
514             MigratableError::MigrateStop(anyhow!(
515                 "Error stopping migration for vhost-user-blk backend: {:?}",
516                 e
517             ))
518         })
519     }
520 
521     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
522         if let Some(guest_memory) = &self.guest_memory {
523             let last_ram_addr = guest_memory.memory().last_addr().raw_value();
524             self.vu
525                 .lock()
526                 .unwrap()
527                 .dirty_log(last_ram_addr)
528                 .map_err(|e| {
529                     MigratableError::MigrateDirtyRanges(anyhow!(
530                         "Error retrieving dirty ranges from vhost-user-blk backend: {:?}",
531                         e
532                     ))
533                 })
534         } else {
535             Err(MigratableError::MigrateDirtyRanges(anyhow!(
536                 "Missing guest memory"
537             )))
538         }
539     }
540 }
541