1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 5 use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 6 use crate::vhost_user::{Error, Inflight, Result, VhostUserEpollHandler}; 7 use crate::{ 8 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, Queue, 9 VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterrupt, EPOLL_HELPER_EVENT_LAST, 10 VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1, 11 }; 12 use crate::{GuestMemoryMmap, GuestRegionMmap}; 13 use anyhow::anyhow; 14 use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig}; 15 use seccomp::{SeccompAction, SeccompFilter}; 16 use std::ops::Deref; 17 use std::os::unix::io::AsRawFd; 18 use std::result; 19 use std::sync::atomic::AtomicBool; 20 use std::sync::{Arc, Barrier, Mutex}; 21 use std::thread; 22 use std::vec::Vec; 23 use versionize::{VersionMap, Versionize, VersionizeResult}; 24 use versionize_derive::Versionize; 25 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; 26 use vhost::vhost_user::{MasterReqHandler, VhostUserMaster, VhostUserMasterReqHandler}; 27 use virtio_bindings::bindings::virtio_net::{ 28 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN, 29 VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, 30 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, 31 VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, 32 }; 33 use vm_memory::{Address, ByteValued, GuestAddressSpace, GuestMemory, GuestMemoryAtomic}; 34 use vm_migration::{ 35 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, 36 Transportable, VersionMapped, 37 }; 38 use vmm_sys_util::eventfd::EventFd; 39 40 const DEFAULT_QUEUE_NUMBER: usize = 2; 41 42 #[derive(Versionize)] 43 pub struct State { 44 pub avail_features: u64, 45 pub acked_features: u64, 46 pub config: VirtioNetConfig, 47 } 48 49 impl VersionMapped for State {} 50 51 struct SlaveReqHandler {} 52 impl VhostUserMasterReqHandler for SlaveReqHandler {} 53 54 /// Control queue 55 // Event available on the control queue. 56 const CTRL_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 57 58 pub struct NetCtrlEpollHandler { 59 pub mem: GuestMemoryAtomic<GuestMemoryMmap>, 60 pub kill_evt: EventFd, 61 pub pause_evt: EventFd, 62 pub ctrl_q: CtrlQueue, 63 pub queue_evt: EventFd, 64 pub queue: Queue, 65 } 66 67 impl NetCtrlEpollHandler { 68 pub fn run_ctrl( 69 &mut self, 70 paused: Arc<AtomicBool>, 71 paused_sync: Arc<Barrier>, 72 ) -> std::result::Result<(), EpollHelperError> { 73 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 74 helper.add_event(self.queue_evt.as_raw_fd(), CTRL_QUEUE_EVENT)?; 75 helper.run(paused, paused_sync, self)?; 76 77 Ok(()) 78 } 79 } 80 81 impl EpollHelperHandler for NetCtrlEpollHandler { 82 fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { 83 let ev_type = event.data as u16; 84 match ev_type { 85 CTRL_QUEUE_EVENT => { 86 let mem = self.mem.memory(); 87 if let Err(e) = self.queue_evt.read() { 88 error!("failed to get ctl queue event: {:?}", e); 89 return true; 90 } 91 if let Err(e) = self.ctrl_q.process(&mem, &mut self.queue) { 92 error!("failed to process ctrl queue: {:?}", e); 93 return true; 94 } 95 } 96 _ => { 97 error!("Unknown event for virtio-net"); 98 return true; 99 } 100 } 101 102 false 103 } 104 } 105 106 pub struct Net { 107 common: VirtioCommon, 108 id: String, 109 vu: Arc<Mutex<VhostUserHandle>>, 110 config: VirtioNetConfig, 111 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 112 acked_protocol_features: u64, 113 socket_path: String, 114 server: bool, 115 ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>, 116 epoll_thread: Option<thread::JoinHandle<()>>, 117 seccomp_action: SeccompAction, 118 vu_num_queues: usize, 119 } 120 121 impl Net { 122 /// Create a new vhost-user-net device 123 pub fn new( 124 id: String, 125 mac_addr: MacAddr, 126 vu_cfg: VhostUserConfig, 127 server: bool, 128 seccomp_action: SeccompAction, 129 ) -> Result<Net> { 130 let mut num_queues = vu_cfg.num_queues; 131 132 // Filling device and vring features VMM supports. 133 let mut avail_features = 1 << VIRTIO_NET_F_CSUM 134 | 1 << VIRTIO_NET_F_GUEST_CSUM 135 | 1 << VIRTIO_NET_F_GUEST_TSO4 136 | 1 << VIRTIO_NET_F_GUEST_TSO6 137 | 1 << VIRTIO_NET_F_GUEST_ECN 138 | 1 << VIRTIO_NET_F_GUEST_UFO 139 | 1 << VIRTIO_NET_F_HOST_TSO4 140 | 1 << VIRTIO_NET_F_HOST_TSO6 141 | 1 << VIRTIO_NET_F_HOST_ECN 142 | 1 << VIRTIO_NET_F_HOST_UFO 143 | 1 << VIRTIO_NET_F_MRG_RXBUF 144 | 1 << VIRTIO_NET_F_CTRL_VQ 145 | 1 << VIRTIO_F_RING_EVENT_IDX 146 | 1 << VIRTIO_F_VERSION_1 147 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); 148 149 let mut config = VirtioNetConfig::default(); 150 build_net_config_space(&mut config, mac_addr, num_queues, &mut avail_features); 151 152 let mut vu = 153 VhostUserHandle::connect_vhost_user(server, &vu_cfg.socket, num_queues as u64, false)?; 154 155 let avail_protocol_features = VhostUserProtocolFeatures::MQ 156 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 157 | VhostUserProtocolFeatures::REPLY_ACK 158 | VhostUserProtocolFeatures::INFLIGHT_SHMFD; 159 160 let (mut acked_features, acked_protocol_features) = 161 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 162 163 let backend_num_queues = 164 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 165 vu.socket_handle() 166 .get_queue_num() 167 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 168 } else { 169 DEFAULT_QUEUE_NUMBER 170 }; 171 172 if num_queues > backend_num_queues { 173 error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n", 174 num_queues, backend_num_queues); 175 return Err(Error::BadQueueNum); 176 } 177 178 // If the control queue feature has been negotiated, let's increase 179 // the number of queues. 180 let vu_num_queues = num_queues; 181 if acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 { 182 num_queues += 1; 183 } 184 185 // Make sure the virtio feature to set the MAC address is exposed to 186 // the guest, even if it hasn't been negotiated with the backend. 187 acked_features |= 1 << VIRTIO_NET_F_MAC; 188 189 Ok(Net { 190 id, 191 common: VirtioCommon { 192 device_type: VirtioDeviceType::Net as u32, 193 queue_sizes: vec![vu_cfg.queue_size; num_queues], 194 avail_features: acked_features, 195 acked_features: 0, 196 paused_sync: Some(Arc::new(Barrier::new(2))), 197 min_queues: DEFAULT_QUEUE_NUMBER as u16, 198 ..Default::default() 199 }, 200 vu: Arc::new(Mutex::new(vu)), 201 config, 202 guest_memory: None, 203 acked_protocol_features, 204 socket_path: vu_cfg.socket, 205 server, 206 ctrl_queue_epoll_thread: None, 207 epoll_thread: None, 208 seccomp_action, 209 vu_num_queues, 210 }) 211 } 212 213 fn state(&self) -> State { 214 State { 215 avail_features: self.common.avail_features, 216 acked_features: self.common.acked_features, 217 config: self.config, 218 } 219 } 220 221 fn set_state(&mut self, state: &State) { 222 self.common.avail_features = state.avail_features; 223 self.common.acked_features = state.acked_features; 224 self.config = state.config; 225 } 226 } 227 228 impl Drop for Net { 229 fn drop(&mut self) { 230 if let Some(kill_evt) = self.common.kill_evt.take() { 231 if let Err(e) = kill_evt.write(1) { 232 error!("failed to kill vhost-user-net: {:?}", e); 233 } 234 } 235 } 236 } 237 238 impl VirtioDevice for Net { 239 fn device_type(&self) -> u32 { 240 self.common.device_type 241 } 242 243 fn queue_max_sizes(&self) -> &[u16] { 244 &self.common.queue_sizes 245 } 246 247 fn features(&self) -> u64 { 248 self.common.avail_features 249 } 250 251 fn ack_features(&mut self, value: u64) { 252 self.common.ack_features(value) 253 } 254 255 fn read_config(&self, offset: u64, data: &mut [u8]) { 256 self.read_config_from_slice(self.config.as_slice(), offset, data); 257 } 258 259 fn activate( 260 &mut self, 261 mem: GuestMemoryAtomic<GuestMemoryMmap>, 262 interrupt_cb: Arc<dyn VirtioInterrupt>, 263 mut queues: Vec<Queue>, 264 mut queue_evts: Vec<EventFd>, 265 ) -> ActivateResult { 266 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 267 self.guest_memory = Some(mem.clone()); 268 269 let num_queues = queues.len(); 270 if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 { 271 let cvq_queue = queues.remove(num_queues - 1); 272 let cvq_queue_evt = queue_evts.remove(num_queues - 1); 273 274 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 275 276 let mut ctrl_handler = NetCtrlEpollHandler { 277 mem: mem.clone(), 278 kill_evt, 279 pause_evt, 280 ctrl_q: CtrlQueue::new(Vec::new()), 281 queue: cvq_queue, 282 queue_evt: cvq_queue_evt, 283 }; 284 285 let paused = self.common.paused.clone(); 286 // Let's update the barrier as we need 1 for the control queue 287 // thread + 1 for the common vhost-user thread + 1 for the main 288 // thread signalling the pause. 289 self.common.paused_sync = Some(Arc::new(Barrier::new(3))); 290 let paused_sync = self.common.paused_sync.clone(); 291 292 // Retrieve seccomp filter for virtio_net_ctl thread 293 let virtio_vhost_net_ctl_seccomp_filter = 294 get_seccomp_filter(&self.seccomp_action, Thread::VirtioVhostNetCtl) 295 .map_err(ActivateError::CreateSeccompFilter)?; 296 thread::Builder::new() 297 .name(format!("{}_ctrl", self.id)) 298 .spawn(move || { 299 if let Err(e) = SeccompFilter::apply(virtio_vhost_net_ctl_seccomp_filter) { 300 error!("Error applying seccomp filter: {:?}", e); 301 } else if let Err(e) = ctrl_handler.run_ctrl(paused, paused_sync.unwrap()) { 302 error!("Error running worker: {:?}", e); 303 } 304 }) 305 .map(|thread| self.ctrl_queue_epoll_thread = Some(thread)) 306 .map_err(|e| { 307 error!("failed to clone queue EventFd: {}", e); 308 ActivateError::BadActivate 309 })?; 310 } 311 312 let slave_req_handler: Option<MasterReqHandler<SlaveReqHandler>> = None; 313 314 // The backend acknowledged features must contain the protocol feature 315 // bit in case it was initially set but lost through the features 316 // negotiation with the guest. Additionally, it must not contain 317 // VIRTIO_NET_F_MAC since we don't expect the backend to handle it. 318 let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC) 319 | (self.common.avail_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits()); 320 321 let mut inflight: Option<Inflight> = 322 if self.acked_protocol_features & VhostUserProtocolFeatures::INFLIGHT_SHMFD.bits() != 0 323 { 324 Some(Inflight::default()) 325 } else { 326 None 327 }; 328 329 self.vu 330 .lock() 331 .unwrap() 332 .setup_vhost_user( 333 &mem.memory(), 334 queues.clone(), 335 queue_evts.iter().map(|q| q.try_clone().unwrap()).collect(), 336 &interrupt_cb, 337 backend_acked_features, 338 &slave_req_handler, 339 inflight.as_mut(), 340 ) 341 .map_err(ActivateError::VhostUserNetSetup)?; 342 343 // Run a dedicated thread for handling potential reconnections with 344 // the backend. 345 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 346 347 let mut handler: VhostUserEpollHandler<SlaveReqHandler> = VhostUserEpollHandler { 348 vu: self.vu.clone(), 349 mem, 350 kill_evt, 351 pause_evt, 352 queues, 353 queue_evts, 354 virtio_interrupt: interrupt_cb, 355 acked_features: backend_acked_features, 356 acked_protocol_features: self.acked_protocol_features, 357 socket_path: self.socket_path.clone(), 358 server: self.server, 359 slave_req_handler: None, 360 inflight, 361 }; 362 363 let paused = self.common.paused.clone(); 364 let paused_sync = self.common.paused_sync.clone(); 365 366 thread::Builder::new() 367 .name(self.id.to_string()) 368 .spawn(move || { 369 if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 370 error!("Error running vhost-user-net worker: {:?}", e); 371 } 372 }) 373 .map(|thread| self.epoll_thread = Some(thread)) 374 .map_err(|e| { 375 error!("failed to clone queue EventFd: {}", e); 376 ActivateError::BadActivate 377 })?; 378 379 Ok(()) 380 } 381 382 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 383 // We first must resume the virtio thread if it was paused. 384 if self.common.pause_evt.take().is_some() { 385 self.common.resume().ok()?; 386 } 387 388 if let Err(e) = self 389 .vu 390 .lock() 391 .unwrap() 392 .reset_vhost_user(self.common.queue_sizes.len()) 393 { 394 error!("Failed to reset vhost-user daemon: {:?}", e); 395 return None; 396 } 397 398 if let Some(kill_evt) = self.common.kill_evt.take() { 399 // Ignore the result because there is nothing we can do about it. 400 let _ = kill_evt.write(1); 401 } 402 403 event!("virtio-device", "reset", "id", &self.id); 404 405 // Return the interrupt 406 Some(self.common.interrupt_cb.take().unwrap()) 407 } 408 409 fn shutdown(&mut self) { 410 let _ = unsafe { libc::close(self.vu.lock().unwrap().socket_handle().as_raw_fd()) }; 411 412 // Remove socket path if needed 413 if self.server { 414 let _ = std::fs::remove_file(&self.socket_path); 415 } 416 } 417 418 fn add_memory_region( 419 &mut self, 420 region: &Arc<GuestRegionMmap>, 421 ) -> std::result::Result<(), crate::Error> { 422 if self.acked_protocol_features & VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS.bits() != 0 423 { 424 self.vu 425 .lock() 426 .unwrap() 427 .add_memory_region(region) 428 .map_err(crate::Error::VhostUserAddMemoryRegion) 429 } else if let Some(guest_memory) = &self.guest_memory { 430 self.vu 431 .lock() 432 .unwrap() 433 .update_mem_table(guest_memory.memory().deref()) 434 .map_err(crate::Error::VhostUserUpdateMemory) 435 } else { 436 Ok(()) 437 } 438 } 439 } 440 441 impl Pausable for Net { 442 fn pause(&mut self) -> result::Result<(), MigratableError> { 443 self.vu 444 .lock() 445 .unwrap() 446 .pause_vhost_user(self.vu_num_queues) 447 .map_err(|e| { 448 MigratableError::Pause(anyhow!("Error pausing vhost-user-net backend: {:?}", e)) 449 })?; 450 451 self.common.pause() 452 } 453 454 fn resume(&mut self) -> result::Result<(), MigratableError> { 455 self.common.resume()?; 456 457 if let Some(epoll_thread) = &self.epoll_thread { 458 epoll_thread.thread().unpark(); 459 } 460 461 if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread { 462 ctrl_queue_epoll_thread.thread().unpark(); 463 } 464 465 self.vu 466 .lock() 467 .unwrap() 468 .resume_vhost_user(self.vu_num_queues) 469 .map_err(|e| { 470 MigratableError::Resume(anyhow!("Error resuming vhost-user-net backend: {:?}", e)) 471 }) 472 } 473 } 474 475 impl Snapshottable for Net { 476 fn id(&self) -> String { 477 self.id.clone() 478 } 479 480 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 481 Snapshot::new_from_versioned_state(&self.id(), &self.state()) 482 } 483 484 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 485 self.set_state(&snapshot.to_versioned_state(&self.id)?); 486 Ok(()) 487 } 488 } 489 impl Transportable for Net {} 490 491 impl Migratable for Net { 492 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 493 if let Some(guest_memory) = &self.guest_memory { 494 let last_ram_addr = guest_memory.memory().last_addr().raw_value(); 495 self.vu 496 .lock() 497 .unwrap() 498 .start_dirty_log(last_ram_addr) 499 .map_err(|e| { 500 MigratableError::MigrateStart(anyhow!( 501 "Error starting migration for vhost-user-blk backend: {:?}", 502 e 503 )) 504 }) 505 } else { 506 Err(MigratableError::MigrateStart(anyhow!( 507 "Missing guest memory" 508 ))) 509 } 510 } 511 512 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 513 self.vu.lock().unwrap().stop_dirty_log().map_err(|e| { 514 MigratableError::MigrateStop(anyhow!( 515 "Error stopping migration for vhost-user-blk backend: {:?}", 516 e 517 )) 518 }) 519 } 520 521 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 522 if let Some(guest_memory) = &self.guest_memory { 523 let last_ram_addr = guest_memory.memory().last_addr().raw_value(); 524 self.vu 525 .lock() 526 .unwrap() 527 .dirty_log(last_ram_addr) 528 .map_err(|e| { 529 MigratableError::MigrateDirtyRanges(anyhow!( 530 "Error retrieving dirty ranges from vhost-user-blk backend: {:?}", 531 e 532 )) 533 }) 534 } else { 535 Err(MigratableError::MigrateDirtyRanges(anyhow!( 536 "Missing guest memory" 537 ))) 538 } 539 } 540 } 541