1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use crate::seccomp_filters::Thread; 5 use crate::thread_helper::spawn_virtio_thread; 6 use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 7 use crate::vhost_user::{Error, Result, VhostUserCommon}; 8 use crate::{ 9 ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, Queue, VirtioCommon, 10 VirtioDevice, VirtioDeviceType, VirtioInterrupt, EPOLL_HELPER_EVENT_LAST, 11 VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1, 12 }; 13 use crate::{GuestMemoryMmap, GuestRegionMmap}; 14 use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig}; 15 use seccompiler::SeccompAction; 16 use std::os::unix::io::AsRawFd; 17 use std::result; 18 use std::sync::atomic::AtomicBool; 19 use std::sync::{Arc, Barrier, Mutex}; 20 use std::thread; 21 use std::vec::Vec; 22 use versionize::{VersionMap, Versionize, VersionizeResult}; 23 use versionize_derive::Versionize; 24 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; 25 use vhost::vhost_user::{MasterReqHandler, VhostUserMaster, VhostUserMasterReqHandler}; 26 use virtio_bindings::bindings::virtio_net::{ 27 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN, 28 VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, 29 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, 30 VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, 31 }; 32 use vm_memory::{ByteValued, GuestAddressSpace, GuestMemoryAtomic}; 33 use vm_migration::{ 34 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, 35 Transportable, VersionMapped, 36 }; 37 use vmm_sys_util::eventfd::EventFd; 38 39 const DEFAULT_QUEUE_NUMBER: usize = 2; 40 41 #[derive(Versionize)] 42 pub struct State { 43 pub avail_features: u64, 44 pub acked_features: u64, 45 pub config: VirtioNetConfig, 46 pub acked_protocol_features: u64, 47 pub vu_num_queues: usize, 48 } 49 50 impl VersionMapped for State {} 51 52 struct SlaveReqHandler {} 53 impl VhostUserMasterReqHandler for SlaveReqHandler {} 54 55 /// Control queue 56 // Event available on the control queue. 57 const CTRL_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 58 59 pub struct NetCtrlEpollHandler { 60 pub mem: GuestMemoryAtomic<GuestMemoryMmap>, 61 pub kill_evt: EventFd, 62 pub pause_evt: EventFd, 63 pub ctrl_q: CtrlQueue, 64 pub queue_evt: EventFd, 65 pub queue: Queue, 66 } 67 68 impl NetCtrlEpollHandler { 69 pub fn run_ctrl( 70 &mut self, 71 paused: Arc<AtomicBool>, 72 paused_sync: Arc<Barrier>, 73 ) -> std::result::Result<(), EpollHelperError> { 74 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 75 helper.add_event(self.queue_evt.as_raw_fd(), CTRL_QUEUE_EVENT)?; 76 helper.run(paused, paused_sync, self)?; 77 78 Ok(()) 79 } 80 } 81 82 impl EpollHelperHandler for NetCtrlEpollHandler { 83 fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { 84 let ev_type = event.data as u16; 85 match ev_type { 86 CTRL_QUEUE_EVENT => { 87 let mem = self.mem.memory(); 88 if let Err(e) = self.queue_evt.read() { 89 error!("failed to get ctl queue event: {:?}", e); 90 return true; 91 } 92 if let Err(e) = self.ctrl_q.process(&mem, &mut self.queue) { 93 error!("failed to process ctrl queue: {:?}", e); 94 return true; 95 } 96 } 97 _ => { 98 error!("Unknown event for virtio-net"); 99 return true; 100 } 101 } 102 103 false 104 } 105 } 106 107 pub struct Net { 108 common: VirtioCommon, 109 vu_common: VhostUserCommon, 110 id: String, 111 config: VirtioNetConfig, 112 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 113 ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>, 114 epoll_thread: Option<thread::JoinHandle<()>>, 115 seccomp_action: SeccompAction, 116 exit_evt: EventFd, 117 } 118 119 impl Net { 120 /// Create a new vhost-user-net device 121 pub fn new( 122 id: String, 123 mac_addr: MacAddr, 124 vu_cfg: VhostUserConfig, 125 server: bool, 126 seccomp_action: SeccompAction, 127 restoring: bool, 128 exit_evt: EventFd, 129 ) -> Result<Net> { 130 let mut num_queues = vu_cfg.num_queues; 131 132 if restoring { 133 // We need 'queue_sizes' to report a number of queues that will be 134 // enough to handle all the potential queues. Including the control 135 // queue (with +1) will guarantee that. VirtioPciDevice::new() will 136 // create the actual queues based on this information. 137 return Ok(Net { 138 common: VirtioCommon { 139 device_type: VirtioDeviceType::Net as u32, 140 queue_sizes: vec![vu_cfg.queue_size; num_queues + 1], 141 paused_sync: Some(Arc::new(Barrier::new(2))), 142 min_queues: DEFAULT_QUEUE_NUMBER as u16, 143 ..Default::default() 144 }, 145 vu_common: VhostUserCommon { 146 socket_path: vu_cfg.socket, 147 vu_num_queues: num_queues, 148 server, 149 ..Default::default() 150 }, 151 id, 152 config: VirtioNetConfig::default(), 153 guest_memory: None, 154 ctrl_queue_epoll_thread: None, 155 epoll_thread: None, 156 seccomp_action, 157 exit_evt, 158 }); 159 } 160 161 // Filling device and vring features VMM supports. 162 let mut avail_features = 1 << VIRTIO_NET_F_CSUM 163 | 1 << VIRTIO_NET_F_GUEST_CSUM 164 | 1 << VIRTIO_NET_F_GUEST_TSO4 165 | 1 << VIRTIO_NET_F_GUEST_TSO6 166 | 1 << VIRTIO_NET_F_GUEST_ECN 167 | 1 << VIRTIO_NET_F_GUEST_UFO 168 | 1 << VIRTIO_NET_F_HOST_TSO4 169 | 1 << VIRTIO_NET_F_HOST_TSO6 170 | 1 << VIRTIO_NET_F_HOST_ECN 171 | 1 << VIRTIO_NET_F_HOST_UFO 172 | 1 << VIRTIO_NET_F_MRG_RXBUF 173 | 1 << VIRTIO_NET_F_CTRL_VQ 174 | 1 << VIRTIO_F_RING_EVENT_IDX 175 | 1 << VIRTIO_F_VERSION_1 176 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); 177 178 let mut config = VirtioNetConfig::default(); 179 build_net_config_space(&mut config, mac_addr, num_queues, &mut avail_features); 180 181 let mut vu = 182 VhostUserHandle::connect_vhost_user(server, &vu_cfg.socket, num_queues as u64, false)?; 183 184 let avail_protocol_features = VhostUserProtocolFeatures::MQ 185 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 186 | VhostUserProtocolFeatures::REPLY_ACK 187 | VhostUserProtocolFeatures::INFLIGHT_SHMFD 188 | VhostUserProtocolFeatures::LOG_SHMFD; 189 190 let (mut acked_features, acked_protocol_features) = 191 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 192 193 let backend_num_queues = 194 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 195 vu.socket_handle() 196 .get_queue_num() 197 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 198 } else { 199 DEFAULT_QUEUE_NUMBER 200 }; 201 202 if num_queues > backend_num_queues { 203 error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n", 204 num_queues, backend_num_queues); 205 return Err(Error::BadQueueNum); 206 } 207 208 // If the control queue feature has been negotiated, let's increase 209 // the number of queues. 210 let vu_num_queues = num_queues; 211 if acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 { 212 num_queues += 1; 213 } 214 215 // Make sure the virtio feature to set the MAC address is exposed to 216 // the guest, even if it hasn't been negotiated with the backend. 217 acked_features |= 1 << VIRTIO_NET_F_MAC; 218 219 Ok(Net { 220 id, 221 common: VirtioCommon { 222 device_type: VirtioDeviceType::Net as u32, 223 queue_sizes: vec![vu_cfg.queue_size; num_queues], 224 avail_features: acked_features, 225 acked_features: 0, 226 paused_sync: Some(Arc::new(Barrier::new(2))), 227 min_queues: DEFAULT_QUEUE_NUMBER as u16, 228 ..Default::default() 229 }, 230 vu_common: VhostUserCommon { 231 vu: Some(Arc::new(Mutex::new(vu))), 232 acked_protocol_features, 233 socket_path: vu_cfg.socket, 234 vu_num_queues, 235 server, 236 ..Default::default() 237 }, 238 config, 239 guest_memory: None, 240 ctrl_queue_epoll_thread: None, 241 epoll_thread: None, 242 seccomp_action, 243 exit_evt, 244 }) 245 } 246 247 fn state(&self) -> State { 248 State { 249 avail_features: self.common.avail_features, 250 acked_features: self.common.acked_features, 251 config: self.config, 252 acked_protocol_features: self.vu_common.acked_protocol_features, 253 vu_num_queues: self.vu_common.vu_num_queues, 254 } 255 } 256 257 fn set_state(&mut self, state: &State) { 258 self.common.avail_features = state.avail_features; 259 self.common.acked_features = state.acked_features; 260 self.config = state.config; 261 self.vu_common.acked_protocol_features = state.acked_protocol_features; 262 self.vu_common.vu_num_queues = state.vu_num_queues; 263 264 if let Err(e) = self 265 .vu_common 266 .restore_backend_connection(self.common.acked_features) 267 { 268 error!( 269 "Failed restoring connection with vhost-user backend: {:?}", 270 e 271 ); 272 } 273 } 274 } 275 276 impl Drop for Net { 277 fn drop(&mut self) { 278 if let Some(kill_evt) = self.common.kill_evt.take() { 279 if let Err(e) = kill_evt.write(1) { 280 error!("failed to kill vhost-user-net: {:?}", e); 281 } 282 } 283 } 284 } 285 286 impl VirtioDevice for Net { 287 fn device_type(&self) -> u32 { 288 self.common.device_type 289 } 290 291 fn queue_max_sizes(&self) -> &[u16] { 292 &self.common.queue_sizes 293 } 294 295 fn features(&self) -> u64 { 296 self.common.avail_features 297 } 298 299 fn ack_features(&mut self, value: u64) { 300 self.common.ack_features(value) 301 } 302 303 fn read_config(&self, offset: u64, data: &mut [u8]) { 304 self.read_config_from_slice(self.config.as_slice(), offset, data); 305 } 306 307 fn activate( 308 &mut self, 309 mem: GuestMemoryAtomic<GuestMemoryMmap>, 310 interrupt_cb: Arc<dyn VirtioInterrupt>, 311 mut queues: Vec<Queue>, 312 mut queue_evts: Vec<EventFd>, 313 ) -> ActivateResult { 314 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 315 self.guest_memory = Some(mem.clone()); 316 317 let num_queues = queues.len(); 318 if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 { 319 let cvq_queue = queues.remove(num_queues - 1); 320 let cvq_queue_evt = queue_evts.remove(num_queues - 1); 321 322 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 323 324 let mut ctrl_handler = NetCtrlEpollHandler { 325 mem: mem.clone(), 326 kill_evt, 327 pause_evt, 328 ctrl_q: CtrlQueue::new(Vec::new()), 329 queue: cvq_queue, 330 queue_evt: cvq_queue_evt, 331 }; 332 333 let paused = self.common.paused.clone(); 334 // Let's update the barrier as we need 1 for the control queue 335 // thread + 1 for the common vhost-user thread + 1 for the main 336 // thread signalling the pause. 337 self.common.paused_sync = Some(Arc::new(Barrier::new(3))); 338 let paused_sync = self.common.paused_sync.clone(); 339 340 let mut epoll_threads = Vec::new(); 341 spawn_virtio_thread( 342 &format!("{}_ctrl", &self.id), 343 &self.seccomp_action, 344 Thread::VirtioVhostNetCtl, 345 &mut epoll_threads, 346 &self.exit_evt, 347 move || { 348 if let Err(e) = ctrl_handler.run_ctrl(paused, paused_sync.unwrap()) { 349 error!("Error running worker: {:?}", e); 350 } 351 }, 352 )?; 353 self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0)); 354 } 355 356 let slave_req_handler: Option<MasterReqHandler<SlaveReqHandler>> = None; 357 358 // The backend acknowledged features must contain the protocol feature 359 // bit in case it was initially set but lost through the features 360 // negotiation with the guest. Additionally, it must not contain 361 // VIRTIO_NET_F_MAC since we don't expect the backend to handle it. 362 let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC) 363 | (self.common.avail_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits()); 364 365 // Run a dedicated thread for handling potential reconnections with 366 // the backend. 367 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 368 369 let mut handler = self.vu_common.activate( 370 mem, 371 queues, 372 queue_evts, 373 interrupt_cb, 374 backend_acked_features, 375 slave_req_handler, 376 kill_evt, 377 pause_evt, 378 )?; 379 380 let paused = self.common.paused.clone(); 381 let paused_sync = self.common.paused_sync.clone(); 382 383 let mut epoll_threads = Vec::new(); 384 spawn_virtio_thread( 385 &self.id, 386 &self.seccomp_action, 387 Thread::VirtioVhostNet, 388 &mut epoll_threads, 389 &self.exit_evt, 390 move || { 391 if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 392 error!("Error running worker: {:?}", e); 393 } 394 }, 395 )?; 396 self.epoll_thread = Some(epoll_threads.remove(0)); 397 398 Ok(()) 399 } 400 401 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 402 // We first must resume the virtio thread if it was paused. 403 if self.common.pause_evt.take().is_some() { 404 self.common.resume().ok()?; 405 } 406 407 if let Some(vu) = &self.vu_common.vu { 408 if let Err(e) = vu 409 .lock() 410 .unwrap() 411 .reset_vhost_user(self.common.queue_sizes.len()) 412 { 413 error!("Failed to reset vhost-user daemon: {:?}", e); 414 return None; 415 } 416 } 417 418 if let Some(kill_evt) = self.common.kill_evt.take() { 419 // Ignore the result because there is nothing we can do about it. 420 let _ = kill_evt.write(1); 421 } 422 423 event!("virtio-device", "reset", "id", &self.id); 424 425 // Return the interrupt 426 Some(self.common.interrupt_cb.take().unwrap()) 427 } 428 429 fn shutdown(&mut self) { 430 self.vu_common.shutdown(); 431 } 432 433 fn add_memory_region( 434 &mut self, 435 region: &Arc<GuestRegionMmap>, 436 ) -> std::result::Result<(), crate::Error> { 437 self.vu_common.add_memory_region(&self.guest_memory, region) 438 } 439 } 440 441 impl Pausable for Net { 442 fn pause(&mut self) -> result::Result<(), MigratableError> { 443 self.vu_common.pause()?; 444 self.common.pause() 445 } 446 447 fn resume(&mut self) -> result::Result<(), MigratableError> { 448 self.common.resume()?; 449 450 if let Some(epoll_thread) = &self.epoll_thread { 451 epoll_thread.thread().unpark(); 452 } 453 454 if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread { 455 ctrl_queue_epoll_thread.thread().unpark(); 456 } 457 458 self.vu_common.resume() 459 } 460 } 461 462 impl Snapshottable for Net { 463 fn id(&self) -> String { 464 self.id.clone() 465 } 466 467 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 468 self.vu_common.snapshot(&self.id(), &self.state()) 469 } 470 471 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 472 self.set_state(&snapshot.to_versioned_state(&self.id)?); 473 Ok(()) 474 } 475 } 476 impl Transportable for Net {} 477 478 impl Migratable for Net { 479 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 480 self.vu_common.start_dirty_log(&self.guest_memory) 481 } 482 483 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 484 self.vu_common.stop_dirty_log() 485 } 486 487 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 488 self.vu_common.dirty_log(&self.guest_memory) 489 } 490 491 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 492 self.vu_common 493 .complete_migration(self.common.kill_evt.take()) 494 } 495 } 496