1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use crate::seccomp_filters::Thread; 5 use crate::thread_helper::spawn_virtio_thread; 6 use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 7 use crate::vhost_user::{Error, Result, VhostUserCommon}; 8 use crate::{ 9 ActivateResult, NetCtrlEpollHandler, VirtioCommon, VirtioDevice, VirtioDeviceType, 10 VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1, 11 }; 12 use crate::{GuestMemoryMmap, GuestRegionMmap}; 13 use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig}; 14 use seccompiler::SeccompAction; 15 use std::result; 16 use std::sync::{Arc, Barrier, Mutex}; 17 use std::thread; 18 use std::vec::Vec; 19 use versionize::{VersionMap, Versionize, VersionizeResult}; 20 use versionize_derive::Versionize; 21 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; 22 use vhost::vhost_user::{MasterReqHandler, VhostUserMaster, VhostUserMasterReqHandler}; 23 use virtio_bindings::bindings::virtio_net::{ 24 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN, 25 VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, 26 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, 27 VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, 28 }; 29 use virtio_bindings::bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX; 30 use virtio_queue::Queue; 31 use vm_memory::{ByteValued, GuestMemoryAtomic}; 32 use vm_migration::{ 33 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, 34 Transportable, VersionMapped, 35 }; 36 use vmm_sys_util::eventfd::EventFd; 37 38 const DEFAULT_QUEUE_NUMBER: usize = 2; 39 40 #[derive(Versionize)] 41 pub struct State { 42 pub avail_features: u64, 43 pub acked_features: u64, 44 pub config: VirtioNetConfig, 45 pub acked_protocol_features: u64, 46 pub vu_num_queues: usize, 47 } 48 49 impl VersionMapped for State {} 50 51 struct SlaveReqHandler {} 52 impl VhostUserMasterReqHandler for SlaveReqHandler {} 53 54 pub struct Net { 55 common: VirtioCommon, 56 vu_common: VhostUserCommon, 57 id: String, 58 config: VirtioNetConfig, 59 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 60 ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>, 61 epoll_thread: Option<thread::JoinHandle<()>>, 62 seccomp_action: SeccompAction, 63 exit_evt: EventFd, 64 iommu: bool, 65 } 66 67 impl Net { 68 /// Create a new vhost-user-net device 69 #[allow(clippy::too_many_arguments)] 70 pub fn new( 71 id: String, 72 mac_addr: MacAddr, 73 vu_cfg: VhostUserConfig, 74 server: bool, 75 seccomp_action: SeccompAction, 76 restoring: bool, 77 exit_evt: EventFd, 78 iommu: bool, 79 ) -> Result<Net> { 80 let mut num_queues = vu_cfg.num_queues; 81 82 if restoring { 83 // We need 'queue_sizes' to report a number of queues that will be 84 // enough to handle all the potential queues. Including the control 85 // queue (with +1) will guarantee that. VirtioPciDevice::new() will 86 // create the actual queues based on this information. 87 return Ok(Net { 88 common: VirtioCommon { 89 device_type: VirtioDeviceType::Net as u32, 90 queue_sizes: vec![vu_cfg.queue_size; num_queues + 1], 91 paused_sync: Some(Arc::new(Barrier::new(2))), 92 min_queues: DEFAULT_QUEUE_NUMBER as u16, 93 ..Default::default() 94 }, 95 vu_common: VhostUserCommon { 96 socket_path: vu_cfg.socket, 97 vu_num_queues: num_queues, 98 server, 99 ..Default::default() 100 }, 101 id, 102 config: VirtioNetConfig::default(), 103 guest_memory: None, 104 ctrl_queue_epoll_thread: None, 105 epoll_thread: None, 106 seccomp_action, 107 exit_evt, 108 iommu, 109 }); 110 } 111 112 // Filling device and vring features VMM supports. 113 let mut avail_features = 1 << VIRTIO_NET_F_CSUM 114 | 1 << VIRTIO_NET_F_GUEST_CSUM 115 | 1 << VIRTIO_NET_F_GUEST_TSO4 116 | 1 << VIRTIO_NET_F_GUEST_TSO6 117 | 1 << VIRTIO_NET_F_GUEST_ECN 118 | 1 << VIRTIO_NET_F_GUEST_UFO 119 | 1 << VIRTIO_NET_F_HOST_TSO4 120 | 1 << VIRTIO_NET_F_HOST_TSO6 121 | 1 << VIRTIO_NET_F_HOST_ECN 122 | 1 << VIRTIO_NET_F_HOST_UFO 123 | 1 << VIRTIO_NET_F_MRG_RXBUF 124 | 1 << VIRTIO_NET_F_CTRL_VQ 125 | 1 << VIRTIO_F_RING_EVENT_IDX 126 | 1 << VIRTIO_F_VERSION_1 127 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); 128 129 let mut config = VirtioNetConfig::default(); 130 build_net_config_space(&mut config, mac_addr, num_queues, &mut avail_features); 131 132 let mut vu = 133 VhostUserHandle::connect_vhost_user(server, &vu_cfg.socket, num_queues as u64, false)?; 134 135 let avail_protocol_features = VhostUserProtocolFeatures::MQ 136 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 137 | VhostUserProtocolFeatures::REPLY_ACK 138 | VhostUserProtocolFeatures::INFLIGHT_SHMFD 139 | VhostUserProtocolFeatures::LOG_SHMFD; 140 141 let (mut acked_features, acked_protocol_features) = 142 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 143 144 let backend_num_queues = 145 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 146 vu.socket_handle() 147 .get_queue_num() 148 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 149 } else { 150 DEFAULT_QUEUE_NUMBER 151 }; 152 153 if num_queues > backend_num_queues { 154 error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n", 155 num_queues, backend_num_queues); 156 return Err(Error::BadQueueNum); 157 } 158 159 // If the control queue feature has been negotiated, let's increase 160 // the number of queues. 161 let vu_num_queues = num_queues; 162 if acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 { 163 num_queues += 1; 164 } 165 166 // Make sure the virtio feature to set the MAC address is exposed to 167 // the guest, even if it hasn't been negotiated with the backend. 168 acked_features |= 1 << VIRTIO_NET_F_MAC; 169 170 Ok(Net { 171 id, 172 common: VirtioCommon { 173 device_type: VirtioDeviceType::Net as u32, 174 queue_sizes: vec![vu_cfg.queue_size; num_queues], 175 avail_features: acked_features, 176 // If part of the available features that have been acked, the 177 // PROTOCOL_FEATURES bit must be already set through the VIRTIO 178 // acked features as we know the guest would never ack it, thus 179 // the feature would be lost. 180 acked_features: acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), 181 paused_sync: Some(Arc::new(Barrier::new(2))), 182 min_queues: DEFAULT_QUEUE_NUMBER as u16, 183 ..Default::default() 184 }, 185 vu_common: VhostUserCommon { 186 vu: Some(Arc::new(Mutex::new(vu))), 187 acked_protocol_features, 188 socket_path: vu_cfg.socket, 189 vu_num_queues, 190 server, 191 ..Default::default() 192 }, 193 config, 194 guest_memory: None, 195 ctrl_queue_epoll_thread: None, 196 epoll_thread: None, 197 seccomp_action, 198 exit_evt, 199 iommu, 200 }) 201 } 202 203 fn state(&self) -> State { 204 State { 205 avail_features: self.common.avail_features, 206 acked_features: self.common.acked_features, 207 config: self.config, 208 acked_protocol_features: self.vu_common.acked_protocol_features, 209 vu_num_queues: self.vu_common.vu_num_queues, 210 } 211 } 212 213 fn set_state(&mut self, state: &State) { 214 self.common.avail_features = state.avail_features; 215 self.common.acked_features = state.acked_features; 216 self.config = state.config; 217 self.vu_common.acked_protocol_features = state.acked_protocol_features; 218 self.vu_common.vu_num_queues = state.vu_num_queues; 219 220 // The backend acknowledged features must not contain VIRTIO_NET_F_MAC 221 // since we don't expect the backend to handle it. 222 let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC); 223 224 if let Err(e) = self 225 .vu_common 226 .restore_backend_connection(backend_acked_features) 227 { 228 error!( 229 "Failed restoring connection with vhost-user backend: {:?}", 230 e 231 ); 232 } 233 } 234 } 235 236 impl Drop for Net { 237 fn drop(&mut self) { 238 if let Some(kill_evt) = self.common.kill_evt.take() { 239 if let Err(e) = kill_evt.write(1) { 240 error!("failed to kill vhost-user-net: {:?}", e); 241 } 242 } 243 } 244 } 245 246 impl VirtioDevice for Net { 247 fn device_type(&self) -> u32 { 248 self.common.device_type 249 } 250 251 fn queue_max_sizes(&self) -> &[u16] { 252 &self.common.queue_sizes 253 } 254 255 fn features(&self) -> u64 { 256 let mut features = self.common.avail_features; 257 if self.iommu { 258 features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 259 } 260 features 261 } 262 263 fn ack_features(&mut self, value: u64) { 264 self.common.ack_features(value) 265 } 266 267 fn read_config(&self, offset: u64, data: &mut [u8]) { 268 self.read_config_from_slice(self.config.as_slice(), offset, data); 269 } 270 271 fn activate( 272 &mut self, 273 mem: GuestMemoryAtomic<GuestMemoryMmap>, 274 interrupt_cb: Arc<dyn VirtioInterrupt>, 275 mut queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>, 276 mut queue_evts: Vec<EventFd>, 277 ) -> ActivateResult { 278 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 279 self.guest_memory = Some(mem.clone()); 280 281 let num_queues = queues.len(); 282 let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into()); 283 if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 { 284 let ctrl_queue_index = num_queues - 1; 285 let mut ctrl_queue = queues.remove(ctrl_queue_index); 286 let ctrl_queue_evt = queue_evts.remove(ctrl_queue_index); 287 288 ctrl_queue.set_event_idx(event_idx); 289 290 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 291 292 let mut ctrl_handler = NetCtrlEpollHandler { 293 kill_evt, 294 pause_evt, 295 ctrl_q: CtrlQueue::new(Vec::new()), 296 queue: ctrl_queue, 297 queue_evt: ctrl_queue_evt, 298 access_platform: None, 299 interrupt_cb: interrupt_cb.clone(), 300 queue_index: ctrl_queue_index as u16, 301 }; 302 303 let paused = self.common.paused.clone(); 304 // Let's update the barrier as we need 1 for the control queue 305 // thread + 1 for the common vhost-user thread + 1 for the main 306 // thread signalling the pause. 307 self.common.paused_sync = Some(Arc::new(Barrier::new(3))); 308 let paused_sync = self.common.paused_sync.clone(); 309 310 let mut epoll_threads = Vec::new(); 311 spawn_virtio_thread( 312 &format!("{}_ctrl", &self.id), 313 &self.seccomp_action, 314 Thread::VirtioVhostNetCtl, 315 &mut epoll_threads, 316 &self.exit_evt, 317 move || { 318 if let Err(e) = ctrl_handler.run_ctrl(paused, paused_sync.unwrap()) { 319 error!("Error running worker: {:?}", e); 320 } 321 }, 322 )?; 323 self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0)); 324 } 325 326 let slave_req_handler: Option<MasterReqHandler<SlaveReqHandler>> = None; 327 328 // The backend acknowledged features must not contain VIRTIO_NET_F_MAC 329 // since we don't expect the backend to handle it. 330 let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC); 331 332 // Run a dedicated thread for handling potential reconnections with 333 // the backend. 334 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 335 336 let mut handler = self.vu_common.activate( 337 mem, 338 queues, 339 queue_evts, 340 interrupt_cb, 341 backend_acked_features, 342 slave_req_handler, 343 kill_evt, 344 pause_evt, 345 )?; 346 347 let paused = self.common.paused.clone(); 348 let paused_sync = self.common.paused_sync.clone(); 349 350 let mut epoll_threads = Vec::new(); 351 spawn_virtio_thread( 352 &self.id, 353 &self.seccomp_action, 354 Thread::VirtioVhostNet, 355 &mut epoll_threads, 356 &self.exit_evt, 357 move || { 358 if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 359 error!("Error running worker: {:?}", e); 360 } 361 }, 362 )?; 363 self.epoll_thread = Some(epoll_threads.remove(0)); 364 365 Ok(()) 366 } 367 368 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 369 // We first must resume the virtio thread if it was paused. 370 if self.common.pause_evt.take().is_some() { 371 self.common.resume().ok()?; 372 } 373 374 if let Some(vu) = &self.vu_common.vu { 375 if let Err(e) = vu 376 .lock() 377 .unwrap() 378 .reset_vhost_user(self.common.queue_sizes.len()) 379 { 380 error!("Failed to reset vhost-user daemon: {:?}", e); 381 return None; 382 } 383 } 384 385 if let Some(kill_evt) = self.common.kill_evt.take() { 386 // Ignore the result because there is nothing we can do about it. 387 let _ = kill_evt.write(1); 388 } 389 390 event!("virtio-device", "reset", "id", &self.id); 391 392 // Return the interrupt 393 Some(self.common.interrupt_cb.take().unwrap()) 394 } 395 396 fn shutdown(&mut self) { 397 self.vu_common.shutdown(); 398 } 399 400 fn add_memory_region( 401 &mut self, 402 region: &Arc<GuestRegionMmap>, 403 ) -> std::result::Result<(), crate::Error> { 404 self.vu_common.add_memory_region(&self.guest_memory, region) 405 } 406 } 407 408 impl Pausable for Net { 409 fn pause(&mut self) -> result::Result<(), MigratableError> { 410 self.vu_common.pause()?; 411 self.common.pause() 412 } 413 414 fn resume(&mut self) -> result::Result<(), MigratableError> { 415 self.common.resume()?; 416 417 if let Some(epoll_thread) = &self.epoll_thread { 418 epoll_thread.thread().unpark(); 419 } 420 421 if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread { 422 ctrl_queue_epoll_thread.thread().unpark(); 423 } 424 425 self.vu_common.resume() 426 } 427 } 428 429 impl Snapshottable for Net { 430 fn id(&self) -> String { 431 self.id.clone() 432 } 433 434 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 435 self.vu_common.snapshot(&self.id(), &self.state()) 436 } 437 438 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 439 self.set_state(&snapshot.to_versioned_state(&self.id)?); 440 Ok(()) 441 } 442 } 443 impl Transportable for Net {} 444 445 impl Migratable for Net { 446 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 447 self.vu_common.start_dirty_log(&self.guest_memory) 448 } 449 450 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 451 self.vu_common.stop_dirty_log() 452 } 453 454 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 455 self.vu_common.dirty_log(&self.guest_memory) 456 } 457 458 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 459 self.vu_common.start_migration() 460 } 461 462 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 463 self.vu_common 464 .complete_migration(self.common.kill_evt.take()) 465 } 466 } 467