1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use std::result; 5 use std::sync::atomic::AtomicBool; 6 use std::sync::{Arc, Barrier, Mutex}; 7 use std::thread; 8 9 use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig}; 10 use seccompiler::SeccompAction; 11 use serde::{Deserialize, Serialize}; 12 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; 13 use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; 14 use virtio_bindings::virtio_net::{ 15 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN, 16 VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, 17 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, 18 VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU, 19 }; 20 use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX; 21 use virtio_queue::{Queue, QueueT}; 22 use vm_memory::{ByteValued, GuestMemoryAtomic}; 23 use vm_migration::{ 24 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, 25 Transportable, 26 }; 27 use vmm_sys_util::eventfd::EventFd; 28 29 use crate::seccomp_filters::Thread; 30 use crate::thread_helper::spawn_virtio_thread; 31 use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 32 use crate::vhost_user::{Error, Result, VhostUserCommon}; 33 use crate::{ 34 ActivateResult, NetCtrlEpollHandler, VirtioCommon, VirtioDevice, VirtioDeviceType, 35 VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1, 36 }; 37 use crate::{GuestMemoryMmap, GuestRegionMmap}; 38 39 const DEFAULT_QUEUE_NUMBER: usize = 2; 40 41 #[derive(Serialize, Deserialize)] 42 pub struct State { 43 pub avail_features: u64, 44 pub acked_features: u64, 45 pub config: VirtioNetConfig, 46 pub acked_protocol_features: u64, 47 pub vu_num_queues: usize, 48 } 49 50 struct BackendReqHandler {} 51 impl VhostUserFrontendReqHandler for BackendReqHandler {} 52 53 pub struct Net { 54 common: VirtioCommon, 55 vu_common: VhostUserCommon, 56 id: String, 57 config: VirtioNetConfig, 58 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 59 ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>, 60 epoll_thread: Option<thread::JoinHandle<()>>, 61 seccomp_action: SeccompAction, 62 exit_evt: EventFd, 63 iommu: bool, 64 } 65 66 impl Net { 67 /// Create a new vhost-user-net device 68 #[allow(clippy::too_many_arguments)] 69 pub fn new( 70 id: String, 71 mac_addr: MacAddr, 72 mtu: Option<u16>, 73 vu_cfg: VhostUserConfig, 74 server: bool, 75 seccomp_action: SeccompAction, 76 exit_evt: EventFd, 77 iommu: bool, 78 state: Option<State>, 79 offload_tso: bool, 80 offload_ufo: bool, 81 offload_csum: bool, 82 ) -> Result<Net> { 83 let mut num_queues = vu_cfg.num_queues; 84 85 let mut vu = 86 VhostUserHandle::connect_vhost_user(server, &vu_cfg.socket, num_queues as u64, false)?; 87 88 let ( 89 avail_features, 90 acked_features, 91 acked_protocol_features, 92 vu_num_queues, 93 config, 94 paused, 95 ) = if let Some(state) = state { 96 info!("Restoring vhost-user-net {}", id); 97 98 // The backend acknowledged features must not contain 99 // VIRTIO_NET_F_MAC since we don't expect the backend 100 // to handle it. 101 let backend_acked_features = state.acked_features & !(1 << VIRTIO_NET_F_MAC); 102 103 vu.set_protocol_features_vhost_user( 104 backend_acked_features, 105 state.acked_protocol_features, 106 )?; 107 108 // If the control queue feature has been negotiated, let's 109 // increase the number of queues. 110 if state.acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 { 111 num_queues += 1; 112 } 113 114 ( 115 state.avail_features, 116 state.acked_features, 117 state.acked_protocol_features, 118 state.vu_num_queues, 119 state.config, 120 true, 121 ) 122 } else { 123 // Filling device and vring features VMM supports. 124 let mut avail_features = 1 << VIRTIO_NET_F_MRG_RXBUF 125 | 1 << VIRTIO_NET_F_CTRL_VQ 126 | 1 << VIRTIO_F_RING_EVENT_IDX 127 | 1 << VIRTIO_F_VERSION_1 128 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); 129 130 if mtu.is_some() { 131 avail_features |= 1u64 << VIRTIO_NET_F_MTU; 132 } 133 134 // Configure TSO/UFO features when hardware checksum offload is enabled. 135 if offload_csum { 136 avail_features |= 1 << VIRTIO_NET_F_CSUM | 1 << VIRTIO_NET_F_GUEST_CSUM; 137 138 if offload_tso { 139 avail_features |= 1 << VIRTIO_NET_F_HOST_ECN 140 | 1 << VIRTIO_NET_F_HOST_TSO4 141 | 1 << VIRTIO_NET_F_HOST_TSO6 142 | 1 << VIRTIO_NET_F_GUEST_ECN 143 | 1 << VIRTIO_NET_F_GUEST_TSO4 144 | 1 << VIRTIO_NET_F_GUEST_TSO6; 145 } 146 147 if offload_ufo { 148 avail_features |= 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_NET_F_GUEST_UFO; 149 } 150 } 151 152 let mut config = VirtioNetConfig::default(); 153 build_net_config_space(&mut config, mac_addr, num_queues, mtu, &mut avail_features); 154 155 let avail_protocol_features = VhostUserProtocolFeatures::MQ 156 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 157 | VhostUserProtocolFeatures::REPLY_ACK 158 | VhostUserProtocolFeatures::INFLIGHT_SHMFD 159 | VhostUserProtocolFeatures::LOG_SHMFD; 160 161 let (mut acked_features, acked_protocol_features) = 162 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 163 164 let backend_num_queues = 165 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 166 vu.socket_handle() 167 .get_queue_num() 168 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 169 } else { 170 DEFAULT_QUEUE_NUMBER 171 }; 172 173 if num_queues > backend_num_queues { 174 error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n", 175 num_queues, backend_num_queues); 176 return Err(Error::BadQueueNum); 177 } 178 179 // If the control queue feature has been negotiated, let's increase 180 // the number of queues. 181 let vu_num_queues = num_queues; 182 if acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 { 183 num_queues += 1; 184 } 185 186 // Make sure the virtio feature to set the MAC address is exposed to 187 // the guest, even if it hasn't been negotiated with the backend. 188 acked_features |= 1 << VIRTIO_NET_F_MAC; 189 190 ( 191 acked_features, 192 // If part of the available features that have been acked, 193 // the PROTOCOL_FEATURES bit must be already set through 194 // the VIRTIO acked features as we know the guest would 195 // never ack it, thus the feature would be lost. 196 acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), 197 acked_protocol_features, 198 vu_num_queues, 199 config, 200 false, 201 ) 202 }; 203 204 Ok(Net { 205 id, 206 common: VirtioCommon { 207 device_type: VirtioDeviceType::Net as u32, 208 queue_sizes: vec![vu_cfg.queue_size; num_queues], 209 avail_features, 210 acked_features, 211 paused_sync: Some(Arc::new(Barrier::new(2))), 212 min_queues: DEFAULT_QUEUE_NUMBER as u16, 213 paused: Arc::new(AtomicBool::new(paused)), 214 ..Default::default() 215 }, 216 vu_common: VhostUserCommon { 217 vu: Some(Arc::new(Mutex::new(vu))), 218 acked_protocol_features, 219 socket_path: vu_cfg.socket, 220 vu_num_queues, 221 server, 222 ..Default::default() 223 }, 224 config, 225 guest_memory: None, 226 ctrl_queue_epoll_thread: None, 227 epoll_thread: None, 228 seccomp_action, 229 exit_evt, 230 iommu, 231 }) 232 } 233 234 fn state(&self) -> State { 235 State { 236 avail_features: self.common.avail_features, 237 acked_features: self.common.acked_features, 238 config: self.config, 239 acked_protocol_features: self.vu_common.acked_protocol_features, 240 vu_num_queues: self.vu_common.vu_num_queues, 241 } 242 } 243 } 244 245 impl Drop for Net { 246 fn drop(&mut self) { 247 if let Some(kill_evt) = self.common.kill_evt.take() { 248 if let Err(e) = kill_evt.write(1) { 249 error!("failed to kill vhost-user-net: {:?}", e); 250 } 251 } 252 253 self.common.wait_for_epoll_threads(); 254 255 if let Some(thread) = self.epoll_thread.take() { 256 if let Err(e) = thread.join() { 257 error!("Error joining thread: {:?}", e); 258 } 259 } 260 if let Some(thread) = self.ctrl_queue_epoll_thread.take() { 261 if let Err(e) = thread.join() { 262 error!("Error joining thread: {:?}", e); 263 } 264 } 265 } 266 } 267 268 impl VirtioDevice for Net { 269 fn device_type(&self) -> u32 { 270 self.common.device_type 271 } 272 273 fn queue_max_sizes(&self) -> &[u16] { 274 &self.common.queue_sizes 275 } 276 277 fn features(&self) -> u64 { 278 let mut features = self.common.avail_features; 279 if self.iommu { 280 features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 281 } 282 features 283 } 284 285 fn ack_features(&mut self, value: u64) { 286 self.common.ack_features(value) 287 } 288 289 fn read_config(&self, offset: u64, data: &mut [u8]) { 290 self.read_config_from_slice(self.config.as_slice(), offset, data); 291 } 292 293 fn activate( 294 &mut self, 295 mem: GuestMemoryAtomic<GuestMemoryMmap>, 296 interrupt_cb: Arc<dyn VirtioInterrupt>, 297 mut queues: Vec<(usize, Queue, EventFd)>, 298 ) -> ActivateResult { 299 self.common.activate(&queues, &interrupt_cb)?; 300 self.guest_memory = Some(mem.clone()); 301 302 let num_queues = queues.len(); 303 let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into()); 304 if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 { 305 let ctrl_queue_index = num_queues - 1; 306 let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index); 307 308 ctrl_queue.set_event_idx(event_idx); 309 310 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 311 312 let mut ctrl_handler = NetCtrlEpollHandler { 313 mem: mem.clone(), 314 kill_evt, 315 pause_evt, 316 ctrl_q: CtrlQueue::new(Vec::new()), 317 queue: ctrl_queue, 318 queue_evt: ctrl_queue_evt, 319 access_platform: None, 320 interrupt_cb: interrupt_cb.clone(), 321 queue_index: ctrl_queue_index as u16, 322 }; 323 324 let paused = self.common.paused.clone(); 325 // Let's update the barrier as we need 1 for the control queue 326 // thread + 1 for the common vhost-user thread + 1 for the main 327 // thread signalling the pause. 328 self.common.paused_sync = Some(Arc::new(Barrier::new(3))); 329 let paused_sync = self.common.paused_sync.clone(); 330 331 let mut epoll_threads = Vec::new(); 332 spawn_virtio_thread( 333 &format!("{}_ctrl", &self.id), 334 &self.seccomp_action, 335 Thread::VirtioVhostNetCtl, 336 &mut epoll_threads, 337 &self.exit_evt, 338 move || ctrl_handler.run_ctrl(paused, paused_sync.unwrap()), 339 )?; 340 self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0)); 341 } 342 343 let backend_req_handler: Option<FrontendReqHandler<BackendReqHandler>> = None; 344 345 // The backend acknowledged features must not contain VIRTIO_NET_F_MAC 346 // since we don't expect the backend to handle it. 347 let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC); 348 349 // Run a dedicated thread for handling potential reconnections with 350 // the backend. 351 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 352 353 let mut handler = self.vu_common.activate( 354 mem, 355 queues, 356 interrupt_cb, 357 backend_acked_features, 358 backend_req_handler, 359 kill_evt, 360 pause_evt, 361 )?; 362 363 let paused = self.common.paused.clone(); 364 let paused_sync = self.common.paused_sync.clone(); 365 366 let mut epoll_threads = Vec::new(); 367 spawn_virtio_thread( 368 &self.id, 369 &self.seccomp_action, 370 Thread::VirtioVhostNet, 371 &mut epoll_threads, 372 &self.exit_evt, 373 move || handler.run(paused, paused_sync.unwrap()), 374 )?; 375 self.epoll_thread = Some(epoll_threads.remove(0)); 376 377 Ok(()) 378 } 379 380 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 381 // We first must resume the virtio thread if it was paused. 382 if self.common.pause_evt.take().is_some() { 383 self.common.resume().ok()?; 384 } 385 386 if let Some(vu) = &self.vu_common.vu { 387 if let Err(e) = vu.lock().unwrap().reset_vhost_user() { 388 error!("Failed to reset vhost-user daemon: {:?}", e); 389 return None; 390 } 391 } 392 393 if let Some(kill_evt) = self.common.kill_evt.take() { 394 // Ignore the result because there is nothing we can do about it. 395 let _ = kill_evt.write(1); 396 } 397 398 event!("virtio-device", "reset", "id", &self.id); 399 400 // Return the interrupt 401 Some(self.common.interrupt_cb.take().unwrap()) 402 } 403 404 fn shutdown(&mut self) { 405 self.vu_common.shutdown(); 406 } 407 408 fn add_memory_region( 409 &mut self, 410 region: &Arc<GuestRegionMmap>, 411 ) -> std::result::Result<(), crate::Error> { 412 self.vu_common.add_memory_region(&self.guest_memory, region) 413 } 414 } 415 416 impl Pausable for Net { 417 fn pause(&mut self) -> result::Result<(), MigratableError> { 418 self.vu_common.pause()?; 419 self.common.pause() 420 } 421 422 fn resume(&mut self) -> result::Result<(), MigratableError> { 423 self.common.resume()?; 424 425 if let Some(epoll_thread) = &self.epoll_thread { 426 epoll_thread.thread().unpark(); 427 } 428 429 if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread { 430 ctrl_queue_epoll_thread.thread().unpark(); 431 } 432 433 self.vu_common.resume() 434 } 435 } 436 437 impl Snapshottable for Net { 438 fn id(&self) -> String { 439 self.id.clone() 440 } 441 442 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 443 self.vu_common.snapshot(&self.state()) 444 } 445 } 446 impl Transportable for Net {} 447 448 impl Migratable for Net { 449 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 450 self.vu_common.start_dirty_log(&self.guest_memory) 451 } 452 453 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 454 self.vu_common.stop_dirty_log() 455 } 456 457 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 458 self.vu_common.dirty_log(&self.guest_memory) 459 } 460 461 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 462 self.vu_common.start_migration() 463 } 464 465 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 466 self.vu_common 467 .complete_migration(self.common.kill_evt.take()) 468 } 469 } 470