1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use std::sync::atomic::AtomicBool; 5 use std::sync::{Arc, Barrier, Mutex}; 6 use std::{result, thread}; 7 8 use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig}; 9 use seccompiler::SeccompAction; 10 use serde::{Deserialize, Serialize}; 11 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; 12 use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; 13 use virtio_bindings::virtio_net::{ 14 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN, 15 VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, 16 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, 17 VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU, 18 }; 19 use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX; 20 use virtio_queue::{Queue, QueueT}; 21 use vm_memory::{ByteValued, GuestMemoryAtomic}; 22 use vm_migration::protocol::MemoryRangeTable; 23 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 24 use vmm_sys_util::eventfd::EventFd; 25 26 use crate::seccomp_filters::Thread; 27 use crate::thread_helper::spawn_virtio_thread; 28 use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 29 use crate::vhost_user::{Error, Result, VhostUserCommon}; 30 use crate::{ 31 ActivateResult, GuestMemoryMmap, GuestRegionMmap, NetCtrlEpollHandler, VirtioCommon, 32 VirtioDevice, VirtioDeviceType, VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM, 33 VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1, 34 }; 35 36 const DEFAULT_QUEUE_NUMBER: usize = 2; 37 38 #[derive(Serialize, Deserialize)] 39 pub struct State { 40 pub avail_features: u64, 41 pub acked_features: u64, 42 pub config: VirtioNetConfig, 43 pub acked_protocol_features: u64, 44 pub vu_num_queues: usize, 45 } 46 47 struct BackendReqHandler {} 48 impl VhostUserFrontendReqHandler for BackendReqHandler {} 49 50 pub struct Net { 51 common: VirtioCommon, 52 vu_common: VhostUserCommon, 53 id: String, 54 config: VirtioNetConfig, 55 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 56 ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>, 57 epoll_thread: Option<thread::JoinHandle<()>>, 58 seccomp_action: SeccompAction, 59 exit_evt: EventFd, 60 iommu: bool, 61 } 62 63 impl Net { 64 /// Create a new vhost-user-net device 65 #[allow(clippy::too_many_arguments)] 66 pub fn new( 67 id: String, 68 mac_addr: MacAddr, 69 mtu: Option<u16>, 70 vu_cfg: VhostUserConfig, 71 server: bool, 72 seccomp_action: SeccompAction, 73 exit_evt: EventFd, 74 iommu: bool, 75 state: Option<State>, 76 offload_tso: bool, 77 offload_ufo: bool, 78 offload_csum: bool, 79 ) -> Result<Net> { 80 let mut num_queues = vu_cfg.num_queues; 81 82 let mut vu = 83 VhostUserHandle::connect_vhost_user(server, &vu_cfg.socket, num_queues as u64, false)?; 84 85 let ( 86 avail_features, 87 acked_features, 88 acked_protocol_features, 89 vu_num_queues, 90 config, 91 paused, 92 ) = if let Some(state) = state { 93 info!("Restoring vhost-user-net {}", id); 94 95 // The backend acknowledged features must not contain 96 // VIRTIO_NET_F_MAC since we don't expect the backend 97 // to handle it. 98 let backend_acked_features = state.acked_features & !(1 << VIRTIO_NET_F_MAC); 99 100 vu.set_protocol_features_vhost_user( 101 backend_acked_features, 102 state.acked_protocol_features, 103 )?; 104 105 // If the control queue feature has been negotiated, let's 106 // increase the number of queues. 107 if state.acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 { 108 num_queues += 1; 109 } 110 111 ( 112 state.avail_features, 113 state.acked_features, 114 state.acked_protocol_features, 115 state.vu_num_queues, 116 state.config, 117 true, 118 ) 119 } else { 120 // Filling device and vring features VMM supports. 121 let mut avail_features = (1 << VIRTIO_NET_F_MRG_RXBUF) 122 | (1 << VIRTIO_NET_F_CTRL_VQ) 123 | (1 << VIRTIO_F_RING_EVENT_IDX) 124 | (1 << VIRTIO_F_VERSION_1) 125 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); 126 127 if mtu.is_some() { 128 avail_features |= 1u64 << VIRTIO_NET_F_MTU; 129 } 130 131 // Configure TSO/UFO features when hardware checksum offload is enabled. 132 if offload_csum { 133 avail_features |= (1 << VIRTIO_NET_F_CSUM) | (1 << VIRTIO_NET_F_GUEST_CSUM); 134 135 if offload_tso { 136 avail_features |= (1 << VIRTIO_NET_F_HOST_ECN) 137 | (1 << VIRTIO_NET_F_HOST_TSO4) 138 | (1 << VIRTIO_NET_F_HOST_TSO6) 139 | (1 << VIRTIO_NET_F_GUEST_ECN) 140 | (1 << VIRTIO_NET_F_GUEST_TSO4) 141 | (1 << VIRTIO_NET_F_GUEST_TSO6); 142 } 143 144 if offload_ufo { 145 avail_features |= (1 << VIRTIO_NET_F_HOST_UFO) | (1 << VIRTIO_NET_F_GUEST_UFO); 146 } 147 } 148 149 let mut config = VirtioNetConfig::default(); 150 build_net_config_space(&mut config, mac_addr, num_queues, mtu, &mut avail_features); 151 152 let avail_protocol_features = VhostUserProtocolFeatures::MQ 153 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 154 | VhostUserProtocolFeatures::REPLY_ACK 155 | VhostUserProtocolFeatures::INFLIGHT_SHMFD 156 | VhostUserProtocolFeatures::LOG_SHMFD; 157 158 let (mut acked_features, acked_protocol_features) = 159 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 160 161 let backend_num_queues = 162 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 163 vu.socket_handle() 164 .get_queue_num() 165 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 166 } else { 167 DEFAULT_QUEUE_NUMBER 168 }; 169 170 if num_queues > backend_num_queues { 171 error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n", 172 num_queues, backend_num_queues); 173 return Err(Error::BadQueueNum); 174 } 175 176 // If the control queue feature has been negotiated, let's increase 177 // the number of queues. 178 let vu_num_queues = num_queues; 179 if acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 { 180 num_queues += 1; 181 } 182 183 // Make sure the virtio feature to set the MAC address is exposed to 184 // the guest, even if it hasn't been negotiated with the backend. 185 acked_features |= 1 << VIRTIO_NET_F_MAC; 186 187 ( 188 acked_features, 189 // If part of the available features that have been acked, 190 // the PROTOCOL_FEATURES bit must be already set through 191 // the VIRTIO acked features as we know the guest would 192 // never ack it, thus the feature would be lost. 193 acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), 194 acked_protocol_features, 195 vu_num_queues, 196 config, 197 false, 198 ) 199 }; 200 201 Ok(Net { 202 id, 203 common: VirtioCommon { 204 device_type: VirtioDeviceType::Net as u32, 205 queue_sizes: vec![vu_cfg.queue_size; num_queues], 206 avail_features, 207 acked_features, 208 paused_sync: Some(Arc::new(Barrier::new(2))), 209 min_queues: DEFAULT_QUEUE_NUMBER as u16, 210 paused: Arc::new(AtomicBool::new(paused)), 211 ..Default::default() 212 }, 213 vu_common: VhostUserCommon { 214 vu: Some(Arc::new(Mutex::new(vu))), 215 acked_protocol_features, 216 socket_path: vu_cfg.socket, 217 vu_num_queues, 218 server, 219 ..Default::default() 220 }, 221 config, 222 guest_memory: None, 223 ctrl_queue_epoll_thread: None, 224 epoll_thread: None, 225 seccomp_action, 226 exit_evt, 227 iommu, 228 }) 229 } 230 231 fn state(&self) -> State { 232 State { 233 avail_features: self.common.avail_features, 234 acked_features: self.common.acked_features, 235 config: self.config, 236 acked_protocol_features: self.vu_common.acked_protocol_features, 237 vu_num_queues: self.vu_common.vu_num_queues, 238 } 239 } 240 } 241 242 impl Drop for Net { 243 fn drop(&mut self) { 244 if let Some(kill_evt) = self.common.kill_evt.take() { 245 if let Err(e) = kill_evt.write(1) { 246 error!("failed to kill vhost-user-net: {:?}", e); 247 } 248 } 249 250 self.common.wait_for_epoll_threads(); 251 252 if let Some(thread) = self.epoll_thread.take() { 253 if let Err(e) = thread.join() { 254 error!("Error joining thread: {:?}", e); 255 } 256 } 257 if let Some(thread) = self.ctrl_queue_epoll_thread.take() { 258 if let Err(e) = thread.join() { 259 error!("Error joining thread: {:?}", e); 260 } 261 } 262 } 263 } 264 265 impl VirtioDevice for Net { 266 fn device_type(&self) -> u32 { 267 self.common.device_type 268 } 269 270 fn queue_max_sizes(&self) -> &[u16] { 271 &self.common.queue_sizes 272 } 273 274 fn features(&self) -> u64 { 275 let mut features = self.common.avail_features; 276 if self.iommu { 277 features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 278 } 279 features 280 } 281 282 fn ack_features(&mut self, value: u64) { 283 self.common.ack_features(value) 284 } 285 286 fn read_config(&self, offset: u64, data: &mut [u8]) { 287 self.read_config_from_slice(self.config.as_slice(), offset, data); 288 } 289 290 fn activate( 291 &mut self, 292 mem: GuestMemoryAtomic<GuestMemoryMmap>, 293 interrupt_cb: Arc<dyn VirtioInterrupt>, 294 mut queues: Vec<(usize, Queue, EventFd)>, 295 ) -> ActivateResult { 296 self.common.activate(&queues, &interrupt_cb)?; 297 self.guest_memory = Some(mem.clone()); 298 299 let num_queues = queues.len(); 300 let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into()); 301 if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 { 302 let ctrl_queue_index = num_queues - 1; 303 let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index); 304 305 ctrl_queue.set_event_idx(event_idx); 306 307 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 308 309 let mut ctrl_handler = NetCtrlEpollHandler { 310 mem: mem.clone(), 311 kill_evt, 312 pause_evt, 313 ctrl_q: CtrlQueue::new(Vec::new()), 314 queue: ctrl_queue, 315 queue_evt: ctrl_queue_evt, 316 access_platform: None, 317 interrupt_cb: interrupt_cb.clone(), 318 queue_index: ctrl_queue_index as u16, 319 }; 320 321 let paused = self.common.paused.clone(); 322 // Let's update the barrier as we need 1 for the control queue 323 // thread + 1 for the common vhost-user thread + 1 for the main 324 // thread signalling the pause. 325 self.common.paused_sync = Some(Arc::new(Barrier::new(3))); 326 let paused_sync = self.common.paused_sync.clone(); 327 328 let mut epoll_threads = Vec::new(); 329 spawn_virtio_thread( 330 &format!("{}_ctrl", &self.id), 331 &self.seccomp_action, 332 Thread::VirtioVhostNetCtl, 333 &mut epoll_threads, 334 &self.exit_evt, 335 move || ctrl_handler.run_ctrl(paused, paused_sync.unwrap()), 336 )?; 337 self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0)); 338 } 339 340 let backend_req_handler: Option<FrontendReqHandler<BackendReqHandler>> = None; 341 342 // The backend acknowledged features must not contain VIRTIO_NET_F_MAC 343 // since we don't expect the backend to handle it. 344 let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC); 345 346 // Run a dedicated thread for handling potential reconnections with 347 // the backend. 348 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 349 350 let mut handler = self.vu_common.activate( 351 mem, 352 queues, 353 interrupt_cb, 354 backend_acked_features, 355 backend_req_handler, 356 kill_evt, 357 pause_evt, 358 )?; 359 360 let paused = self.common.paused.clone(); 361 let paused_sync = self.common.paused_sync.clone(); 362 363 let mut epoll_threads = Vec::new(); 364 spawn_virtio_thread( 365 &self.id, 366 &self.seccomp_action, 367 Thread::VirtioVhostNet, 368 &mut epoll_threads, 369 &self.exit_evt, 370 move || handler.run(paused, paused_sync.unwrap()), 371 )?; 372 self.epoll_thread = Some(epoll_threads.remove(0)); 373 374 Ok(()) 375 } 376 377 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 378 // We first must resume the virtio thread if it was paused. 379 if self.common.pause_evt.take().is_some() { 380 self.common.resume().ok()?; 381 } 382 383 if let Some(vu) = &self.vu_common.vu { 384 if let Err(e) = vu.lock().unwrap().reset_vhost_user() { 385 error!("Failed to reset vhost-user daemon: {:?}", e); 386 return None; 387 } 388 } 389 390 if let Some(kill_evt) = self.common.kill_evt.take() { 391 // Ignore the result because there is nothing we can do about it. 392 let _ = kill_evt.write(1); 393 } 394 395 event!("virtio-device", "reset", "id", &self.id); 396 397 // Return the interrupt 398 Some(self.common.interrupt_cb.take().unwrap()) 399 } 400 401 fn shutdown(&mut self) { 402 self.vu_common.shutdown(); 403 } 404 405 fn add_memory_region( 406 &mut self, 407 region: &Arc<GuestRegionMmap>, 408 ) -> std::result::Result<(), crate::Error> { 409 self.vu_common.add_memory_region(&self.guest_memory, region) 410 } 411 } 412 413 impl Pausable for Net { 414 fn pause(&mut self) -> result::Result<(), MigratableError> { 415 self.vu_common.pause()?; 416 self.common.pause() 417 } 418 419 fn resume(&mut self) -> result::Result<(), MigratableError> { 420 self.common.resume()?; 421 422 if let Some(epoll_thread) = &self.epoll_thread { 423 epoll_thread.thread().unpark(); 424 } 425 426 if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread { 427 ctrl_queue_epoll_thread.thread().unpark(); 428 } 429 430 self.vu_common.resume() 431 } 432 } 433 434 impl Snapshottable for Net { 435 fn id(&self) -> String { 436 self.id.clone() 437 } 438 439 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 440 self.vu_common.snapshot(&self.state()) 441 } 442 } 443 impl Transportable for Net {} 444 445 impl Migratable for Net { 446 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 447 self.vu_common.start_dirty_log(&self.guest_memory) 448 } 449 450 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 451 self.vu_common.stop_dirty_log() 452 } 453 454 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 455 self.vu_common.dirty_log(&self.guest_memory) 456 } 457 458 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 459 self.vu_common.start_migration() 460 } 461 462 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 463 self.vu_common 464 .complete_migration(self.common.kill_evt.take()) 465 } 466 } 467