1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use crate::seccomp_filters::Thread; 5 use crate::thread_helper::spawn_virtio_thread; 6 use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 7 use crate::vhost_user::{Error, Result, VhostUserCommon}; 8 use crate::{ 9 ActivateResult, NetCtrlEpollHandler, VirtioCommon, VirtioDevice, VirtioDeviceType, 10 VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1, 11 }; 12 use crate::{GuestMemoryMmap, GuestRegionMmap}; 13 use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig}; 14 use seccompiler::SeccompAction; 15 use serde::{Deserialize, Serialize}; 16 use std::result; 17 use std::sync::atomic::AtomicBool; 18 use std::sync::{Arc, Barrier, Mutex}; 19 use std::thread; 20 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; 21 use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; 22 use virtio_bindings::virtio_net::{ 23 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN, 24 VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, 25 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, 26 VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU, 27 }; 28 use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX; 29 use virtio_queue::{Queue, QueueT}; 30 use vm_memory::{ByteValued, GuestMemoryAtomic}; 31 use vm_migration::{ 32 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, 33 Transportable, 34 }; 35 use vmm_sys_util::eventfd::EventFd; 36 37 const DEFAULT_QUEUE_NUMBER: usize = 2; 38 39 #[derive(Serialize, Deserialize)] 40 pub struct State { 41 pub avail_features: u64, 42 pub acked_features: u64, 43 pub config: VirtioNetConfig, 44 pub acked_protocol_features: u64, 45 pub vu_num_queues: usize, 46 } 47 48 struct BackendReqHandler {} 49 impl VhostUserFrontendReqHandler for BackendReqHandler {} 50 51 pub struct Net { 52 common: VirtioCommon, 53 vu_common: VhostUserCommon, 54 id: String, 55 config: VirtioNetConfig, 56 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 57 ctrl_queue_epoll_thread: Option<thread::JoinHandle<()>>, 58 epoll_thread: Option<thread::JoinHandle<()>>, 59 seccomp_action: SeccompAction, 60 exit_evt: EventFd, 61 iommu: bool, 62 } 63 64 impl Net { 65 /// Create a new vhost-user-net device 66 #[allow(clippy::too_many_arguments)] 67 pub fn new( 68 id: String, 69 mac_addr: MacAddr, 70 mtu: Option<u16>, 71 vu_cfg: VhostUserConfig, 72 server: bool, 73 seccomp_action: SeccompAction, 74 exit_evt: EventFd, 75 iommu: bool, 76 state: Option<State>, 77 offload_tso: bool, 78 offload_ufo: bool, 79 offload_csum: bool, 80 ) -> Result<Net> { 81 let mut num_queues = vu_cfg.num_queues; 82 83 let mut vu = 84 VhostUserHandle::connect_vhost_user(server, &vu_cfg.socket, num_queues as u64, false)?; 85 86 let ( 87 avail_features, 88 acked_features, 89 acked_protocol_features, 90 vu_num_queues, 91 config, 92 paused, 93 ) = if let Some(state) = state { 94 info!("Restoring vhost-user-net {}", id); 95 96 // The backend acknowledged features must not contain 97 // VIRTIO_NET_F_MAC since we don't expect the backend 98 // to handle it. 99 let backend_acked_features = state.acked_features & !(1 << VIRTIO_NET_F_MAC); 100 101 vu.set_protocol_features_vhost_user( 102 backend_acked_features, 103 state.acked_protocol_features, 104 )?; 105 106 // If the control queue feature has been negotiated, let's 107 // increase the number of queues. 108 if state.acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 { 109 num_queues += 1; 110 } 111 112 ( 113 state.avail_features, 114 state.acked_features, 115 state.acked_protocol_features, 116 state.vu_num_queues, 117 state.config, 118 true, 119 ) 120 } else { 121 // Filling device and vring features VMM supports. 122 let mut avail_features = 1 << VIRTIO_NET_F_MRG_RXBUF 123 | 1 << VIRTIO_NET_F_CTRL_VQ 124 | 1 << VIRTIO_F_RING_EVENT_IDX 125 | 1 << VIRTIO_F_VERSION_1 126 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); 127 128 if mtu.is_some() { 129 avail_features |= 1u64 << VIRTIO_NET_F_MTU; 130 } 131 132 // Configure TSO/UFO features when hardware checksum offload is enabled. 133 if offload_csum { 134 avail_features |= 1 << VIRTIO_NET_F_CSUM | 1 << VIRTIO_NET_F_GUEST_CSUM; 135 136 if offload_tso { 137 avail_features |= 1 << VIRTIO_NET_F_HOST_ECN 138 | 1 << VIRTIO_NET_F_HOST_TSO4 139 | 1 << VIRTIO_NET_F_HOST_TSO6 140 | 1 << VIRTIO_NET_F_GUEST_ECN 141 | 1 << VIRTIO_NET_F_GUEST_TSO4 142 | 1 << VIRTIO_NET_F_GUEST_TSO6; 143 } 144 145 if offload_ufo { 146 avail_features |= 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_NET_F_GUEST_UFO; 147 } 148 } 149 150 let mut config = VirtioNetConfig::default(); 151 build_net_config_space(&mut config, mac_addr, num_queues, mtu, &mut avail_features); 152 153 let avail_protocol_features = VhostUserProtocolFeatures::MQ 154 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 155 | VhostUserProtocolFeatures::REPLY_ACK 156 | VhostUserProtocolFeatures::INFLIGHT_SHMFD 157 | VhostUserProtocolFeatures::LOG_SHMFD; 158 159 let (mut acked_features, acked_protocol_features) = 160 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 161 162 let backend_num_queues = 163 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 164 vu.socket_handle() 165 .get_queue_num() 166 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 167 } else { 168 DEFAULT_QUEUE_NUMBER 169 }; 170 171 if num_queues > backend_num_queues { 172 error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n", 173 num_queues, backend_num_queues); 174 return Err(Error::BadQueueNum); 175 } 176 177 // If the control queue feature has been negotiated, let's increase 178 // the number of queues. 179 let vu_num_queues = num_queues; 180 if acked_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0 { 181 num_queues += 1; 182 } 183 184 // Make sure the virtio feature to set the MAC address is exposed to 185 // the guest, even if it hasn't been negotiated with the backend. 186 acked_features |= 1 << VIRTIO_NET_F_MAC; 187 188 ( 189 acked_features, 190 // If part of the available features that have been acked, 191 // the PROTOCOL_FEATURES bit must be already set through 192 // the VIRTIO acked features as we know the guest would 193 // never ack it, thus the feature would be lost. 194 acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), 195 acked_protocol_features, 196 vu_num_queues, 197 config, 198 false, 199 ) 200 }; 201 202 Ok(Net { 203 id, 204 common: VirtioCommon { 205 device_type: VirtioDeviceType::Net as u32, 206 queue_sizes: vec![vu_cfg.queue_size; num_queues], 207 avail_features, 208 acked_features, 209 paused_sync: Some(Arc::new(Barrier::new(2))), 210 min_queues: DEFAULT_QUEUE_NUMBER as u16, 211 paused: Arc::new(AtomicBool::new(paused)), 212 ..Default::default() 213 }, 214 vu_common: VhostUserCommon { 215 vu: Some(Arc::new(Mutex::new(vu))), 216 acked_protocol_features, 217 socket_path: vu_cfg.socket, 218 vu_num_queues, 219 server, 220 ..Default::default() 221 }, 222 config, 223 guest_memory: None, 224 ctrl_queue_epoll_thread: None, 225 epoll_thread: None, 226 seccomp_action, 227 exit_evt, 228 iommu, 229 }) 230 } 231 232 fn state(&self) -> State { 233 State { 234 avail_features: self.common.avail_features, 235 acked_features: self.common.acked_features, 236 config: self.config, 237 acked_protocol_features: self.vu_common.acked_protocol_features, 238 vu_num_queues: self.vu_common.vu_num_queues, 239 } 240 } 241 } 242 243 impl Drop for Net { 244 fn drop(&mut self) { 245 if let Some(kill_evt) = self.common.kill_evt.take() { 246 if let Err(e) = kill_evt.write(1) { 247 error!("failed to kill vhost-user-net: {:?}", e); 248 } 249 } 250 251 self.common.wait_for_epoll_threads(); 252 253 if let Some(thread) = self.epoll_thread.take() { 254 if let Err(e) = thread.join() { 255 error!("Error joining thread: {:?}", e); 256 } 257 } 258 if let Some(thread) = self.ctrl_queue_epoll_thread.take() { 259 if let Err(e) = thread.join() { 260 error!("Error joining thread: {:?}", e); 261 } 262 } 263 } 264 } 265 266 impl VirtioDevice for Net { 267 fn device_type(&self) -> u32 { 268 self.common.device_type 269 } 270 271 fn queue_max_sizes(&self) -> &[u16] { 272 &self.common.queue_sizes 273 } 274 275 fn features(&self) -> u64 { 276 let mut features = self.common.avail_features; 277 if self.iommu { 278 features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 279 } 280 features 281 } 282 283 fn ack_features(&mut self, value: u64) { 284 self.common.ack_features(value) 285 } 286 287 fn read_config(&self, offset: u64, data: &mut [u8]) { 288 self.read_config_from_slice(self.config.as_slice(), offset, data); 289 } 290 291 fn activate( 292 &mut self, 293 mem: GuestMemoryAtomic<GuestMemoryMmap>, 294 interrupt_cb: Arc<dyn VirtioInterrupt>, 295 mut queues: Vec<(usize, Queue, EventFd)>, 296 ) -> ActivateResult { 297 self.common.activate(&queues, &interrupt_cb)?; 298 self.guest_memory = Some(mem.clone()); 299 300 let num_queues = queues.len(); 301 let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into()); 302 if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 { 303 let ctrl_queue_index = num_queues - 1; 304 let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index); 305 306 ctrl_queue.set_event_idx(event_idx); 307 308 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 309 310 let mut ctrl_handler = NetCtrlEpollHandler { 311 mem: mem.clone(), 312 kill_evt, 313 pause_evt, 314 ctrl_q: CtrlQueue::new(Vec::new()), 315 queue: ctrl_queue, 316 queue_evt: ctrl_queue_evt, 317 access_platform: None, 318 interrupt_cb: interrupt_cb.clone(), 319 queue_index: ctrl_queue_index as u16, 320 }; 321 322 let paused = self.common.paused.clone(); 323 // Let's update the barrier as we need 1 for the control queue 324 // thread + 1 for the common vhost-user thread + 1 for the main 325 // thread signalling the pause. 326 self.common.paused_sync = Some(Arc::new(Barrier::new(3))); 327 let paused_sync = self.common.paused_sync.clone(); 328 329 let mut epoll_threads = Vec::new(); 330 spawn_virtio_thread( 331 &format!("{}_ctrl", &self.id), 332 &self.seccomp_action, 333 Thread::VirtioVhostNetCtl, 334 &mut epoll_threads, 335 &self.exit_evt, 336 move || ctrl_handler.run_ctrl(paused, paused_sync.unwrap()), 337 )?; 338 self.ctrl_queue_epoll_thread = Some(epoll_threads.remove(0)); 339 } 340 341 let backend_req_handler: Option<FrontendReqHandler<BackendReqHandler>> = None; 342 343 // The backend acknowledged features must not contain VIRTIO_NET_F_MAC 344 // since we don't expect the backend to handle it. 345 let backend_acked_features = self.common.acked_features & !(1 << VIRTIO_NET_F_MAC); 346 347 // Run a dedicated thread for handling potential reconnections with 348 // the backend. 349 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 350 351 let mut handler = self.vu_common.activate( 352 mem, 353 queues, 354 interrupt_cb, 355 backend_acked_features, 356 backend_req_handler, 357 kill_evt, 358 pause_evt, 359 )?; 360 361 let paused = self.common.paused.clone(); 362 let paused_sync = self.common.paused_sync.clone(); 363 364 let mut epoll_threads = Vec::new(); 365 spawn_virtio_thread( 366 &self.id, 367 &self.seccomp_action, 368 Thread::VirtioVhostNet, 369 &mut epoll_threads, 370 &self.exit_evt, 371 move || handler.run(paused, paused_sync.unwrap()), 372 )?; 373 self.epoll_thread = Some(epoll_threads.remove(0)); 374 375 Ok(()) 376 } 377 378 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 379 // We first must resume the virtio thread if it was paused. 380 if self.common.pause_evt.take().is_some() { 381 self.common.resume().ok()?; 382 } 383 384 if let Some(vu) = &self.vu_common.vu { 385 if let Err(e) = vu.lock().unwrap().reset_vhost_user() { 386 error!("Failed to reset vhost-user daemon: {:?}", e); 387 return None; 388 } 389 } 390 391 if let Some(kill_evt) = self.common.kill_evt.take() { 392 // Ignore the result because there is nothing we can do about it. 393 let _ = kill_evt.write(1); 394 } 395 396 event!("virtio-device", "reset", "id", &self.id); 397 398 // Return the interrupt 399 Some(self.common.interrupt_cb.take().unwrap()) 400 } 401 402 fn shutdown(&mut self) { 403 self.vu_common.shutdown(); 404 } 405 406 fn add_memory_region( 407 &mut self, 408 region: &Arc<GuestRegionMmap>, 409 ) -> std::result::Result<(), crate::Error> { 410 self.vu_common.add_memory_region(&self.guest_memory, region) 411 } 412 } 413 414 impl Pausable for Net { 415 fn pause(&mut self) -> result::Result<(), MigratableError> { 416 self.vu_common.pause()?; 417 self.common.pause() 418 } 419 420 fn resume(&mut self) -> result::Result<(), MigratableError> { 421 self.common.resume()?; 422 423 if let Some(epoll_thread) = &self.epoll_thread { 424 epoll_thread.thread().unpark(); 425 } 426 427 if let Some(ctrl_queue_epoll_thread) = &self.ctrl_queue_epoll_thread { 428 ctrl_queue_epoll_thread.thread().unpark(); 429 } 430 431 self.vu_common.resume() 432 } 433 } 434 435 impl Snapshottable for Net { 436 fn id(&self) -> String { 437 self.id.clone() 438 } 439 440 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 441 self.vu_common.snapshot(&self.state()) 442 } 443 } 444 impl Transportable for Net {} 445 446 impl Migratable for Net { 447 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 448 self.vu_common.start_dirty_log(&self.guest_memory) 449 } 450 451 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 452 self.vu_common.stop_dirty_log() 453 } 454 455 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 456 self.vu_common.dirty_log(&self.guest_memory) 457 } 458 459 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 460 self.vu_common.start_migration() 461 } 462 463 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 464 self.vu_common 465 .complete_migration(self.common.kill_evt.take()) 466 } 467 } 468