1 // Copyright © 2022 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 use std::collections::BTreeMap; 7 use std::sync::atomic::{AtomicBool, Ordering}; 8 use std::sync::{Arc, Mutex}; 9 use std::{io, result}; 10 11 use anyhow::anyhow; 12 use serde::{Deserialize, Serialize}; 13 use thiserror::Error; 14 use vhost::vdpa::{VhostVdpa, VhostVdpaIovaRange}; 15 use vhost::vhost_kern::vdpa::VhostKernVdpa; 16 use vhost::vhost_kern::vhost_binding::VHOST_BACKEND_F_SUSPEND; 17 use vhost::vhost_kern::VhostKernFeatures; 18 use vhost::{VhostBackend, VringConfigData}; 19 use virtio_queue::desc::RawDescriptor; 20 use virtio_queue::{Queue, QueueT}; 21 use vm_device::dma_mapping::ExternalDmaMapping; 22 use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic}; 23 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 24 use vm_virtio::{AccessPlatform, Translatable}; 25 use vmm_sys_util::eventfd::EventFd; 26 27 use crate::{ 28 ActivateError, ActivateResult, GuestMemoryMmap, VirtioCommon, VirtioDevice, VirtioInterrupt, 29 VirtioInterruptType, DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FEATURES_OK, 30 VIRTIO_F_IOMMU_PLATFORM, 31 }; 32 33 #[derive(Error, Debug)] 34 pub enum Error { 35 #[error("Failed to create vhost-vdpa")] 36 CreateVhostVdpa(#[source] vhost::Error), 37 #[error("Failed to map DMA range")] 38 DmaMap(#[source] vhost::Error), 39 #[error("Failed to unmap DMA range")] 40 DmaUnmap(#[source] vhost::Error), 41 #[error("Failed to get address range")] 42 GetAddressRange, 43 #[error("Failed to get the available index from the virtio queue")] 44 GetAvailableIndex(#[source] virtio_queue::Error), 45 #[error("Get virtio configuration size")] 46 GetConfigSize(#[source] vhost::Error), 47 #[error("Get virtio device identifier")] 48 GetDeviceId(#[source] vhost::Error), 49 #[error("Failed to get backend specific features")] 50 GetBackendFeatures(#[source] vhost::Error), 51 #[error("Failed to get virtio features")] 52 GetFeatures(#[source] vhost::Error), 53 #[error("Failed to get the IOVA range")] 54 GetIovaRange(#[source] vhost::Error), 55 #[error("Failed to get queue size")] 56 GetVringNum(#[source] vhost::Error), 57 #[error("Invalid IOVA range: {0}-{1}")] 58 InvalidIovaRange(u64, u64), 59 #[error("Missing VIRTIO_F_ACCESS_PLATFORM feature")] 60 MissingAccessPlatformVirtioFeature, 61 #[error("Failed to reset owner")] 62 ResetOwner(#[source] vhost::Error), 63 #[error("Failed to set backend specific features")] 64 SetBackendFeatures(#[source] vhost::Error), 65 #[error("Failed to set backend configuration")] 66 SetConfig(#[source] vhost::Error), 67 #[error("Failed to set eventfd notifying about a configuration change")] 68 SetConfigCall(#[source] vhost::Error), 69 #[error("Failed to set virtio features")] 70 SetFeatures(#[source] vhost::Error), 71 #[error("Failed to set memory table")] 72 SetMemTable(#[source] vhost::Error), 73 #[error("Failed to set owner")] 74 SetOwner(#[source] vhost::Error), 75 #[error("Failed to set virtio status")] 76 SetStatus(#[source] vhost::Error), 77 #[error("Failed to set vring address")] 78 SetVringAddr(#[source] vhost::Error), 79 #[error("Failed to set vring base")] 80 SetVringBase(#[source] vhost::Error), 81 #[error("Failed to set vring eventfd when buffer are used")] 82 SetVringCall(#[source] vhost::Error), 83 #[error("Failed to enable/disable vring")] 84 SetVringEnable(#[source] vhost::Error), 85 #[error("Failed to set vring eventfd when new descriptors are available")] 86 SetVringKick(#[source] vhost::Error), 87 #[error("Failed to set vring size")] 88 SetVringNum(#[source] vhost::Error), 89 } 90 91 pub type Result<T> = std::result::Result<T, Error>; 92 93 #[derive(Serialize, Deserialize)] 94 pub struct VdpaState { 95 pub avail_features: u64, 96 pub acked_features: u64, 97 pub device_type: u32, 98 pub iova_range_first: u64, 99 pub iova_range_last: u64, 100 pub config: Vec<u8>, 101 pub queue_sizes: Vec<u16>, 102 pub backend_features: u64, 103 } 104 105 pub struct Vdpa { 106 common: VirtioCommon, 107 id: String, 108 vhost: Option<VhostKernVdpa<GuestMemoryAtomic<GuestMemoryMmap>>>, 109 iova_range: VhostVdpaIovaRange, 110 enabled_queues: BTreeMap<usize, bool>, 111 backend_features: u64, 112 migrating: bool, 113 } 114 115 impl Vdpa { new( id: String, device_path: &str, mem: GuestMemoryAtomic<GuestMemoryMmap>, num_queues: u16, state: Option<VdpaState>, ) -> Result<Self>116 pub fn new( 117 id: String, 118 device_path: &str, 119 mem: GuestMemoryAtomic<GuestMemoryMmap>, 120 num_queues: u16, 121 state: Option<VdpaState>, 122 ) -> Result<Self> { 123 let mut vhost = VhostKernVdpa::new(device_path, mem).map_err(Error::CreateVhostVdpa)?; 124 vhost.set_owner().map_err(Error::SetOwner)?; 125 126 let ( 127 device_type, 128 avail_features, 129 acked_features, 130 queue_sizes, 131 iova_range, 132 backend_features, 133 paused, 134 ) = if let Some(state) = state { 135 info!("Restoring vDPA {}", id); 136 137 vhost.set_backend_features_acked(state.backend_features); 138 vhost 139 .set_config(0, state.config.as_slice()) 140 .map_err(Error::SetConfig)?; 141 142 ( 143 state.device_type, 144 state.avail_features, 145 state.acked_features, 146 state.queue_sizes, 147 VhostVdpaIovaRange { 148 first: state.iova_range_first, 149 last: state.iova_range_last, 150 }, 151 state.backend_features, 152 false, 153 ) 154 } else { 155 let device_type = vhost.get_device_id().map_err(Error::GetDeviceId)?; 156 let queue_size = vhost.get_vring_num().map_err(Error::GetVringNum)?; 157 let avail_features = vhost.get_features().map_err(Error::GetFeatures)?; 158 let backend_features = vhost 159 .get_backend_features() 160 .map_err(Error::GetBackendFeatures)?; 161 vhost.set_backend_features_acked(backend_features); 162 163 let iova_range = vhost.get_iova_range().map_err(Error::GetIovaRange)?; 164 165 if avail_features & (1u64 << VIRTIO_F_IOMMU_PLATFORM) == 0 { 166 return Err(Error::MissingAccessPlatformVirtioFeature); 167 } 168 169 ( 170 device_type, 171 avail_features, 172 0, 173 vec![queue_size; num_queues as usize], 174 iova_range, 175 backend_features, 176 false, 177 ) 178 }; 179 180 Ok(Vdpa { 181 common: VirtioCommon { 182 device_type, 183 queue_sizes, 184 avail_features, 185 acked_features, 186 min_queues: num_queues, 187 paused: Arc::new(AtomicBool::new(paused)), 188 ..Default::default() 189 }, 190 id, 191 vhost: Some(vhost), 192 iova_range, 193 enabled_queues: BTreeMap::new(), 194 backend_features, 195 migrating: false, 196 }) 197 } 198 enable_vrings(&mut self, enable: bool) -> Result<()>199 fn enable_vrings(&mut self, enable: bool) -> Result<()> { 200 assert!(self.vhost.is_some()); 201 202 for (queue_index, enabled) in self.enabled_queues.iter_mut() { 203 if *enabled != enable { 204 self.vhost 205 .as_ref() 206 .unwrap() 207 .set_vring_enable(*queue_index, enable) 208 .map_err(Error::SetVringEnable)?; 209 *enabled = enable; 210 } 211 } 212 213 Ok(()) 214 } 215 activate_vdpa( &mut self, mem: &GuestMemoryMmap, virtio_interrupt: &Arc<dyn VirtioInterrupt>, queues: Vec<(usize, Queue, EventFd)>, ) -> Result<()>216 fn activate_vdpa( 217 &mut self, 218 mem: &GuestMemoryMmap, 219 virtio_interrupt: &Arc<dyn VirtioInterrupt>, 220 queues: Vec<(usize, Queue, EventFd)>, 221 ) -> Result<()> { 222 assert!(self.vhost.is_some()); 223 self.vhost 224 .as_ref() 225 .unwrap() 226 .set_features(self.common.acked_features) 227 .map_err(Error::SetFeatures)?; 228 self.vhost 229 .as_mut() 230 .unwrap() 231 .set_backend_features(self.backend_features) 232 .map_err(Error::SetBackendFeatures)?; 233 234 for (queue_index, queue, queue_evt) in queues.iter() { 235 let queue_max_size = queue.max_size(); 236 let queue_size = queue.size(); 237 self.vhost 238 .as_ref() 239 .unwrap() 240 .set_vring_num(*queue_index, queue_size) 241 .map_err(Error::SetVringNum)?; 242 243 let config_data = VringConfigData { 244 queue_max_size, 245 queue_size, 246 flags: 0u32, 247 desc_table_addr: queue.desc_table().translate_gpa( 248 self.common.access_platform.as_ref(), 249 queue_size as usize * std::mem::size_of::<RawDescriptor>(), 250 ), 251 used_ring_addr: queue.used_ring().translate_gpa( 252 self.common.access_platform.as_ref(), 253 4 + queue_size as usize * 8, 254 ), 255 avail_ring_addr: queue.avail_ring().translate_gpa( 256 self.common.access_platform.as_ref(), 257 4 + queue_size as usize * 2, 258 ), 259 log_addr: None, 260 }; 261 262 self.vhost 263 .as_ref() 264 .unwrap() 265 .set_vring_addr(*queue_index, &config_data) 266 .map_err(Error::SetVringAddr)?; 267 self.vhost 268 .as_ref() 269 .unwrap() 270 .set_vring_base( 271 *queue_index, 272 queue 273 .avail_idx(mem, Ordering::Acquire) 274 .map_err(Error::GetAvailableIndex)? 275 .0, 276 ) 277 .map_err(Error::SetVringBase)?; 278 279 if let Some(eventfd) = 280 virtio_interrupt.notifier(VirtioInterruptType::Queue(*queue_index as u16)) 281 { 282 self.vhost 283 .as_ref() 284 .unwrap() 285 .set_vring_call(*queue_index, &eventfd) 286 .map_err(Error::SetVringCall)?; 287 } 288 289 self.vhost 290 .as_ref() 291 .unwrap() 292 .set_vring_kick(*queue_index, queue_evt) 293 .map_err(Error::SetVringKick)?; 294 295 self.enabled_queues.insert(*queue_index, false); 296 } 297 298 // Setup the config eventfd if there is one 299 if let Some(eventfd) = virtio_interrupt.notifier(VirtioInterruptType::Config) { 300 self.vhost 301 .as_ref() 302 .unwrap() 303 .set_config_call(&eventfd) 304 .map_err(Error::SetConfigCall)?; 305 } 306 307 self.enable_vrings(true)?; 308 309 self.vhost 310 .as_ref() 311 .unwrap() 312 .set_status( 313 (DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_DRIVER_OK | DEVICE_FEATURES_OK) as u8, 314 ) 315 .map_err(Error::SetStatus) 316 } 317 reset_vdpa(&mut self) -> Result<()>318 fn reset_vdpa(&mut self) -> Result<()> { 319 self.enable_vrings(false)?; 320 321 assert!(self.vhost.is_some()); 322 self.vhost 323 .as_ref() 324 .unwrap() 325 .set_status(0) 326 .map_err(Error::SetStatus) 327 } 328 dma_map( &mut self, iova: u64, size: u64, host_vaddr: *const u8, readonly: bool, ) -> Result<()>329 fn dma_map( 330 &mut self, 331 iova: u64, 332 size: u64, 333 host_vaddr: *const u8, 334 readonly: bool, 335 ) -> Result<()> { 336 let iova_last = iova + size - 1; 337 if iova < self.iova_range.first || iova_last > self.iova_range.last { 338 return Err(Error::InvalidIovaRange(iova, iova_last)); 339 } 340 341 assert!(self.vhost.is_some()); 342 self.vhost 343 .as_ref() 344 .unwrap() 345 .dma_map(iova, size, host_vaddr, readonly) 346 .map_err(Error::DmaMap) 347 } 348 dma_unmap(&self, iova: u64, size: u64) -> Result<()>349 fn dma_unmap(&self, iova: u64, size: u64) -> Result<()> { 350 let iova_last = iova + size - 1; 351 if iova < self.iova_range.first || iova_last > self.iova_range.last { 352 return Err(Error::InvalidIovaRange(iova, iova_last)); 353 } 354 355 assert!(self.vhost.is_some()); 356 self.vhost 357 .as_ref() 358 .unwrap() 359 .dma_unmap(iova, size) 360 .map_err(Error::DmaUnmap) 361 } 362 state(&self) -> Result<VdpaState>363 fn state(&self) -> Result<VdpaState> { 364 assert!(self.vhost.is_some()); 365 let config_size = self 366 .vhost 367 .as_ref() 368 .unwrap() 369 .get_config_size() 370 .map_err(Error::GetConfigSize)?; 371 let mut config = vec![0; config_size as usize]; 372 self.read_config(0, config.as_mut_slice()); 373 374 Ok(VdpaState { 375 avail_features: self.common.avail_features, 376 acked_features: self.common.acked_features, 377 device_type: self.common.device_type, 378 queue_sizes: self.common.queue_sizes.clone(), 379 iova_range_first: self.iova_range.first, 380 iova_range_last: self.iova_range.last, 381 config, 382 backend_features: self.backend_features, 383 }) 384 } 385 } 386 387 impl VirtioDevice for Vdpa { device_type(&self) -> u32388 fn device_type(&self) -> u32 { 389 self.common.device_type 390 } 391 queue_max_sizes(&self) -> &[u16]392 fn queue_max_sizes(&self) -> &[u16] { 393 &self.common.queue_sizes 394 } 395 features(&self) -> u64396 fn features(&self) -> u64 { 397 self.common.avail_features 398 } 399 ack_features(&mut self, value: u64)400 fn ack_features(&mut self, value: u64) { 401 self.common.ack_features(value) 402 } 403 read_config(&self, offset: u64, data: &mut [u8])404 fn read_config(&self, offset: u64, data: &mut [u8]) { 405 assert!(self.vhost.is_some()); 406 if let Err(e) = self.vhost.as_ref().unwrap().get_config(offset as u32, data) { 407 error!("Failed reading virtio config: {}", e); 408 } 409 } 410 write_config(&mut self, offset: u64, data: &[u8])411 fn write_config(&mut self, offset: u64, data: &[u8]) { 412 assert!(self.vhost.is_some()); 413 if let Err(e) = self.vhost.as_ref().unwrap().set_config(offset as u32, data) { 414 error!("Failed writing virtio config: {}", e); 415 } 416 } 417 activate( &mut self, mem: GuestMemoryAtomic<GuestMemoryMmap>, virtio_interrupt: Arc<dyn VirtioInterrupt>, queues: Vec<(usize, Queue, EventFd)>, ) -> ActivateResult418 fn activate( 419 &mut self, 420 mem: GuestMemoryAtomic<GuestMemoryMmap>, 421 virtio_interrupt: Arc<dyn VirtioInterrupt>, 422 queues: Vec<(usize, Queue, EventFd)>, 423 ) -> ActivateResult { 424 self.activate_vdpa(&mem.memory(), &virtio_interrupt, queues) 425 .map_err(ActivateError::ActivateVdpa)?; 426 427 // Store the virtio interrupt handler as we need to return it on reset 428 self.common.interrupt_cb = Some(virtio_interrupt); 429 430 event!("vdpa", "activated", "id", &self.id); 431 Ok(()) 432 } 433 reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>>434 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 435 if let Err(e) = self.reset_vdpa() { 436 error!("Failed to reset vhost-vdpa: {:?}", e); 437 return None; 438 } 439 440 event!("vdpa", "reset", "id", &self.id); 441 442 // Return the virtio interrupt handler 443 self.common.interrupt_cb.take() 444 } 445 set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>)446 fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) { 447 self.common.set_access_platform(access_platform) 448 } 449 } 450 451 impl Pausable for Vdpa { pause(&mut self) -> std::result::Result<(), MigratableError>452 fn pause(&mut self) -> std::result::Result<(), MigratableError> { 453 if !self.migrating { 454 Err(MigratableError::Pause(anyhow!( 455 "Can't pause a vDPA device outside live migration" 456 ))) 457 } else { 458 Ok(()) 459 } 460 } 461 resume(&mut self) -> std::result::Result<(), MigratableError>462 fn resume(&mut self) -> std::result::Result<(), MigratableError> { 463 if !self.common.paused.load(Ordering::SeqCst) { 464 return Ok(()); 465 } 466 467 if !self.migrating { 468 Err(MigratableError::Resume(anyhow!( 469 "Can't resume a vDPA device outside live migration" 470 ))) 471 } else { 472 Ok(()) 473 } 474 } 475 } 476 477 impl Snapshottable for Vdpa { id(&self) -> String478 fn id(&self) -> String { 479 self.id.clone() 480 } 481 snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError>482 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 483 if !self.migrating { 484 return Err(MigratableError::Snapshot(anyhow!( 485 "Can't snapshot a vDPA device outside live migration" 486 ))); 487 } 488 489 let snapshot = Snapshot::new_from_state(&self.state().map_err(|e| { 490 MigratableError::Snapshot(anyhow!("Error snapshotting vDPA device: {:?}", e)) 491 })?)?; 492 493 // Force the vhost handler to be dropped in order to close the vDPA 494 // file. This will ensure the device can be accessed if the VM is 495 // migrated on the same host machine. 496 self.vhost.take(); 497 498 Ok(snapshot) 499 } 500 } 501 502 impl Transportable for Vdpa {} 503 504 impl Migratable for Vdpa { start_migration(&mut self) -> std::result::Result<(), MigratableError>505 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 506 self.migrating = true; 507 // Given there's no way to track dirty pages, we must suspend the 508 // device as soon as the migration process starts. 509 if self.backend_features & (1 << VHOST_BACKEND_F_SUSPEND) != 0 { 510 assert!(self.vhost.is_some()); 511 self.vhost.as_ref().unwrap().suspend().map_err(|e| { 512 MigratableError::StartMigration(anyhow!("Error suspending vDPA device: {:?}", e)) 513 }) 514 } else { 515 Err(MigratableError::StartMigration(anyhow!( 516 "vDPA device can't be suspended" 517 ))) 518 } 519 } 520 complete_migration(&mut self) -> std::result::Result<(), MigratableError>521 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 522 self.migrating = false; 523 Ok(()) 524 } 525 } 526 527 pub struct VdpaDmaMapping<M: GuestAddressSpace> { 528 device: Arc<Mutex<Vdpa>>, 529 memory: Arc<M>, 530 } 531 532 impl<M: GuestAddressSpace> VdpaDmaMapping<M> { new(device: Arc<Mutex<Vdpa>>, memory: Arc<M>) -> Self533 pub fn new(device: Arc<Mutex<Vdpa>>, memory: Arc<M>) -> Self { 534 Self { device, memory } 535 } 536 } 537 538 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VdpaDmaMapping<M> { map(&self, iova: u64, gpa: u64, size: u64) -> result::Result<(), io::Error>539 fn map(&self, iova: u64, gpa: u64, size: u64) -> result::Result<(), io::Error> { 540 let mem = self.memory.memory(); 541 let guest_addr = GuestAddress(gpa); 542 let user_addr = if mem.check_range(guest_addr, size as usize) { 543 mem.get_host_address(guest_addr).unwrap() as *const u8 544 } else { 545 return Err(io::Error::other(format!( 546 "failed to convert guest address 0x{gpa:x} into \ 547 host user virtual address" 548 ))); 549 }; 550 551 debug!( 552 "DMA map iova 0x{:x}, gpa 0x{:x}, size 0x{:x}, host_addr 0x{:x}", 553 iova, gpa, size, user_addr as u64 554 ); 555 self.device 556 .lock() 557 .unwrap() 558 .dma_map(iova, size, user_addr, false) 559 .map_err(|e| { 560 io::Error::other(format!( 561 "failed to map memory for vDPA device, \ 562 iova 0x{iova:x}, gpa 0x{gpa:x}, size 0x{size:x}: {e:?}" 563 )) 564 }) 565 } 566 unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error>567 fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> { 568 debug!("DMA unmap iova 0x{:x} size 0x{:x}", iova, size); 569 self.device 570 .lock() 571 .unwrap() 572 .dma_unmap(iova, size) 573 .map_err(|e| { 574 io::Error::other(format!( 575 "failed to unmap memory for vDPA device, \ 576 iova 0x{iova:x}, size 0x{size:x}: {e:?}" 577 )) 578 }) 579 } 580 } 581