1 // Copyright © 2022 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 use crate::{ 7 ActivateError, ActivateResult, GuestMemoryMmap, VirtioCommon, VirtioDevice, VirtioInterrupt, 8 VirtioInterruptType, DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FEATURES_OK, 9 VIRTIO_F_IOMMU_PLATFORM, 10 }; 11 use anyhow::anyhow; 12 use serde::{Deserialize, Serialize}; 13 use std::{ 14 collections::BTreeMap, 15 io, result, 16 sync::{ 17 atomic::{AtomicBool, Ordering}, 18 Arc, Mutex, 19 }, 20 }; 21 use thiserror::Error; 22 use vhost::{ 23 vdpa::{VhostVdpa, VhostVdpaIovaRange}, 24 vhost_kern::VhostKernFeatures, 25 vhost_kern::{vdpa::VhostKernVdpa, vhost_binding::VHOST_BACKEND_F_SUSPEND}, 26 VhostBackend, VringConfigData, 27 }; 28 use virtio_queue::{Descriptor, Queue, QueueT}; 29 use vm_device::dma_mapping::ExternalDmaMapping; 30 use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic}; 31 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 32 use vm_virtio::{AccessPlatform, Translatable}; 33 use vmm_sys_util::eventfd::EventFd; 34 35 #[derive(Error, Debug)] 36 pub enum Error { 37 #[error("Failed to create vhost-vdpa: {0}")] 38 CreateVhostVdpa(vhost::Error), 39 #[error("Failed to map DMA range: {0}")] 40 DmaMap(vhost::Error), 41 #[error("Failed to unmap DMA range: {0}")] 42 DmaUnmap(vhost::Error), 43 #[error("Failed to get address range")] 44 GetAddressRange, 45 #[error("Failed to get the available index from the virtio queue: {0}")] 46 GetAvailableIndex(virtio_queue::Error), 47 #[error("Get virtio configuration size: {0}")] 48 GetConfigSize(vhost::Error), 49 #[error("Get virtio device identifier: {0}")] 50 GetDeviceId(vhost::Error), 51 #[error("Failed to get backend specific features: {0}")] 52 GetBackendFeatures(vhost::Error), 53 #[error("Failed to get virtio features: {0}")] 54 GetFeatures(vhost::Error), 55 #[error("Failed to get the IOVA range: {0}")] 56 GetIovaRange(vhost::Error), 57 #[error("Failed to get queue size: {0}")] 58 GetVringNum(vhost::Error), 59 #[error("Invalid IOVA range: {0}-{1}")] 60 InvalidIovaRange(u64, u64), 61 #[error("Missing VIRTIO_F_ACCESS_PLATFORM feature")] 62 MissingAccessPlatformVirtioFeature, 63 #[error("Failed to reset owner: {0}")] 64 ResetOwner(vhost::Error), 65 #[error("Failed to set backend specific features: {0}")] 66 SetBackendFeatures(vhost::Error), 67 #[error("Failed to set backend configuration: {0}")] 68 SetConfig(vhost::Error), 69 #[error("Failed to set eventfd notifying about a configuration change: {0}")] 70 SetConfigCall(vhost::Error), 71 #[error("Failed to set virtio features: {0}")] 72 SetFeatures(vhost::Error), 73 #[error("Failed to set memory table: {0}")] 74 SetMemTable(vhost::Error), 75 #[error("Failed to set owner: {0}")] 76 SetOwner(vhost::Error), 77 #[error("Failed to set virtio status: {0}")] 78 SetStatus(vhost::Error), 79 #[error("Failed to set vring address: {0}")] 80 SetVringAddr(vhost::Error), 81 #[error("Failed to set vring base: {0}")] 82 SetVringBase(vhost::Error), 83 #[error("Failed to set vring eventfd when buffer are used: {0}")] 84 SetVringCall(vhost::Error), 85 #[error("Failed to enable/disable vring: {0}")] 86 SetVringEnable(vhost::Error), 87 #[error("Failed to set vring eventfd when new descriptors are available: {0}")] 88 SetVringKick(vhost::Error), 89 #[error("Failed to set vring size: {0}")] 90 SetVringNum(vhost::Error), 91 } 92 93 pub type Result<T> = std::result::Result<T, Error>; 94 95 #[derive(Serialize, Deserialize)] 96 pub struct VdpaState { 97 pub avail_features: u64, 98 pub acked_features: u64, 99 pub device_type: u32, 100 pub iova_range_first: u64, 101 pub iova_range_last: u64, 102 pub config: Vec<u8>, 103 pub queue_sizes: Vec<u16>, 104 pub backend_features: u64, 105 } 106 107 pub struct Vdpa { 108 common: VirtioCommon, 109 id: String, 110 vhost: Option<VhostKernVdpa<GuestMemoryAtomic<GuestMemoryMmap>>>, 111 iova_range: VhostVdpaIovaRange, 112 enabled_queues: BTreeMap<usize, bool>, 113 backend_features: u64, 114 migrating: bool, 115 } 116 117 impl Vdpa { 118 pub fn new( 119 id: String, 120 device_path: &str, 121 mem: GuestMemoryAtomic<GuestMemoryMmap>, 122 num_queues: u16, 123 state: Option<VdpaState>, 124 ) -> Result<Self> { 125 let mut vhost = VhostKernVdpa::new(device_path, mem).map_err(Error::CreateVhostVdpa)?; 126 vhost.set_owner().map_err(Error::SetOwner)?; 127 128 let ( 129 device_type, 130 avail_features, 131 acked_features, 132 queue_sizes, 133 iova_range, 134 backend_features, 135 paused, 136 ) = if let Some(state) = state { 137 info!("Restoring vDPA {}", id); 138 139 vhost.set_backend_features_acked(state.backend_features); 140 vhost 141 .set_config(0, state.config.as_slice()) 142 .map_err(Error::SetConfig)?; 143 144 ( 145 state.device_type, 146 state.avail_features, 147 state.acked_features, 148 state.queue_sizes, 149 VhostVdpaIovaRange { 150 first: state.iova_range_first, 151 last: state.iova_range_last, 152 }, 153 state.backend_features, 154 true, 155 ) 156 } else { 157 let device_type = vhost.get_device_id().map_err(Error::GetDeviceId)?; 158 let queue_size = vhost.get_vring_num().map_err(Error::GetVringNum)?; 159 let avail_features = vhost.get_features().map_err(Error::GetFeatures)?; 160 let backend_features = vhost 161 .get_backend_features() 162 .map_err(Error::GetBackendFeatures)?; 163 vhost.set_backend_features_acked(backend_features); 164 165 let iova_range = vhost.get_iova_range().map_err(Error::GetIovaRange)?; 166 167 if avail_features & (1u64 << VIRTIO_F_IOMMU_PLATFORM) == 0 { 168 return Err(Error::MissingAccessPlatformVirtioFeature); 169 } 170 171 ( 172 device_type, 173 avail_features, 174 0, 175 vec![queue_size; num_queues as usize], 176 iova_range, 177 backend_features, 178 false, 179 ) 180 }; 181 182 Ok(Vdpa { 183 common: VirtioCommon { 184 device_type, 185 queue_sizes, 186 avail_features, 187 acked_features, 188 min_queues: num_queues, 189 paused: Arc::new(AtomicBool::new(paused)), 190 ..Default::default() 191 }, 192 id, 193 vhost: Some(vhost), 194 iova_range, 195 enabled_queues: BTreeMap::new(), 196 backend_features, 197 migrating: false, 198 }) 199 } 200 201 fn enable_vrings(&mut self, enable: bool) -> Result<()> { 202 assert!(self.vhost.is_some()); 203 204 for (queue_index, enabled) in self.enabled_queues.iter_mut() { 205 if *enabled != enable { 206 self.vhost 207 .as_ref() 208 .unwrap() 209 .set_vring_enable(*queue_index, enable) 210 .map_err(Error::SetVringEnable)?; 211 *enabled = enable; 212 } 213 } 214 215 Ok(()) 216 } 217 218 fn activate_vdpa( 219 &mut self, 220 mem: &GuestMemoryMmap, 221 virtio_interrupt: &Arc<dyn VirtioInterrupt>, 222 queues: Vec<(usize, Queue, EventFd)>, 223 ) -> Result<()> { 224 assert!(self.vhost.is_some()); 225 self.vhost 226 .as_ref() 227 .unwrap() 228 .set_features(self.common.acked_features) 229 .map_err(Error::SetFeatures)?; 230 self.vhost 231 .as_mut() 232 .unwrap() 233 .set_backend_features(self.backend_features) 234 .map_err(Error::SetBackendFeatures)?; 235 236 for (queue_index, queue, queue_evt) in queues.iter() { 237 let queue_max_size = queue.max_size(); 238 let queue_size = queue.size(); 239 self.vhost 240 .as_ref() 241 .unwrap() 242 .set_vring_num(*queue_index, queue_size) 243 .map_err(Error::SetVringNum)?; 244 245 let config_data = VringConfigData { 246 queue_max_size, 247 queue_size, 248 flags: 0u32, 249 desc_table_addr: queue.desc_table().translate_gpa( 250 self.common.access_platform.as_ref(), 251 queue_size as usize * std::mem::size_of::<Descriptor>(), 252 ), 253 used_ring_addr: queue.used_ring().translate_gpa( 254 self.common.access_platform.as_ref(), 255 4 + queue_size as usize * 8, 256 ), 257 avail_ring_addr: queue.avail_ring().translate_gpa( 258 self.common.access_platform.as_ref(), 259 4 + queue_size as usize * 2, 260 ), 261 log_addr: None, 262 }; 263 264 self.vhost 265 .as_ref() 266 .unwrap() 267 .set_vring_addr(*queue_index, &config_data) 268 .map_err(Error::SetVringAddr)?; 269 self.vhost 270 .as_ref() 271 .unwrap() 272 .set_vring_base( 273 *queue_index, 274 queue 275 .avail_idx(mem, Ordering::Acquire) 276 .map_err(Error::GetAvailableIndex)? 277 .0, 278 ) 279 .map_err(Error::SetVringBase)?; 280 281 if let Some(eventfd) = 282 virtio_interrupt.notifier(VirtioInterruptType::Queue(*queue_index as u16)) 283 { 284 self.vhost 285 .as_ref() 286 .unwrap() 287 .set_vring_call(*queue_index, &eventfd) 288 .map_err(Error::SetVringCall)?; 289 } 290 291 self.vhost 292 .as_ref() 293 .unwrap() 294 .set_vring_kick(*queue_index, queue_evt) 295 .map_err(Error::SetVringKick)?; 296 297 self.enabled_queues.insert(*queue_index, false); 298 } 299 300 // Setup the config eventfd if there is one 301 if let Some(eventfd) = virtio_interrupt.notifier(VirtioInterruptType::Config) { 302 self.vhost 303 .as_ref() 304 .unwrap() 305 .set_config_call(&eventfd) 306 .map_err(Error::SetConfigCall)?; 307 } 308 309 self.enable_vrings(true)?; 310 311 self.vhost 312 .as_ref() 313 .unwrap() 314 .set_status( 315 (DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_DRIVER_OK | DEVICE_FEATURES_OK) as u8, 316 ) 317 .map_err(Error::SetStatus) 318 } 319 320 fn reset_vdpa(&mut self) -> Result<()> { 321 self.enable_vrings(false)?; 322 323 assert!(self.vhost.is_some()); 324 self.vhost 325 .as_ref() 326 .unwrap() 327 .set_status(0) 328 .map_err(Error::SetStatus) 329 } 330 331 fn dma_map( 332 &mut self, 333 iova: u64, 334 size: u64, 335 host_vaddr: *const u8, 336 readonly: bool, 337 ) -> Result<()> { 338 let iova_last = iova + size - 1; 339 if iova < self.iova_range.first || iova_last > self.iova_range.last { 340 return Err(Error::InvalidIovaRange(iova, iova_last)); 341 } 342 343 assert!(self.vhost.is_some()); 344 self.vhost 345 .as_ref() 346 .unwrap() 347 .dma_map(iova, size, host_vaddr, readonly) 348 .map_err(Error::DmaMap) 349 } 350 351 fn dma_unmap(&self, iova: u64, size: u64) -> Result<()> { 352 let iova_last = iova + size - 1; 353 if iova < self.iova_range.first || iova_last > self.iova_range.last { 354 return Err(Error::InvalidIovaRange(iova, iova_last)); 355 } 356 357 assert!(self.vhost.is_some()); 358 self.vhost 359 .as_ref() 360 .unwrap() 361 .dma_unmap(iova, size) 362 .map_err(Error::DmaUnmap) 363 } 364 365 fn state(&self) -> Result<VdpaState> { 366 assert!(self.vhost.is_some()); 367 let config_size = self 368 .vhost 369 .as_ref() 370 .unwrap() 371 .get_config_size() 372 .map_err(Error::GetConfigSize)?; 373 let mut config = vec![0; config_size as usize]; 374 self.read_config(0, config.as_mut_slice()); 375 376 Ok(VdpaState { 377 avail_features: self.common.avail_features, 378 acked_features: self.common.acked_features, 379 device_type: self.common.device_type, 380 queue_sizes: self.common.queue_sizes.clone(), 381 iova_range_first: self.iova_range.first, 382 iova_range_last: self.iova_range.last, 383 config, 384 backend_features: self.backend_features, 385 }) 386 } 387 } 388 389 impl VirtioDevice for Vdpa { 390 fn device_type(&self) -> u32 { 391 self.common.device_type 392 } 393 394 fn queue_max_sizes(&self) -> &[u16] { 395 &self.common.queue_sizes 396 } 397 398 fn features(&self) -> u64 { 399 self.common.avail_features 400 } 401 402 fn ack_features(&mut self, value: u64) { 403 self.common.ack_features(value) 404 } 405 406 fn read_config(&self, offset: u64, data: &mut [u8]) { 407 assert!(self.vhost.is_some()); 408 if let Err(e) = self.vhost.as_ref().unwrap().get_config(offset as u32, data) { 409 error!("Failed reading virtio config: {}", e); 410 } 411 } 412 413 fn write_config(&mut self, offset: u64, data: &[u8]) { 414 assert!(self.vhost.is_some()); 415 if let Err(e) = self.vhost.as_ref().unwrap().set_config(offset as u32, data) { 416 error!("Failed writing virtio config: {}", e); 417 } 418 } 419 420 fn activate( 421 &mut self, 422 mem: GuestMemoryAtomic<GuestMemoryMmap>, 423 virtio_interrupt: Arc<dyn VirtioInterrupt>, 424 queues: Vec<(usize, Queue, EventFd)>, 425 ) -> ActivateResult { 426 self.activate_vdpa(&mem.memory(), &virtio_interrupt, queues) 427 .map_err(ActivateError::ActivateVdpa)?; 428 429 // Store the virtio interrupt handler as we need to return it on reset 430 self.common.interrupt_cb = Some(virtio_interrupt); 431 432 event!("vdpa", "activated", "id", &self.id); 433 Ok(()) 434 } 435 436 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 437 if let Err(e) = self.reset_vdpa() { 438 error!("Failed to reset vhost-vdpa: {:?}", e); 439 return None; 440 } 441 442 event!("vdpa", "reset", "id", &self.id); 443 444 // Return the virtio interrupt handler 445 self.common.interrupt_cb.take() 446 } 447 448 fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) { 449 self.common.set_access_platform(access_platform) 450 } 451 } 452 453 impl Pausable for Vdpa { 454 fn pause(&mut self) -> std::result::Result<(), MigratableError> { 455 if !self.migrating { 456 Err(MigratableError::Pause(anyhow!( 457 "Can't pause a vDPA device outside live migration" 458 ))) 459 } else { 460 Ok(()) 461 } 462 } 463 464 fn resume(&mut self) -> std::result::Result<(), MigratableError> { 465 if !self.migrating { 466 Err(MigratableError::Resume(anyhow!( 467 "Can't resume a vDPA device outside live migration" 468 ))) 469 } else { 470 Ok(()) 471 } 472 } 473 } 474 475 impl Snapshottable for Vdpa { 476 fn id(&self) -> String { 477 self.id.clone() 478 } 479 480 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 481 if !self.migrating { 482 return Err(MigratableError::Snapshot(anyhow!( 483 "Can't snapshot a vDPA device outside live migration" 484 ))); 485 } 486 487 let snapshot = Snapshot::new_from_state(&self.state().map_err(|e| { 488 MigratableError::Snapshot(anyhow!("Error snapshotting vDPA device: {:?}", e)) 489 })?)?; 490 491 // Force the vhost handler to be dropped in order to close the vDPA 492 // file. This will ensure the device can be accessed if the VM is 493 // migrated on the same host machine. 494 self.vhost.take(); 495 496 Ok(snapshot) 497 } 498 } 499 500 impl Transportable for Vdpa {} 501 502 impl Migratable for Vdpa { 503 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 504 self.migrating = true; 505 // Given there's no way to track dirty pages, we must suspend the 506 // device as soon as the migration process starts. 507 if self.backend_features & (1 << VHOST_BACKEND_F_SUSPEND) != 0 { 508 assert!(self.vhost.is_some()); 509 self.vhost.as_ref().unwrap().suspend().map_err(|e| { 510 MigratableError::StartMigration(anyhow!("Error suspending vDPA device: {:?}", e)) 511 }) 512 } else { 513 Err(MigratableError::StartMigration(anyhow!( 514 "vDPA device can't be suspended" 515 ))) 516 } 517 } 518 519 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 520 self.migrating = false; 521 Ok(()) 522 } 523 } 524 525 pub struct VdpaDmaMapping<M: GuestAddressSpace> { 526 device: Arc<Mutex<Vdpa>>, 527 memory: Arc<M>, 528 } 529 530 impl<M: GuestAddressSpace> VdpaDmaMapping<M> { 531 pub fn new(device: Arc<Mutex<Vdpa>>, memory: Arc<M>) -> Self { 532 Self { device, memory } 533 } 534 } 535 536 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VdpaDmaMapping<M> { 537 fn map(&self, iova: u64, gpa: u64, size: u64) -> result::Result<(), io::Error> { 538 let mem = self.memory.memory(); 539 let guest_addr = GuestAddress(gpa); 540 let user_addr = if mem.check_range(guest_addr, size as usize) { 541 mem.get_host_address(guest_addr).unwrap() as *const u8 542 } else { 543 return Err(io::Error::new( 544 io::ErrorKind::Other, 545 format!( 546 "failed to convert guest address 0x{gpa:x} into \ 547 host user virtual address" 548 ), 549 )); 550 }; 551 552 debug!( 553 "DMA map iova 0x{:x}, gpa 0x{:x}, size 0x{:x}, host_addr 0x{:x}", 554 iova, gpa, size, user_addr as u64 555 ); 556 self.device 557 .lock() 558 .unwrap() 559 .dma_map(iova, size, user_addr, false) 560 .map_err(|e| { 561 io::Error::new( 562 io::ErrorKind::Other, 563 format!( 564 "failed to map memory for vDPA device, \ 565 iova 0x{iova:x}, gpa 0x{gpa:x}, size 0x{size:x}: {e:?}" 566 ), 567 ) 568 }) 569 } 570 571 fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> { 572 debug!("DMA unmap iova 0x{:x} size 0x{:x}", iova, size); 573 self.device 574 .lock() 575 .unwrap() 576 .dma_unmap(iova, size) 577 .map_err(|e| { 578 io::Error::new( 579 io::ErrorKind::Other, 580 format!( 581 "failed to unmap memory for vDPA device, \ 582 iova 0x{iova:x}, size 0x{size:x}: {e:?}" 583 ), 584 ) 585 }) 586 } 587 } 588