1 // Copyright © 2021 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 use crate::vfio::{Interrupt, UserMemoryRegion, Vfio, VfioCommon, VfioError}; 7 use crate::{BarReprogrammingParams, PciBarConfiguration, VfioPciError}; 8 use crate::{ 9 PciBdf, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciSubclass, 10 }; 11 use anyhow::anyhow; 12 use hypervisor::HypervisorVmError; 13 use std::any::Any; 14 use std::os::unix::prelude::AsRawFd; 15 use std::ptr::null_mut; 16 use std::sync::{Arc, Barrier, Mutex}; 17 use std::u32; 18 use thiserror::Error; 19 use vfio_bindings::bindings::vfio::*; 20 use vfio_ioctls::VfioIrq; 21 use vfio_user::{Client, Error as VfioUserError}; 22 use vm_allocator::{AddressAllocator, SystemAllocator}; 23 use vm_device::dma_mapping::ExternalDmaMapping; 24 use vm_device::interrupt::{InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig}; 25 use vm_device::{BusDevice, Resource}; 26 use vm_memory::bitmap::AtomicBitmap; 27 use vm_memory::{ 28 Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap, 29 }; 30 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 31 use vmm_sys_util::eventfd::EventFd; 32 33 pub struct VfioUserPciDevice { 34 id: String, 35 vm: Arc<dyn hypervisor::Vm>, 36 client: Arc<Mutex<Client>>, 37 common: VfioCommon, 38 memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>, 39 } 40 41 #[derive(Error, Debug)] 42 pub enum VfioUserPciDeviceError { 43 #[error("Client error: {0}")] 44 Client(#[source] VfioUserError), 45 #[error("Failed to map VFIO PCI region into guest: {0}")] 46 MapRegionGuest(#[source] HypervisorVmError), 47 #[error("Failed to DMA map: {0}")] 48 DmaMap(#[source] VfioUserError), 49 #[error("Failed to DMA unmap: {0}")] 50 DmaUnmap(#[source] VfioUserError), 51 #[error("Failed to initialize legacy interrupts: {0}")] 52 InitializeLegacyInterrupts(#[source] VfioPciError), 53 } 54 55 #[derive(Copy, Clone)] 56 enum PciVfioUserSubclass { 57 VfioUserSubclass = 0xff, 58 } 59 60 impl PciSubclass for PciVfioUserSubclass { 61 fn get_register_value(&self) -> u8 { 62 *self as u8 63 } 64 } 65 66 impl VfioUserPciDevice { 67 #[allow(clippy::too_many_arguments)] 68 pub fn new( 69 id: String, 70 vm: &Arc<dyn hypervisor::Vm>, 71 client: Arc<Mutex<Client>>, 72 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 73 legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, 74 bdf: PciBdf, 75 restoring: bool, 76 memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>, 77 ) -> Result<Self, VfioUserPciDeviceError> { 78 // This is used for the BAR and capabilities only 79 let configuration = PciConfiguration::new( 80 0, 81 0, 82 0, 83 PciClassCode::Other, 84 &PciVfioUserSubclass::VfioUserSubclass, 85 None, 86 PciHeaderType::Device, 87 0, 88 0, 89 None, 90 ); 91 let resettable = client.lock().unwrap().resettable(); 92 if resettable { 93 client 94 .lock() 95 .unwrap() 96 .reset() 97 .map_err(VfioUserPciDeviceError::Client)?; 98 } 99 100 let vfio_wrapper = VfioUserClientWrapper { 101 client: client.clone(), 102 }; 103 104 let mut common = VfioCommon { 105 mmio_regions: Vec::new(), 106 configuration, 107 interrupt: Interrupt { 108 intx: None, 109 msi: None, 110 msix: None, 111 }, 112 msi_interrupt_manager, 113 legacy_interrupt_group, 114 vfio_wrapper: Arc::new(vfio_wrapper) as Arc<dyn Vfio>, 115 }; 116 117 // No need to parse capabilities from the device if on the restore path. 118 // The initialization will be performed later when restore() will be 119 // called. 120 if !restoring { 121 common.parse_capabilities(bdf); 122 common 123 .initialize_legacy_interrupt() 124 .map_err(VfioUserPciDeviceError::InitializeLegacyInterrupts)?; 125 } 126 127 Ok(Self { 128 id, 129 vm: vm.clone(), 130 client, 131 common, 132 memory_slot, 133 }) 134 } 135 136 pub fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> { 137 for mmio_region in &mut self.common.mmio_regions { 138 let region_flags = self 139 .client 140 .lock() 141 .unwrap() 142 .region(mmio_region.index) 143 .unwrap() 144 .flags; 145 let file_offset = self 146 .client 147 .lock() 148 .unwrap() 149 .region(mmio_region.index) 150 .unwrap() 151 .file_offset 152 .clone(); 153 154 let sparse_areas = self 155 .client 156 .lock() 157 .unwrap() 158 .region(mmio_region.index) 159 .unwrap() 160 .sparse_areas 161 .clone(); 162 163 if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 { 164 let mut prot = 0; 165 if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 { 166 prot |= libc::PROT_READ; 167 } 168 if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 { 169 prot |= libc::PROT_WRITE; 170 } 171 172 let mmaps = if sparse_areas.is_empty() { 173 vec![vfio_region_sparse_mmap_area { 174 offset: 0, 175 size: mmio_region.length, 176 }] 177 } else { 178 sparse_areas 179 }; 180 181 for s in mmaps.iter() { 182 let host_addr = unsafe { 183 libc::mmap( 184 null_mut(), 185 s.size as usize, 186 prot, 187 libc::MAP_SHARED, 188 file_offset.as_ref().unwrap().file().as_raw_fd(), 189 file_offset.as_ref().unwrap().start() as libc::off_t 190 + s.offset as libc::off_t, 191 ) 192 }; 193 194 if host_addr == libc::MAP_FAILED { 195 error!( 196 "Could not mmap regions, error:{}", 197 std::io::Error::last_os_error() 198 ); 199 continue; 200 } 201 202 let user_memory_region = UserMemoryRegion { 203 slot: (self.memory_slot)(), 204 start: mmio_region.start.0 + s.offset, 205 size: s.size, 206 host_addr: host_addr as u64, 207 }; 208 209 mmio_region.user_memory_regions.push(user_memory_region); 210 211 let mem_region = self.vm.make_user_memory_region( 212 user_memory_region.slot, 213 user_memory_region.start, 214 user_memory_region.size, 215 user_memory_region.host_addr, 216 false, 217 false, 218 ); 219 220 self.vm 221 .create_user_memory_region(mem_region) 222 .map_err(VfioUserPciDeviceError::MapRegionGuest)?; 223 } 224 } 225 } 226 227 Ok(()) 228 } 229 230 pub fn unmap_mmio_regions(&mut self) { 231 for mmio_region in self.common.mmio_regions.iter() { 232 for user_memory_region in mmio_region.user_memory_regions.iter() { 233 // Remove region 234 let r = self.vm.make_user_memory_region( 235 user_memory_region.slot, 236 user_memory_region.start, 237 user_memory_region.size, 238 user_memory_region.host_addr, 239 false, 240 false, 241 ); 242 243 if let Err(e) = self.vm.remove_user_memory_region(r) { 244 error!("Could not remove the userspace memory region: {}", e); 245 } 246 247 // Remove mmaps 248 let ret = unsafe { 249 libc::munmap( 250 user_memory_region.host_addr as *mut libc::c_void, 251 user_memory_region.size as usize, 252 ) 253 }; 254 if ret != 0 { 255 error!( 256 "Could not unmap region {}, error:{}", 257 mmio_region.index, 258 std::io::Error::last_os_error() 259 ); 260 } 261 } 262 } 263 } 264 265 pub fn dma_map( 266 &mut self, 267 region: &GuestRegionMmap<AtomicBitmap>, 268 ) -> Result<(), VfioUserPciDeviceError> { 269 let (fd, offset) = match region.file_offset() { 270 Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()), 271 None => return Ok(()), 272 }; 273 274 self.client 275 .lock() 276 .unwrap() 277 .dma_map( 278 offset, 279 region.start_addr().raw_value(), 280 region.len() as u64, 281 fd, 282 ) 283 .map_err(VfioUserPciDeviceError::DmaMap) 284 } 285 286 pub fn dma_unmap( 287 &mut self, 288 region: &GuestRegionMmap<AtomicBitmap>, 289 ) -> Result<(), VfioUserPciDeviceError> { 290 self.client 291 .lock() 292 .unwrap() 293 .dma_unmap(region.start_addr().raw_value(), region.len() as u64) 294 .map_err(VfioUserPciDeviceError::DmaUnmap) 295 } 296 } 297 298 impl BusDevice for VfioUserPciDevice { 299 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 300 self.read_bar(base, offset, data) 301 } 302 303 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 304 self.write_bar(base, offset, data) 305 } 306 } 307 308 #[repr(u32)] 309 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] 310 #[allow(dead_code)] 311 enum Regions { 312 Bar0, 313 Bar1, 314 Bar2, 315 Bar3, 316 Bar4, 317 Bar5, 318 Rom, 319 Config, 320 Vga, 321 Migration, 322 } 323 324 struct VfioUserClientWrapper { 325 client: Arc<Mutex<Client>>, 326 } 327 328 impl Vfio for VfioUserClientWrapper { 329 fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) { 330 self.client 331 .lock() 332 .unwrap() 333 .region_read(index, offset, data) 334 .ok(); 335 } 336 337 fn region_write(&self, index: u32, offset: u64, data: &[u8]) { 338 self.client 339 .lock() 340 .unwrap() 341 .region_write(index, offset, data) 342 .ok(); 343 } 344 345 fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> { 346 self.client 347 .lock() 348 .unwrap() 349 .get_irq_info(irq_index) 350 .ok() 351 .map(|i| VfioIrq { 352 index: i.index, 353 flags: i.flags, 354 count: i.count, 355 }) 356 } 357 358 fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> { 359 info!( 360 "Enabling IRQ {:x} number of fds = {:?}", 361 irq_index, 362 event_fds.len() 363 ); 364 let fds: Vec<i32> = event_fds.iter().map(|e| e.as_raw_fd()).collect(); 365 366 // Batch into blocks of 16 fds as sendmsg() has a size limit 367 let mut sent_fds = 0; 368 let num_fds = event_fds.len() as u32; 369 while sent_fds < num_fds { 370 let remaining_fds = num_fds - sent_fds; 371 let count = if remaining_fds > 16 { 372 16 373 } else { 374 remaining_fds 375 }; 376 377 self.client 378 .lock() 379 .unwrap() 380 .set_irqs( 381 irq_index, 382 VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER, 383 sent_fds, 384 count, 385 &fds[sent_fds as usize..(sent_fds + count) as usize], 386 ) 387 .map_err(VfioError::VfioUser)?; 388 389 sent_fds += count; 390 } 391 392 Ok(()) 393 } 394 395 fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> { 396 info!("Disabling IRQ {:x}", irq_index); 397 self.client 398 .lock() 399 .unwrap() 400 .set_irqs( 401 irq_index, 402 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, 403 0, 404 0, 405 &[], 406 ) 407 .map_err(VfioError::VfioUser) 408 } 409 410 fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> { 411 info!("Unmasking IRQ {:x}", irq_index); 412 self.client 413 .lock() 414 .unwrap() 415 .set_irqs( 416 irq_index, 417 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, 418 0, 419 1, 420 &[], 421 ) 422 .map_err(VfioError::VfioUser) 423 } 424 } 425 426 impl PciDevice for VfioUserPciDevice { 427 fn allocate_bars( 428 &mut self, 429 allocator: &Arc<Mutex<SystemAllocator>>, 430 mmio_allocator: &mut AddressAllocator, 431 resources: Option<Vec<Resource>>, 432 ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> { 433 self.common 434 .allocate_bars(allocator, mmio_allocator, resources) 435 } 436 437 fn free_bars( 438 &mut self, 439 allocator: &mut SystemAllocator, 440 mmio_allocator: &mut AddressAllocator, 441 ) -> Result<(), PciDeviceError> { 442 self.common.free_bars(allocator, mmio_allocator) 443 } 444 445 fn as_any(&mut self) -> &mut dyn Any { 446 self 447 } 448 449 fn detect_bar_reprogramming( 450 &mut self, 451 reg_idx: usize, 452 data: &[u8], 453 ) -> Option<BarReprogrammingParams> { 454 self.common 455 .configuration 456 .detect_bar_reprogramming(reg_idx, data) 457 } 458 459 fn write_config_register( 460 &mut self, 461 reg_idx: usize, 462 offset: u64, 463 data: &[u8], 464 ) -> Option<Arc<Barrier>> { 465 self.common.write_config_register(reg_idx, offset, data) 466 } 467 468 fn read_config_register(&mut self, reg_idx: usize) -> u32 { 469 self.common.read_config_register(reg_idx) 470 } 471 472 fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) { 473 self.common.read_bar(base, offset, data) 474 } 475 476 fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 477 self.common.write_bar(base, offset, data) 478 } 479 480 fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), std::io::Error> { 481 info!("Moving BAR 0x{:x} -> 0x{:x}", old_base, new_base); 482 for mmio_region in self.common.mmio_regions.iter_mut() { 483 if mmio_region.start.raw_value() == old_base { 484 mmio_region.start = GuestAddress(new_base); 485 486 for user_memory_region in mmio_region.user_memory_regions.iter_mut() { 487 // Remove old region 488 let old_region = self.vm.make_user_memory_region( 489 user_memory_region.slot, 490 user_memory_region.start, 491 user_memory_region.size, 492 user_memory_region.host_addr, 493 false, 494 false, 495 ); 496 497 self.vm 498 .remove_user_memory_region(old_region) 499 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; 500 501 // Update the user memory region with the correct start address. 502 if new_base > old_base { 503 user_memory_region.start += new_base - old_base; 504 } else { 505 user_memory_region.start -= old_base - new_base; 506 } 507 508 // Insert new region 509 let new_region = self.vm.make_user_memory_region( 510 user_memory_region.slot, 511 user_memory_region.start, 512 user_memory_region.size, 513 user_memory_region.host_addr, 514 false, 515 false, 516 ); 517 518 self.vm 519 .create_user_memory_region(new_region) 520 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; 521 } 522 info!("Moved bar 0x{:x} -> 0x{:x}", old_base, new_base); 523 } 524 } 525 526 Ok(()) 527 } 528 529 fn id(&self) -> Option<String> { 530 Some(self.id.clone()) 531 } 532 } 533 534 impl Drop for VfioUserPciDevice { 535 fn drop(&mut self) { 536 self.unmap_mmio_regions(); 537 538 if let Some(msix) = &self.common.interrupt.msix { 539 if msix.bar.enabled() { 540 self.common.disable_msix(); 541 } 542 } 543 544 if let Some(msi) = &self.common.interrupt.msi { 545 if msi.cfg.enabled() { 546 self.common.disable_msi() 547 } 548 } 549 550 if self.common.interrupt.intx_in_use() { 551 self.common.disable_intx(); 552 } 553 554 if let Err(e) = self.client.lock().unwrap().shutdown() { 555 error!("Failed shutting down vfio-user client: {}", e); 556 } 557 } 558 } 559 560 impl Pausable for VfioUserPciDevice {} 561 562 impl Snapshottable for VfioUserPciDevice { 563 fn id(&self) -> String { 564 self.id.clone() 565 } 566 567 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 568 let mut vfio_pci_dev_snapshot = Snapshot::new(&self.id); 569 570 // Snapshot VfioCommon 571 vfio_pci_dev_snapshot.add_snapshot(self.common.snapshot()?); 572 573 Ok(vfio_pci_dev_snapshot) 574 } 575 576 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 577 // Restore VfioCommon 578 if let Some(vfio_common_snapshot) = snapshot.snapshots.get(&self.common.id()) { 579 self.common.restore(*vfio_common_snapshot.clone())?; 580 self.map_mmio_regions().map_err(|e| { 581 MigratableError::Restore(anyhow!( 582 "Could not map MMIO regions for VfioUserPciDevice on restore {:?}", 583 e 584 )) 585 })?; 586 } 587 588 Ok(()) 589 } 590 } 591 impl Transportable for VfioUserPciDevice {} 592 impl Migratable for VfioUserPciDevice {} 593 594 pub struct VfioUserDmaMapping<M: GuestAddressSpace> { 595 client: Arc<Mutex<Client>>, 596 memory: Arc<M>, 597 } 598 599 impl<M: GuestAddressSpace> VfioUserDmaMapping<M> { 600 pub fn new(client: Arc<Mutex<Client>>, memory: Arc<M>) -> Self { 601 Self { client, memory } 602 } 603 } 604 605 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioUserDmaMapping<M> { 606 fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), std::io::Error> { 607 let mem = self.memory.memory(); 608 let guest_addr = GuestAddress(gpa); 609 let region = mem.find_region(guest_addr); 610 611 if let Some(region) = region { 612 let file_offset = region.file_offset().unwrap(); 613 let offset = (GuestAddress(gpa).checked_offset_from(region.start_addr())).unwrap() 614 + file_offset.start(); 615 616 self.client 617 .lock() 618 .unwrap() 619 .dma_map(offset, iova, size, file_offset.file().as_raw_fd()) 620 .map_err(|e| { 621 std::io::Error::new( 622 std::io::ErrorKind::Other, 623 format!("Error mapping region: {}", e), 624 ) 625 }) 626 } else { 627 Err(std::io::Error::new( 628 std::io::ErrorKind::Other, 629 format!("Region not found for 0x{:x}", gpa), 630 )) 631 } 632 } 633 634 fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> { 635 self.client 636 .lock() 637 .unwrap() 638 .dma_unmap(iova, size) 639 .map_err(|e| { 640 std::io::Error::new( 641 std::io::ErrorKind::Other, 642 format!("Error unmapping region: {}", e), 643 ) 644 }) 645 } 646 } 647