1 // Copyright © 2021 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 use crate::vfio::{Interrupt, UserMemoryRegion, Vfio, VfioCommon, VfioError}; 7 use crate::{BarReprogrammingParams, PciBarConfiguration, VfioPciError}; 8 use crate::{ 9 PciBdf, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciSubclass, 10 }; 11 use anyhow::anyhow; 12 use hypervisor::HypervisorVmError; 13 use std::any::Any; 14 use std::collections::HashMap; 15 use std::os::unix::prelude::AsRawFd; 16 use std::ptr::null_mut; 17 use std::sync::{Arc, Barrier, Mutex}; 18 use std::u32; 19 use thiserror::Error; 20 use vfio_bindings::bindings::vfio::*; 21 use vfio_ioctls::VfioIrq; 22 use vfio_user::{Client, Error as VfioUserError}; 23 use vm_allocator::{AddressAllocator, SystemAllocator}; 24 use vm_device::dma_mapping::ExternalDmaMapping; 25 use vm_device::interrupt::{InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig}; 26 use vm_device::{BusDevice, Resource}; 27 use vm_memory::bitmap::AtomicBitmap; 28 use vm_memory::{ 29 Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap, 30 }; 31 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 32 use vmm_sys_util::eventfd::EventFd; 33 34 pub struct VfioUserPciDevice { 35 id: String, 36 vm: Arc<dyn hypervisor::Vm>, 37 client: Arc<Mutex<Client>>, 38 common: VfioCommon, 39 memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>, 40 } 41 42 #[derive(Error, Debug)] 43 pub enum VfioUserPciDeviceError { 44 #[error("Client error: {0}")] 45 Client(#[source] VfioUserError), 46 #[error("Failed to map VFIO PCI region into guest: {0}")] 47 MapRegionGuest(#[source] HypervisorVmError), 48 #[error("Failed to DMA map: {0}")] 49 DmaMap(#[source] VfioUserError), 50 #[error("Failed to DMA unmap: {0}")] 51 DmaUnmap(#[source] VfioUserError), 52 #[error("Failed to initialize legacy interrupts: {0}")] 53 InitializeLegacyInterrupts(#[source] VfioPciError), 54 } 55 56 #[derive(Copy, Clone)] 57 enum PciVfioUserSubclass { 58 VfioUserSubclass = 0xff, 59 } 60 61 impl PciSubclass for PciVfioUserSubclass { 62 fn get_register_value(&self) -> u8 { 63 *self as u8 64 } 65 } 66 67 impl VfioUserPciDevice { 68 #[allow(clippy::too_many_arguments)] 69 pub fn new( 70 id: String, 71 vm: &Arc<dyn hypervisor::Vm>, 72 client: Arc<Mutex<Client>>, 73 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 74 legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, 75 bdf: PciBdf, 76 restoring: bool, 77 memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>, 78 ) -> Result<Self, VfioUserPciDeviceError> { 79 // This is used for the BAR and capabilities only 80 let configuration = PciConfiguration::new( 81 0, 82 0, 83 0, 84 PciClassCode::Other, 85 &PciVfioUserSubclass::VfioUserSubclass, 86 None, 87 PciHeaderType::Device, 88 0, 89 0, 90 None, 91 None, 92 ); 93 let resettable = client.lock().unwrap().resettable(); 94 if resettable { 95 client 96 .lock() 97 .unwrap() 98 .reset() 99 .map_err(VfioUserPciDeviceError::Client)?; 100 } 101 102 let vfio_wrapper = VfioUserClientWrapper { 103 client: client.clone(), 104 }; 105 106 let mut common = VfioCommon { 107 mmio_regions: Vec::new(), 108 configuration, 109 interrupt: Interrupt { 110 intx: None, 111 msi: None, 112 msix: None, 113 }, 114 msi_interrupt_manager, 115 legacy_interrupt_group, 116 vfio_wrapper: Arc::new(vfio_wrapper) as Arc<dyn Vfio>, 117 patches: HashMap::new(), 118 }; 119 120 // No need to parse capabilities from the device if on the restore path. 121 // The initialization will be performed later when restore() will be 122 // called. 123 if !restoring { 124 common.parse_capabilities(bdf); 125 common 126 .initialize_legacy_interrupt() 127 .map_err(VfioUserPciDeviceError::InitializeLegacyInterrupts)?; 128 } 129 130 Ok(Self { 131 id, 132 vm: vm.clone(), 133 client, 134 common, 135 memory_slot, 136 }) 137 } 138 139 pub fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> { 140 for mmio_region in &mut self.common.mmio_regions { 141 let region_flags = self 142 .client 143 .lock() 144 .unwrap() 145 .region(mmio_region.index) 146 .unwrap() 147 .flags; 148 let file_offset = self 149 .client 150 .lock() 151 .unwrap() 152 .region(mmio_region.index) 153 .unwrap() 154 .file_offset 155 .clone(); 156 157 let sparse_areas = self 158 .client 159 .lock() 160 .unwrap() 161 .region(mmio_region.index) 162 .unwrap() 163 .sparse_areas 164 .clone(); 165 166 if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 { 167 let mut prot = 0; 168 if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 { 169 prot |= libc::PROT_READ; 170 } 171 if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 { 172 prot |= libc::PROT_WRITE; 173 } 174 175 let mmaps = if sparse_areas.is_empty() { 176 vec![vfio_region_sparse_mmap_area { 177 offset: 0, 178 size: mmio_region.length, 179 }] 180 } else { 181 sparse_areas 182 }; 183 184 for s in mmaps.iter() { 185 // SAFETY: FFI call with correct arguments 186 let host_addr = unsafe { 187 libc::mmap( 188 null_mut(), 189 s.size as usize, 190 prot, 191 libc::MAP_SHARED, 192 file_offset.as_ref().unwrap().file().as_raw_fd(), 193 file_offset.as_ref().unwrap().start() as libc::off_t 194 + s.offset as libc::off_t, 195 ) 196 }; 197 198 if host_addr == libc::MAP_FAILED { 199 error!( 200 "Could not mmap regions, error:{}", 201 std::io::Error::last_os_error() 202 ); 203 continue; 204 } 205 206 let user_memory_region = UserMemoryRegion { 207 slot: (self.memory_slot)(), 208 start: mmio_region.start.0 + s.offset, 209 size: s.size, 210 host_addr: host_addr as u64, 211 }; 212 213 mmio_region.user_memory_regions.push(user_memory_region); 214 215 let mem_region = self.vm.make_user_memory_region( 216 user_memory_region.slot, 217 user_memory_region.start, 218 user_memory_region.size, 219 user_memory_region.host_addr, 220 false, 221 false, 222 ); 223 224 self.vm 225 .create_user_memory_region(mem_region) 226 .map_err(VfioUserPciDeviceError::MapRegionGuest)?; 227 } 228 } 229 } 230 231 Ok(()) 232 } 233 234 pub fn unmap_mmio_regions(&mut self) { 235 for mmio_region in self.common.mmio_regions.iter() { 236 for user_memory_region in mmio_region.user_memory_regions.iter() { 237 // Remove region 238 let r = self.vm.make_user_memory_region( 239 user_memory_region.slot, 240 user_memory_region.start, 241 user_memory_region.size, 242 user_memory_region.host_addr, 243 false, 244 false, 245 ); 246 247 if let Err(e) = self.vm.remove_user_memory_region(r) { 248 error!("Could not remove the userspace memory region: {}", e); 249 } 250 251 // Remove mmaps 252 // SAFETY: FFI call with correct arguments 253 let ret = unsafe { 254 libc::munmap( 255 user_memory_region.host_addr as *mut libc::c_void, 256 user_memory_region.size as usize, 257 ) 258 }; 259 if ret != 0 { 260 error!( 261 "Could not unmap region {}, error:{}", 262 mmio_region.index, 263 std::io::Error::last_os_error() 264 ); 265 } 266 } 267 } 268 } 269 270 pub fn dma_map( 271 &mut self, 272 region: &GuestRegionMmap<AtomicBitmap>, 273 ) -> Result<(), VfioUserPciDeviceError> { 274 let (fd, offset) = match region.file_offset() { 275 Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()), 276 None => return Ok(()), 277 }; 278 279 self.client 280 .lock() 281 .unwrap() 282 .dma_map(offset, region.start_addr().raw_value(), region.len(), fd) 283 .map_err(VfioUserPciDeviceError::DmaMap) 284 } 285 286 pub fn dma_unmap( 287 &mut self, 288 region: &GuestRegionMmap<AtomicBitmap>, 289 ) -> Result<(), VfioUserPciDeviceError> { 290 self.client 291 .lock() 292 .unwrap() 293 .dma_unmap(region.start_addr().raw_value(), region.len()) 294 .map_err(VfioUserPciDeviceError::DmaUnmap) 295 } 296 } 297 298 impl BusDevice for VfioUserPciDevice { 299 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 300 self.read_bar(base, offset, data) 301 } 302 303 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 304 self.write_bar(base, offset, data) 305 } 306 } 307 308 #[repr(u32)] 309 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] 310 #[allow(dead_code)] 311 enum Regions { 312 Bar0, 313 Bar1, 314 Bar2, 315 Bar3, 316 Bar4, 317 Bar5, 318 Rom, 319 Config, 320 Vga, 321 Migration, 322 } 323 324 struct VfioUserClientWrapper { 325 client: Arc<Mutex<Client>>, 326 } 327 328 impl Vfio for VfioUserClientWrapper { 329 fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) { 330 self.client 331 .lock() 332 .unwrap() 333 .region_read(index, offset, data) 334 .ok(); 335 } 336 337 fn region_write(&self, index: u32, offset: u64, data: &[u8]) { 338 self.client 339 .lock() 340 .unwrap() 341 .region_write(index, offset, data) 342 .ok(); 343 } 344 345 fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> { 346 self.client 347 .lock() 348 .unwrap() 349 .get_irq_info(irq_index) 350 .ok() 351 .map(|i| VfioIrq { 352 index: i.index, 353 flags: i.flags, 354 count: i.count, 355 }) 356 } 357 358 fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> { 359 info!( 360 "Enabling IRQ {:x} number of fds = {:?}", 361 irq_index, 362 event_fds.len() 363 ); 364 let fds: Vec<i32> = event_fds.iter().map(|e| e.as_raw_fd()).collect(); 365 366 // Batch into blocks of 16 fds as sendmsg() has a size limit 367 let mut sent_fds = 0; 368 let num_fds = event_fds.len() as u32; 369 while sent_fds < num_fds { 370 let remaining_fds = num_fds - sent_fds; 371 let count = if remaining_fds > 16 { 372 16 373 } else { 374 remaining_fds 375 }; 376 377 self.client 378 .lock() 379 .unwrap() 380 .set_irqs( 381 irq_index, 382 VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER, 383 sent_fds, 384 count, 385 &fds[sent_fds as usize..(sent_fds + count) as usize], 386 ) 387 .map_err(VfioError::VfioUser)?; 388 389 sent_fds += count; 390 } 391 392 Ok(()) 393 } 394 395 fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> { 396 info!("Disabling IRQ {:x}", irq_index); 397 self.client 398 .lock() 399 .unwrap() 400 .set_irqs( 401 irq_index, 402 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, 403 0, 404 0, 405 &[], 406 ) 407 .map_err(VfioError::VfioUser) 408 } 409 410 fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> { 411 info!("Unmasking IRQ {:x}", irq_index); 412 self.client 413 .lock() 414 .unwrap() 415 .set_irqs( 416 irq_index, 417 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, 418 0, 419 1, 420 &[], 421 ) 422 .map_err(VfioError::VfioUser) 423 } 424 } 425 426 impl PciDevice for VfioUserPciDevice { 427 fn allocate_bars( 428 &mut self, 429 allocator: &Arc<Mutex<SystemAllocator>>, 430 mmio_allocator: &mut AddressAllocator, 431 resources: Option<Vec<Resource>>, 432 ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> { 433 self.common 434 .allocate_bars(allocator, mmio_allocator, resources) 435 } 436 437 fn free_bars( 438 &mut self, 439 allocator: &mut SystemAllocator, 440 mmio_allocator: &mut AddressAllocator, 441 ) -> Result<(), PciDeviceError> { 442 self.common.free_bars(allocator, mmio_allocator) 443 } 444 445 fn as_any(&mut self) -> &mut dyn Any { 446 self 447 } 448 449 fn detect_bar_reprogramming( 450 &mut self, 451 reg_idx: usize, 452 data: &[u8], 453 ) -> Option<BarReprogrammingParams> { 454 self.common 455 .configuration 456 .detect_bar_reprogramming(reg_idx, data) 457 } 458 459 fn write_config_register( 460 &mut self, 461 reg_idx: usize, 462 offset: u64, 463 data: &[u8], 464 ) -> Option<Arc<Barrier>> { 465 self.common.write_config_register(reg_idx, offset, data) 466 } 467 468 fn read_config_register(&mut self, reg_idx: usize) -> u32 { 469 self.common.read_config_register(reg_idx) 470 } 471 472 fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) { 473 self.common.read_bar(base, offset, data) 474 } 475 476 fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 477 self.common.write_bar(base, offset, data) 478 } 479 480 fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), std::io::Error> { 481 info!("Moving BAR 0x{:x} -> 0x{:x}", old_base, new_base); 482 for mmio_region in self.common.mmio_regions.iter_mut() { 483 if mmio_region.start.raw_value() == old_base { 484 mmio_region.start = GuestAddress(new_base); 485 486 for user_memory_region in mmio_region.user_memory_regions.iter_mut() { 487 // Remove old region 488 let old_region = self.vm.make_user_memory_region( 489 user_memory_region.slot, 490 user_memory_region.start, 491 user_memory_region.size, 492 user_memory_region.host_addr, 493 false, 494 false, 495 ); 496 497 self.vm 498 .remove_user_memory_region(old_region) 499 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; 500 501 // Update the user memory region with the correct start address. 502 if new_base > old_base { 503 user_memory_region.start += new_base - old_base; 504 } else { 505 user_memory_region.start -= old_base - new_base; 506 } 507 508 // Insert new region 509 let new_region = self.vm.make_user_memory_region( 510 user_memory_region.slot, 511 user_memory_region.start, 512 user_memory_region.size, 513 user_memory_region.host_addr, 514 false, 515 false, 516 ); 517 518 self.vm 519 .create_user_memory_region(new_region) 520 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; 521 } 522 info!("Moved bar 0x{:x} -> 0x{:x}", old_base, new_base); 523 } 524 } 525 526 Ok(()) 527 } 528 529 fn id(&self) -> Option<String> { 530 Some(self.id.clone()) 531 } 532 } 533 534 impl Drop for VfioUserPciDevice { 535 fn drop(&mut self) { 536 self.unmap_mmio_regions(); 537 538 if let Some(msix) = &self.common.interrupt.msix { 539 if msix.bar.enabled() { 540 self.common.disable_msix(); 541 } 542 } 543 544 if let Some(msi) = &self.common.interrupt.msi { 545 if msi.cfg.enabled() { 546 self.common.disable_msi() 547 } 548 } 549 550 if self.common.interrupt.intx_in_use() { 551 self.common.disable_intx(); 552 } 553 554 if let Err(e) = self.client.lock().unwrap().shutdown() { 555 error!("Failed shutting down vfio-user client: {}", e); 556 } 557 } 558 } 559 560 impl Pausable for VfioUserPciDevice {} 561 562 impl Snapshottable for VfioUserPciDevice { 563 fn id(&self) -> String { 564 self.id.clone() 565 } 566 567 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 568 let mut vfio_pci_dev_snapshot = Snapshot::new(&self.id); 569 570 // Snapshot VfioCommon 571 vfio_pci_dev_snapshot.add_snapshot(self.common.snapshot()?); 572 573 Ok(vfio_pci_dev_snapshot) 574 } 575 576 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 577 // Restore VfioCommon 578 if let Some(vfio_common_snapshot) = snapshot.snapshots.get(&self.common.id()) { 579 self.common.restore(*vfio_common_snapshot.clone())?; 580 self.map_mmio_regions().map_err(|e| { 581 MigratableError::Restore(anyhow!( 582 "Could not map MMIO regions for VfioUserPciDevice on restore {:?}", 583 e 584 )) 585 })?; 586 } 587 588 Ok(()) 589 } 590 } 591 impl Transportable for VfioUserPciDevice {} 592 impl Migratable for VfioUserPciDevice {} 593 594 pub struct VfioUserDmaMapping<M: GuestAddressSpace> { 595 client: Arc<Mutex<Client>>, 596 memory: Arc<M>, 597 } 598 599 impl<M: GuestAddressSpace> VfioUserDmaMapping<M> { 600 pub fn new(client: Arc<Mutex<Client>>, memory: Arc<M>) -> Self { 601 Self { client, memory } 602 } 603 } 604 605 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioUserDmaMapping<M> { 606 fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), std::io::Error> { 607 let mem = self.memory.memory(); 608 let guest_addr = GuestAddress(gpa); 609 let region = mem.find_region(guest_addr); 610 611 if let Some(region) = region { 612 let file_offset = region.file_offset().unwrap(); 613 let offset = (GuestAddress(gpa).checked_offset_from(region.start_addr())).unwrap() 614 + file_offset.start(); 615 616 self.client 617 .lock() 618 .unwrap() 619 .dma_map(offset, iova, size, file_offset.file().as_raw_fd()) 620 .map_err(|e| { 621 std::io::Error::new( 622 std::io::ErrorKind::Other, 623 format!("Error mapping region: {}", e), 624 ) 625 }) 626 } else { 627 Err(std::io::Error::new( 628 std::io::ErrorKind::Other, 629 format!("Region not found for 0x{:x}", gpa), 630 )) 631 } 632 } 633 634 fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> { 635 self.client 636 .lock() 637 .unwrap() 638 .dma_unmap(iova, size) 639 .map_err(|e| { 640 std::io::Error::new( 641 std::io::ErrorKind::Other, 642 format!("Error unmapping region: {}", e), 643 ) 644 }) 645 } 646 } 647