1 // Copyright © 2021 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 use crate::vfio::{Interrupt, UserMemoryRegion, Vfio, VfioCommon, VfioError}; 7 use crate::{BarReprogrammingParams, PciBarConfiguration, VfioPciError}; 8 use crate::{ 9 PciBdf, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciSubclass, 10 }; 11 use anyhow::anyhow; 12 use hypervisor::HypervisorVmError; 13 use std::any::Any; 14 use std::collections::HashMap; 15 use std::os::unix::prelude::AsRawFd; 16 use std::ptr::null_mut; 17 use std::sync::{Arc, Barrier, Mutex}; 18 use std::u32; 19 use thiserror::Error; 20 use vfio_bindings::bindings::vfio::*; 21 use vfio_ioctls::VfioIrq; 22 use vfio_user::{Client, Error as VfioUserError}; 23 use vm_allocator::{AddressAllocator, SystemAllocator}; 24 use vm_device::dma_mapping::ExternalDmaMapping; 25 use vm_device::interrupt::{InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig}; 26 use vm_device::{BusDevice, Resource}; 27 use vm_memory::bitmap::AtomicBitmap; 28 use vm_memory::{ 29 Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap, 30 }; 31 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 32 use vmm_sys_util::eventfd::EventFd; 33 34 pub struct VfioUserPciDevice { 35 id: String, 36 vm: Arc<dyn hypervisor::Vm>, 37 client: Arc<Mutex<Client>>, 38 common: VfioCommon, 39 memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>, 40 } 41 42 #[derive(Error, Debug)] 43 pub enum VfioUserPciDeviceError { 44 #[error("Client error: {0}")] 45 Client(#[source] VfioUserError), 46 #[error("Failed to map VFIO PCI region into guest: {0}")] 47 MapRegionGuest(#[source] HypervisorVmError), 48 #[error("Failed to DMA map: {0}")] 49 DmaMap(#[source] VfioUserError), 50 #[error("Failed to DMA unmap: {0}")] 51 DmaUnmap(#[source] VfioUserError), 52 #[error("Failed to initialize legacy interrupts: {0}")] 53 InitializeLegacyInterrupts(#[source] VfioPciError), 54 } 55 56 #[derive(Copy, Clone)] 57 enum PciVfioUserSubclass { 58 VfioUserSubclass = 0xff, 59 } 60 61 impl PciSubclass for PciVfioUserSubclass { 62 fn get_register_value(&self) -> u8 { 63 *self as u8 64 } 65 } 66 67 impl VfioUserPciDevice { 68 #[allow(clippy::too_many_arguments)] 69 pub fn new( 70 id: String, 71 vm: &Arc<dyn hypervisor::Vm>, 72 client: Arc<Mutex<Client>>, 73 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 74 legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, 75 bdf: PciBdf, 76 restoring: bool, 77 memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>, 78 ) -> Result<Self, VfioUserPciDeviceError> { 79 // This is used for the BAR and capabilities only 80 let configuration = PciConfiguration::new( 81 0, 82 0, 83 0, 84 PciClassCode::Other, 85 &PciVfioUserSubclass::VfioUserSubclass, 86 None, 87 PciHeaderType::Device, 88 0, 89 0, 90 None, 91 ); 92 let resettable = client.lock().unwrap().resettable(); 93 if resettable { 94 client 95 .lock() 96 .unwrap() 97 .reset() 98 .map_err(VfioUserPciDeviceError::Client)?; 99 } 100 101 let vfio_wrapper = VfioUserClientWrapper { 102 client: client.clone(), 103 }; 104 105 let mut common = VfioCommon { 106 mmio_regions: Vec::new(), 107 configuration, 108 interrupt: Interrupt { 109 intx: None, 110 msi: None, 111 msix: None, 112 }, 113 msi_interrupt_manager, 114 legacy_interrupt_group, 115 vfio_wrapper: Arc::new(vfio_wrapper) as Arc<dyn Vfio>, 116 patches: HashMap::new(), 117 }; 118 119 // No need to parse capabilities from the device if on the restore path. 120 // The initialization will be performed later when restore() will be 121 // called. 122 if !restoring { 123 common.parse_capabilities(bdf); 124 common 125 .initialize_legacy_interrupt() 126 .map_err(VfioUserPciDeviceError::InitializeLegacyInterrupts)?; 127 } 128 129 Ok(Self { 130 id, 131 vm: vm.clone(), 132 client, 133 common, 134 memory_slot, 135 }) 136 } 137 138 pub fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> { 139 for mmio_region in &mut self.common.mmio_regions { 140 let region_flags = self 141 .client 142 .lock() 143 .unwrap() 144 .region(mmio_region.index) 145 .unwrap() 146 .flags; 147 let file_offset = self 148 .client 149 .lock() 150 .unwrap() 151 .region(mmio_region.index) 152 .unwrap() 153 .file_offset 154 .clone(); 155 156 let sparse_areas = self 157 .client 158 .lock() 159 .unwrap() 160 .region(mmio_region.index) 161 .unwrap() 162 .sparse_areas 163 .clone(); 164 165 if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 { 166 let mut prot = 0; 167 if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 { 168 prot |= libc::PROT_READ; 169 } 170 if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 { 171 prot |= libc::PROT_WRITE; 172 } 173 174 let mmaps = if sparse_areas.is_empty() { 175 vec![vfio_region_sparse_mmap_area { 176 offset: 0, 177 size: mmio_region.length, 178 }] 179 } else { 180 sparse_areas 181 }; 182 183 for s in mmaps.iter() { 184 let host_addr = unsafe { 185 libc::mmap( 186 null_mut(), 187 s.size as usize, 188 prot, 189 libc::MAP_SHARED, 190 file_offset.as_ref().unwrap().file().as_raw_fd(), 191 file_offset.as_ref().unwrap().start() as libc::off_t 192 + s.offset as libc::off_t, 193 ) 194 }; 195 196 if host_addr == libc::MAP_FAILED { 197 error!( 198 "Could not mmap regions, error:{}", 199 std::io::Error::last_os_error() 200 ); 201 continue; 202 } 203 204 let user_memory_region = UserMemoryRegion { 205 slot: (self.memory_slot)(), 206 start: mmio_region.start.0 + s.offset, 207 size: s.size, 208 host_addr: host_addr as u64, 209 }; 210 211 mmio_region.user_memory_regions.push(user_memory_region); 212 213 let mem_region = self.vm.make_user_memory_region( 214 user_memory_region.slot, 215 user_memory_region.start, 216 user_memory_region.size, 217 user_memory_region.host_addr, 218 false, 219 false, 220 ); 221 222 self.vm 223 .create_user_memory_region(mem_region) 224 .map_err(VfioUserPciDeviceError::MapRegionGuest)?; 225 } 226 } 227 } 228 229 Ok(()) 230 } 231 232 pub fn unmap_mmio_regions(&mut self) { 233 for mmio_region in self.common.mmio_regions.iter() { 234 for user_memory_region in mmio_region.user_memory_regions.iter() { 235 // Remove region 236 let r = self.vm.make_user_memory_region( 237 user_memory_region.slot, 238 user_memory_region.start, 239 user_memory_region.size, 240 user_memory_region.host_addr, 241 false, 242 false, 243 ); 244 245 if let Err(e) = self.vm.remove_user_memory_region(r) { 246 error!("Could not remove the userspace memory region: {}", e); 247 } 248 249 // Remove mmaps 250 let ret = unsafe { 251 libc::munmap( 252 user_memory_region.host_addr as *mut libc::c_void, 253 user_memory_region.size as usize, 254 ) 255 }; 256 if ret != 0 { 257 error!( 258 "Could not unmap region {}, error:{}", 259 mmio_region.index, 260 std::io::Error::last_os_error() 261 ); 262 } 263 } 264 } 265 } 266 267 pub fn dma_map( 268 &mut self, 269 region: &GuestRegionMmap<AtomicBitmap>, 270 ) -> Result<(), VfioUserPciDeviceError> { 271 let (fd, offset) = match region.file_offset() { 272 Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()), 273 None => return Ok(()), 274 }; 275 276 self.client 277 .lock() 278 .unwrap() 279 .dma_map( 280 offset, 281 region.start_addr().raw_value(), 282 region.len() as u64, 283 fd, 284 ) 285 .map_err(VfioUserPciDeviceError::DmaMap) 286 } 287 288 pub fn dma_unmap( 289 &mut self, 290 region: &GuestRegionMmap<AtomicBitmap>, 291 ) -> Result<(), VfioUserPciDeviceError> { 292 self.client 293 .lock() 294 .unwrap() 295 .dma_unmap(region.start_addr().raw_value(), region.len() as u64) 296 .map_err(VfioUserPciDeviceError::DmaUnmap) 297 } 298 } 299 300 impl BusDevice for VfioUserPciDevice { 301 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 302 self.read_bar(base, offset, data) 303 } 304 305 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 306 self.write_bar(base, offset, data) 307 } 308 } 309 310 #[repr(u32)] 311 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] 312 #[allow(dead_code)] 313 enum Regions { 314 Bar0, 315 Bar1, 316 Bar2, 317 Bar3, 318 Bar4, 319 Bar5, 320 Rom, 321 Config, 322 Vga, 323 Migration, 324 } 325 326 struct VfioUserClientWrapper { 327 client: Arc<Mutex<Client>>, 328 } 329 330 impl Vfio for VfioUserClientWrapper { 331 fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) { 332 self.client 333 .lock() 334 .unwrap() 335 .region_read(index, offset, data) 336 .ok(); 337 } 338 339 fn region_write(&self, index: u32, offset: u64, data: &[u8]) { 340 self.client 341 .lock() 342 .unwrap() 343 .region_write(index, offset, data) 344 .ok(); 345 } 346 347 fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> { 348 self.client 349 .lock() 350 .unwrap() 351 .get_irq_info(irq_index) 352 .ok() 353 .map(|i| VfioIrq { 354 index: i.index, 355 flags: i.flags, 356 count: i.count, 357 }) 358 } 359 360 fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> { 361 info!( 362 "Enabling IRQ {:x} number of fds = {:?}", 363 irq_index, 364 event_fds.len() 365 ); 366 let fds: Vec<i32> = event_fds.iter().map(|e| e.as_raw_fd()).collect(); 367 368 // Batch into blocks of 16 fds as sendmsg() has a size limit 369 let mut sent_fds = 0; 370 let num_fds = event_fds.len() as u32; 371 while sent_fds < num_fds { 372 let remaining_fds = num_fds - sent_fds; 373 let count = if remaining_fds > 16 { 374 16 375 } else { 376 remaining_fds 377 }; 378 379 self.client 380 .lock() 381 .unwrap() 382 .set_irqs( 383 irq_index, 384 VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER, 385 sent_fds, 386 count, 387 &fds[sent_fds as usize..(sent_fds + count) as usize], 388 ) 389 .map_err(VfioError::VfioUser)?; 390 391 sent_fds += count; 392 } 393 394 Ok(()) 395 } 396 397 fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> { 398 info!("Disabling IRQ {:x}", irq_index); 399 self.client 400 .lock() 401 .unwrap() 402 .set_irqs( 403 irq_index, 404 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, 405 0, 406 0, 407 &[], 408 ) 409 .map_err(VfioError::VfioUser) 410 } 411 412 fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> { 413 info!("Unmasking IRQ {:x}", irq_index); 414 self.client 415 .lock() 416 .unwrap() 417 .set_irqs( 418 irq_index, 419 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, 420 0, 421 1, 422 &[], 423 ) 424 .map_err(VfioError::VfioUser) 425 } 426 } 427 428 impl PciDevice for VfioUserPciDevice { 429 fn allocate_bars( 430 &mut self, 431 allocator: &Arc<Mutex<SystemAllocator>>, 432 mmio_allocator: &mut AddressAllocator, 433 resources: Option<Vec<Resource>>, 434 ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> { 435 self.common 436 .allocate_bars(allocator, mmio_allocator, resources) 437 } 438 439 fn free_bars( 440 &mut self, 441 allocator: &mut SystemAllocator, 442 mmio_allocator: &mut AddressAllocator, 443 ) -> Result<(), PciDeviceError> { 444 self.common.free_bars(allocator, mmio_allocator) 445 } 446 447 fn as_any(&mut self) -> &mut dyn Any { 448 self 449 } 450 451 fn detect_bar_reprogramming( 452 &mut self, 453 reg_idx: usize, 454 data: &[u8], 455 ) -> Option<BarReprogrammingParams> { 456 self.common 457 .configuration 458 .detect_bar_reprogramming(reg_idx, data) 459 } 460 461 fn write_config_register( 462 &mut self, 463 reg_idx: usize, 464 offset: u64, 465 data: &[u8], 466 ) -> Option<Arc<Barrier>> { 467 self.common.write_config_register(reg_idx, offset, data) 468 } 469 470 fn read_config_register(&mut self, reg_idx: usize) -> u32 { 471 self.common.read_config_register(reg_idx) 472 } 473 474 fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) { 475 self.common.read_bar(base, offset, data) 476 } 477 478 fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 479 self.common.write_bar(base, offset, data) 480 } 481 482 fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), std::io::Error> { 483 info!("Moving BAR 0x{:x} -> 0x{:x}", old_base, new_base); 484 for mmio_region in self.common.mmio_regions.iter_mut() { 485 if mmio_region.start.raw_value() == old_base { 486 mmio_region.start = GuestAddress(new_base); 487 488 for user_memory_region in mmio_region.user_memory_regions.iter_mut() { 489 // Remove old region 490 let old_region = self.vm.make_user_memory_region( 491 user_memory_region.slot, 492 user_memory_region.start, 493 user_memory_region.size, 494 user_memory_region.host_addr, 495 false, 496 false, 497 ); 498 499 self.vm 500 .remove_user_memory_region(old_region) 501 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; 502 503 // Update the user memory region with the correct start address. 504 if new_base > old_base { 505 user_memory_region.start += new_base - old_base; 506 } else { 507 user_memory_region.start -= old_base - new_base; 508 } 509 510 // Insert new region 511 let new_region = self.vm.make_user_memory_region( 512 user_memory_region.slot, 513 user_memory_region.start, 514 user_memory_region.size, 515 user_memory_region.host_addr, 516 false, 517 false, 518 ); 519 520 self.vm 521 .create_user_memory_region(new_region) 522 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; 523 } 524 info!("Moved bar 0x{:x} -> 0x{:x}", old_base, new_base); 525 } 526 } 527 528 Ok(()) 529 } 530 531 fn id(&self) -> Option<String> { 532 Some(self.id.clone()) 533 } 534 } 535 536 impl Drop for VfioUserPciDevice { 537 fn drop(&mut self) { 538 self.unmap_mmio_regions(); 539 540 if let Some(msix) = &self.common.interrupt.msix { 541 if msix.bar.enabled() { 542 self.common.disable_msix(); 543 } 544 } 545 546 if let Some(msi) = &self.common.interrupt.msi { 547 if msi.cfg.enabled() { 548 self.common.disable_msi() 549 } 550 } 551 552 if self.common.interrupt.intx_in_use() { 553 self.common.disable_intx(); 554 } 555 556 if let Err(e) = self.client.lock().unwrap().shutdown() { 557 error!("Failed shutting down vfio-user client: {}", e); 558 } 559 } 560 } 561 562 impl Pausable for VfioUserPciDevice {} 563 564 impl Snapshottable for VfioUserPciDevice { 565 fn id(&self) -> String { 566 self.id.clone() 567 } 568 569 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 570 let mut vfio_pci_dev_snapshot = Snapshot::new(&self.id); 571 572 // Snapshot VfioCommon 573 vfio_pci_dev_snapshot.add_snapshot(self.common.snapshot()?); 574 575 Ok(vfio_pci_dev_snapshot) 576 } 577 578 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 579 // Restore VfioCommon 580 if let Some(vfio_common_snapshot) = snapshot.snapshots.get(&self.common.id()) { 581 self.common.restore(*vfio_common_snapshot.clone())?; 582 self.map_mmio_regions().map_err(|e| { 583 MigratableError::Restore(anyhow!( 584 "Could not map MMIO regions for VfioUserPciDevice on restore {:?}", 585 e 586 )) 587 })?; 588 } 589 590 Ok(()) 591 } 592 } 593 impl Transportable for VfioUserPciDevice {} 594 impl Migratable for VfioUserPciDevice {} 595 596 pub struct VfioUserDmaMapping<M: GuestAddressSpace> { 597 client: Arc<Mutex<Client>>, 598 memory: Arc<M>, 599 } 600 601 impl<M: GuestAddressSpace> VfioUserDmaMapping<M> { 602 pub fn new(client: Arc<Mutex<Client>>, memory: Arc<M>) -> Self { 603 Self { client, memory } 604 } 605 } 606 607 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioUserDmaMapping<M> { 608 fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), std::io::Error> { 609 let mem = self.memory.memory(); 610 let guest_addr = GuestAddress(gpa); 611 let region = mem.find_region(guest_addr); 612 613 if let Some(region) = region { 614 let file_offset = region.file_offset().unwrap(); 615 let offset = (GuestAddress(gpa).checked_offset_from(region.start_addr())).unwrap() 616 + file_offset.start(); 617 618 self.client 619 .lock() 620 .unwrap() 621 .dma_map(offset, iova, size, file_offset.file().as_raw_fd()) 622 .map_err(|e| { 623 std::io::Error::new( 624 std::io::ErrorKind::Other, 625 format!("Error mapping region: {}", e), 626 ) 627 }) 628 } else { 629 Err(std::io::Error::new( 630 std::io::ErrorKind::Other, 631 format!("Region not found for 0x{:x}", gpa), 632 )) 633 } 634 } 635 636 fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> { 637 self.client 638 .lock() 639 .unwrap() 640 .dma_unmap(iova, size) 641 .map_err(|e| { 642 std::io::Error::new( 643 std::io::ErrorKind::Other, 644 format!("Error unmapping region: {}", e), 645 ) 646 }) 647 } 648 } 649