1 // Copyright © 2021 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 use crate::vfio::{UserMemoryRegion, Vfio, VfioCommon, VfioError, VFIO_COMMON_ID}; 7 use crate::{BarReprogrammingParams, PciBarConfiguration, VfioPciError}; 8 use crate::{PciBdf, PciDevice, PciDeviceError, PciSubclass}; 9 use hypervisor::HypervisorVmError; 10 use std::any::Any; 11 use std::os::unix::prelude::AsRawFd; 12 use std::ptr::null_mut; 13 use std::sync::{Arc, Barrier, Mutex}; 14 use std::u32; 15 use thiserror::Error; 16 use vfio_bindings::bindings::vfio::*; 17 use vfio_ioctls::VfioIrq; 18 use vfio_user::{Client, Error as VfioUserError}; 19 use vm_allocator::{AddressAllocator, SystemAllocator}; 20 use vm_device::dma_mapping::ExternalDmaMapping; 21 use vm_device::interrupt::{InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig}; 22 use vm_device::{BusDevice, Resource}; 23 use vm_memory::bitmap::AtomicBitmap; 24 use vm_memory::{ 25 Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap, 26 }; 27 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 28 use vmm_sys_util::eventfd::EventFd; 29 30 pub struct VfioUserPciDevice { 31 id: String, 32 vm: Arc<dyn hypervisor::Vm>, 33 client: Arc<Mutex<Client>>, 34 common: VfioCommon, 35 memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>, 36 } 37 38 #[derive(Error, Debug)] 39 pub enum VfioUserPciDeviceError { 40 #[error("Client error: {0}")] 41 Client(#[source] VfioUserError), 42 #[error("Failed to map VFIO PCI region into guest: {0}")] 43 MapRegionGuest(#[source] HypervisorVmError), 44 #[error("Failed to DMA map: {0}")] 45 DmaMap(#[source] VfioUserError), 46 #[error("Failed to DMA unmap: {0}")] 47 DmaUnmap(#[source] VfioUserError), 48 #[error("Failed to initialize legacy interrupts: {0}")] 49 InitializeLegacyInterrupts(#[source] VfioPciError), 50 #[error("Failed to create VfioCommon: {0}")] 51 CreateVfioCommon(#[source] VfioPciError), 52 } 53 54 #[derive(Copy, Clone)] 55 enum PciVfioUserSubclass { 56 VfioUserSubclass = 0xff, 57 } 58 59 impl PciSubclass for PciVfioUserSubclass { 60 fn get_register_value(&self) -> u8 { 61 *self as u8 62 } 63 } 64 65 impl VfioUserPciDevice { 66 #[allow(clippy::too_many_arguments)] 67 pub fn new( 68 id: String, 69 vm: &Arc<dyn hypervisor::Vm>, 70 client: Arc<Mutex<Client>>, 71 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 72 legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, 73 bdf: PciBdf, 74 memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>, 75 snapshot: Option<Snapshot>, 76 ) -> Result<Self, VfioUserPciDeviceError> { 77 let resettable = client.lock().unwrap().resettable(); 78 if resettable { 79 client 80 .lock() 81 .unwrap() 82 .reset() 83 .map_err(VfioUserPciDeviceError::Client)?; 84 } 85 86 let vfio_wrapper = VfioUserClientWrapper { 87 client: client.clone(), 88 }; 89 90 let common = VfioCommon::new( 91 msi_interrupt_manager, 92 legacy_interrupt_group, 93 Arc::new(vfio_wrapper) as Arc<dyn Vfio>, 94 &PciVfioUserSubclass::VfioUserSubclass, 95 bdf, 96 vm_migration::snapshot_from_id(snapshot.as_ref(), VFIO_COMMON_ID), 97 ) 98 .map_err(VfioUserPciDeviceError::CreateVfioCommon)?; 99 100 Ok(Self { 101 id, 102 vm: vm.clone(), 103 client, 104 common, 105 memory_slot, 106 }) 107 } 108 109 pub fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> { 110 for mmio_region in &mut self.common.mmio_regions { 111 let region_flags = self 112 .client 113 .lock() 114 .unwrap() 115 .region(mmio_region.index) 116 .unwrap() 117 .flags; 118 let file_offset = self 119 .client 120 .lock() 121 .unwrap() 122 .region(mmio_region.index) 123 .unwrap() 124 .file_offset 125 .clone(); 126 127 let sparse_areas = self 128 .client 129 .lock() 130 .unwrap() 131 .region(mmio_region.index) 132 .unwrap() 133 .sparse_areas 134 .clone(); 135 136 if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 { 137 let mut prot = 0; 138 if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 { 139 prot |= libc::PROT_READ; 140 } 141 if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 { 142 prot |= libc::PROT_WRITE; 143 } 144 145 let mmaps = if sparse_areas.is_empty() { 146 vec![vfio_region_sparse_mmap_area { 147 offset: 0, 148 size: mmio_region.length, 149 }] 150 } else { 151 sparse_areas 152 }; 153 154 for s in mmaps.iter() { 155 // SAFETY: FFI call with correct arguments 156 let host_addr = unsafe { 157 libc::mmap( 158 null_mut(), 159 s.size as usize, 160 prot, 161 libc::MAP_SHARED, 162 file_offset.as_ref().unwrap().file().as_raw_fd(), 163 file_offset.as_ref().unwrap().start() as libc::off_t 164 + s.offset as libc::off_t, 165 ) 166 }; 167 168 if host_addr == libc::MAP_FAILED { 169 error!( 170 "Could not mmap regions, error:{}", 171 std::io::Error::last_os_error() 172 ); 173 continue; 174 } 175 176 let user_memory_region = UserMemoryRegion { 177 slot: (self.memory_slot)(), 178 start: mmio_region.start.0 + s.offset, 179 size: s.size, 180 host_addr: host_addr as u64, 181 }; 182 183 mmio_region.user_memory_regions.push(user_memory_region); 184 185 let mem_region = self.vm.make_user_memory_region( 186 user_memory_region.slot, 187 user_memory_region.start, 188 user_memory_region.size, 189 user_memory_region.host_addr, 190 false, 191 false, 192 ); 193 194 self.vm 195 .create_user_memory_region(mem_region) 196 .map_err(VfioUserPciDeviceError::MapRegionGuest)?; 197 } 198 } 199 } 200 201 Ok(()) 202 } 203 204 pub fn unmap_mmio_regions(&mut self) { 205 for mmio_region in self.common.mmio_regions.iter() { 206 for user_memory_region in mmio_region.user_memory_regions.iter() { 207 // Remove region 208 let r = self.vm.make_user_memory_region( 209 user_memory_region.slot, 210 user_memory_region.start, 211 user_memory_region.size, 212 user_memory_region.host_addr, 213 false, 214 false, 215 ); 216 217 if let Err(e) = self.vm.remove_user_memory_region(r) { 218 error!("Could not remove the userspace memory region: {}", e); 219 } 220 221 // Remove mmaps 222 // SAFETY: FFI call with correct arguments 223 let ret = unsafe { 224 libc::munmap( 225 user_memory_region.host_addr as *mut libc::c_void, 226 user_memory_region.size as usize, 227 ) 228 }; 229 if ret != 0 { 230 error!( 231 "Could not unmap region {}, error:{}", 232 mmio_region.index, 233 std::io::Error::last_os_error() 234 ); 235 } 236 } 237 } 238 } 239 240 pub fn dma_map( 241 &mut self, 242 region: &GuestRegionMmap<AtomicBitmap>, 243 ) -> Result<(), VfioUserPciDeviceError> { 244 let (fd, offset) = match region.file_offset() { 245 Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()), 246 None => return Ok(()), 247 }; 248 249 self.client 250 .lock() 251 .unwrap() 252 .dma_map(offset, region.start_addr().raw_value(), region.len(), fd) 253 .map_err(VfioUserPciDeviceError::DmaMap) 254 } 255 256 pub fn dma_unmap( 257 &mut self, 258 region: &GuestRegionMmap<AtomicBitmap>, 259 ) -> Result<(), VfioUserPciDeviceError> { 260 self.client 261 .lock() 262 .unwrap() 263 .dma_unmap(region.start_addr().raw_value(), region.len()) 264 .map_err(VfioUserPciDeviceError::DmaUnmap) 265 } 266 } 267 268 impl BusDevice for VfioUserPciDevice { 269 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 270 self.read_bar(base, offset, data) 271 } 272 273 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 274 self.write_bar(base, offset, data) 275 } 276 } 277 278 #[repr(u32)] 279 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] 280 #[allow(dead_code)] 281 enum Regions { 282 Bar0, 283 Bar1, 284 Bar2, 285 Bar3, 286 Bar4, 287 Bar5, 288 Rom, 289 Config, 290 Vga, 291 Migration, 292 } 293 294 struct VfioUserClientWrapper { 295 client: Arc<Mutex<Client>>, 296 } 297 298 impl Vfio for VfioUserClientWrapper { 299 fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) { 300 self.client 301 .lock() 302 .unwrap() 303 .region_read(index, offset, data) 304 .ok(); 305 } 306 307 fn region_write(&self, index: u32, offset: u64, data: &[u8]) { 308 self.client 309 .lock() 310 .unwrap() 311 .region_write(index, offset, data) 312 .ok(); 313 } 314 315 fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> { 316 self.client 317 .lock() 318 .unwrap() 319 .get_irq_info(irq_index) 320 .ok() 321 .map(|i| VfioIrq { 322 index: i.index, 323 flags: i.flags, 324 count: i.count, 325 }) 326 } 327 328 fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> { 329 info!( 330 "Enabling IRQ {:x} number of fds = {:?}", 331 irq_index, 332 event_fds.len() 333 ); 334 let fds: Vec<i32> = event_fds.iter().map(|e| e.as_raw_fd()).collect(); 335 336 // Batch into blocks of 16 fds as sendmsg() has a size limit 337 let mut sent_fds = 0; 338 let num_fds = event_fds.len() as u32; 339 while sent_fds < num_fds { 340 let remaining_fds = num_fds - sent_fds; 341 let count = if remaining_fds > 16 { 342 16 343 } else { 344 remaining_fds 345 }; 346 347 self.client 348 .lock() 349 .unwrap() 350 .set_irqs( 351 irq_index, 352 VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER, 353 sent_fds, 354 count, 355 &fds[sent_fds as usize..(sent_fds + count) as usize], 356 ) 357 .map_err(VfioError::VfioUser)?; 358 359 sent_fds += count; 360 } 361 362 Ok(()) 363 } 364 365 fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> { 366 info!("Disabling IRQ {:x}", irq_index); 367 self.client 368 .lock() 369 .unwrap() 370 .set_irqs( 371 irq_index, 372 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, 373 0, 374 0, 375 &[], 376 ) 377 .map_err(VfioError::VfioUser) 378 } 379 380 fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> { 381 info!("Unmasking IRQ {:x}", irq_index); 382 self.client 383 .lock() 384 .unwrap() 385 .set_irqs( 386 irq_index, 387 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, 388 0, 389 1, 390 &[], 391 ) 392 .map_err(VfioError::VfioUser) 393 } 394 } 395 396 impl PciDevice for VfioUserPciDevice { 397 fn allocate_bars( 398 &mut self, 399 allocator: &Arc<Mutex<SystemAllocator>>, 400 mmio_allocator: &mut AddressAllocator, 401 resources: Option<Vec<Resource>>, 402 ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> { 403 self.common 404 .allocate_bars(allocator, mmio_allocator, resources) 405 } 406 407 fn free_bars( 408 &mut self, 409 allocator: &mut SystemAllocator, 410 mmio_allocator: &mut AddressAllocator, 411 ) -> Result<(), PciDeviceError> { 412 self.common.free_bars(allocator, mmio_allocator) 413 } 414 415 fn as_any(&mut self) -> &mut dyn Any { 416 self 417 } 418 419 fn detect_bar_reprogramming( 420 &mut self, 421 reg_idx: usize, 422 data: &[u8], 423 ) -> Option<BarReprogrammingParams> { 424 self.common 425 .configuration 426 .detect_bar_reprogramming(reg_idx, data) 427 } 428 429 fn write_config_register( 430 &mut self, 431 reg_idx: usize, 432 offset: u64, 433 data: &[u8], 434 ) -> Option<Arc<Barrier>> { 435 self.common.write_config_register(reg_idx, offset, data) 436 } 437 438 fn read_config_register(&mut self, reg_idx: usize) -> u32 { 439 self.common.read_config_register(reg_idx) 440 } 441 442 fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) { 443 self.common.read_bar(base, offset, data) 444 } 445 446 fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 447 self.common.write_bar(base, offset, data) 448 } 449 450 fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), std::io::Error> { 451 info!("Moving BAR 0x{:x} -> 0x{:x}", old_base, new_base); 452 for mmio_region in self.common.mmio_regions.iter_mut() { 453 if mmio_region.start.raw_value() == old_base { 454 mmio_region.start = GuestAddress(new_base); 455 456 for user_memory_region in mmio_region.user_memory_regions.iter_mut() { 457 // Remove old region 458 let old_region = self.vm.make_user_memory_region( 459 user_memory_region.slot, 460 user_memory_region.start, 461 user_memory_region.size, 462 user_memory_region.host_addr, 463 false, 464 false, 465 ); 466 467 self.vm 468 .remove_user_memory_region(old_region) 469 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; 470 471 // Update the user memory region with the correct start address. 472 if new_base > old_base { 473 user_memory_region.start += new_base - old_base; 474 } else { 475 user_memory_region.start -= old_base - new_base; 476 } 477 478 // Insert new region 479 let new_region = self.vm.make_user_memory_region( 480 user_memory_region.slot, 481 user_memory_region.start, 482 user_memory_region.size, 483 user_memory_region.host_addr, 484 false, 485 false, 486 ); 487 488 self.vm 489 .create_user_memory_region(new_region) 490 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; 491 } 492 info!("Moved bar 0x{:x} -> 0x{:x}", old_base, new_base); 493 } 494 } 495 496 Ok(()) 497 } 498 499 fn id(&self) -> Option<String> { 500 Some(self.id.clone()) 501 } 502 } 503 504 impl Drop for VfioUserPciDevice { 505 fn drop(&mut self) { 506 self.unmap_mmio_regions(); 507 508 if let Some(msix) = &self.common.interrupt.msix { 509 if msix.bar.enabled() { 510 self.common.disable_msix(); 511 } 512 } 513 514 if let Some(msi) = &self.common.interrupt.msi { 515 if msi.cfg.enabled() { 516 self.common.disable_msi() 517 } 518 } 519 520 if self.common.interrupt.intx_in_use() { 521 self.common.disable_intx(); 522 } 523 524 if let Err(e) = self.client.lock().unwrap().shutdown() { 525 error!("Failed shutting down vfio-user client: {}", e); 526 } 527 } 528 } 529 530 impl Pausable for VfioUserPciDevice {} 531 532 impl Snapshottable for VfioUserPciDevice { 533 fn id(&self) -> String { 534 self.id.clone() 535 } 536 537 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 538 let mut vfio_pci_dev_snapshot = Snapshot::default(); 539 540 // Snapshot VfioCommon 541 vfio_pci_dev_snapshot.add_snapshot(self.common.id(), self.common.snapshot()?); 542 543 Ok(vfio_pci_dev_snapshot) 544 } 545 } 546 impl Transportable for VfioUserPciDevice {} 547 impl Migratable for VfioUserPciDevice {} 548 549 pub struct VfioUserDmaMapping<M: GuestAddressSpace> { 550 client: Arc<Mutex<Client>>, 551 memory: Arc<M>, 552 } 553 554 impl<M: GuestAddressSpace> VfioUserDmaMapping<M> { 555 pub fn new(client: Arc<Mutex<Client>>, memory: Arc<M>) -> Self { 556 Self { client, memory } 557 } 558 } 559 560 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioUserDmaMapping<M> { 561 fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), std::io::Error> { 562 let mem = self.memory.memory(); 563 let guest_addr = GuestAddress(gpa); 564 let region = mem.find_region(guest_addr); 565 566 if let Some(region) = region { 567 let file_offset = region.file_offset().unwrap(); 568 let offset = (GuestAddress(gpa).checked_offset_from(region.start_addr())).unwrap() 569 + file_offset.start(); 570 571 self.client 572 .lock() 573 .unwrap() 574 .dma_map(offset, iova, size, file_offset.file().as_raw_fd()) 575 .map_err(|e| { 576 std::io::Error::new( 577 std::io::ErrorKind::Other, 578 format!("Error mapping region: {e}"), 579 ) 580 }) 581 } else { 582 Err(std::io::Error::new( 583 std::io::ErrorKind::Other, 584 format!("Region not found for 0x{gpa:x}"), 585 )) 586 } 587 } 588 589 fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> { 590 self.client 591 .lock() 592 .unwrap() 593 .dma_unmap(iova, size) 594 .map_err(|e| { 595 std::io::Error::new( 596 std::io::ErrorKind::Other, 597 format!("Error unmapping region: {e}"), 598 ) 599 }) 600 } 601 } 602