1db5b4763SSamuel Ortiz // Copyright © 2019 Intel Corporation 2db5b4763SSamuel Ortiz // 3db5b4763SSamuel Ortiz // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4db5b4763SSamuel Ortiz // 5db5b4763SSamuel Ortiz 6de21c9baSSebastien Boeuf use std::any::Any; 7e45e3df6SSebastien Boeuf use std::collections::{BTreeMap, HashMap}; 8cdfc1773SRob Bradford use std::io; 9c93d5361SSebastien Boeuf use std::os::unix::io::AsRawFd; 10297b41d6SBo Chen use std::path::PathBuf; 11b746dd71SChao Peng use std::ptr::null_mut; 129ef1187fSRob Bradford use std::sync::{Arc, Barrier, Mutex}; 1388a9f799SRob Bradford 1488a9f799SRob Bradford use anyhow::anyhow; 1588a9f799SRob Bradford use byteorder::{ByteOrder, LittleEndian}; 1688a9f799SRob Bradford use hypervisor::HypervisorVmError; 1788a9f799SRob Bradford use libc::{sysconf, _SC_PAGESIZE}; 1888a9f799SRob Bradford use serde::{Deserialize, Serialize}; 19cdfc1773SRob Bradford use thiserror::Error; 20db5b4763SSamuel Ortiz use vfio_bindings::bindings::vfio::*; 211108bd19SSebastien Boeuf use vfio_ioctls::{ 221108bd19SSebastien Boeuf VfioContainer, VfioDevice, VfioIrq, VfioRegionInfoCap, VfioRegionSparseMmapArea, 231108bd19SSebastien Boeuf }; 24a7187168SJianyong Wu use vm_allocator::page_size::{ 25a7187168SJianyong Wu align_page_size_down, align_page_size_up, is_4k_aligned, is_4k_multiple, is_page_size_aligned, 26a7187168SJianyong Wu }; 2781f8a27eSRob Bradford use vm_allocator::{AddressAllocator, MemorySlotAllocator, SystemAllocator}; 28a5e2460dSAndrew Carp use vm_device::dma_mapping::ExternalDmaMapping; 29da2b3c92SSamuel Ortiz use vm_device::interrupt::{ 30da2b3c92SSamuel Ortiz InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig, 31da2b3c92SSamuel Ortiz }; 326e084572SSebastien Boeuf use vm_device::{BusDevice, Resource}; 33a5e2460dSAndrew Carp use vm_memory::{Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestUsize}; 3410ab87d6SRob Bradford use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 359caad739SRob Bradford use vmm_sys_util::eventfd::EventFd; 36c93d5361SSebastien Boeuf 3788a9f799SRob Bradford use crate::msi::{MsiConfigState, MSI_CONFIG_ID}; 3888a9f799SRob Bradford use crate::msix::MsixConfigState; 3988a9f799SRob Bradford use crate::{ 4088a9f799SRob Bradford msi_num_enabled_vectors, BarReprogrammingParams, MsiCap, MsiConfig, MsixCap, MsixConfig, 4188a9f799SRob Bradford PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciBdf, PciCapabilityId, 4288a9f799SRob Bradford PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciExpressCapabilityId, 4388a9f799SRob Bradford PciHeaderType, PciSubclass, MSIX_CONFIG_ID, MSIX_TABLE_ENTRY_SIZE, PCI_CONFIGURATION_ID, 4488a9f799SRob Bradford }; 4588a9f799SRob Bradford 46cc3706afSSebastien Boeuf pub(crate) const VFIO_COMMON_ID: &str = "vfio_common"; 47cc3706afSSebastien Boeuf 48cdfc1773SRob Bradford #[derive(Debug, Error)] 49c93d5361SSebastien Boeuf pub enum VfioPciError { 50*a3692144SPhilipp Schuster #[error("Failed to create user memory region")] 511108bd19SSebastien Boeuf CreateUserMemoryRegion(#[source] HypervisorVmError), 52297b41d6SBo Chen #[error("Failed to DMA map: {0} for device {1} (guest BDF: {2})")] 53297b41d6SBo Chen DmaMap(#[source] vfio_ioctls::VfioError, PathBuf, PciBdf), 54297b41d6SBo Chen #[error("Failed to DMA unmap: {0} for device {1} (guest BDF: {2})")] 55297b41d6SBo Chen DmaUnmap(#[source] vfio_ioctls::VfioError, PathBuf, PciBdf), 56*a3692144SPhilipp Schuster #[error("Failed to enable INTx")] 57cdfc1773SRob Bradford EnableIntx(#[source] VfioError), 58*a3692144SPhilipp Schuster #[error("Failed to enable MSI")] 59cdfc1773SRob Bradford EnableMsi(#[source] VfioError), 60*a3692144SPhilipp Schuster #[error("Failed to enable MSI-x")] 61cdfc1773SRob Bradford EnableMsix(#[source] VfioError), 621108bd19SSebastien Boeuf #[error("Failed to mmap the area")] 631108bd19SSebastien Boeuf MmapArea, 64cdfc1773SRob Bradford #[error("Failed to notifier's eventfd")] 6527515a6eSSebastien Boeuf MissingNotifier, 661108bd19SSebastien Boeuf #[error("Invalid region alignment")] 671108bd19SSebastien Boeuf RegionAlignment, 681108bd19SSebastien Boeuf #[error("Invalid region size")] 691108bd19SSebastien Boeuf RegionSize, 70*a3692144SPhilipp Schuster #[error("Failed to retrieve MsiConfigState")] 71cc3706afSSebastien Boeuf RetrieveMsiConfigState(#[source] anyhow::Error), 72*a3692144SPhilipp Schuster #[error("Failed to retrieve MsixConfigState")] 73cc3706afSSebastien Boeuf RetrieveMsixConfigState(#[source] anyhow::Error), 74*a3692144SPhilipp Schuster #[error("Failed to retrieve PciConfigurationState")] 75cc3706afSSebastien Boeuf RetrievePciConfigurationState(#[source] anyhow::Error), 76*a3692144SPhilipp Schuster #[error("Failed to retrieve VfioCommonState")] 77cc3706afSSebastien Boeuf RetrieveVfioCommonState(#[source] anyhow::Error), 78c93d5361SSebastien Boeuf } 79db5b4763SSamuel Ortiz 80db5b4763SSamuel Ortiz #[derive(Copy, Clone)] 81db5b4763SSamuel Ortiz enum PciVfioSubclass { 82db5b4763SSamuel Ortiz VfioSubclass = 0xff, 83db5b4763SSamuel Ortiz } 84db5b4763SSamuel Ortiz 85db5b4763SSamuel Ortiz impl PciSubclass for PciVfioSubclass { get_register_value(&self) -> u886db5b4763SSamuel Ortiz fn get_register_value(&self) -> u8 { 87db5b4763SSamuel Ortiz *self as u8 88db5b4763SSamuel Ortiz } 89db5b4763SSamuel Ortiz } 90db5b4763SSamuel Ortiz 9120f01161SSebastien Boeuf enum InterruptUpdateAction { 9220f01161SSebastien Boeuf EnableMsi, 9320f01161SSebastien Boeuf DisableMsi, 9420f01161SSebastien Boeuf EnableMsix, 9520f01161SSebastien Boeuf DisableMsix, 9620f01161SSebastien Boeuf } 9720f01161SSebastien Boeuf 9810ab87d6SRob Bradford #[derive(Serialize, Deserialize)] 9949069d84SSebastien Boeuf struct IntxState { 10049069d84SSebastien Boeuf enabled: bool, 10149069d84SSebastien Boeuf } 10249069d84SSebastien Boeuf 103ec1f7189SRob Bradford pub(crate) struct VfioIntx { 104dcc646f5SSebastien Boeuf interrupt_source_group: Arc<dyn InterruptSourceGroup>, 10519167e76SSebastien Boeuf enabled: bool, 10619167e76SSebastien Boeuf } 10719167e76SSebastien Boeuf 10810ab87d6SRob Bradford #[derive(Serialize, Deserialize)] 10949069d84SSebastien Boeuf struct MsiState { 11049069d84SSebastien Boeuf cap: MsiCap, 11149069d84SSebastien Boeuf cap_offset: u32, 11249069d84SSebastien Boeuf } 11349069d84SSebastien Boeuf 114ec1f7189SRob Bradford pub(crate) struct VfioMsi { 115ec1f7189SRob Bradford pub(crate) cfg: MsiConfig, 11620f01161SSebastien Boeuf cap_offset: u32, 117dcc646f5SSebastien Boeuf interrupt_source_group: Arc<dyn InterruptSourceGroup>, 11820f01161SSebastien Boeuf } 11920f01161SSebastien Boeuf 12020f01161SSebastien Boeuf impl VfioMsi { update(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction>12120f01161SSebastien Boeuf fn update(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction> { 122f3c38701SSebastien Boeuf let old_enabled = self.cfg.enabled(); 12320f01161SSebastien Boeuf 124f3c38701SSebastien Boeuf self.cfg.update(offset, data); 12520f01161SSebastien Boeuf 126f3c38701SSebastien Boeuf let new_enabled = self.cfg.enabled(); 12720f01161SSebastien Boeuf 12820f01161SSebastien Boeuf if !old_enabled && new_enabled { 12920f01161SSebastien Boeuf return Some(InterruptUpdateAction::EnableMsi); 13020f01161SSebastien Boeuf } 13120f01161SSebastien Boeuf 13220f01161SSebastien Boeuf if old_enabled && !new_enabled { 13320f01161SSebastien Boeuf return Some(InterruptUpdateAction::DisableMsi); 13420f01161SSebastien Boeuf } 13520f01161SSebastien Boeuf 13620f01161SSebastien Boeuf None 13720f01161SSebastien Boeuf } 13820f01161SSebastien Boeuf } 13920f01161SSebastien Boeuf 14010ab87d6SRob Bradford #[derive(Serialize, Deserialize)] 14149069d84SSebastien Boeuf struct MsixState { 14249069d84SSebastien Boeuf cap: MsixCap, 14349069d84SSebastien Boeuf cap_offset: u32, 14449069d84SSebastien Boeuf bdf: u32, 14549069d84SSebastien Boeuf } 14649069d84SSebastien Boeuf 147ec1f7189SRob Bradford pub(crate) struct VfioMsix { 148ec1f7189SRob Bradford pub(crate) bar: MsixConfig, 14920f01161SSebastien Boeuf cap: MsixCap, 15020f01161SSebastien Boeuf cap_offset: u32, 151dcc646f5SSebastien Boeuf interrupt_source_group: Arc<dyn InterruptSourceGroup>, 15220f01161SSebastien Boeuf } 15320f01161SSebastien Boeuf 15420f01161SSebastien Boeuf impl VfioMsix { update(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction>15520f01161SSebastien Boeuf fn update(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction> { 1563fe362e3SSebastien Boeuf let old_enabled = self.bar.enabled(); 15720f01161SSebastien Boeuf 15820f01161SSebastien Boeuf // Update "Message Control" word 15920f01161SSebastien Boeuf if offset == 2 && data.len() == 2 { 1603fe362e3SSebastien Boeuf self.bar.set_msg_ctl(LittleEndian::read_u16(data)); 16120f01161SSebastien Boeuf } 16220f01161SSebastien Boeuf 1633fe362e3SSebastien Boeuf let new_enabled = self.bar.enabled(); 16420f01161SSebastien Boeuf 16520f01161SSebastien Boeuf if !old_enabled && new_enabled { 16620f01161SSebastien Boeuf return Some(InterruptUpdateAction::EnableMsix); 16720f01161SSebastien Boeuf } 16820f01161SSebastien Boeuf 16920f01161SSebastien Boeuf if old_enabled && !new_enabled { 17020f01161SSebastien Boeuf return Some(InterruptUpdateAction::DisableMsix); 17120f01161SSebastien Boeuf } 17220f01161SSebastien Boeuf 17320f01161SSebastien Boeuf None 17420f01161SSebastien Boeuf } 17520f01161SSebastien Boeuf table_accessed(&self, bar_index: u32, offset: u64) -> bool17620f01161SSebastien Boeuf fn table_accessed(&self, bar_index: u32, offset: u64) -> bool { 17720f01161SSebastien Boeuf let table_offset: u64 = u64::from(self.cap.table_offset()); 17820f01161SSebastien Boeuf let table_size: u64 = u64::from(self.cap.table_size()) * (MSIX_TABLE_ENTRY_SIZE as u64); 17920f01161SSebastien Boeuf let table_bir: u32 = self.cap.table_bir(); 18020f01161SSebastien Boeuf 18120f01161SSebastien Boeuf bar_index == table_bir && offset >= table_offset && offset < table_offset + table_size 18220f01161SSebastien Boeuf } 18320f01161SSebastien Boeuf } 18420f01161SSebastien Boeuf 185ec1f7189SRob Bradford pub(crate) struct Interrupt { 186ec1f7189SRob Bradford pub(crate) intx: Option<VfioIntx>, 187ec1f7189SRob Bradford pub(crate) msi: Option<VfioMsi>, 188ec1f7189SRob Bradford pub(crate) msix: Option<VfioMsix>, 18920f01161SSebastien Boeuf } 19020f01161SSebastien Boeuf 19120f01161SSebastien Boeuf impl Interrupt { update_msi(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction>19220f01161SSebastien Boeuf fn update_msi(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction> { 19320f01161SSebastien Boeuf if let Some(ref mut msi) = &mut self.msi { 19420f01161SSebastien Boeuf let action = msi.update(offset, data); 19520f01161SSebastien Boeuf return action; 19620f01161SSebastien Boeuf } 19720f01161SSebastien Boeuf 19820f01161SSebastien Boeuf None 19920f01161SSebastien Boeuf } 20020f01161SSebastien Boeuf update_msix(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction>20120f01161SSebastien Boeuf fn update_msix(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction> { 20220f01161SSebastien Boeuf if let Some(ref mut msix) = &mut self.msix { 20320f01161SSebastien Boeuf let action = msix.update(offset, data); 20420f01161SSebastien Boeuf return action; 20520f01161SSebastien Boeuf } 20620f01161SSebastien Boeuf 20720f01161SSebastien Boeuf None 20820f01161SSebastien Boeuf } 20920f01161SSebastien Boeuf accessed(&self, offset: u64) -> Option<(PciCapabilityId, u64)>210827229d8SRob Bradford fn accessed(&self, offset: u64) -> Option<(PciCapabilityId, u64)> { 21120f01161SSebastien Boeuf if let Some(msi) = &self.msi { 21220f01161SSebastien Boeuf if offset >= u64::from(msi.cap_offset) 213f3c38701SSebastien Boeuf && offset < u64::from(msi.cap_offset) + msi.cfg.size() 21420f01161SSebastien Boeuf { 21520f01161SSebastien Boeuf return Some(( 216827229d8SRob Bradford PciCapabilityId::MessageSignalledInterrupts, 21720f01161SSebastien Boeuf u64::from(msi.cap_offset), 21820f01161SSebastien Boeuf )); 21920f01161SSebastien Boeuf } 22020f01161SSebastien Boeuf } 22120f01161SSebastien Boeuf 22220f01161SSebastien Boeuf if let Some(msix) = &self.msix { 22320f01161SSebastien Boeuf if offset == u64::from(msix.cap_offset) { 224827229d8SRob Bradford return Some((PciCapabilityId::MsiX, u64::from(msix.cap_offset))); 22520f01161SSebastien Boeuf } 22620f01161SSebastien Boeuf } 22720f01161SSebastien Boeuf 22820f01161SSebastien Boeuf None 22920f01161SSebastien Boeuf } 23020f01161SSebastien Boeuf msix_table_accessed(&self, bar_index: u32, offset: u64) -> bool23120f01161SSebastien Boeuf fn msix_table_accessed(&self, bar_index: u32, offset: u64) -> bool { 23220f01161SSebastien Boeuf if let Some(msix) = &self.msix { 23320f01161SSebastien Boeuf return msix.table_accessed(bar_index, offset); 23420f01161SSebastien Boeuf } 23520f01161SSebastien Boeuf 23620f01161SSebastien Boeuf false 23720f01161SSebastien Boeuf } 23820f01161SSebastien Boeuf msix_write_table(&mut self, offset: u64, data: &[u8])23920f01161SSebastien Boeuf fn msix_write_table(&mut self, offset: u64, data: &[u8]) { 24020f01161SSebastien Boeuf if let Some(ref mut msix) = &mut self.msix { 24149ef201cSSebastien Boeuf let offset = offset - u64::from(msix.cap.table_offset()); 24220f01161SSebastien Boeuf msix.bar.write_table(offset, data) 24320f01161SSebastien Boeuf } 24420f01161SSebastien Boeuf } 24520f01161SSebastien Boeuf msix_read_table(&self, offset: u64, data: &mut [u8])24620f01161SSebastien Boeuf fn msix_read_table(&self, offset: u64, data: &mut [u8]) { 24720f01161SSebastien Boeuf if let Some(msix) = &self.msix { 24849ef201cSSebastien Boeuf let offset = offset - u64::from(msix.cap.table_offset()); 24920f01161SSebastien Boeuf msix.bar.read_table(offset, data) 25020f01161SSebastien Boeuf } 25120f01161SSebastien Boeuf } 25219167e76SSebastien Boeuf intx_in_use(&self) -> bool253ec1f7189SRob Bradford pub(crate) fn intx_in_use(&self) -> bool { 25419167e76SSebastien Boeuf if let Some(intx) = &self.intx { 25519167e76SSebastien Boeuf return intx.enabled; 25619167e76SSebastien Boeuf } 25719167e76SSebastien Boeuf 25819167e76SSebastien Boeuf false 25919167e76SSebastien Boeuf } 26020f01161SSebastien Boeuf } 26120f01161SSebastien Boeuf 262db5b4763SSamuel Ortiz #[derive(Copy, Clone)] 263ed5f2544SSebastien Boeuf pub struct UserMemoryRegion { 264bf39146cSBo Chen pub slot: u32, 265bf39146cSBo Chen pub start: u64, 266bf39146cSBo Chen pub size: u64, 267bf39146cSBo Chen pub host_addr: u64, 268ed5f2544SSebastien Boeuf } 269ed5f2544SSebastien Boeuf 270ed5f2544SSebastien Boeuf #[derive(Clone)] 271593a958fSRob Bradford pub struct MmioRegion { 272593a958fSRob Bradford pub start: GuestAddress, 273593a958fSRob Bradford pub length: GuestUsize, 274ec1f7189SRob Bradford pub(crate) type_: PciBarRegionType, 275ec1f7189SRob Bradford pub(crate) index: u32, 276ed5f2544SSebastien Boeuf pub(crate) user_memory_regions: Vec<UserMemoryRegion>, 277db5b4763SSamuel Ortiz } 278045964deSAndrew Carp 279045964deSAndrew Carp trait MmioRegionRange { check_range(&self, guest_addr: u64, size: u64) -> bool280045964deSAndrew Carp fn check_range(&self, guest_addr: u64, size: u64) -> bool; find_user_address(&self, guest_addr: u64) -> Result<u64, io::Error>281045964deSAndrew Carp fn find_user_address(&self, guest_addr: u64) -> Result<u64, io::Error>; 282045964deSAndrew Carp } 283045964deSAndrew Carp 284045964deSAndrew Carp impl MmioRegionRange for Vec<MmioRegion> { 285045964deSAndrew Carp // Check if a guest address is within the range of mmio regions check_range(&self, guest_addr: u64, size: u64) -> bool286045964deSAndrew Carp fn check_range(&self, guest_addr: u64, size: u64) -> bool { 287045964deSAndrew Carp for region in self.iter() { 288045964deSAndrew Carp let Some(guest_addr_end) = guest_addr.checked_add(size) else { 289045964deSAndrew Carp return false; 290045964deSAndrew Carp }; 291045964deSAndrew Carp let Some(region_end) = region.start.raw_value().checked_add(region.length) else { 292045964deSAndrew Carp return false; 293045964deSAndrew Carp }; 294045964deSAndrew Carp if guest_addr >= region.start.raw_value() && guest_addr_end <= region_end { 295045964deSAndrew Carp return true; 296045964deSAndrew Carp } 297045964deSAndrew Carp } 298045964deSAndrew Carp false 299045964deSAndrew Carp } 300045964deSAndrew Carp 301045964deSAndrew Carp // Locate the user region address for a guest address within all mmio regions find_user_address(&self, guest_addr: u64) -> Result<u64, io::Error>302045964deSAndrew Carp fn find_user_address(&self, guest_addr: u64) -> Result<u64, io::Error> { 303045964deSAndrew Carp for region in self.iter() { 304045964deSAndrew Carp for user_region in region.user_memory_regions.iter() { 305045964deSAndrew Carp if guest_addr >= user_region.start 306045964deSAndrew Carp && guest_addr < user_region.start + user_region.size 307045964deSAndrew Carp { 308045964deSAndrew Carp return Ok(user_region.host_addr + (guest_addr - user_region.start)); 309045964deSAndrew Carp } 310045964deSAndrew Carp } 311045964deSAndrew Carp } 312045964deSAndrew Carp 313ea4693a0SJinank Jain Err(io::Error::other(format!( 314ea4693a0SJinank Jain "unable to find user address: 0x{guest_addr:x}" 315ea4693a0SJinank Jain ))) 316045964deSAndrew Carp } 317045964deSAndrew Carp } 318045964deSAndrew Carp 319cdfc1773SRob Bradford #[derive(Debug, Error)] 320cdfc1773SRob Bradford pub enum VfioError { 321*a3692144SPhilipp Schuster #[error("Kernel VFIO error")] 322cdfc1773SRob Bradford KernelVfio(#[source] vfio_ioctls::VfioError), 323*a3692144SPhilipp Schuster #[error("VFIO user error")] 3249254b74cSRob Bradford VfioUser(#[source] vfio_user::Error), 325cdfc1773SRob Bradford } 326db5b4763SSamuel Ortiz 3274a99d3dbSSebastien Boeuf pub(crate) trait Vfio: Send + Sync { read_config_byte(&self, offset: u32) -> u8328a0e48a87SRob Bradford fn read_config_byte(&self, offset: u32) -> u8 { 329a0e48a87SRob Bradford let mut data: [u8; 1] = [0]; 330a0e48a87SRob Bradford self.read_config(offset, &mut data); 331a0e48a87SRob Bradford data[0] 332a0e48a87SRob Bradford } 333a0e48a87SRob Bradford read_config_word(&self, offset: u32) -> u16334a0e48a87SRob Bradford fn read_config_word(&self, offset: u32) -> u16 { 335a0e48a87SRob Bradford let mut data: [u8; 2] = [0, 0]; 336a0e48a87SRob Bradford self.read_config(offset, &mut data); 337a0e48a87SRob Bradford u16::from_le_bytes(data) 338a0e48a87SRob Bradford } 339a0e48a87SRob Bradford read_config_dword(&self, offset: u32) -> u32340a0e48a87SRob Bradford fn read_config_dword(&self, offset: u32) -> u32 { 341a0e48a87SRob Bradford let mut data: [u8; 4] = [0, 0, 0, 0]; 342a0e48a87SRob Bradford self.read_config(offset, &mut data); 343a0e48a87SRob Bradford u32::from_le_bytes(data) 344a0e48a87SRob Bradford } 345a0e48a87SRob Bradford write_config_dword(&self, offset: u32, buf: u32)346a0e48a87SRob Bradford fn write_config_dword(&self, offset: u32, buf: u32) { 347a0e48a87SRob Bradford let data: [u8; 4] = buf.to_le_bytes(); 348a0e48a87SRob Bradford self.write_config(offset, &data) 349a0e48a87SRob Bradford } 350a0e48a87SRob Bradford read_config(&self, offset: u32, data: &mut [u8])35160d05451SRob Bradford fn read_config(&self, offset: u32, data: &mut [u8]) { 35260d05451SRob Bradford self.region_read(VFIO_PCI_CONFIG_REGION_INDEX, offset.into(), data.as_mut()); 35360d05451SRob Bradford } 35460d05451SRob Bradford write_config(&self, offset: u32, data: &[u8])35560d05451SRob Bradford fn write_config(&self, offset: u32, data: &[u8]) { 35660d05451SRob Bradford self.region_write(VFIO_PCI_CONFIG_REGION_INDEX, offset.into(), data) 35760d05451SRob Bradford } 35860d05451SRob Bradford enable_msi(&self, fds: Vec<&EventFd>) -> Result<(), VfioError>359cdfc1773SRob Bradford fn enable_msi(&self, fds: Vec<&EventFd>) -> Result<(), VfioError> { 360ecc8382fSRob Bradford self.enable_irq(VFIO_PCI_MSI_IRQ_INDEX, fds) 361ecc8382fSRob Bradford } 362ecc8382fSRob Bradford disable_msi(&self) -> Result<(), VfioError>363cdfc1773SRob Bradford fn disable_msi(&self) -> Result<(), VfioError> { 364ecc8382fSRob Bradford self.disable_irq(VFIO_PCI_MSI_IRQ_INDEX) 365ecc8382fSRob Bradford } 366ecc8382fSRob Bradford enable_msix(&self, fds: Vec<&EventFd>) -> Result<(), VfioError>367cdfc1773SRob Bradford fn enable_msix(&self, fds: Vec<&EventFd>) -> Result<(), VfioError> { 368ecc8382fSRob Bradford self.enable_irq(VFIO_PCI_MSIX_IRQ_INDEX, fds) 369ecc8382fSRob Bradford } 370ecc8382fSRob Bradford disable_msix(&self) -> Result<(), VfioError>371cdfc1773SRob Bradford fn disable_msix(&self) -> Result<(), VfioError> { 372ecc8382fSRob Bradford self.disable_irq(VFIO_PCI_MSIX_IRQ_INDEX) 373ecc8382fSRob Bradford } 374ecc8382fSRob Bradford region_read(&self, _index: u32, _offset: u64, _data: &mut [u8])37560d05451SRob Bradford fn region_read(&self, _index: u32, _offset: u64, _data: &mut [u8]) { 376349dbb9aSRob Bradford unimplemented!() 377349dbb9aSRob Bradford } 378349dbb9aSRob Bradford region_write(&self, _index: u32, _offset: u64, _data: &[u8])37960d05451SRob Bradford fn region_write(&self, _index: u32, _offset: u64, _data: &[u8]) { 380349dbb9aSRob Bradford unimplemented!() 381349dbb9aSRob Bradford } 382521a11a1SRob Bradford get_irq_info(&self, _irq_index: u32) -> Option<VfioIrq>38351ceae91SRob Bradford fn get_irq_info(&self, _irq_index: u32) -> Option<VfioIrq> { 384521a11a1SRob Bradford unimplemented!() 385521a11a1SRob Bradford } 386ecc8382fSRob Bradford enable_irq(&self, _irq_index: u32, _event_fds: Vec<&EventFd>) -> Result<(), VfioError>387cdfc1773SRob Bradford fn enable_irq(&self, _irq_index: u32, _event_fds: Vec<&EventFd>) -> Result<(), VfioError> { 388ecc8382fSRob Bradford unimplemented!() 389ecc8382fSRob Bradford } 390ecc8382fSRob Bradford disable_irq(&self, _irq_index: u32) -> Result<(), VfioError>391cdfc1773SRob Bradford fn disable_irq(&self, _irq_index: u32) -> Result<(), VfioError> { 392ecc8382fSRob Bradford unimplemented!() 393ecc8382fSRob Bradford } 394a5f4d795SRob Bradford unmask_irq(&self, _irq_index: u32) -> Result<(), VfioError>395cdfc1773SRob Bradford fn unmask_irq(&self, _irq_index: u32) -> Result<(), VfioError> { 396a5f4d795SRob Bradford unimplemented!() 397a5f4d795SRob Bradford } 398349dbb9aSRob Bradford } 399349dbb9aSRob Bradford 400dc35dac3SRob Bradford struct VfioDeviceWrapper { 401db5b4763SSamuel Ortiz device: Arc<VfioDevice>, 402db5b4763SSamuel Ortiz } 403db5b4763SSamuel Ortiz 404dc35dac3SRob Bradford impl VfioDeviceWrapper { new(device: Arc<VfioDevice>) -> Self405db5b4763SSamuel Ortiz fn new(device: Arc<VfioDevice>) -> Self { 406349dbb9aSRob Bradford Self { device } 407349dbb9aSRob Bradford } 408db5b4763SSamuel Ortiz } 409db5b4763SSamuel Ortiz 410dc35dac3SRob Bradford impl Vfio for VfioDeviceWrapper { region_read(&self, index: u32, offset: u64, data: &mut [u8])41160d05451SRob Bradford fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) { 41260d05451SRob Bradford self.device.region_read(index, data, offset) 413db5b4763SSamuel Ortiz } 414db5b4763SSamuel Ortiz region_write(&self, index: u32, offset: u64, data: &[u8])41560d05451SRob Bradford fn region_write(&self, index: u32, offset: u64, data: &[u8]) { 41660d05451SRob Bradford self.device.region_write(index, data, offset) 417db5b4763SSamuel Ortiz } 418521a11a1SRob Bradford get_irq_info(&self, irq_index: u32) -> Option<VfioIrq>41951ceae91SRob Bradford fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> { 42051ceae91SRob Bradford self.device.get_irq_info(irq_index).copied() 421521a11a1SRob Bradford } 422ecc8382fSRob Bradford enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError>423cdfc1773SRob Bradford fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> { 424cdfc1773SRob Bradford self.device 425cdfc1773SRob Bradford .enable_irq(irq_index, event_fds) 426cdfc1773SRob Bradford .map_err(VfioError::KernelVfio) 427ecc8382fSRob Bradford } 428ecc8382fSRob Bradford disable_irq(&self, irq_index: u32) -> Result<(), VfioError>429cdfc1773SRob Bradford fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> { 430cdfc1773SRob Bradford self.device 431cdfc1773SRob Bradford .disable_irq(irq_index) 432cdfc1773SRob Bradford .map_err(VfioError::KernelVfio) 433ecc8382fSRob Bradford } 434a5f4d795SRob Bradford unmask_irq(&self, irq_index: u32) -> Result<(), VfioError>435cdfc1773SRob Bradford fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> { 436cdfc1773SRob Bradford self.device 437cdfc1773SRob Bradford .unmask_irq(irq_index) 438cdfc1773SRob Bradford .map_err(VfioError::KernelVfio) 439a5f4d795SRob Bradford } 440db5b4763SSamuel Ortiz } 441db5b4763SSamuel Ortiz 44210ab87d6SRob Bradford #[derive(Serialize, Deserialize)] 44349069d84SSebastien Boeuf struct VfioCommonState { 44449069d84SSebastien Boeuf intx_state: Option<IntxState>, 44549069d84SSebastien Boeuf msi_state: Option<MsiState>, 44649069d84SSebastien Boeuf msix_state: Option<MsixState>, 44749069d84SSebastien Boeuf } 44849069d84SSebastien Boeuf 449e45e3df6SSebastien Boeuf pub(crate) struct ConfigPatch { 450e45e3df6SSebastien Boeuf mask: u32, 451e45e3df6SSebastien Boeuf patch: u32, 452e45e3df6SSebastien Boeuf } 453e45e3df6SSebastien Boeuf 454ec1f7189SRob Bradford pub(crate) struct VfioCommon { 455ec1f7189SRob Bradford pub(crate) configuration: PciConfiguration, 456ec1f7189SRob Bradford pub(crate) mmio_regions: Vec<MmioRegion>, 457ec1f7189SRob Bradford pub(crate) interrupt: Interrupt, 458eb6daa2fSSebastien Boeuf pub(crate) msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 459e6aa792cSSebastien Boeuf pub(crate) legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, 4604a99d3dbSSebastien Boeuf pub(crate) vfio_wrapper: Arc<dyn Vfio>, 461e45e3df6SSebastien Boeuf pub(crate) patches: HashMap<usize, ConfigPatch>, 462b750c332SThomas Barrett x_nv_gpudirect_clique: Option<u8>, 463d27ea34aSRob Bradford } 464d27ea34aSRob Bradford 46522275c34SRob Bradford impl VfioCommon { new( msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, vfio_wrapper: Arc<dyn Vfio>, subclass: &dyn PciSubclass, bdf: PciBdf, snapshot: Option<Snapshot>, x_nv_gpudirect_clique: Option<u8>, ) -> Result<Self, VfioPciError>466d6bf1f5eSSebastien Boeuf pub(crate) fn new( 467d6bf1f5eSSebastien Boeuf msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 468d6bf1f5eSSebastien Boeuf legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, 469d6bf1f5eSSebastien Boeuf vfio_wrapper: Arc<dyn Vfio>, 470d6bf1f5eSSebastien Boeuf subclass: &dyn PciSubclass, 471d6bf1f5eSSebastien Boeuf bdf: PciBdf, 472cc3706afSSebastien Boeuf snapshot: Option<Snapshot>, 473b750c332SThomas Barrett x_nv_gpudirect_clique: Option<u8>, 474d6bf1f5eSSebastien Boeuf ) -> Result<Self, VfioPciError> { 475cc3706afSSebastien Boeuf let pci_configuration_state = 47610ab87d6SRob Bradford vm_migration::state_from_id(snapshot.as_ref(), PCI_CONFIGURATION_ID).map_err(|e| { 477cc3706afSSebastien Boeuf VfioPciError::RetrievePciConfigurationState(anyhow!( 478cc3706afSSebastien Boeuf "Failed to get PciConfigurationState from Snapshot: {}", 479cc3706afSSebastien Boeuf e 480cc3706afSSebastien Boeuf )) 481cc3706afSSebastien Boeuf })?; 482cc3706afSSebastien Boeuf 483d6bf1f5eSSebastien Boeuf let configuration = PciConfiguration::new( 484d6bf1f5eSSebastien Boeuf 0, 485d6bf1f5eSSebastien Boeuf 0, 486d6bf1f5eSSebastien Boeuf 0, 487d6bf1f5eSSebastien Boeuf PciClassCode::Other, 488d6bf1f5eSSebastien Boeuf subclass, 489d6bf1f5eSSebastien Boeuf None, 490d6bf1f5eSSebastien Boeuf PciHeaderType::Device, 491d6bf1f5eSSebastien Boeuf 0, 492d6bf1f5eSSebastien Boeuf 0, 493d6bf1f5eSSebastien Boeuf None, 494cc3706afSSebastien Boeuf pci_configuration_state, 495d6bf1f5eSSebastien Boeuf ); 496d6bf1f5eSSebastien Boeuf 497d6bf1f5eSSebastien Boeuf let mut vfio_common = VfioCommon { 498d6bf1f5eSSebastien Boeuf mmio_regions: Vec::new(), 499d6bf1f5eSSebastien Boeuf configuration, 500d6bf1f5eSSebastien Boeuf interrupt: Interrupt { 501d6bf1f5eSSebastien Boeuf intx: None, 502d6bf1f5eSSebastien Boeuf msi: None, 503d6bf1f5eSSebastien Boeuf msix: None, 504d6bf1f5eSSebastien Boeuf }, 505d6bf1f5eSSebastien Boeuf msi_interrupt_manager, 506d6bf1f5eSSebastien Boeuf legacy_interrupt_group, 507d6bf1f5eSSebastien Boeuf vfio_wrapper, 508d6bf1f5eSSebastien Boeuf patches: HashMap::new(), 509b750c332SThomas Barrett x_nv_gpudirect_clique, 510d6bf1f5eSSebastien Boeuf }; 511d6bf1f5eSSebastien Boeuf 512cc3706afSSebastien Boeuf let state: Option<VfioCommonState> = snapshot 513cc3706afSSebastien Boeuf .as_ref() 51410ab87d6SRob Bradford .map(|s| s.to_state()) 515cc3706afSSebastien Boeuf .transpose() 516cc3706afSSebastien Boeuf .map_err(|e| { 517cc3706afSSebastien Boeuf VfioPciError::RetrieveVfioCommonState(anyhow!( 518cc3706afSSebastien Boeuf "Failed to get VfioCommonState from Snapshot: {}", 519cc3706afSSebastien Boeuf e 520cc3706afSSebastien Boeuf )) 521cc3706afSSebastien Boeuf })?; 52210ab87d6SRob Bradford let msi_state = 52310ab87d6SRob Bradford vm_migration::state_from_id(snapshot.as_ref(), MSI_CONFIG_ID).map_err(|e| { 524cc3706afSSebastien Boeuf VfioPciError::RetrieveMsiConfigState(anyhow!( 525cc3706afSSebastien Boeuf "Failed to get MsiConfigState from Snapshot: {}", 526cc3706afSSebastien Boeuf e 527cc3706afSSebastien Boeuf )) 528cc3706afSSebastien Boeuf })?; 52910ab87d6SRob Bradford let msix_state = 53010ab87d6SRob Bradford vm_migration::state_from_id(snapshot.as_ref(), MSIX_CONFIG_ID).map_err(|e| { 531cc3706afSSebastien Boeuf VfioPciError::RetrieveMsixConfigState(anyhow!( 532cc3706afSSebastien Boeuf "Failed to get MsixConfigState from Snapshot: {}", 533cc3706afSSebastien Boeuf e 534cc3706afSSebastien Boeuf )) 535cc3706afSSebastien Boeuf })?; 536cc3706afSSebastien Boeuf 537cc3706afSSebastien Boeuf if let Some(state) = state.as_ref() { 538cc3706afSSebastien Boeuf vfio_common.set_state(state, msi_state, msix_state)?; 539cc3706afSSebastien Boeuf } else { 540d6bf1f5eSSebastien Boeuf vfio_common.parse_capabilities(bdf); 541d6bf1f5eSSebastien Boeuf vfio_common.initialize_legacy_interrupt()?; 542d6bf1f5eSSebastien Boeuf } 543d6bf1f5eSSebastien Boeuf 544d6bf1f5eSSebastien Boeuf Ok(vfio_common) 545d6bf1f5eSSebastien Boeuf } 546d6bf1f5eSSebastien Boeuf 5477bf0cc1eSPhilipp Schuster /// In case msix table offset is not page size aligned, we need do some fixup to achieve it. 5487bf0cc1eSPhilipp Schuster /// Because we don't want the MMIO RW region and trap region overlap each other. fixup_msix_region(&mut self, bar_id: u32, region_size: u64) -> u64549a7187168SJianyong Wu fn fixup_msix_region(&mut self, bar_id: u32, region_size: u64) -> u64 { 550363b4780SRob Bradford if let Some(msix) = self.interrupt.msix.as_mut() { 551a7187168SJianyong Wu let msix_cap = &mut msix.cap; 552a7187168SJianyong Wu 553a7187168SJianyong Wu // Suppose table_bir equals to pba_bir here. Am I right? 554a7187168SJianyong Wu let (table_offset, table_size) = msix_cap.table_range(); 555a7187168SJianyong Wu if is_page_size_aligned(table_offset) || msix_cap.table_bir() != bar_id { 556a7187168SJianyong Wu return region_size; 557a7187168SJianyong Wu } 558a7187168SJianyong Wu 559a7187168SJianyong Wu let (pba_offset, pba_size) = msix_cap.pba_range(); 560a7187168SJianyong Wu let msix_sz = align_page_size_up(table_size + pba_size); 561a7187168SJianyong Wu // Expand region to hold RW and trap region which both page size aligned 562a7187168SJianyong Wu let size = std::cmp::max(region_size * 2, msix_sz * 2); 563a7187168SJianyong Wu // let table starts from the middle of the region 564a7187168SJianyong Wu msix_cap.table_set_offset((size / 2) as u32); 565a7187168SJianyong Wu msix_cap.pba_set_offset((size / 2 + pba_offset - table_offset) as u32); 566a7187168SJianyong Wu 567a7187168SJianyong Wu size 568363b4780SRob Bradford } else { 569363b4780SRob Bradford // MSI-X not supported for this device 570363b4780SRob Bradford region_size 571363b4780SRob Bradford } 572a7187168SJianyong Wu } 573a7187168SJianyong Wu 57445b01d59SThomas Barrett // The `allocator` argument is unused on `aarch64` 57545b01d59SThomas Barrett #[allow(unused_variables)] allocate_bars( &mut self, allocator: &Arc<Mutex<SystemAllocator>>, mmio32_allocator: &mut AddressAllocator, mmio64_allocator: &mut AddressAllocator, resources: Option<Vec<Resource>>, ) -> Result<Vec<PciBarConfiguration>, PciDeviceError>576ec1f7189SRob Bradford pub(crate) fn allocate_bars( 57722275c34SRob Bradford &mut self, 5789ef1187fSRob Bradford allocator: &Arc<Mutex<SystemAllocator>>, 57945b01d59SThomas Barrett mmio32_allocator: &mut AddressAllocator, 58045b01d59SThomas Barrett mmio64_allocator: &mut AddressAllocator, 5816175cc09SSebastien Boeuf resources: Option<Vec<Resource>>, 58289218b6dSSebastien Boeuf ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> { 58389218b6dSSebastien Boeuf let mut bars = Vec::new(); 584a9ec0f33SBo Chen let mut bar_id = VFIO_PCI_BAR0_REGION_INDEX; 58522275c34SRob Bradford 58622275c34SRob Bradford // Going through all regular regions to compute the BAR size. 58722275c34SRob Bradford // We're not saving the BAR address to restore it, because we 58822275c34SRob Bradford // are going to allocate a guest address for each BAR and write 58922275c34SRob Bradford // that new address back. 59022275c34SRob Bradford while bar_id < VFIO_PCI_CONFIG_REGION_INDEX { 59137521ddfSSebastien Boeuf let mut region_size: u64 = 0; 59237521ddfSSebastien Boeuf let mut region_type = PciBarRegionType::Memory32BitRegion; 593868d1f69SSteven Dake let mut prefetchable = PciBarPrefetchable::NotPrefetchable; 59437521ddfSSebastien Boeuf let mut flags: u32 = 0; 59522275c34SRob Bradford 59611e9f433SSebastien Boeuf let mut restored_bar_addr = None; 59711e9f433SSebastien Boeuf if let Some(resources) = &resources { 59811e9f433SSebastien Boeuf for resource in resources { 59937521ddfSSebastien Boeuf if let Resource::PciBar { 60037521ddfSSebastien Boeuf index, 60137521ddfSSebastien Boeuf base, 60237521ddfSSebastien Boeuf size, 60337521ddfSSebastien Boeuf type_, 60437521ddfSSebastien Boeuf .. 60537521ddfSSebastien Boeuf } = resource 60637521ddfSSebastien Boeuf { 60711e9f433SSebastien Boeuf if *index == bar_id as usize { 60811e9f433SSebastien Boeuf restored_bar_addr = Some(GuestAddress(*base)); 60937521ddfSSebastien Boeuf region_size = *size; 61037521ddfSSebastien Boeuf region_type = PciBarRegionType::from(*type_); 61111e9f433SSebastien Boeuf break; 6126175cc09SSebastien Boeuf } 61311e9f433SSebastien Boeuf } 61411e9f433SSebastien Boeuf } 61537521ddfSSebastien Boeuf if restored_bar_addr.is_none() { 61637521ddfSSebastien Boeuf bar_id += 1; 61737521ddfSSebastien Boeuf continue; 61811e9f433SSebastien Boeuf } 61937521ddfSSebastien Boeuf } else { 62022275c34SRob Bradford let bar_offset = if bar_id == VFIO_PCI_ROM_REGION_INDEX { 62122275c34SRob Bradford (PCI_ROM_EXP_BAR_INDEX * 4) as u32 62222275c34SRob Bradford } else { 62322275c34SRob Bradford PCI_CONFIG_BAR_OFFSET + bar_id * 4 62422275c34SRob Bradford }; 62522275c34SRob Bradford 62622275c34SRob Bradford // First read flags 6274a99d3dbSSebastien Boeuf flags = self.vfio_wrapper.read_config_dword(bar_offset); 62822275c34SRob Bradford 62922275c34SRob Bradford // Is this an IO BAR? 63022275c34SRob Bradford let io_bar = if bar_id != VFIO_PCI_ROM_REGION_INDEX { 63122275c34SRob Bradford matches!(flags & PCI_CONFIG_IO_BAR, PCI_CONFIG_IO_BAR) 63222275c34SRob Bradford } else { 63322275c34SRob Bradford false 63422275c34SRob Bradford }; 63522275c34SRob Bradford 63622275c34SRob Bradford // Is this a 64-bit BAR? 63722275c34SRob Bradford let is_64bit_bar = if bar_id != VFIO_PCI_ROM_REGION_INDEX { 63822275c34SRob Bradford matches!( 63922275c34SRob Bradford flags & PCI_CONFIG_MEMORY_BAR_64BIT, 64022275c34SRob Bradford PCI_CONFIG_MEMORY_BAR_64BIT 64122275c34SRob Bradford ) 64222275c34SRob Bradford } else { 64322275c34SRob Bradford false 64422275c34SRob Bradford }; 64522275c34SRob Bradford 646868d1f69SSteven Dake if matches!( 647868d1f69SSteven Dake flags & PCI_CONFIG_BAR_PREFETCHABLE, 648868d1f69SSteven Dake PCI_CONFIG_BAR_PREFETCHABLE 649868d1f69SSteven Dake ) { 650868d1f69SSteven Dake prefetchable = PciBarPrefetchable::Prefetchable 651868d1f69SSteven Dake }; 652868d1f69SSteven Dake 65322275c34SRob Bradford // To get size write all 1s 6544a99d3dbSSebastien Boeuf self.vfio_wrapper 6554a99d3dbSSebastien Boeuf .write_config_dword(bar_offset, 0xffff_ffff); 65622275c34SRob Bradford 65722275c34SRob Bradford // And read back BAR value. The device will write zeros for bits it doesn't care about 6584a99d3dbSSebastien Boeuf let mut lower = self.vfio_wrapper.read_config_dword(bar_offset); 65922275c34SRob Bradford 66022275c34SRob Bradford if io_bar { 66122275c34SRob Bradford // Mask flag bits (lowest 2 for I/O bars) 66222275c34SRob Bradford lower &= !0b11; 66322275c34SRob Bradford 66422275c34SRob Bradford // BAR is not enabled 66522275c34SRob Bradford if lower == 0 { 66622275c34SRob Bradford bar_id += 1; 66722275c34SRob Bradford continue; 66822275c34SRob Bradford } 66922275c34SRob Bradford 670cf68f03aSHenry Wang // IO BAR 671cf68f03aSHenry Wang region_type = PciBarRegionType::IoRegion; 672cf68f03aSHenry Wang 67322275c34SRob Bradford // Invert bits and add 1 to calculate size 67422275c34SRob Bradford region_size = (!lower + 1) as u64; 67522275c34SRob Bradford } else if is_64bit_bar { 67622275c34SRob Bradford // 64 bits Memory BAR 67722275c34SRob Bradford region_type = PciBarRegionType::Memory64BitRegion; 67822275c34SRob Bradford 67922275c34SRob Bradford // Query size of upper BAR of 64-bit BAR 68022275c34SRob Bradford let upper_offset: u32 = PCI_CONFIG_BAR_OFFSET + (bar_id + 1) * 4; 6814a99d3dbSSebastien Boeuf self.vfio_wrapper 6824a99d3dbSSebastien Boeuf .write_config_dword(upper_offset, 0xffff_ffff); 6834a99d3dbSSebastien Boeuf let upper = self.vfio_wrapper.read_config_dword(upper_offset); 68422275c34SRob Bradford 685b57cc3d7SRob Bradford let mut combined_size = (u64::from(upper) << 32) | u64::from(lower); 68622275c34SRob Bradford 68722275c34SRob Bradford // Mask out flag bits (lowest 4 for memory bars) 68822275c34SRob Bradford combined_size &= !0b1111; 68922275c34SRob Bradford 69022275c34SRob Bradford // BAR is not enabled 69122275c34SRob Bradford if combined_size == 0 { 69222275c34SRob Bradford bar_id += 1; 69322275c34SRob Bradford continue; 69422275c34SRob Bradford } 69522275c34SRob Bradford 69622275c34SRob Bradford // Invert and add 1 to to find size 697a9ec0f33SBo Chen region_size = !combined_size + 1; 69822275c34SRob Bradford } else { 69937521ddfSSebastien Boeuf region_type = PciBarRegionType::Memory32BitRegion; 70037521ddfSSebastien Boeuf 70122275c34SRob Bradford // Mask out flag bits (lowest 4 for memory bars) 70222275c34SRob Bradford lower &= !0b1111; 70322275c34SRob Bradford 70422275c34SRob Bradford if lower == 0 { 70522275c34SRob Bradford bar_id += 1; 70622275c34SRob Bradford continue; 70722275c34SRob Bradford } 70822275c34SRob Bradford 70922275c34SRob Bradford // Invert and add 1 to to find size 71022275c34SRob Bradford region_size = (!lower + 1) as u64; 71137521ddfSSebastien Boeuf } 71237521ddfSSebastien Boeuf } 71322275c34SRob Bradford 71437521ddfSSebastien Boeuf let bar_addr = match region_type { 71537521ddfSSebastien Boeuf PciBarRegionType::IoRegion => { 71637521ddfSSebastien Boeuf // The address needs to be 4 bytes aligned. 71737521ddfSSebastien Boeuf allocator 7189ef1187fSRob Bradford .lock() 7199ef1187fSRob Bradford .unwrap() 72037521ddfSSebastien Boeuf .allocate_io_addresses(restored_bar_addr, region_size, Some(0x4)) 72137521ddfSSebastien Boeuf .ok_or(PciDeviceError::IoAllocationFailed(region_size))? 72222275c34SRob Bradford } 72337521ddfSSebastien Boeuf PciBarRegionType::Memory32BitRegion => { 72437521ddfSSebastien Boeuf // BAR allocation must be naturally aligned 72545b01d59SThomas Barrett mmio32_allocator 72645b01d59SThomas Barrett .allocate(restored_bar_addr, region_size, Some(region_size)) 72737521ddfSSebastien Boeuf .ok_or(PciDeviceError::IoAllocationFailed(region_size))? 72837521ddfSSebastien Boeuf } 72937521ddfSSebastien Boeuf PciBarRegionType::Memory64BitRegion => { 730a7187168SJianyong Wu // We need do some fixup to keep MMIO RW region and msix cap region page size 731a7187168SJianyong Wu // aligned. 732a7187168SJianyong Wu region_size = self.fixup_msix_region(bar_id, region_size); 73345b01d59SThomas Barrett mmio64_allocator 734eca75dcfSJianyong Wu .allocate( 735eca75dcfSJianyong Wu restored_bar_addr, 736eca75dcfSJianyong Wu region_size, 737c9f94be7SThomas Barrett Some(std::cmp::max( 738eca75dcfSJianyong Wu // SAFETY: FFI call. Trivially safe. 739c9f94be7SThomas Barrett unsafe { sysconf(_SC_PAGESIZE) as GuestUsize }, 740c9f94be7SThomas Barrett region_size, 741c9f94be7SThomas Barrett )), 742eca75dcfSJianyong Wu ) 74337521ddfSSebastien Boeuf .ok_or(PciDeviceError::IoAllocationFailed(region_size))? 74437521ddfSSebastien Boeuf } 74537521ddfSSebastien Boeuf }; 74622275c34SRob Bradford 74722275c34SRob Bradford // We can now build our BAR configuration block. 74889218b6dSSebastien Boeuf let bar = PciBarConfiguration::default() 749da95c0d7SSebastien Boeuf .set_index(bar_id as usize) 75022275c34SRob Bradford .set_address(bar_addr.raw_value()) 75122275c34SRob Bradford .set_size(region_size) 752868d1f69SSteven Dake .set_region_type(region_type) 753868d1f69SSteven Dake .set_prefetchable(prefetchable); 75422275c34SRob Bradford 75522275c34SRob Bradford if bar_id == VFIO_PCI_ROM_REGION_INDEX { 75622275c34SRob Bradford self.configuration 75789218b6dSSebastien Boeuf .add_pci_rom_bar(&bar, flags & 0x1) 75822275c34SRob Bradford .map_err(|e| PciDeviceError::IoRegistrationFailed(bar_addr.raw_value(), e))?; 75922275c34SRob Bradford } else { 76022275c34SRob Bradford self.configuration 76189218b6dSSebastien Boeuf .add_pci_bar(&bar) 76222275c34SRob Bradford .map_err(|e| PciDeviceError::IoRegistrationFailed(bar_addr.raw_value(), e))?; 76322275c34SRob Bradford } 76422275c34SRob Bradford 76589218b6dSSebastien Boeuf bars.push(bar); 76622275c34SRob Bradford self.mmio_regions.push(MmioRegion { 76722275c34SRob Bradford start: bar_addr, 76822275c34SRob Bradford length: region_size, 76922275c34SRob Bradford type_: region_type, 770a9ec0f33SBo Chen index: bar_id, 771ed5f2544SSebastien Boeuf user_memory_regions: Vec::new(), 77222275c34SRob Bradford }); 77322275c34SRob Bradford 77422275c34SRob Bradford bar_id += 1; 77537521ddfSSebastien Boeuf if region_type == PciBarRegionType::Memory64BitRegion { 77622275c34SRob Bradford bar_id += 1; 77722275c34SRob Bradford } 77822275c34SRob Bradford } 77922275c34SRob Bradford 78089218b6dSSebastien Boeuf Ok(bars) 78122275c34SRob Bradford } 78222275c34SRob Bradford 78345b01d59SThomas Barrett // The `allocator` argument is unused on `aarch64` 78445b01d59SThomas Barrett #[allow(unused_variables)] free_bars( &mut self, allocator: &mut SystemAllocator, mmio32_allocator: &mut AddressAllocator, mmio64_allocator: &mut AddressAllocator, ) -> Result<(), PciDeviceError>785ec1f7189SRob Bradford pub(crate) fn free_bars( 78622275c34SRob Bradford &mut self, 78722275c34SRob Bradford allocator: &mut SystemAllocator, 78845b01d59SThomas Barrett mmio32_allocator: &mut AddressAllocator, 78945b01d59SThomas Barrett mmio64_allocator: &mut AddressAllocator, 790cdfc1773SRob Bradford ) -> Result<(), PciDeviceError> { 79122275c34SRob Bradford for region in self.mmio_regions.iter() { 79222275c34SRob Bradford match region.type_ { 79322275c34SRob Bradford PciBarRegionType::IoRegion => { 79422275c34SRob Bradford allocator.free_io_addresses(region.start, region.length); 79522275c34SRob Bradford } 79622275c34SRob Bradford PciBarRegionType::Memory32BitRegion => { 79745b01d59SThomas Barrett mmio32_allocator.free(region.start, region.length); 79822275c34SRob Bradford } 79922275c34SRob Bradford PciBarRegionType::Memory64BitRegion => { 80045b01d59SThomas Barrett mmio64_allocator.free(region.start, region.length); 80122275c34SRob Bradford } 80222275c34SRob Bradford } 80322275c34SRob Bradford } 80422275c34SRob Bradford Ok(()) 80522275c34SRob Bradford } 8062a76a589SRob Bradford parse_msix_capabilities(&mut self, cap: u8) -> MsixCap807de764456SJulian Stecklina fn parse_msix_capabilities(&mut self, cap: u8) -> MsixCap { 8084a99d3dbSSebastien Boeuf let msg_ctl = self.vfio_wrapper.read_config_word((cap + 2).into()); 8092a76a589SRob Bradford 8104a99d3dbSSebastien Boeuf let table = self.vfio_wrapper.read_config_dword((cap + 4).into()); 8112a76a589SRob Bradford 8124a99d3dbSSebastien Boeuf let pba = self.vfio_wrapper.read_config_dword((cap + 8).into()); 8132a76a589SRob Bradford 814f767e97fSSebastien Boeuf MsixCap { 8152a76a589SRob Bradford msg_ctl, 8162a76a589SRob Bradford table, 8172a76a589SRob Bradford pba, 818f767e97fSSebastien Boeuf } 819f767e97fSSebastien Boeuf } 8202a76a589SRob Bradford initialize_msix( &mut self, msix_cap: MsixCap, cap_offset: u32, bdf: PciBdf, state: Option<MsixConfigState>, )821de764456SJulian Stecklina fn initialize_msix( 822cc3706afSSebastien Boeuf &mut self, 823cc3706afSSebastien Boeuf msix_cap: MsixCap, 824cc3706afSSebastien Boeuf cap_offset: u32, 825cc3706afSSebastien Boeuf bdf: PciBdf, 826cc3706afSSebastien Boeuf state: Option<MsixConfigState>, 827cc3706afSSebastien Boeuf ) { 828eb6daa2fSSebastien Boeuf let interrupt_source_group = self 829eb6daa2fSSebastien Boeuf .msi_interrupt_manager 8302a76a589SRob Bradford .create_group(MsiIrqGroupConfig { 8312a76a589SRob Bradford base: 0, 8322a76a589SRob Bradford count: msix_cap.table_size() as InterruptIndex, 8332a76a589SRob Bradford }) 8342a76a589SRob Bradford .unwrap(); 8352a76a589SRob Bradford 8361db77185SMichael Zhao let msix_config = MsixConfig::new( 8371db77185SMichael Zhao msix_cap.table_size(), 8381db77185SMichael Zhao interrupt_source_group.clone(), 8391db77185SMichael Zhao bdf.into(), 840cc3706afSSebastien Boeuf state, 841eae80438SSebastien Boeuf ) 842eae80438SSebastien Boeuf .unwrap(); 8432a76a589SRob Bradford 8442a76a589SRob Bradford self.interrupt.msix = Some(VfioMsix { 8452a76a589SRob Bradford bar: msix_config, 8462a76a589SRob Bradford cap: msix_cap, 847f767e97fSSebastien Boeuf cap_offset, 8482a76a589SRob Bradford interrupt_source_group, 8492a76a589SRob Bradford }); 8502a76a589SRob Bradford } 8512a76a589SRob Bradford parse_msi_capabilities(&mut self, cap: u8) -> u16852de764456SJulian Stecklina fn parse_msi_capabilities(&mut self, cap: u8) -> u16 { 8534a99d3dbSSebastien Boeuf self.vfio_wrapper.read_config_word((cap + 2).into()) 854f767e97fSSebastien Boeuf } 8552a76a589SRob Bradford initialize_msi(&mut self, msg_ctl: u16, cap_offset: u32, state: Option<MsiConfigState>)856de764456SJulian Stecklina fn initialize_msi(&mut self, msg_ctl: u16, cap_offset: u32, state: Option<MsiConfigState>) { 857eb6daa2fSSebastien Boeuf let interrupt_source_group = self 858eb6daa2fSSebastien Boeuf .msi_interrupt_manager 8592a76a589SRob Bradford .create_group(MsiIrqGroupConfig { 8602a76a589SRob Bradford base: 0, 8612a76a589SRob Bradford count: msi_num_enabled_vectors(msg_ctl) as InterruptIndex, 8622a76a589SRob Bradford }) 8632a76a589SRob Bradford .unwrap(); 8642a76a589SRob Bradford 865cc3706afSSebastien Boeuf let msi_config = MsiConfig::new(msg_ctl, interrupt_source_group.clone(), state).unwrap(); 8662a76a589SRob Bradford 8672a76a589SRob Bradford self.interrupt.msi = Some(VfioMsi { 8682a76a589SRob Bradford cfg: msi_config, 869f767e97fSSebastien Boeuf cap_offset, 8702a76a589SRob Bradford interrupt_source_group, 8712a76a589SRob Bradford }); 8722a76a589SRob Bradford } 873521a11a1SRob Bradford 87456ca26e7SJulian Stecklina /// Returns true, if the device claims to have a PCI capability list. has_capabilities(&self) -> bool875de764456SJulian Stecklina fn has_capabilities(&self) -> bool { 87656ca26e7SJulian Stecklina let status = self.vfio_wrapper.read_config_word(PCI_CONFIG_STATUS_OFFSET); 87756ca26e7SJulian Stecklina status & PCI_CONFIG_STATUS_CAPABILITIES_LIST != 0 87856ca26e7SJulian Stecklina } 87956ca26e7SJulian Stecklina get_msix_cap_idx(&self) -> Option<usize>880de764456SJulian Stecklina fn get_msix_cap_idx(&self) -> Option<usize> { 88156ca26e7SJulian Stecklina if !self.has_capabilities() { 88256ca26e7SJulian Stecklina return None; 88356ca26e7SJulian Stecklina } 88456ca26e7SJulian Stecklina 885a7187168SJianyong Wu let mut cap_next = self 886a7187168SJianyong Wu .vfio_wrapper 887a0065452SJulian Stecklina .read_config_byte(PCI_CONFIG_CAPABILITY_OFFSET) 888a0065452SJulian Stecklina & PCI_CONFIG_CAPABILITY_PTR_MASK; 889a7187168SJianyong Wu 890a7187168SJianyong Wu while cap_next != 0 { 891a7187168SJianyong Wu let cap_id = self.vfio_wrapper.read_config_byte(cap_next.into()); 892a7187168SJianyong Wu if PciCapabilityId::from(cap_id) == PciCapabilityId::MsiX { 893a7187168SJianyong Wu return Some(cap_next as usize); 894a7187168SJianyong Wu } else { 89500955568SJulian Stecklina let cap_ptr = self.vfio_wrapper.read_config_byte((cap_next + 1).into()) 896a0065452SJulian Stecklina & PCI_CONFIG_CAPABILITY_PTR_MASK; 89700955568SJulian Stecklina 89800955568SJulian Stecklina // See parse_capabilities below for an explanation. 89900955568SJulian Stecklina if cap_ptr != cap_next { 90000955568SJulian Stecklina cap_next = cap_ptr; 90100955568SJulian Stecklina } else { 90200955568SJulian Stecklina break; 90300955568SJulian Stecklina } 904a7187168SJianyong Wu } 905a7187168SJianyong Wu } 906a7187168SJianyong Wu 907a7187168SJianyong Wu None 908a7187168SJianyong Wu } 909a7187168SJianyong Wu parse_capabilities(&mut self, bdf: PciBdf)910de764456SJulian Stecklina fn parse_capabilities(&mut self, bdf: PciBdf) { 91156ca26e7SJulian Stecklina if !self.has_capabilities() { 91256ca26e7SJulian Stecklina return; 91356ca26e7SJulian Stecklina } 91456ca26e7SJulian Stecklina 915b750c332SThomas Barrett let mut cap_iter = self 9164a99d3dbSSebastien Boeuf .vfio_wrapper 917a0065452SJulian Stecklina .read_config_byte(PCI_CONFIG_CAPABILITY_OFFSET) 918a0065452SJulian Stecklina & PCI_CONFIG_CAPABILITY_PTR_MASK; 919521a11a1SRob Bradford 920e45e3df6SSebastien Boeuf let mut pci_express_cap_found = false; 921e45e3df6SSebastien Boeuf let mut power_management_cap_found = false; 922e45e3df6SSebastien Boeuf 923b750c332SThomas Barrett while cap_iter != 0 { 924b750c332SThomas Barrett let cap_id = self.vfio_wrapper.read_config_byte(cap_iter.into()); 925521a11a1SRob Bradford 926521a11a1SRob Bradford match PciCapabilityId::from(cap_id) { 927521a11a1SRob Bradford PciCapabilityId::MessageSignalledInterrupts => { 9284a99d3dbSSebastien Boeuf if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_MSI_IRQ_INDEX) { 929521a11a1SRob Bradford if irq_info.count > 0 { 930521a11a1SRob Bradford // Parse capability only if the VFIO device 931521a11a1SRob Bradford // supports MSI. 932b750c332SThomas Barrett let msg_ctl = self.parse_msi_capabilities(cap_iter); 933b750c332SThomas Barrett self.initialize_msi(msg_ctl, cap_iter as u32, None); 934521a11a1SRob Bradford } 935521a11a1SRob Bradford } 936521a11a1SRob Bradford } 937521a11a1SRob Bradford PciCapabilityId::MsiX => { 9384a99d3dbSSebastien Boeuf if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_MSIX_IRQ_INDEX) 9394a99d3dbSSebastien Boeuf { 940521a11a1SRob Bradford if irq_info.count > 0 { 941521a11a1SRob Bradford // Parse capability only if the VFIO device 942521a11a1SRob Bradford // supports MSI-X. 943b750c332SThomas Barrett let msix_cap = self.parse_msix_capabilities(cap_iter); 944b750c332SThomas Barrett self.initialize_msix(msix_cap, cap_iter as u32, bdf, None); 945521a11a1SRob Bradford } 946521a11a1SRob Bradford } 947521a11a1SRob Bradford } 948e45e3df6SSebastien Boeuf PciCapabilityId::PciExpress => pci_express_cap_found = true, 949e45e3df6SSebastien Boeuf PciCapabilityId::PowerManagement => power_management_cap_found = true, 950521a11a1SRob Bradford _ => {} 951521a11a1SRob Bradford }; 952521a11a1SRob Bradford 953a0065452SJulian Stecklina let cap_next = self.vfio_wrapper.read_config_byte((cap_iter + 1).into()) 954a0065452SJulian Stecklina & PCI_CONFIG_CAPABILITY_PTR_MASK; 95500955568SJulian Stecklina 95600955568SJulian Stecklina // Break out of the loop, if we either find the end or we have a broken device. This 95700955568SJulian Stecklina // doesn't handle all cases where a device might send us in a loop here, but it 95800955568SJulian Stecklina // handles case of a device returning 0xFF instead of implementing a real 95900955568SJulian Stecklina // capabilities list. 96000955568SJulian Stecklina if cap_next == 0 || cap_next == cap_iter { 961b750c332SThomas Barrett break; 962b750c332SThomas Barrett } 963b750c332SThomas Barrett 964b750c332SThomas Barrett cap_iter = cap_next; 965b750c332SThomas Barrett } 966b750c332SThomas Barrett 967b750c332SThomas Barrett if let Some(clique_id) = self.x_nv_gpudirect_clique { 968b750c332SThomas Barrett self.add_nv_gpudirect_clique_cap(cap_iter, clique_id); 969521a11a1SRob Bradford } 970e45e3df6SSebastien Boeuf 971e45e3df6SSebastien Boeuf if pci_express_cap_found && power_management_cap_found { 972e45e3df6SSebastien Boeuf self.parse_extended_capabilities(); 973e45e3df6SSebastien Boeuf } 974e45e3df6SSebastien Boeuf } 975e45e3df6SSebastien Boeuf add_nv_gpudirect_clique_cap(&mut self, cap_iter: u8, clique_id: u8)976b750c332SThomas Barrett fn add_nv_gpudirect_clique_cap(&mut self, cap_iter: u8, clique_id: u8) { 977b750c332SThomas Barrett // Turing, Ampere, Hopper, and Lovelace GPUs have dedicated space 978b750c332SThomas Barrett // at 0xD4 for this capability. 979b750c332SThomas Barrett let cap_offset = 0xd4u32; 980b750c332SThomas Barrett 981b750c332SThomas Barrett let reg_idx = (cap_iter / 4) as usize; 982b750c332SThomas Barrett self.patches.insert( 983b750c332SThomas Barrett reg_idx, 984b750c332SThomas Barrett ConfigPatch { 985b750c332SThomas Barrett mask: 0x0000_ff00, 986b750c332SThomas Barrett patch: cap_offset << 8, 987b750c332SThomas Barrett }, 988b750c332SThomas Barrett ); 989b750c332SThomas Barrett 990b750c332SThomas Barrett let reg_idx = (cap_offset / 4) as usize; 991b750c332SThomas Barrett self.patches.insert( 992b750c332SThomas Barrett reg_idx, 993b750c332SThomas Barrett ConfigPatch { 994b750c332SThomas Barrett mask: 0xffff_ffff, 995b750c332SThomas Barrett patch: 0x50080009u32, 996b750c332SThomas Barrett }, 997b750c332SThomas Barrett ); 998b750c332SThomas Barrett self.patches.insert( 999b750c332SThomas Barrett reg_idx + 1, 1000b750c332SThomas Barrett ConfigPatch { 1001b750c332SThomas Barrett mask: 0xffff_ffff, 1002b57cc3d7SRob Bradford patch: (u32::from(clique_id) << 19) | 0x5032, 1003b750c332SThomas Barrett }, 1004b750c332SThomas Barrett ); 1005b750c332SThomas Barrett } 1006b750c332SThomas Barrett parse_extended_capabilities(&mut self)1007e45e3df6SSebastien Boeuf fn parse_extended_capabilities(&mut self) { 1008e45e3df6SSebastien Boeuf let mut current_offset = PCI_CONFIG_EXTENDED_CAPABILITY_OFFSET; 1009e45e3df6SSebastien Boeuf 1010e45e3df6SSebastien Boeuf loop { 1011e45e3df6SSebastien Boeuf let ext_cap_hdr = self.vfio_wrapper.read_config_dword(current_offset); 1012e45e3df6SSebastien Boeuf 1013e45e3df6SSebastien Boeuf let cap_id: u16 = (ext_cap_hdr & 0xffff) as u16; 1014e45e3df6SSebastien Boeuf let cap_next: u16 = ((ext_cap_hdr >> 20) & 0xfff) as u16; 1015e45e3df6SSebastien Boeuf 1016e45e3df6SSebastien Boeuf match PciExpressCapabilityId::from(cap_id) { 10177bf0cc1eSPhilipp Schuster PciExpressCapabilityId::AlternativeRoutingIdentificationInterpretation 1018e45e3df6SSebastien Boeuf | PciExpressCapabilityId::ResizeableBar 1019e45e3df6SSebastien Boeuf | PciExpressCapabilityId::SingleRootIoVirtualization => { 1020e45e3df6SSebastien Boeuf let reg_idx = (current_offset / 4) as usize; 1021e45e3df6SSebastien Boeuf self.patches.insert( 1022e45e3df6SSebastien Boeuf reg_idx, 1023e45e3df6SSebastien Boeuf ConfigPatch { 1024e45e3df6SSebastien Boeuf mask: 0x0000_ffff, 1025e45e3df6SSebastien Boeuf patch: PciExpressCapabilityId::NullCapability as u32, 1026e45e3df6SSebastien Boeuf }, 1027e45e3df6SSebastien Boeuf ); 1028e45e3df6SSebastien Boeuf } 1029e45e3df6SSebastien Boeuf _ => {} 1030e45e3df6SSebastien Boeuf } 1031e45e3df6SSebastien Boeuf 1032e45e3df6SSebastien Boeuf if cap_next == 0 { 1033e45e3df6SSebastien Boeuf break; 1034e45e3df6SSebastien Boeuf } 1035e45e3df6SSebastien Boeuf 1036e45e3df6SSebastien Boeuf current_offset = cap_next.into(); 1037e45e3df6SSebastien Boeuf } 1038521a11a1SRob Bradford } 1039ecc8382fSRob Bradford enable_intx(&mut self) -> Result<(), VfioPciError>10404a99d3dbSSebastien Boeuf pub(crate) fn enable_intx(&mut self) -> Result<(), VfioPciError> { 1041ecc8382fSRob Bradford if let Some(intx) = &mut self.interrupt.intx { 1042ecc8382fSRob Bradford if !intx.enabled { 1043ecc8382fSRob Bradford if let Some(eventfd) = intx.interrupt_source_group.notifier(0) { 10444a99d3dbSSebastien Boeuf self.vfio_wrapper 1045ecc8382fSRob Bradford .enable_irq(VFIO_PCI_INTX_IRQ_INDEX, vec![&eventfd]) 1046ecc8382fSRob Bradford .map_err(VfioPciError::EnableIntx)?; 1047ecc8382fSRob Bradford 1048ecc8382fSRob Bradford intx.enabled = true; 1049ecc8382fSRob Bradford } else { 1050ecc8382fSRob Bradford return Err(VfioPciError::MissingNotifier); 1051ecc8382fSRob Bradford } 1052ecc8382fSRob Bradford } 1053ecc8382fSRob Bradford } 1054ecc8382fSRob Bradford 1055ecc8382fSRob Bradford Ok(()) 1056ecc8382fSRob Bradford } 1057ecc8382fSRob Bradford disable_intx(&mut self)10584a99d3dbSSebastien Boeuf pub(crate) fn disable_intx(&mut self) { 1059ecc8382fSRob Bradford if let Some(intx) = &mut self.interrupt.intx { 1060ecc8382fSRob Bradford if intx.enabled { 10614a99d3dbSSebastien Boeuf if let Err(e) = self.vfio_wrapper.disable_irq(VFIO_PCI_INTX_IRQ_INDEX) { 1062ecc8382fSRob Bradford error!("Could not disable INTx: {}", e); 1063ecc8382fSRob Bradford } else { 1064ecc8382fSRob Bradford intx.enabled = false; 1065ecc8382fSRob Bradford } 1066ecc8382fSRob Bradford } 1067ecc8382fSRob Bradford } 1068ecc8382fSRob Bradford } 1069ecc8382fSRob Bradford enable_msi(&self) -> Result<(), VfioPciError>10704a99d3dbSSebastien Boeuf pub(crate) fn enable_msi(&self) -> Result<(), VfioPciError> { 1071ecc8382fSRob Bradford if let Some(msi) = &self.interrupt.msi { 1072ecc8382fSRob Bradford let mut irq_fds: Vec<EventFd> = Vec::new(); 1073ecc8382fSRob Bradford for i in 0..msi.cfg.num_enabled_vectors() { 1074ecc8382fSRob Bradford if let Some(eventfd) = msi.interrupt_source_group.notifier(i as InterruptIndex) { 1075ecc8382fSRob Bradford irq_fds.push(eventfd); 1076ecc8382fSRob Bradford } else { 1077ecc8382fSRob Bradford return Err(VfioPciError::MissingNotifier); 1078ecc8382fSRob Bradford } 1079ecc8382fSRob Bradford } 1080ecc8382fSRob Bradford 10814a99d3dbSSebastien Boeuf self.vfio_wrapper 1082ecc8382fSRob Bradford .enable_msi(irq_fds.iter().collect()) 1083ecc8382fSRob Bradford .map_err(VfioPciError::EnableMsi)?; 1084ecc8382fSRob Bradford } 1085ecc8382fSRob Bradford 1086ecc8382fSRob Bradford Ok(()) 1087ecc8382fSRob Bradford } 1088ecc8382fSRob Bradford disable_msi(&self)10894a99d3dbSSebastien Boeuf pub(crate) fn disable_msi(&self) { 10904a99d3dbSSebastien Boeuf if let Err(e) = self.vfio_wrapper.disable_msi() { 1091ecc8382fSRob Bradford error!("Could not disable MSI: {}", e); 1092ecc8382fSRob Bradford } 1093ecc8382fSRob Bradford } 1094ecc8382fSRob Bradford enable_msix(&self) -> Result<(), VfioPciError>10954a99d3dbSSebastien Boeuf pub(crate) fn enable_msix(&self) -> Result<(), VfioPciError> { 1096ecc8382fSRob Bradford if let Some(msix) = &self.interrupt.msix { 1097ecc8382fSRob Bradford let mut irq_fds: Vec<EventFd> = Vec::new(); 1098ecc8382fSRob Bradford for i in 0..msix.bar.table_entries.len() { 1099ecc8382fSRob Bradford if let Some(eventfd) = msix.interrupt_source_group.notifier(i as InterruptIndex) { 1100ecc8382fSRob Bradford irq_fds.push(eventfd); 1101ecc8382fSRob Bradford } else { 1102ecc8382fSRob Bradford return Err(VfioPciError::MissingNotifier); 1103ecc8382fSRob Bradford } 1104ecc8382fSRob Bradford } 1105ecc8382fSRob Bradford 11064a99d3dbSSebastien Boeuf self.vfio_wrapper 1107ecc8382fSRob Bradford .enable_msix(irq_fds.iter().collect()) 1108ecc8382fSRob Bradford .map_err(VfioPciError::EnableMsix)?; 1109ecc8382fSRob Bradford } 1110ecc8382fSRob Bradford 1111ecc8382fSRob Bradford Ok(()) 1112ecc8382fSRob Bradford } 1113ecc8382fSRob Bradford disable_msix(&self)11144a99d3dbSSebastien Boeuf pub(crate) fn disable_msix(&self) { 11154a99d3dbSSebastien Boeuf if let Err(e) = self.vfio_wrapper.disable_msix() { 1116ecc8382fSRob Bradford error!("Could not disable MSI-X: {}", e); 1117ecc8382fSRob Bradford } 1118ecc8382fSRob Bradford } 1119ecc8382fSRob Bradford initialize_legacy_interrupt(&mut self) -> Result<(), VfioPciError>1120de764456SJulian Stecklina fn initialize_legacy_interrupt(&mut self) -> Result<(), VfioPciError> { 11214a99d3dbSSebastien Boeuf if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_INTX_IRQ_INDEX) { 1122ecc8382fSRob Bradford if irq_info.count == 0 { 1123ecc8382fSRob Bradford // A count of 0 means the INTx IRQ is not supported, therefore 1124ecc8382fSRob Bradford // it shouldn't be initialized. 1125ecc8382fSRob Bradford return Ok(()); 1126ecc8382fSRob Bradford } 1127ecc8382fSRob Bradford } 1128ecc8382fSRob Bradford 1129e6aa792cSSebastien Boeuf if let Some(interrupt_source_group) = self.legacy_interrupt_group.clone() { 1130ecc8382fSRob Bradford self.interrupt.intx = Some(VfioIntx { 1131ecc8382fSRob Bradford interrupt_source_group, 1132ecc8382fSRob Bradford enabled: false, 1133ecc8382fSRob Bradford }); 1134ecc8382fSRob Bradford 11354a99d3dbSSebastien Boeuf self.enable_intx()?; 1136ecc8382fSRob Bradford } 1137ecc8382fSRob Bradford 1138ecc8382fSRob Bradford Ok(()) 1139ecc8382fSRob Bradford } 1140ecc8382fSRob Bradford update_msi_capabilities(&mut self, offset: u64, data: &[u8]) -> Result<(), VfioPciError>1141de764456SJulian Stecklina fn update_msi_capabilities(&mut self, offset: u64, data: &[u8]) -> Result<(), VfioPciError> { 1142ecc8382fSRob Bradford match self.interrupt.update_msi(offset, data) { 1143ecc8382fSRob Bradford Some(InterruptUpdateAction::EnableMsi) => { 1144ecc8382fSRob Bradford // Disable INTx before we can enable MSI 11454a99d3dbSSebastien Boeuf self.disable_intx(); 11464a99d3dbSSebastien Boeuf self.enable_msi()?; 1147ecc8382fSRob Bradford } 1148ecc8382fSRob Bradford Some(InterruptUpdateAction::DisableMsi) => { 1149ecc8382fSRob Bradford // Fallback onto INTx when disabling MSI 11504a99d3dbSSebastien Boeuf self.disable_msi(); 11514a99d3dbSSebastien Boeuf self.enable_intx()?; 1152ecc8382fSRob Bradford } 1153ecc8382fSRob Bradford _ => {} 1154ecc8382fSRob Bradford } 1155ecc8382fSRob Bradford 1156ecc8382fSRob Bradford Ok(()) 1157ecc8382fSRob Bradford } 1158ecc8382fSRob Bradford update_msix_capabilities(&mut self, offset: u64, data: &[u8]) -> Result<(), VfioPciError>1159de764456SJulian Stecklina fn update_msix_capabilities(&mut self, offset: u64, data: &[u8]) -> Result<(), VfioPciError> { 1160ecc8382fSRob Bradford match self.interrupt.update_msix(offset, data) { 1161ecc8382fSRob Bradford Some(InterruptUpdateAction::EnableMsix) => { 1162ecc8382fSRob Bradford // Disable INTx before we can enable MSI-X 11634a99d3dbSSebastien Boeuf self.disable_intx(); 11644a99d3dbSSebastien Boeuf self.enable_msix()?; 1165ecc8382fSRob Bradford } 1166ecc8382fSRob Bradford Some(InterruptUpdateAction::DisableMsix) => { 1167ecc8382fSRob Bradford // Fallback onto INTx when disabling MSI-X 11684a99d3dbSSebastien Boeuf self.disable_msix(); 11694a99d3dbSSebastien Boeuf self.enable_intx()?; 1170ecc8382fSRob Bradford } 1171ecc8382fSRob Bradford _ => {} 1172ecc8382fSRob Bradford } 1173ecc8382fSRob Bradford 1174ecc8382fSRob Bradford Ok(()) 1175ecc8382fSRob Bradford } 11762ff19345SRob Bradford find_region(&self, addr: u64) -> Option<MmioRegion>1177de764456SJulian Stecklina fn find_region(&self, addr: u64) -> Option<MmioRegion> { 11782ff19345SRob Bradford for region in self.mmio_regions.iter() { 11792ff19345SRob Bradford if addr >= region.start.raw_value() 11802ff19345SRob Bradford && addr < region.start.unchecked_add(region.length).raw_value() 11812ff19345SRob Bradford { 1182ed5f2544SSebastien Boeuf return Some(region.clone()); 11832ff19345SRob Bradford } 11842ff19345SRob Bradford } 11852ff19345SRob Bradford None 11862ff19345SRob Bradford } 1187a5f4d795SRob Bradford read_bar(&mut self, base: u64, offset: u64, data: &mut [u8])11884a99d3dbSSebastien Boeuf pub(crate) fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) { 1189a5f4d795SRob Bradford let addr = base + offset; 1190a5f4d795SRob Bradford if let Some(region) = self.find_region(addr) { 1191a5f4d795SRob Bradford let offset = addr - region.start.raw_value(); 1192a5f4d795SRob Bradford 1193a5f4d795SRob Bradford if self.interrupt.msix_table_accessed(region.index, offset) { 1194a5f4d795SRob Bradford self.interrupt.msix_read_table(offset, data); 1195a5f4d795SRob Bradford } else { 11964a99d3dbSSebastien Boeuf self.vfio_wrapper.region_read(region.index, offset, data); 1197a5f4d795SRob Bradford } 1198a5f4d795SRob Bradford } 1199a5f4d795SRob Bradford 1200a5f4d795SRob Bradford // INTx EOI 1201a5f4d795SRob Bradford // The guest reading from the BAR potentially means the interrupt has 1202a5f4d795SRob Bradford // been received and can be acknowledged. 1203a5f4d795SRob Bradford if self.interrupt.intx_in_use() { 12044a99d3dbSSebastien Boeuf if let Err(e) = self.vfio_wrapper.unmask_irq(VFIO_PCI_INTX_IRQ_INDEX) { 1205a5f4d795SRob Bradford error!("Failed unmasking INTx IRQ: {}", e); 1206a5f4d795SRob Bradford } 1207a5f4d795SRob Bradford } 1208a5f4d795SRob Bradford } 1209a5f4d795SRob Bradford write_bar( &mut self, base: u64, offset: u64, data: &[u8], ) -> Option<Arc<Barrier>>1210a5f4d795SRob Bradford pub(crate) fn write_bar( 1211a5f4d795SRob Bradford &mut self, 1212a5f4d795SRob Bradford base: u64, 1213a5f4d795SRob Bradford offset: u64, 1214a5f4d795SRob Bradford data: &[u8], 1215a5f4d795SRob Bradford ) -> Option<Arc<Barrier>> { 1216a5f4d795SRob Bradford let addr = base + offset; 1217a5f4d795SRob Bradford if let Some(region) = self.find_region(addr) { 1218a5f4d795SRob Bradford let offset = addr - region.start.raw_value(); 1219a5f4d795SRob Bradford 1220a5f4d795SRob Bradford // If the MSI-X table is written to, we need to update our cache. 1221a5f4d795SRob Bradford if self.interrupt.msix_table_accessed(region.index, offset) { 1222a5f4d795SRob Bradford self.interrupt.msix_write_table(offset, data); 1223a5f4d795SRob Bradford } else { 12244a99d3dbSSebastien Boeuf self.vfio_wrapper.region_write(region.index, offset, data); 1225a5f4d795SRob Bradford } 1226a5f4d795SRob Bradford } 1227a5f4d795SRob Bradford 1228a5f4d795SRob Bradford // INTx EOI 1229a5f4d795SRob Bradford // The guest writing to the BAR potentially means the interrupt has 1230a5f4d795SRob Bradford // been received and can be acknowledged. 1231a5f4d795SRob Bradford if self.interrupt.intx_in_use() { 12324a99d3dbSSebastien Boeuf if let Err(e) = self.vfio_wrapper.unmask_irq(VFIO_PCI_INTX_IRQ_INDEX) { 1233a5f4d795SRob Bradford error!("Failed unmasking INTx IRQ: {}", e); 1234a5f4d795SRob Bradford } 1235a5f4d795SRob Bradford } 1236a5f4d795SRob Bradford 1237a5f4d795SRob Bradford None 1238a5f4d795SRob Bradford } 12391997152eSRob Bradford write_config_register( &mut self, reg_idx: usize, offset: u64, data: &[u8], ) -> (Vec<BarReprogrammingParams>, Option<Arc<Barrier>>)12401997152eSRob Bradford pub(crate) fn write_config_register( 12411997152eSRob Bradford &mut self, 12421997152eSRob Bradford reg_idx: usize, 12431997152eSRob Bradford offset: u64, 12441997152eSRob Bradford data: &[u8], 1245aaf86ef2SBo Chen ) -> (Vec<BarReprogrammingParams>, Option<Arc<Barrier>>) { 12461997152eSRob Bradford // When the guest wants to write to a BAR, we trap it into 12471997152eSRob Bradford // our local configuration space. We're not reprogramming 12481997152eSRob Bradford // VFIO device. 12491997152eSRob Bradford if (PCI_CONFIG_BAR0_INDEX..PCI_CONFIG_BAR0_INDEX + BAR_NUMS).contains(®_idx) 12501997152eSRob Bradford || reg_idx == PCI_ROM_EXP_BAR_INDEX 12511997152eSRob Bradford { 12521997152eSRob Bradford // We keep our local cache updated with the BARs. 12531997152eSRob Bradford // We'll read it back from there when the guest is asking 12541997152eSRob Bradford // for BARs (see read_config_register()). 1255cb52cf91SBo Chen return ( 12561997152eSRob Bradford self.configuration 1257cb52cf91SBo Chen .write_config_register(reg_idx, offset, data), 1258cb52cf91SBo Chen None, 1259cb52cf91SBo Chen ); 12601997152eSRob Bradford } 12611997152eSRob Bradford 12621997152eSRob Bradford let reg = (reg_idx * PCI_CONFIG_REGISTER_SIZE) as u64; 12631997152eSRob Bradford 12641997152eSRob Bradford // If the MSI or MSI-X capabilities are accessed, we need to 12651997152eSRob Bradford // update our local cache accordingly. 12661997152eSRob Bradford // Depending on how the capabilities are modified, this could 12671997152eSRob Bradford // trigger a VFIO MSI or MSI-X toggle. 12681997152eSRob Bradford if let Some((cap_id, cap_base)) = self.interrupt.accessed(reg) { 12691997152eSRob Bradford let cap_offset: u64 = reg - cap_base + offset; 12701997152eSRob Bradford match cap_id { 12711997152eSRob Bradford PciCapabilityId::MessageSignalledInterrupts => { 12724a99d3dbSSebastien Boeuf if let Err(e) = self.update_msi_capabilities(cap_offset, data) { 12731997152eSRob Bradford error!("Could not update MSI capabilities: {}", e); 12741997152eSRob Bradford } 12751997152eSRob Bradford } 12761997152eSRob Bradford PciCapabilityId::MsiX => { 12774a99d3dbSSebastien Boeuf if let Err(e) = self.update_msix_capabilities(cap_offset, data) { 12781997152eSRob Bradford error!("Could not update MSI-X capabilities: {}", e); 12791997152eSRob Bradford } 12801997152eSRob Bradford } 12811997152eSRob Bradford _ => {} 12821997152eSRob Bradford } 12831997152eSRob Bradford } 12841997152eSRob Bradford 12851997152eSRob Bradford // Make sure to write to the device's PCI config space after MSI/MSI-X 12861997152eSRob Bradford // interrupts have been enabled/disabled. In case of MSI, when the 12871997152eSRob Bradford // interrupts are enabled through VFIO (using VFIO_DEVICE_SET_IRQS), 12881997152eSRob Bradford // the MSI Enable bit in the MSI capability structure found in the PCI 12891997152eSRob Bradford // config space is disabled by default. That's why when the guest is 12901997152eSRob Bradford // enabling this bit, we first need to enable the MSI interrupts with 12911997152eSRob Bradford // VFIO through VFIO_DEVICE_SET_IRQS ioctl, and only after we can write 12921997152eSRob Bradford // to the device region to update the MSI Enable bit. 12934a99d3dbSSebastien Boeuf self.vfio_wrapper.write_config((reg + offset) as u32, data); 12941997152eSRob Bradford 12958da7c13eSBo Chen // Return pending BAR repgrogramming if MSE bit is set 12968da7c13eSBo Chen let mut ret_param = self.configuration.pending_bar_reprogram(); 12978da7c13eSBo Chen if !ret_param.is_empty() { 12988da7c13eSBo Chen if self.read_config_register(crate::configuration::COMMAND_REG) 12998da7c13eSBo Chen & crate::configuration::COMMAND_REG_MEMORY_SPACE_MASK 13008da7c13eSBo Chen == crate::configuration::COMMAND_REG_MEMORY_SPACE_MASK 13018da7c13eSBo Chen { 13028da7c13eSBo Chen info!("BAR reprogramming parameter is returned: {:x?}", ret_param); 13038da7c13eSBo Chen self.configuration.clear_pending_bar_reprogram(); 13048da7c13eSBo Chen } else { 13058da7c13eSBo Chen info!( 13068da7c13eSBo Chen "MSE bit is disabled. No BAR reprogramming parameter is returned: {:x?}", 13078da7c13eSBo Chen ret_param 13088da7c13eSBo Chen ); 13098da7c13eSBo Chen 13108da7c13eSBo Chen ret_param = Vec::new(); 13118da7c13eSBo Chen } 13128da7c13eSBo Chen } 13138da7c13eSBo Chen 13148da7c13eSBo Chen (ret_param, None) 13151997152eSRob Bradford } 13161997152eSRob Bradford read_config_register(&mut self, reg_idx: usize) -> u3213174a99d3dbSSebastien Boeuf pub(crate) fn read_config_register(&mut self, reg_idx: usize) -> u32 { 13181997152eSRob Bradford // When reading the BARs, we trap it and return what comes 13191997152eSRob Bradford // from our local configuration space. We want the guest to 13201997152eSRob Bradford // use that and not the VFIO device BARs as it does not map 13211997152eSRob Bradford // with the guest address space. 13221997152eSRob Bradford if (PCI_CONFIG_BAR0_INDEX..PCI_CONFIG_BAR0_INDEX + BAR_NUMS).contains(®_idx) 13231997152eSRob Bradford || reg_idx == PCI_ROM_EXP_BAR_INDEX 13241997152eSRob Bradford { 13251997152eSRob Bradford return self.configuration.read_reg(reg_idx); 13261997152eSRob Bradford } 13271997152eSRob Bradford 1328a7187168SJianyong Wu if let Some(id) = self.get_msix_cap_idx() { 1329a7187168SJianyong Wu let msix = self.interrupt.msix.as_mut().unwrap(); 1330a7187168SJianyong Wu if reg_idx * 4 == id + 4 { 1331a7187168SJianyong Wu return msix.cap.table; 1332a7187168SJianyong Wu } else if reg_idx * 4 == id + 8 { 1333a7187168SJianyong Wu return msix.cap.pba; 1334a7187168SJianyong Wu } 1335a7187168SJianyong Wu } 1336a7187168SJianyong Wu 13371997152eSRob Bradford // Since we don't support passing multi-functions devices, we should 13381997152eSRob Bradford // mask the multi-function bit, bit 7 of the Header Type byte on the 13391997152eSRob Bradford // register 3. 13401997152eSRob Bradford let mask = if reg_idx == PCI_HEADER_TYPE_REG_INDEX { 13411997152eSRob Bradford 0xff7f_ffff 13421997152eSRob Bradford } else { 13431997152eSRob Bradford 0xffff_ffff 13441997152eSRob Bradford }; 13451997152eSRob Bradford 13461997152eSRob Bradford // The config register read comes from the VFIO device itself. 1347e45e3df6SSebastien Boeuf let mut value = self.vfio_wrapper.read_config_dword((reg_idx * 4) as u32) & mask; 1348e45e3df6SSebastien Boeuf 1349e45e3df6SSebastien Boeuf if let Some(config_patch) = self.patches.get(®_idx) { 1350e45e3df6SSebastien Boeuf value = (value & !config_patch.mask) | config_patch.patch; 1351e45e3df6SSebastien Boeuf } 1352e45e3df6SSebastien Boeuf 1353e45e3df6SSebastien Boeuf value 13541997152eSRob Bradford } 135549069d84SSebastien Boeuf state(&self) -> VfioCommonState135649069d84SSebastien Boeuf fn state(&self) -> VfioCommonState { 135749069d84SSebastien Boeuf let intx_state = self.interrupt.intx.as_ref().map(|intx| IntxState { 135849069d84SSebastien Boeuf enabled: intx.enabled, 135949069d84SSebastien Boeuf }); 136049069d84SSebastien Boeuf 136149069d84SSebastien Boeuf let msi_state = self.interrupt.msi.as_ref().map(|msi| MsiState { 136249069d84SSebastien Boeuf cap: msi.cfg.cap, 136349069d84SSebastien Boeuf cap_offset: msi.cap_offset, 136449069d84SSebastien Boeuf }); 136549069d84SSebastien Boeuf 136649069d84SSebastien Boeuf let msix_state = self.interrupt.msix.as_ref().map(|msix| MsixState { 136749069d84SSebastien Boeuf cap: msix.cap, 136849069d84SSebastien Boeuf cap_offset: msix.cap_offset, 136949069d84SSebastien Boeuf bdf: msix.bar.devid, 137049069d84SSebastien Boeuf }); 137149069d84SSebastien Boeuf 137249069d84SSebastien Boeuf VfioCommonState { 137349069d84SSebastien Boeuf intx_state, 137449069d84SSebastien Boeuf msi_state, 137549069d84SSebastien Boeuf msix_state, 137649069d84SSebastien Boeuf } 137749069d84SSebastien Boeuf } 137849069d84SSebastien Boeuf set_state( &mut self, state: &VfioCommonState, msi_state: Option<MsiConfigState>, msix_state: Option<MsixConfigState>, ) -> Result<(), VfioPciError>1379cc3706afSSebastien Boeuf fn set_state( 1380cc3706afSSebastien Boeuf &mut self, 1381cc3706afSSebastien Boeuf state: &VfioCommonState, 1382cc3706afSSebastien Boeuf msi_state: Option<MsiConfigState>, 1383cc3706afSSebastien Boeuf msix_state: Option<MsixConfigState>, 1384cc3706afSSebastien Boeuf ) -> Result<(), VfioPciError> { 138549069d84SSebastien Boeuf if let (Some(intx), Some(interrupt_source_group)) = 138649069d84SSebastien Boeuf (&state.intx_state, self.legacy_interrupt_group.clone()) 138749069d84SSebastien Boeuf { 138849069d84SSebastien Boeuf self.interrupt.intx = Some(VfioIntx { 138949069d84SSebastien Boeuf interrupt_source_group, 139049069d84SSebastien Boeuf enabled: false, 139149069d84SSebastien Boeuf }); 139249069d84SSebastien Boeuf 139349069d84SSebastien Boeuf if intx.enabled { 139449069d84SSebastien Boeuf self.enable_intx()?; 139549069d84SSebastien Boeuf } 139649069d84SSebastien Boeuf } 139749069d84SSebastien Boeuf 139849069d84SSebastien Boeuf if let Some(msi) = &state.msi_state { 1399cc3706afSSebastien Boeuf self.initialize_msi(msi.cap.msg_ctl, msi.cap_offset, msi_state); 140049069d84SSebastien Boeuf } 140149069d84SSebastien Boeuf 140249069d84SSebastien Boeuf if let Some(msix) = &state.msix_state { 1403cc3706afSSebastien Boeuf self.initialize_msix(msix.cap, msix.cap_offset, msix.bdf.into(), msix_state); 140449069d84SSebastien Boeuf } 140549069d84SSebastien Boeuf 140649069d84SSebastien Boeuf Ok(()) 140749069d84SSebastien Boeuf } 140849069d84SSebastien Boeuf } 140949069d84SSebastien Boeuf 141049069d84SSebastien Boeuf impl Pausable for VfioCommon {} 141149069d84SSebastien Boeuf 141249069d84SSebastien Boeuf impl Snapshottable for VfioCommon { id(&self) -> String141349069d84SSebastien Boeuf fn id(&self) -> String { 1414cc3706afSSebastien Boeuf String::from(VFIO_COMMON_ID) 141549069d84SSebastien Boeuf } 141649069d84SSebastien Boeuf snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError>141749069d84SSebastien Boeuf fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 141810ab87d6SRob Bradford let mut vfio_common_snapshot = Snapshot::new_from_state(&self.state())?; 141949069d84SSebastien Boeuf 142049069d84SSebastien Boeuf // Snapshot PciConfiguration 1421748018acSSebastien Boeuf vfio_common_snapshot.add_snapshot(self.configuration.id(), self.configuration.snapshot()?); 142249069d84SSebastien Boeuf 142349069d84SSebastien Boeuf // Snapshot MSI 142449069d84SSebastien Boeuf if let Some(msi) = &mut self.interrupt.msi { 1425748018acSSebastien Boeuf vfio_common_snapshot.add_snapshot(msi.cfg.id(), msi.cfg.snapshot()?); 142649069d84SSebastien Boeuf } 142749069d84SSebastien Boeuf 142849069d84SSebastien Boeuf // Snapshot MSI-X 142949069d84SSebastien Boeuf if let Some(msix) = &mut self.interrupt.msix { 1430748018acSSebastien Boeuf vfio_common_snapshot.add_snapshot(msix.bar.id(), msix.bar.snapshot()?); 143149069d84SSebastien Boeuf } 143249069d84SSebastien Boeuf 143349069d84SSebastien Boeuf Ok(vfio_common_snapshot) 143449069d84SSebastien Boeuf } 143522275c34SRob Bradford } 143622275c34SRob Bradford 1437db5b4763SSamuel Ortiz /// VfioPciDevice represents a VFIO PCI device. 1438db5b4763SSamuel Ortiz /// This structure implements the BusDevice and PciDevice traits. 1439db5b4763SSamuel Ortiz /// 1440db5b4763SSamuel Ortiz /// A VfioPciDevice is bound to a VfioDevice and is also a PCI device. 1441db5b4763SSamuel Ortiz /// The VMM creates a VfioDevice, then assigns it to a VfioPciDevice, 1442db5b4763SSamuel Ortiz /// which then gets added to the PCI bus. 1443db5b4763SSamuel Ortiz pub struct VfioPciDevice { 14445264d545SSebastien Boeuf id: String, 1445e5552a53SWei Liu vm: Arc<dyn hypervisor::Vm>, 1446db5b4763SSamuel Ortiz device: Arc<VfioDevice>, 1447a0a89b13SSebastien Boeuf container: Arc<VfioContainer>, 1448d27ea34aSRob Bradford common: VfioCommon, 1449a0a89b13SSebastien Boeuf iommu_attached: bool, 145081f8a27eSRob Bradford memory_slot_allocator: MemorySlotAllocator, 145181eca69bSArvind Vasudev bdf: PciBdf, 1452297b41d6SBo Chen device_path: PathBuf, 1453db5b4763SSamuel Ortiz } 1454db5b4763SSamuel Ortiz 1455db5b4763SSamuel Ortiz impl VfioPciDevice { 1456db5b4763SSamuel Ortiz /// Constructs a new Vfio Pci device for the given Vfio device 14575264d545SSebastien Boeuf #[allow(clippy::too_many_arguments)] new( id: String, vm: &Arc<dyn hypervisor::Vm>, device: VfioDevice, container: Arc<VfioContainer>, msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, iommu_attached: bool, bdf: PciBdf, memory_slot_allocator: MemorySlotAllocator, snapshot: Option<Snapshot>, x_nv_gpudirect_clique: Option<u8>, device_path: PathBuf, ) -> Result<Self, VfioPciError>1458db5b4763SSamuel Ortiz pub fn new( 14595264d545SSebastien Boeuf id: String, 1460e5552a53SWei Liu vm: &Arc<dyn hypervisor::Vm>, 1461db5b4763SSamuel Ortiz device: VfioDevice, 1462a0a89b13SSebastien Boeuf container: Arc<VfioContainer>, 1463eb6daa2fSSebastien Boeuf msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 1464dcc646f5SSebastien Boeuf legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, 1465a0a89b13SSebastien Boeuf iommu_attached: bool, 14661db77185SMichael Zhao bdf: PciBdf, 146781f8a27eSRob Bradford memory_slot_allocator: MemorySlotAllocator, 1468cc3706afSSebastien Boeuf snapshot: Option<Snapshot>, 1469b750c332SThomas Barrett x_nv_gpudirect_clique: Option<u8>, 1470297b41d6SBo Chen device_path: PathBuf, 1471cdfc1773SRob Bradford ) -> Result<Self, VfioPciError> { 1472db5b4763SSamuel Ortiz let device = Arc::new(device); 1473db5b4763SSamuel Ortiz device.reset(); 1474db5b4763SSamuel Ortiz 1475dc35dac3SRob Bradford let vfio_wrapper = VfioDeviceWrapper::new(Arc::clone(&device)); 1476db5b4763SSamuel Ortiz 1477d6bf1f5eSSebastien Boeuf let common = VfioCommon::new( 1478eb6daa2fSSebastien Boeuf msi_interrupt_manager, 1479e6aa792cSSebastien Boeuf legacy_interrupt_group, 1480d6bf1f5eSSebastien Boeuf Arc::new(vfio_wrapper) as Arc<dyn Vfio>, 1481d6bf1f5eSSebastien Boeuf &PciVfioSubclass::VfioSubclass, 1482d6bf1f5eSSebastien Boeuf bdf, 1483cc3706afSSebastien Boeuf vm_migration::snapshot_from_id(snapshot.as_ref(), VFIO_COMMON_ID), 1484b750c332SThomas Barrett x_nv_gpudirect_clique, 1485d6bf1f5eSSebastien Boeuf )?; 1486521a11a1SRob Bradford 1487ecc8382fSRob Bradford let vfio_pci_device = VfioPciDevice { 14885264d545SSebastien Boeuf id, 1489521a11a1SRob Bradford vm: vm.clone(), 1490521a11a1SRob Bradford device, 1491521a11a1SRob Bradford container, 1492521a11a1SRob Bradford common, 1493521a11a1SRob Bradford iommu_attached, 149481f8a27eSRob Bradford memory_slot_allocator, 149581eca69bSArvind Vasudev bdf, 1496297b41d6SBo Chen device_path: device_path.clone(), 1497521a11a1SRob Bradford }; 149819167e76SSebastien Boeuf 1499db5b4763SSamuel Ortiz Ok(vfio_pci_device) 1500db5b4763SSamuel Ortiz } 1501db5b4763SSamuel Ortiz iommu_attached(&self) -> bool1502933d41cfSSebastien Boeuf pub fn iommu_attached(&self) -> bool { 1503933d41cfSSebastien Boeuf self.iommu_attached 1504933d41cfSSebastien Boeuf } 1505933d41cfSSebastien Boeuf generate_sparse_areas( caps: &[VfioRegionInfoCap], region_index: u32, region_start: u64, region_size: u64, vfio_msix: Option<&VfioMsix>, ) -> Result<Vec<VfioRegionSparseMmapArea>, VfioPciError>15061108bd19SSebastien Boeuf fn generate_sparse_areas( 15071108bd19SSebastien Boeuf caps: &[VfioRegionInfoCap], 1508ed5f2544SSebastien Boeuf region_index: u32, 1509ed5f2544SSebastien Boeuf region_start: u64, 1510ed5f2544SSebastien Boeuf region_size: u64, 1511ed5f2544SSebastien Boeuf vfio_msix: Option<&VfioMsix>, 15121108bd19SSebastien Boeuf ) -> Result<Vec<VfioRegionSparseMmapArea>, VfioPciError> { 15131108bd19SSebastien Boeuf for cap in caps { 15141108bd19SSebastien Boeuf match cap { 15151108bd19SSebastien Boeuf VfioRegionInfoCap::SparseMmap(sparse_mmap) => return Ok(sparse_mmap.areas.clone()), 15161108bd19SSebastien Boeuf VfioRegionInfoCap::MsixMappable => { 1517a7187168SJianyong Wu if !is_4k_aligned(region_start) { 1518ed5f2544SSebastien Boeuf error!( 1519ed5f2544SSebastien Boeuf "Region start address 0x{:x} must be at least aligned on 4KiB", 1520ed5f2544SSebastien Boeuf region_start 1521ed5f2544SSebastien Boeuf ); 15221108bd19SSebastien Boeuf return Err(VfioPciError::RegionAlignment); 1523ed5f2544SSebastien Boeuf } 1524a7187168SJianyong Wu if !is_4k_multiple(region_size) { 1525ed5f2544SSebastien Boeuf error!( 1526ed5f2544SSebastien Boeuf "Region size 0x{:x} must be at least a multiple of 4KiB", 1527ed5f2544SSebastien Boeuf region_size 1528ed5f2544SSebastien Boeuf ); 15291108bd19SSebastien Boeuf return Err(VfioPciError::RegionSize); 1530ed5f2544SSebastien Boeuf } 1531ed5f2544SSebastien Boeuf 15321108bd19SSebastien Boeuf // In case the region contains the MSI-X vectors table or 15331108bd19SSebastien Boeuf // the MSI-X PBA table, we must calculate the subregions 15341108bd19SSebastien Boeuf // around them, leading to a list of sparse areas. 15351108bd19SSebastien Boeuf // We want to make sure we will still trap MMIO accesses 1536a7187168SJianyong Wu // to these MSI-X specific ranges. If these region don't align 15377bf0cc1eSPhilipp Schuster // with pagesize, we can achieve it by enlarging its range. 15381108bd19SSebastien Boeuf // 1539ed5f2544SSebastien Boeuf // Using a BtreeMap as the list provided through the iterator is sorted 1540ed5f2544SSebastien Boeuf // by key. This ensures proper split of the whole region. 1541ed5f2544SSebastien Boeuf let mut inter_ranges = BTreeMap::new(); 1542ed5f2544SSebastien Boeuf if let Some(msix) = vfio_msix { 1543ed5f2544SSebastien Boeuf if region_index == msix.cap.table_bir() { 1544ed5f2544SSebastien Boeuf let (offset, size) = msix.cap.table_range(); 1545a7187168SJianyong Wu let offset = align_page_size_down(offset); 1546a7187168SJianyong Wu let size = align_page_size_up(size); 15471108bd19SSebastien Boeuf inter_ranges.insert(offset, size); 1548ed5f2544SSebastien Boeuf } 1549ed5f2544SSebastien Boeuf if region_index == msix.cap.pba_bir() { 1550ed5f2544SSebastien Boeuf let (offset, size) = msix.cap.pba_range(); 1551a7187168SJianyong Wu let offset = align_page_size_down(offset); 1552a7187168SJianyong Wu let size = align_page_size_up(size); 15531108bd19SSebastien Boeuf inter_ranges.insert(offset, size); 1554ed5f2544SSebastien Boeuf } 1555ed5f2544SSebastien Boeuf } 1556ed5f2544SSebastien Boeuf 15571108bd19SSebastien Boeuf let mut sparse_areas = Vec::new(); 15581108bd19SSebastien Boeuf let mut current_offset = 0; 15591108bd19SSebastien Boeuf for (range_offset, range_size) in inter_ranges { 15601108bd19SSebastien Boeuf if range_offset > current_offset { 15611108bd19SSebastien Boeuf sparse_areas.push(VfioRegionSparseMmapArea { 15621108bd19SSebastien Boeuf offset: current_offset, 15631108bd19SSebastien Boeuf size: range_offset - current_offset, 1564ed5f2544SSebastien Boeuf }); 1565ed5f2544SSebastien Boeuf } 1566a7187168SJianyong Wu current_offset = align_page_size_down(range_offset + range_size); 1567ed5f2544SSebastien Boeuf } 1568ed5f2544SSebastien Boeuf 15691108bd19SSebastien Boeuf if region_size > current_offset { 15701108bd19SSebastien Boeuf sparse_areas.push(VfioRegionSparseMmapArea { 1571a7187168SJianyong Wu offset: current_offset, 1572a7187168SJianyong Wu size: region_size - current_offset, 1573ed5f2544SSebastien Boeuf }); 1574ed5f2544SSebastien Boeuf } 1575ed5f2544SSebastien Boeuf 15761108bd19SSebastien Boeuf return Ok(sparse_areas); 15771108bd19SSebastien Boeuf } 15781108bd19SSebastien Boeuf _ => {} 15791108bd19SSebastien Boeuf } 15801108bd19SSebastien Boeuf } 15811108bd19SSebastien Boeuf 15821108bd19SSebastien Boeuf // In case no relevant capabilities have been found, create a single 15831108bd19SSebastien Boeuf // sparse area corresponding to the entire MMIO region. 15841108bd19SSebastien Boeuf Ok(vec![VfioRegionSparseMmapArea { 15851108bd19SSebastien Boeuf offset: 0, 15861108bd19SSebastien Boeuf size: region_size, 15871108bd19SSebastien Boeuf }]) 1588ed5f2544SSebastien Boeuf } 1589ed5f2544SSebastien Boeuf 1590b746dd71SChao Peng /// Map MMIO regions into the guest, and avoid VM exits when the guest tries 1591b746dd71SChao Peng /// to reach those regions. 1592d92d7978SSebastien Boeuf /// 1593d92d7978SSebastien Boeuf /// # Arguments 1594d92d7978SSebastien Boeuf /// 1595571c3685SWei Liu /// * `vm` - The VM object. It is used to set the VFIO MMIO regions 1596571c3685SWei Liu /// as user memory regions. 1597571c3685SWei Liu /// * `mem_slot` - The closure to return a memory slot. map_mmio_regions(&mut self) -> Result<(), VfioPciError>159881ba70a4SSebastien Boeuf pub fn map_mmio_regions(&mut self) -> Result<(), VfioPciError> { 1599b746dd71SChao Peng let fd = self.device.as_raw_fd(); 1600b746dd71SChao Peng 1601d27ea34aSRob Bradford for region in self.common.mmio_regions.iter_mut() { 1602b746dd71SChao Peng let region_flags = self.device.get_region_flags(region.index); 1603b746dd71SChao Peng if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 { 1604b746dd71SChao Peng let mut prot = 0; 1605b746dd71SChao Peng if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 { 1606b746dd71SChao Peng prot |= libc::PROT_READ; 1607b746dd71SChao Peng } 1608b746dd71SChao Peng if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 { 1609b746dd71SChao Peng prot |= libc::PROT_WRITE; 1610b746dd71SChao Peng } 1611ed5f2544SSebastien Boeuf 1612b11a8a5aSSebastien Boeuf // Retrieve the list of capabilities found on the region 1613b11a8a5aSSebastien Boeuf let caps = if region_flags & VFIO_REGION_INFO_FLAG_CAPS != 0 { 1614b11a8a5aSSebastien Boeuf self.device.get_region_caps(region.index) 1615b11a8a5aSSebastien Boeuf } else { 1616b11a8a5aSSebastien Boeuf Vec::new() 1617b11a8a5aSSebastien Boeuf }; 1618b11a8a5aSSebastien Boeuf 1619b11a8a5aSSebastien Boeuf // Don't try to mmap the region if it contains MSI-X table or 1620b11a8a5aSSebastien Boeuf // MSI-X PBA subregion, and if we couldn't find MSIX_MAPPABLE 1621b11a8a5aSSebastien Boeuf // in the list of supported capabilities. 1622b11a8a5aSSebastien Boeuf if let Some(msix) = self.common.interrupt.msix.as_ref() { 1623b11a8a5aSSebastien Boeuf if (region.index == msix.cap.table_bir() || region.index == msix.cap.pba_bir()) 1624b11a8a5aSSebastien Boeuf && !caps.contains(&VfioRegionInfoCap::MsixMappable) 1625b11a8a5aSSebastien Boeuf { 1626b11a8a5aSSebastien Boeuf continue; 1627b11a8a5aSSebastien Boeuf } 1628b11a8a5aSSebastien Boeuf } 1629b11a8a5aSSebastien Boeuf 1630b11a8a5aSSebastien Boeuf let mmap_size = self.device.get_region_size(region.index); 16311108bd19SSebastien Boeuf let mmap_offset = self.device.get_region_offset(region.index); 1632b746dd71SChao Peng 16331108bd19SSebastien Boeuf let sparse_areas = Self::generate_sparse_areas( 16341108bd19SSebastien Boeuf &caps, 16351108bd19SSebastien Boeuf region.index, 16361108bd19SSebastien Boeuf region.start.0, 16371108bd19SSebastien Boeuf mmap_size, 16381108bd19SSebastien Boeuf self.common.interrupt.msix.as_ref(), 16391108bd19SSebastien Boeuf )?; 16401108bd19SSebastien Boeuf 16411108bd19SSebastien Boeuf for area in sparse_areas.iter() { 1642c5bd8cabSWei Liu // SAFETY: FFI call with correct arguments 1643b746dd71SChao Peng let host_addr = unsafe { 1644b746dd71SChao Peng libc::mmap( 1645b746dd71SChao Peng null_mut(), 16461108bd19SSebastien Boeuf area.size as usize, 1647b746dd71SChao Peng prot, 1648b746dd71SChao Peng libc::MAP_SHARED, 1649b746dd71SChao Peng fd, 16501108bd19SSebastien Boeuf mmap_offset as libc::off_t + area.offset as libc::off_t, 1651b746dd71SChao Peng ) 1652b746dd71SChao Peng }; 1653b746dd71SChao Peng 1654a64ba04eSJinank Jain if std::ptr::eq(host_addr, libc::MAP_FAILED) { 1655b11a8a5aSSebastien Boeuf error!( 16561108bd19SSebastien Boeuf "Could not mmap sparse area (offset = 0x{:x}, size = 0x{:x}): {}", 16571108bd19SSebastien Boeuf area.offset, 16581108bd19SSebastien Boeuf area.size, 16591108bd19SSebastien Boeuf std::io::Error::last_os_error() 1660b746dd71SChao Peng ); 16611108bd19SSebastien Boeuf return Err(VfioPciError::MmapArea); 1662b746dd71SChao Peng } 1663b746dd71SChao Peng 1664a7187168SJianyong Wu if !is_page_size_aligned(area.size) || !is_page_size_aligned(area.offset) { 1665a7187168SJianyong Wu warn!( 1666a7187168SJianyong Wu "Could not mmap sparse area that is not page size aligned (offset = 0x{:x}, size = 0x{:x})", 1667a7187168SJianyong Wu area.offset, 1668a7187168SJianyong Wu area.size, 1669a7187168SJianyong Wu ); 1670a7187168SJianyong Wu return Ok(()); 1671a7187168SJianyong Wu } 1672a7187168SJianyong Wu 16731108bd19SSebastien Boeuf let user_memory_region = UserMemoryRegion { 167481f8a27eSRob Bradford slot: self.memory_slot_allocator.next_memory_slot(), 16751108bd19SSebastien Boeuf start: region.start.0 + area.offset, 16761108bd19SSebastien Boeuf size: area.size, 16771108bd19SSebastien Boeuf host_addr: host_addr as u64, 16781108bd19SSebastien Boeuf }; 16791108bd19SSebastien Boeuf 168006f57abdSBo Chen region.user_memory_regions.push(user_memory_region); 168106f57abdSBo Chen 168281ba70a4SSebastien Boeuf let mem_region = self.vm.make_user_memory_region( 1683ed5f2544SSebastien Boeuf user_memory_region.slot, 1684ed5f2544SSebastien Boeuf user_memory_region.start, 1685ed5f2544SSebastien Boeuf user_memory_region.size, 1686ed5f2544SSebastien Boeuf user_memory_region.host_addr, 1687053ea5dcSMuminul Islam false, 16888baa244eSRob Bradford false, 1689053ea5dcSMuminul Islam ); 1690b746dd71SChao Peng 169181ba70a4SSebastien Boeuf self.vm 169281ba70a4SSebastien Boeuf .create_user_memory_region(mem_region) 16931108bd19SSebastien Boeuf .map_err(VfioPciError::CreateUserMemoryRegion)?; 1694f0c1f8d0SThomas Barrett 1695f0c1f8d0SThomas Barrett if !self.iommu_attached { 1696f0c1f8d0SThomas Barrett self.container 1697f0c1f8d0SThomas Barrett .vfio_dma_map( 1698f0c1f8d0SThomas Barrett user_memory_region.start, 1699f0c1f8d0SThomas Barrett user_memory_region.size, 1700f0c1f8d0SThomas Barrett user_memory_region.host_addr, 1701f0c1f8d0SThomas Barrett ) 1702297b41d6SBo Chen .map_err(|e| { 1703297b41d6SBo Chen VfioPciError::DmaMap(e, self.device_path.clone(), self.bdf) 1704297b41d6SBo Chen })?; 1705f0c1f8d0SThomas Barrett } 1706ed5f2544SSebastien Boeuf } 1707b746dd71SChao Peng } 1708b746dd71SChao Peng } 1709b746dd71SChao Peng 1710320fea0eSSebastien Boeuf Ok(()) 1711b746dd71SChao Peng } 17124de04e84SWu Zongyong unmap_mmio_regions(&mut self)17134de04e84SWu Zongyong pub fn unmap_mmio_regions(&mut self) { 1714d27ea34aSRob Bradford for region in self.common.mmio_regions.iter() { 1715ed5f2544SSebastien Boeuf for user_memory_region in region.user_memory_regions.iter() { 1716f0c1f8d0SThomas Barrett // Unmap from vfio container 1717f0c1f8d0SThomas Barrett if !self.iommu_attached { 1718f0c1f8d0SThomas Barrett if let Err(e) = self 1719f0c1f8d0SThomas Barrett .container 1720f0c1f8d0SThomas Barrett .vfio_dma_unmap(user_memory_region.start, user_memory_region.size) 17211307d31eSBo Chen .map_err(|e| VfioPciError::DmaUnmap(e, self.device_path.clone(), self.bdf)) 1722f0c1f8d0SThomas Barrett { 17231307d31eSBo Chen error!( 17241307d31eSBo Chen "Could not unmap mmio region from vfio container: \ 17251307d31eSBo Chen iova 0x{:x}, size 0x{:x}: {}, ", 17261307d31eSBo Chen user_memory_region.start, user_memory_region.size, e 17271307d31eSBo Chen ); 1728f0c1f8d0SThomas Barrett } 1729f0c1f8d0SThomas Barrett } 1730f0c1f8d0SThomas Barrett 1731e6849699SWei Liu // Remove region 1732e6849699SWei Liu let r = self.vm.make_user_memory_region( 1733ed5f2544SSebastien Boeuf user_memory_region.slot, 1734ed5f2544SSebastien Boeuf user_memory_region.start, 1735ed5f2544SSebastien Boeuf user_memory_region.size, 1736ed5f2544SSebastien Boeuf user_memory_region.host_addr, 1737053ea5dcSMuminul Islam false, 17388baa244eSRob Bradford false, 1739053ea5dcSMuminul Islam ); 1740e4dee57eSMuminul Islam 17411f2915bfSWei Liu if let Err(e) = self.vm.remove_user_memory_region(r) { 1742e6849699SWei Liu error!("Could not remove the userspace memory region: {}", e); 1743f0dff8b5SSebastien Boeuf } 1744f0dff8b5SSebastien Boeuf 17450d6cef45SRob Bradford self.memory_slot_allocator 17460d6cef45SRob Bradford .free_memory_slot(user_memory_region.slot); 17470d6cef45SRob Bradford 1748c5bd8cabSWei Liu // SAFETY: FFI call with correct arguments 17491108bd19SSebastien Boeuf let ret = unsafe { 17501108bd19SSebastien Boeuf libc::munmap( 17511108bd19SSebastien Boeuf user_memory_region.host_addr as *mut libc::c_void, 17521108bd19SSebastien Boeuf user_memory_region.size as usize, 17531108bd19SSebastien Boeuf ) 17541108bd19SSebastien Boeuf }; 17554de04e84SWu Zongyong if ret != 0 { 17564de04e84SWu Zongyong error!( 1757f0dff8b5SSebastien Boeuf "Could not unmap region {}, error:{}", 1758f0dff8b5SSebastien Boeuf region.index, 17594de04e84SWu Zongyong io::Error::last_os_error() 17604de04e84SWu Zongyong ); 17614de04e84SWu Zongyong } 17624de04e84SWu Zongyong } 17634de04e84SWu Zongyong } 17644de04e84SWu Zongyong } 1765e4a034aeSSebastien Boeuf dma_map(&self, iova: u64, size: u64, user_addr: u64) -> Result<(), VfioPciError>1766cdfc1773SRob Bradford pub fn dma_map(&self, iova: u64, size: u64, user_addr: u64) -> Result<(), VfioPciError> { 1767a0a89b13SSebastien Boeuf if !self.iommu_attached { 1768a0a89b13SSebastien Boeuf self.container 1769080ea318SSebastien Boeuf .vfio_dma_map(iova, size, user_addr) 1770297b41d6SBo Chen .map_err(|e| VfioPciError::DmaMap(e, self.device_path.clone(), self.bdf))?; 1771080ea318SSebastien Boeuf } 1772080ea318SSebastien Boeuf 1773080ea318SSebastien Boeuf Ok(()) 1774080ea318SSebastien Boeuf } 1775080ea318SSebastien Boeuf dma_unmap(&self, iova: u64, size: u64) -> Result<(), VfioPciError>1776cdfc1773SRob Bradford pub fn dma_unmap(&self, iova: u64, size: u64) -> Result<(), VfioPciError> { 1777080ea318SSebastien Boeuf if !self.iommu_attached { 1778080ea318SSebastien Boeuf self.container 1779080ea318SSebastien Boeuf .vfio_dma_unmap(iova, size) 1780297b41d6SBo Chen .map_err(|e| VfioPciError::DmaUnmap(e, self.device_path.clone(), self.bdf))?; 1781a0a89b13SSebastien Boeuf } 1782a0a89b13SSebastien Boeuf 1783a0a89b13SSebastien Boeuf Ok(()) 1784e4a034aeSSebastien Boeuf } 1785593a958fSRob Bradford mmio_regions(&self) -> Vec<MmioRegion>1786593a958fSRob Bradford pub fn mmio_regions(&self) -> Vec<MmioRegion> { 1787d27ea34aSRob Bradford self.common.mmio_regions.clone() 1788593a958fSRob Bradford } 1789db5b4763SSamuel Ortiz } 1790db5b4763SSamuel Ortiz 1791db5b4763SSamuel Ortiz impl Drop for VfioPciDevice { drop(&mut self)1792db5b4763SSamuel Ortiz fn drop(&mut self) { 17934de04e84SWu Zongyong self.unmap_mmio_regions(); 17944de04e84SWu Zongyong 1795d27ea34aSRob Bradford if let Some(msix) = &self.common.interrupt.msix { 179619167e76SSebastien Boeuf if msix.bar.enabled() { 17974a99d3dbSSebastien Boeuf self.common.disable_msix(); 1798d7dc1a92SWu Zongyong } 179920f01161SSebastien Boeuf } 180020f01161SSebastien Boeuf 1801d27ea34aSRob Bradford if let Some(msi) = &self.common.interrupt.msi { 180219167e76SSebastien Boeuf if msi.cfg.enabled() { 18034a99d3dbSSebastien Boeuf self.common.disable_msi() 1804d7dc1a92SWu Zongyong } 180520f01161SSebastien Boeuf } 180620f01161SSebastien Boeuf 1807d27ea34aSRob Bradford if self.common.interrupt.intx_in_use() { 18084a99d3dbSSebastien Boeuf self.common.disable_intx(); 180919167e76SSebastien Boeuf } 1810db5b4763SSamuel Ortiz } 1811db5b4763SSamuel Ortiz } 1812db5b4763SSamuel Ortiz 1813db5b4763SSamuel Ortiz impl BusDevice for VfioPciDevice { read(&mut self, base: u64, offset: u64, data: &mut [u8])1814db5b4763SSamuel Ortiz fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 1815db5b4763SSamuel Ortiz self.read_bar(base, offset, data) 1816db5b4763SSamuel Ortiz } 1817db5b4763SSamuel Ortiz write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>>18181fc6d50fSRob Bradford fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 18197cc729c7SRob Bradford self.write_bar(base, offset, data) 1820db5b4763SSamuel Ortiz } 1821db5b4763SSamuel Ortiz } 1822db5b4763SSamuel Ortiz 182356ca26e7SJulian Stecklina // Offset of the 16-bit status register in the PCI configuration space. 182456ca26e7SJulian Stecklina const PCI_CONFIG_STATUS_OFFSET: u32 = 0x06; 182556ca26e7SJulian Stecklina // Status bit indicating the presence of a capabilities list. 182656ca26e7SJulian Stecklina const PCI_CONFIG_STATUS_CAPABILITIES_LIST: u16 = 1 << 4; 1827db5b4763SSamuel Ortiz // First BAR offset in the PCI config space. 1828db5b4763SSamuel Ortiz const PCI_CONFIG_BAR_OFFSET: u32 = 0x10; 1829db5b4763SSamuel Ortiz // Capability register offset in the PCI config space. 1830db5b4763SSamuel Ortiz const PCI_CONFIG_CAPABILITY_OFFSET: u32 = 0x34; 1831a0065452SJulian Stecklina // The valid bits for the capabilities pointer. 1832a0065452SJulian Stecklina const PCI_CONFIG_CAPABILITY_PTR_MASK: u8 = !0b11; 1833e45e3df6SSebastien Boeuf // Extended capabilities register offset in the PCI config space. 1834e45e3df6SSebastien Boeuf const PCI_CONFIG_EXTENDED_CAPABILITY_OFFSET: u32 = 0x100; 1835db5b4763SSamuel Ortiz // IO BAR when first BAR bit is 1. 1836db5b4763SSamuel Ortiz const PCI_CONFIG_IO_BAR: u32 = 0x1; 1837db5b4763SSamuel Ortiz // 64-bit memory bar flag. 1838db5b4763SSamuel Ortiz const PCI_CONFIG_MEMORY_BAR_64BIT: u32 = 0x4; 1839868d1f69SSteven Dake // Prefetchable BAR bit 1840868d1f69SSteven Dake const PCI_CONFIG_BAR_PREFETCHABLE: u32 = 0x8; 1841db5b4763SSamuel Ortiz // PCI config register size (4 bytes). 1842db5b4763SSamuel Ortiz const PCI_CONFIG_REGISTER_SIZE: usize = 4; 1843db5b4763SSamuel Ortiz // Number of BARs for a PCI device 1844db5b4763SSamuel Ortiz const BAR_NUMS: usize = 6; 1845347f8a03SSebastien Boeuf // PCI Header Type register index 1846347f8a03SSebastien Boeuf const PCI_HEADER_TYPE_REG_INDEX: usize = 3; 1847347f8a03SSebastien Boeuf // First BAR register index 1848347f8a03SSebastien Boeuf const PCI_CONFIG_BAR0_INDEX: usize = 4; 18492f802880SSebastien Boeuf // PCI ROM expansion BAR register index 18502f802880SSebastien Boeuf const PCI_ROM_EXP_BAR_INDEX: usize = 12; 1851db5b4763SSamuel Ortiz 1852db5b4763SSamuel Ortiz impl PciDevice for VfioPciDevice { allocate_bars( &mut self, allocator: &Arc<Mutex<SystemAllocator>>, mmio32_allocator: &mut AddressAllocator, mmio64_allocator: &mut AddressAllocator, resources: Option<Vec<Resource>>, ) -> Result<Vec<PciBarConfiguration>, PciDeviceError>1853db5b4763SSamuel Ortiz fn allocate_bars( 1854db5b4763SSamuel Ortiz &mut self, 18559ef1187fSRob Bradford allocator: &Arc<Mutex<SystemAllocator>>, 185645b01d59SThomas Barrett mmio32_allocator: &mut AddressAllocator, 185745b01d59SThomas Barrett mmio64_allocator: &mut AddressAllocator, 18586e084572SSebastien Boeuf resources: Option<Vec<Resource>>, 185989218b6dSSebastien Boeuf ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> { 1860cd9d1cf8SRob Bradford self.common 186145b01d59SThomas Barrett .allocate_bars(allocator, mmio32_allocator, mmio64_allocator, resources) 1862db5b4763SSamuel Ortiz } 1863db5b4763SSamuel Ortiz free_bars( &mut self, allocator: &mut SystemAllocator, mmio32_allocator: &mut AddressAllocator, mmio64_allocator: &mut AddressAllocator, ) -> Result<(), PciDeviceError>1864cd9d1cf8SRob Bradford fn free_bars( 1865cd9d1cf8SRob Bradford &mut self, 1866cd9d1cf8SRob Bradford allocator: &mut SystemAllocator, 186745b01d59SThomas Barrett mmio32_allocator: &mut AddressAllocator, 186845b01d59SThomas Barrett mmio64_allocator: &mut AddressAllocator, 1869cd9d1cf8SRob Bradford ) -> Result<(), PciDeviceError> { 187045b01d59SThomas Barrett self.common 187145b01d59SThomas Barrett .free_bars(allocator, mmio32_allocator, mmio64_allocator) 187234d1f435SSebastien Boeuf } 187334d1f435SSebastien Boeuf write_config_register( &mut self, reg_idx: usize, offset: u64, data: &[u8], ) -> (Vec<BarReprogrammingParams>, Option<Arc<Barrier>>)18747cc729c7SRob Bradford fn write_config_register( 18757cc729c7SRob Bradford &mut self, 18767cc729c7SRob Bradford reg_idx: usize, 18777cc729c7SRob Bradford offset: u64, 18787cc729c7SRob Bradford data: &[u8], 1879aaf86ef2SBo Chen ) -> (Vec<BarReprogrammingParams>, Option<Arc<Barrier>>) { 18804a99d3dbSSebastien Boeuf self.common.write_config_register(reg_idx, offset, data) 1881db5b4763SSamuel Ortiz } 1882db5b4763SSamuel Ortiz read_config_register(&mut self, reg_idx: usize) -> u321883db9f9b78SSebastien Boeuf fn read_config_register(&mut self, reg_idx: usize) -> u32 { 18844a99d3dbSSebastien Boeuf self.common.read_config_register(reg_idx) 1885db5b4763SSamuel Ortiz } 1886db5b4763SSamuel Ortiz read_bar(&mut self, base: u64, offset: u64, data: &mut [u8])1887db5b4763SSamuel Ortiz fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) { 18884a99d3dbSSebastien Boeuf self.common.read_bar(base, offset, data) 1889c93d5361SSebastien Boeuf } 1890db5b4763SSamuel Ortiz write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>>18917cc729c7SRob Bradford fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 18924a99d3dbSSebastien Boeuf self.common.write_bar(base, offset, data) 1893db5b4763SSamuel Ortiz } 1894e536f880SSebastien Boeuf move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), io::Error>1895cdfc1773SRob Bradford fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), io::Error> { 1896d27ea34aSRob Bradford for region in self.common.mmio_regions.iter_mut() { 1897e536f880SSebastien Boeuf if region.start.raw_value() == old_base { 1898e536f880SSebastien Boeuf region.start = GuestAddress(new_base); 1899e536f880SSebastien Boeuf 1900ed5f2544SSebastien Boeuf for user_memory_region in region.user_memory_regions.iter_mut() { 19012f218274SBo Chen // Unmap the old MMIO region from vfio container 19022f218274SBo Chen if !self.iommu_attached { 19032f218274SBo Chen if let Err(e) = self 19042f218274SBo Chen .container 19052f218274SBo Chen .vfio_dma_unmap(user_memory_region.start, user_memory_region.size) 19062f218274SBo Chen .map_err(|e| { 19072f218274SBo Chen VfioPciError::DmaUnmap(e, self.device_path.clone(), self.bdf) 19082f218274SBo Chen }) 19092f218274SBo Chen { 19102f218274SBo Chen error!( 19112f218274SBo Chen "Could not unmap mmio region from vfio container: \ 19122f218274SBo Chen iova 0x{:x}, size 0x{:x}: {}, ", 19132f218274SBo Chen user_memory_region.start, user_memory_region.size, e 19142f218274SBo Chen ); 19152f218274SBo Chen } 19162f218274SBo Chen } 19172f218274SBo Chen 1918e6849699SWei Liu // Remove old region 1919053ea5dcSMuminul Islam let old_mem_region = self.vm.make_user_memory_region( 1920ed5f2544SSebastien Boeuf user_memory_region.slot, 1921ed5f2544SSebastien Boeuf user_memory_region.start, 1922ed5f2544SSebastien Boeuf user_memory_region.size, 1923ed5f2544SSebastien Boeuf user_memory_region.host_addr, 1924053ea5dcSMuminul Islam false, 19258baa244eSRob Bradford false, 1926053ea5dcSMuminul Islam ); 1927e4dee57eSMuminul Islam 1928e5552a53SWei Liu self.vm 19291f2915bfSWei Liu .remove_user_memory_region(old_mem_region) 1930ea4693a0SJinank Jain .map_err(io::Error::other)?; 1931e536f880SSebastien Boeuf 1932ed5f2544SSebastien Boeuf // Update the user memory region with the correct start address. 1933ed5f2544SSebastien Boeuf if new_base > old_base { 1934ed5f2544SSebastien Boeuf user_memory_region.start += new_base - old_base; 1935ed5f2544SSebastien Boeuf } else { 1936ed5f2544SSebastien Boeuf user_memory_region.start -= old_base - new_base; 1937ed5f2544SSebastien Boeuf } 1938ed5f2544SSebastien Boeuf 1939e6849699SWei Liu // Insert new region 1940053ea5dcSMuminul Islam let new_mem_region = self.vm.make_user_memory_region( 1941ed5f2544SSebastien Boeuf user_memory_region.slot, 1942ed5f2544SSebastien Boeuf user_memory_region.start, 1943ed5f2544SSebastien Boeuf user_memory_region.size, 1944ed5f2544SSebastien Boeuf user_memory_region.host_addr, 1945053ea5dcSMuminul Islam false, 19468baa244eSRob Bradford false, 1947053ea5dcSMuminul Islam ); 1948e4dee57eSMuminul Islam 1949e5552a53SWei Liu self.vm 19501f2915bfSWei Liu .create_user_memory_region(new_mem_region) 1951ea4693a0SJinank Jain .map_err(io::Error::other)?; 19522f218274SBo Chen 19532f218274SBo Chen // Map the moved mmio region to vfio container 19542f218274SBo Chen if !self.iommu_attached { 19552f218274SBo Chen self.container 19562f218274SBo Chen .vfio_dma_map( 19572f218274SBo Chen user_memory_region.start, 19582f218274SBo Chen user_memory_region.size, 19592f218274SBo Chen user_memory_region.host_addr, 19602f218274SBo Chen ) 19612f218274SBo Chen .map_err(|e| { 19622f218274SBo Chen VfioPciError::DmaMap(e, self.device_path.clone(), self.bdf) 19632f218274SBo Chen }) 19642f218274SBo Chen .map_err(|e| { 19652f218274SBo Chen io::Error::other(format!( 19662f218274SBo Chen "Could not map mmio region to vfio container: \ 19672f218274SBo Chen iova 0x{:x}, size 0x{:x}: {}, ", 19682f218274SBo Chen user_memory_region.start, user_memory_region.size, e 19692f218274SBo Chen )) 19702f218274SBo Chen })?; 19712f218274SBo Chen } 1972e536f880SSebastien Boeuf } 1973e536f880SSebastien Boeuf } 1974e536f880SSebastien Boeuf } 1975d6c68e47SSebastien Boeuf 1976d6c68e47SSebastien Boeuf Ok(()) 1977e536f880SSebastien Boeuf } 1978de21c9baSSebastien Boeuf as_any_mut(&mut self) -> &mut dyn Any1979d99f2942SWei Liu fn as_any_mut(&mut self) -> &mut dyn Any { 1980de21c9baSSebastien Boeuf self 1981de21c9baSSebastien Boeuf } 19825264d545SSebastien Boeuf id(&self) -> Option<String>19835264d545SSebastien Boeuf fn id(&self) -> Option<String> { 19845264d545SSebastien Boeuf Some(self.id.clone()) 19855264d545SSebastien Boeuf } 1986c93d5361SSebastien Boeuf } 19873b59e570SSebastien Boeuf 19883b59e570SSebastien Boeuf impl Pausable for VfioPciDevice {} 19893b59e570SSebastien Boeuf 19903b59e570SSebastien Boeuf impl Snapshottable for VfioPciDevice { id(&self) -> String19913b59e570SSebastien Boeuf fn id(&self) -> String { 19923b59e570SSebastien Boeuf self.id.clone() 19933b59e570SSebastien Boeuf } 19943b59e570SSebastien Boeuf snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError>19953b59e570SSebastien Boeuf fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1996748018acSSebastien Boeuf let mut vfio_pci_dev_snapshot = Snapshot::default(); 19973b59e570SSebastien Boeuf 19983b59e570SSebastien Boeuf // Snapshot VfioCommon 1999748018acSSebastien Boeuf vfio_pci_dev_snapshot.add_snapshot(self.common.id(), self.common.snapshot()?); 20003b59e570SSebastien Boeuf 20013b59e570SSebastien Boeuf Ok(vfio_pci_dev_snapshot) 20023b59e570SSebastien Boeuf } 20033b59e570SSebastien Boeuf } 20043b59e570SSebastien Boeuf impl Transportable for VfioPciDevice {} 20053b59e570SSebastien Boeuf impl Migratable for VfioPciDevice {} 2006a5e2460dSAndrew Carp 2007a5e2460dSAndrew Carp /// This structure implements the ExternalDmaMapping trait. It is meant to 2008a5e2460dSAndrew Carp /// be used when the caller tries to provide a way to update the mappings 2009a5e2460dSAndrew Carp /// associated with a specific VFIO container. 2010a5e2460dSAndrew Carp pub struct VfioDmaMapping<M: GuestAddressSpace> { 2011a5e2460dSAndrew Carp container: Arc<VfioContainer>, 2012a5e2460dSAndrew Carp memory: Arc<M>, 2013045964deSAndrew Carp mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, 2014a5e2460dSAndrew Carp } 2015a5e2460dSAndrew Carp 2016a5e2460dSAndrew Carp impl<M: GuestAddressSpace> VfioDmaMapping<M> { 2017a5e2460dSAndrew Carp /// Create a DmaMapping object. 2018a5e2460dSAndrew Carp /// # Parameters 2019a5e2460dSAndrew Carp /// * `container`: VFIO container object. 2020045964deSAndrew Carp /// * `memory`: guest memory to mmap. 2021045964deSAndrew Carp /// * `mmio_regions`: mmio_regions to mmap. new( container: Arc<VfioContainer>, memory: Arc<M>, mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, ) -> Self2022045964deSAndrew Carp pub fn new( 2023045964deSAndrew Carp container: Arc<VfioContainer>, 2024045964deSAndrew Carp memory: Arc<M>, 2025045964deSAndrew Carp mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, 2026045964deSAndrew Carp ) -> Self { 2027045964deSAndrew Carp VfioDmaMapping { 2028045964deSAndrew Carp container, 2029045964deSAndrew Carp memory, 2030045964deSAndrew Carp mmio_regions, 2031045964deSAndrew Carp } 2032a5e2460dSAndrew Carp } 2033a5e2460dSAndrew Carp } 2034a5e2460dSAndrew Carp 2035a5e2460dSAndrew Carp impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioDmaMapping<M> { map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), io::Error>2036a5e2460dSAndrew Carp fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), io::Error> { 2037a5e2460dSAndrew Carp let mem = self.memory.memory(); 2038a5e2460dSAndrew Carp let guest_addr = GuestAddress(gpa); 2039a5e2460dSAndrew Carp let user_addr = if mem.check_range(guest_addr, size as usize) { 2040045964deSAndrew Carp match mem.get_host_address(guest_addr) { 2041045964deSAndrew Carp Ok(t) => t as u64, 2042045964deSAndrew Carp Err(e) => { 2043ea4693a0SJinank Jain return Err(io::Error::other( 2044045964deSAndrew Carp format!("unable to retrieve user address for gpa 0x{gpa:x} from guest memory region: {e}") 2045045964deSAndrew Carp )); 2046045964deSAndrew Carp } 2047045964deSAndrew Carp } 2048045964deSAndrew Carp } else if self.mmio_regions.lock().unwrap().check_range(gpa, size) { 2049045964deSAndrew Carp self.mmio_regions.lock().unwrap().find_user_address(gpa)? 2050a5e2460dSAndrew Carp } else { 2051ea4693a0SJinank Jain return Err(io::Error::other(format!( 2052ea4693a0SJinank Jain "failed to locate guest address 0x{gpa:x} in guest memory" 2053ea4693a0SJinank Jain ))); 2054a5e2460dSAndrew Carp }; 2055a5e2460dSAndrew Carp 2056a5e2460dSAndrew Carp self.container 2057a5e2460dSAndrew Carp .vfio_dma_map(iova, size, user_addr) 2058a5e2460dSAndrew Carp .map_err(|e| { 2059ea4693a0SJinank Jain io::Error::other(format!( 2060a5e2460dSAndrew Carp "failed to map memory for VFIO container, \ 2061a5e2460dSAndrew Carp iova 0x{iova:x}, gpa 0x{gpa:x}, size 0x{size:x}: {e:?}" 2062ea4693a0SJinank Jain )) 2063a5e2460dSAndrew Carp }) 2064a5e2460dSAndrew Carp } 2065a5e2460dSAndrew Carp unmap(&self, iova: u64, size: u64) -> std::result::Result<(), io::Error>2066a5e2460dSAndrew Carp fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), io::Error> { 2067a5e2460dSAndrew Carp self.container.vfio_dma_unmap(iova, size).map_err(|e| { 2068ea4693a0SJinank Jain io::Error::other(format!( 2069a5e2460dSAndrew Carp "failed to unmap memory for VFIO container, \ 2070a5e2460dSAndrew Carp iova 0x{iova:x}, size 0x{size:x}: {e:?}" 2071ea4693a0SJinank Jain )) 2072a5e2460dSAndrew Carp }) 2073a5e2460dSAndrew Carp } 2074a5e2460dSAndrew Carp } 2075