xref: /cloud-hypervisor/pci/src/vfio.rs (revision a3692144f0970f8d3f8ef3f8123d6a60ae982771)
1db5b4763SSamuel Ortiz // Copyright © 2019 Intel Corporation
2db5b4763SSamuel Ortiz //
3db5b4763SSamuel Ortiz // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4db5b4763SSamuel Ortiz //
5db5b4763SSamuel Ortiz 
6de21c9baSSebastien Boeuf use std::any::Any;
7e45e3df6SSebastien Boeuf use std::collections::{BTreeMap, HashMap};
8cdfc1773SRob Bradford use std::io;
9c93d5361SSebastien Boeuf use std::os::unix::io::AsRawFd;
10297b41d6SBo Chen use std::path::PathBuf;
11b746dd71SChao Peng use std::ptr::null_mut;
129ef1187fSRob Bradford use std::sync::{Arc, Barrier, Mutex};
1388a9f799SRob Bradford 
1488a9f799SRob Bradford use anyhow::anyhow;
1588a9f799SRob Bradford use byteorder::{ByteOrder, LittleEndian};
1688a9f799SRob Bradford use hypervisor::HypervisorVmError;
1788a9f799SRob Bradford use libc::{sysconf, _SC_PAGESIZE};
1888a9f799SRob Bradford use serde::{Deserialize, Serialize};
19cdfc1773SRob Bradford use thiserror::Error;
20db5b4763SSamuel Ortiz use vfio_bindings::bindings::vfio::*;
211108bd19SSebastien Boeuf use vfio_ioctls::{
221108bd19SSebastien Boeuf     VfioContainer, VfioDevice, VfioIrq, VfioRegionInfoCap, VfioRegionSparseMmapArea,
231108bd19SSebastien Boeuf };
24a7187168SJianyong Wu use vm_allocator::page_size::{
25a7187168SJianyong Wu     align_page_size_down, align_page_size_up, is_4k_aligned, is_4k_multiple, is_page_size_aligned,
26a7187168SJianyong Wu };
2781f8a27eSRob Bradford use vm_allocator::{AddressAllocator, MemorySlotAllocator, SystemAllocator};
28a5e2460dSAndrew Carp use vm_device::dma_mapping::ExternalDmaMapping;
29da2b3c92SSamuel Ortiz use vm_device::interrupt::{
30da2b3c92SSamuel Ortiz     InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig,
31da2b3c92SSamuel Ortiz };
326e084572SSebastien Boeuf use vm_device::{BusDevice, Resource};
33a5e2460dSAndrew Carp use vm_memory::{Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestUsize};
3410ab87d6SRob Bradford use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
359caad739SRob Bradford use vmm_sys_util::eventfd::EventFd;
36c93d5361SSebastien Boeuf 
3788a9f799SRob Bradford use crate::msi::{MsiConfigState, MSI_CONFIG_ID};
3888a9f799SRob Bradford use crate::msix::MsixConfigState;
3988a9f799SRob Bradford use crate::{
4088a9f799SRob Bradford     msi_num_enabled_vectors, BarReprogrammingParams, MsiCap, MsiConfig, MsixCap, MsixConfig,
4188a9f799SRob Bradford     PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciBdf, PciCapabilityId,
4288a9f799SRob Bradford     PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciExpressCapabilityId,
4388a9f799SRob Bradford     PciHeaderType, PciSubclass, MSIX_CONFIG_ID, MSIX_TABLE_ENTRY_SIZE, PCI_CONFIGURATION_ID,
4488a9f799SRob Bradford };
4588a9f799SRob Bradford 
46cc3706afSSebastien Boeuf pub(crate) const VFIO_COMMON_ID: &str = "vfio_common";
47cc3706afSSebastien Boeuf 
48cdfc1773SRob Bradford #[derive(Debug, Error)]
49c93d5361SSebastien Boeuf pub enum VfioPciError {
50*a3692144SPhilipp Schuster     #[error("Failed to create user memory region")]
511108bd19SSebastien Boeuf     CreateUserMemoryRegion(#[source] HypervisorVmError),
52297b41d6SBo Chen     #[error("Failed to DMA map: {0} for device {1} (guest BDF: {2})")]
53297b41d6SBo Chen     DmaMap(#[source] vfio_ioctls::VfioError, PathBuf, PciBdf),
54297b41d6SBo Chen     #[error("Failed to DMA unmap: {0} for device {1} (guest BDF: {2})")]
55297b41d6SBo Chen     DmaUnmap(#[source] vfio_ioctls::VfioError, PathBuf, PciBdf),
56*a3692144SPhilipp Schuster     #[error("Failed to enable INTx")]
57cdfc1773SRob Bradford     EnableIntx(#[source] VfioError),
58*a3692144SPhilipp Schuster     #[error("Failed to enable MSI")]
59cdfc1773SRob Bradford     EnableMsi(#[source] VfioError),
60*a3692144SPhilipp Schuster     #[error("Failed to enable MSI-x")]
61cdfc1773SRob Bradford     EnableMsix(#[source] VfioError),
621108bd19SSebastien Boeuf     #[error("Failed to mmap the area")]
631108bd19SSebastien Boeuf     MmapArea,
64cdfc1773SRob Bradford     #[error("Failed to notifier's eventfd")]
6527515a6eSSebastien Boeuf     MissingNotifier,
661108bd19SSebastien Boeuf     #[error("Invalid region alignment")]
671108bd19SSebastien Boeuf     RegionAlignment,
681108bd19SSebastien Boeuf     #[error("Invalid region size")]
691108bd19SSebastien Boeuf     RegionSize,
70*a3692144SPhilipp Schuster     #[error("Failed to retrieve MsiConfigState")]
71cc3706afSSebastien Boeuf     RetrieveMsiConfigState(#[source] anyhow::Error),
72*a3692144SPhilipp Schuster     #[error("Failed to retrieve MsixConfigState")]
73cc3706afSSebastien Boeuf     RetrieveMsixConfigState(#[source] anyhow::Error),
74*a3692144SPhilipp Schuster     #[error("Failed to retrieve PciConfigurationState")]
75cc3706afSSebastien Boeuf     RetrievePciConfigurationState(#[source] anyhow::Error),
76*a3692144SPhilipp Schuster     #[error("Failed to retrieve VfioCommonState")]
77cc3706afSSebastien Boeuf     RetrieveVfioCommonState(#[source] anyhow::Error),
78c93d5361SSebastien Boeuf }
79db5b4763SSamuel Ortiz 
80db5b4763SSamuel Ortiz #[derive(Copy, Clone)]
81db5b4763SSamuel Ortiz enum PciVfioSubclass {
82db5b4763SSamuel Ortiz     VfioSubclass = 0xff,
83db5b4763SSamuel Ortiz }
84db5b4763SSamuel Ortiz 
85db5b4763SSamuel Ortiz impl PciSubclass for PciVfioSubclass {
get_register_value(&self) -> u886db5b4763SSamuel Ortiz     fn get_register_value(&self) -> u8 {
87db5b4763SSamuel Ortiz         *self as u8
88db5b4763SSamuel Ortiz     }
89db5b4763SSamuel Ortiz }
90db5b4763SSamuel Ortiz 
9120f01161SSebastien Boeuf enum InterruptUpdateAction {
9220f01161SSebastien Boeuf     EnableMsi,
9320f01161SSebastien Boeuf     DisableMsi,
9420f01161SSebastien Boeuf     EnableMsix,
9520f01161SSebastien Boeuf     DisableMsix,
9620f01161SSebastien Boeuf }
9720f01161SSebastien Boeuf 
9810ab87d6SRob Bradford #[derive(Serialize, Deserialize)]
9949069d84SSebastien Boeuf struct IntxState {
10049069d84SSebastien Boeuf     enabled: bool,
10149069d84SSebastien Boeuf }
10249069d84SSebastien Boeuf 
103ec1f7189SRob Bradford pub(crate) struct VfioIntx {
104dcc646f5SSebastien Boeuf     interrupt_source_group: Arc<dyn InterruptSourceGroup>,
10519167e76SSebastien Boeuf     enabled: bool,
10619167e76SSebastien Boeuf }
10719167e76SSebastien Boeuf 
10810ab87d6SRob Bradford #[derive(Serialize, Deserialize)]
10949069d84SSebastien Boeuf struct MsiState {
11049069d84SSebastien Boeuf     cap: MsiCap,
11149069d84SSebastien Boeuf     cap_offset: u32,
11249069d84SSebastien Boeuf }
11349069d84SSebastien Boeuf 
114ec1f7189SRob Bradford pub(crate) struct VfioMsi {
115ec1f7189SRob Bradford     pub(crate) cfg: MsiConfig,
11620f01161SSebastien Boeuf     cap_offset: u32,
117dcc646f5SSebastien Boeuf     interrupt_source_group: Arc<dyn InterruptSourceGroup>,
11820f01161SSebastien Boeuf }
11920f01161SSebastien Boeuf 
12020f01161SSebastien Boeuf impl VfioMsi {
update(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction>12120f01161SSebastien Boeuf     fn update(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction> {
122f3c38701SSebastien Boeuf         let old_enabled = self.cfg.enabled();
12320f01161SSebastien Boeuf 
124f3c38701SSebastien Boeuf         self.cfg.update(offset, data);
12520f01161SSebastien Boeuf 
126f3c38701SSebastien Boeuf         let new_enabled = self.cfg.enabled();
12720f01161SSebastien Boeuf 
12820f01161SSebastien Boeuf         if !old_enabled && new_enabled {
12920f01161SSebastien Boeuf             return Some(InterruptUpdateAction::EnableMsi);
13020f01161SSebastien Boeuf         }
13120f01161SSebastien Boeuf 
13220f01161SSebastien Boeuf         if old_enabled && !new_enabled {
13320f01161SSebastien Boeuf             return Some(InterruptUpdateAction::DisableMsi);
13420f01161SSebastien Boeuf         }
13520f01161SSebastien Boeuf 
13620f01161SSebastien Boeuf         None
13720f01161SSebastien Boeuf     }
13820f01161SSebastien Boeuf }
13920f01161SSebastien Boeuf 
14010ab87d6SRob Bradford #[derive(Serialize, Deserialize)]
14149069d84SSebastien Boeuf struct MsixState {
14249069d84SSebastien Boeuf     cap: MsixCap,
14349069d84SSebastien Boeuf     cap_offset: u32,
14449069d84SSebastien Boeuf     bdf: u32,
14549069d84SSebastien Boeuf }
14649069d84SSebastien Boeuf 
147ec1f7189SRob Bradford pub(crate) struct VfioMsix {
148ec1f7189SRob Bradford     pub(crate) bar: MsixConfig,
14920f01161SSebastien Boeuf     cap: MsixCap,
15020f01161SSebastien Boeuf     cap_offset: u32,
151dcc646f5SSebastien Boeuf     interrupt_source_group: Arc<dyn InterruptSourceGroup>,
15220f01161SSebastien Boeuf }
15320f01161SSebastien Boeuf 
15420f01161SSebastien Boeuf impl VfioMsix {
update(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction>15520f01161SSebastien Boeuf     fn update(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction> {
1563fe362e3SSebastien Boeuf         let old_enabled = self.bar.enabled();
15720f01161SSebastien Boeuf 
15820f01161SSebastien Boeuf         // Update "Message Control" word
15920f01161SSebastien Boeuf         if offset == 2 && data.len() == 2 {
1603fe362e3SSebastien Boeuf             self.bar.set_msg_ctl(LittleEndian::read_u16(data));
16120f01161SSebastien Boeuf         }
16220f01161SSebastien Boeuf 
1633fe362e3SSebastien Boeuf         let new_enabled = self.bar.enabled();
16420f01161SSebastien Boeuf 
16520f01161SSebastien Boeuf         if !old_enabled && new_enabled {
16620f01161SSebastien Boeuf             return Some(InterruptUpdateAction::EnableMsix);
16720f01161SSebastien Boeuf         }
16820f01161SSebastien Boeuf 
16920f01161SSebastien Boeuf         if old_enabled && !new_enabled {
17020f01161SSebastien Boeuf             return Some(InterruptUpdateAction::DisableMsix);
17120f01161SSebastien Boeuf         }
17220f01161SSebastien Boeuf 
17320f01161SSebastien Boeuf         None
17420f01161SSebastien Boeuf     }
17520f01161SSebastien Boeuf 
table_accessed(&self, bar_index: u32, offset: u64) -> bool17620f01161SSebastien Boeuf     fn table_accessed(&self, bar_index: u32, offset: u64) -> bool {
17720f01161SSebastien Boeuf         let table_offset: u64 = u64::from(self.cap.table_offset());
17820f01161SSebastien Boeuf         let table_size: u64 = u64::from(self.cap.table_size()) * (MSIX_TABLE_ENTRY_SIZE as u64);
17920f01161SSebastien Boeuf         let table_bir: u32 = self.cap.table_bir();
18020f01161SSebastien Boeuf 
18120f01161SSebastien Boeuf         bar_index == table_bir && offset >= table_offset && offset < table_offset + table_size
18220f01161SSebastien Boeuf     }
18320f01161SSebastien Boeuf }
18420f01161SSebastien Boeuf 
185ec1f7189SRob Bradford pub(crate) struct Interrupt {
186ec1f7189SRob Bradford     pub(crate) intx: Option<VfioIntx>,
187ec1f7189SRob Bradford     pub(crate) msi: Option<VfioMsi>,
188ec1f7189SRob Bradford     pub(crate) msix: Option<VfioMsix>,
18920f01161SSebastien Boeuf }
19020f01161SSebastien Boeuf 
19120f01161SSebastien Boeuf impl Interrupt {
update_msi(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction>19220f01161SSebastien Boeuf     fn update_msi(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction> {
19320f01161SSebastien Boeuf         if let Some(ref mut msi) = &mut self.msi {
19420f01161SSebastien Boeuf             let action = msi.update(offset, data);
19520f01161SSebastien Boeuf             return action;
19620f01161SSebastien Boeuf         }
19720f01161SSebastien Boeuf 
19820f01161SSebastien Boeuf         None
19920f01161SSebastien Boeuf     }
20020f01161SSebastien Boeuf 
update_msix(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction>20120f01161SSebastien Boeuf     fn update_msix(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction> {
20220f01161SSebastien Boeuf         if let Some(ref mut msix) = &mut self.msix {
20320f01161SSebastien Boeuf             let action = msix.update(offset, data);
20420f01161SSebastien Boeuf             return action;
20520f01161SSebastien Boeuf         }
20620f01161SSebastien Boeuf 
20720f01161SSebastien Boeuf         None
20820f01161SSebastien Boeuf     }
20920f01161SSebastien Boeuf 
accessed(&self, offset: u64) -> Option<(PciCapabilityId, u64)>210827229d8SRob Bradford     fn accessed(&self, offset: u64) -> Option<(PciCapabilityId, u64)> {
21120f01161SSebastien Boeuf         if let Some(msi) = &self.msi {
21220f01161SSebastien Boeuf             if offset >= u64::from(msi.cap_offset)
213f3c38701SSebastien Boeuf                 && offset < u64::from(msi.cap_offset) + msi.cfg.size()
21420f01161SSebastien Boeuf             {
21520f01161SSebastien Boeuf                 return Some((
216827229d8SRob Bradford                     PciCapabilityId::MessageSignalledInterrupts,
21720f01161SSebastien Boeuf                     u64::from(msi.cap_offset),
21820f01161SSebastien Boeuf                 ));
21920f01161SSebastien Boeuf             }
22020f01161SSebastien Boeuf         }
22120f01161SSebastien Boeuf 
22220f01161SSebastien Boeuf         if let Some(msix) = &self.msix {
22320f01161SSebastien Boeuf             if offset == u64::from(msix.cap_offset) {
224827229d8SRob Bradford                 return Some((PciCapabilityId::MsiX, u64::from(msix.cap_offset)));
22520f01161SSebastien Boeuf             }
22620f01161SSebastien Boeuf         }
22720f01161SSebastien Boeuf 
22820f01161SSebastien Boeuf         None
22920f01161SSebastien Boeuf     }
23020f01161SSebastien Boeuf 
msix_table_accessed(&self, bar_index: u32, offset: u64) -> bool23120f01161SSebastien Boeuf     fn msix_table_accessed(&self, bar_index: u32, offset: u64) -> bool {
23220f01161SSebastien Boeuf         if let Some(msix) = &self.msix {
23320f01161SSebastien Boeuf             return msix.table_accessed(bar_index, offset);
23420f01161SSebastien Boeuf         }
23520f01161SSebastien Boeuf 
23620f01161SSebastien Boeuf         false
23720f01161SSebastien Boeuf     }
23820f01161SSebastien Boeuf 
msix_write_table(&mut self, offset: u64, data: &[u8])23920f01161SSebastien Boeuf     fn msix_write_table(&mut self, offset: u64, data: &[u8]) {
24020f01161SSebastien Boeuf         if let Some(ref mut msix) = &mut self.msix {
24149ef201cSSebastien Boeuf             let offset = offset - u64::from(msix.cap.table_offset());
24220f01161SSebastien Boeuf             msix.bar.write_table(offset, data)
24320f01161SSebastien Boeuf         }
24420f01161SSebastien Boeuf     }
24520f01161SSebastien Boeuf 
msix_read_table(&self, offset: u64, data: &mut [u8])24620f01161SSebastien Boeuf     fn msix_read_table(&self, offset: u64, data: &mut [u8]) {
24720f01161SSebastien Boeuf         if let Some(msix) = &self.msix {
24849ef201cSSebastien Boeuf             let offset = offset - u64::from(msix.cap.table_offset());
24920f01161SSebastien Boeuf             msix.bar.read_table(offset, data)
25020f01161SSebastien Boeuf         }
25120f01161SSebastien Boeuf     }
25219167e76SSebastien Boeuf 
intx_in_use(&self) -> bool253ec1f7189SRob Bradford     pub(crate) fn intx_in_use(&self) -> bool {
25419167e76SSebastien Boeuf         if let Some(intx) = &self.intx {
25519167e76SSebastien Boeuf             return intx.enabled;
25619167e76SSebastien Boeuf         }
25719167e76SSebastien Boeuf 
25819167e76SSebastien Boeuf         false
25919167e76SSebastien Boeuf     }
26020f01161SSebastien Boeuf }
26120f01161SSebastien Boeuf 
262db5b4763SSamuel Ortiz #[derive(Copy, Clone)]
263ed5f2544SSebastien Boeuf pub struct UserMemoryRegion {
264bf39146cSBo Chen     pub slot: u32,
265bf39146cSBo Chen     pub start: u64,
266bf39146cSBo Chen     pub size: u64,
267bf39146cSBo Chen     pub host_addr: u64,
268ed5f2544SSebastien Boeuf }
269ed5f2544SSebastien Boeuf 
270ed5f2544SSebastien Boeuf #[derive(Clone)]
271593a958fSRob Bradford pub struct MmioRegion {
272593a958fSRob Bradford     pub start: GuestAddress,
273593a958fSRob Bradford     pub length: GuestUsize,
274ec1f7189SRob Bradford     pub(crate) type_: PciBarRegionType,
275ec1f7189SRob Bradford     pub(crate) index: u32,
276ed5f2544SSebastien Boeuf     pub(crate) user_memory_regions: Vec<UserMemoryRegion>,
277db5b4763SSamuel Ortiz }
278045964deSAndrew Carp 
279045964deSAndrew Carp trait MmioRegionRange {
check_range(&self, guest_addr: u64, size: u64) -> bool280045964deSAndrew Carp     fn check_range(&self, guest_addr: u64, size: u64) -> bool;
find_user_address(&self, guest_addr: u64) -> Result<u64, io::Error>281045964deSAndrew Carp     fn find_user_address(&self, guest_addr: u64) -> Result<u64, io::Error>;
282045964deSAndrew Carp }
283045964deSAndrew Carp 
284045964deSAndrew Carp impl MmioRegionRange for Vec<MmioRegion> {
285045964deSAndrew Carp     // Check if a guest address is within the range of mmio regions
check_range(&self, guest_addr: u64, size: u64) -> bool286045964deSAndrew Carp     fn check_range(&self, guest_addr: u64, size: u64) -> bool {
287045964deSAndrew Carp         for region in self.iter() {
288045964deSAndrew Carp             let Some(guest_addr_end) = guest_addr.checked_add(size) else {
289045964deSAndrew Carp                 return false;
290045964deSAndrew Carp             };
291045964deSAndrew Carp             let Some(region_end) = region.start.raw_value().checked_add(region.length) else {
292045964deSAndrew Carp                 return false;
293045964deSAndrew Carp             };
294045964deSAndrew Carp             if guest_addr >= region.start.raw_value() && guest_addr_end <= region_end {
295045964deSAndrew Carp                 return true;
296045964deSAndrew Carp             }
297045964deSAndrew Carp         }
298045964deSAndrew Carp         false
299045964deSAndrew Carp     }
300045964deSAndrew Carp 
301045964deSAndrew Carp     // Locate the user region address for a guest address within all mmio regions
find_user_address(&self, guest_addr: u64) -> Result<u64, io::Error>302045964deSAndrew Carp     fn find_user_address(&self, guest_addr: u64) -> Result<u64, io::Error> {
303045964deSAndrew Carp         for region in self.iter() {
304045964deSAndrew Carp             for user_region in region.user_memory_regions.iter() {
305045964deSAndrew Carp                 if guest_addr >= user_region.start
306045964deSAndrew Carp                     && guest_addr < user_region.start + user_region.size
307045964deSAndrew Carp                 {
308045964deSAndrew Carp                     return Ok(user_region.host_addr + (guest_addr - user_region.start));
309045964deSAndrew Carp                 }
310045964deSAndrew Carp             }
311045964deSAndrew Carp         }
312045964deSAndrew Carp 
313ea4693a0SJinank Jain         Err(io::Error::other(format!(
314ea4693a0SJinank Jain             "unable to find user address: 0x{guest_addr:x}"
315ea4693a0SJinank Jain         )))
316045964deSAndrew Carp     }
317045964deSAndrew Carp }
318045964deSAndrew Carp 
319cdfc1773SRob Bradford #[derive(Debug, Error)]
320cdfc1773SRob Bradford pub enum VfioError {
321*a3692144SPhilipp Schuster     #[error("Kernel VFIO error")]
322cdfc1773SRob Bradford     KernelVfio(#[source] vfio_ioctls::VfioError),
323*a3692144SPhilipp Schuster     #[error("VFIO user error")]
3249254b74cSRob Bradford     VfioUser(#[source] vfio_user::Error),
325cdfc1773SRob Bradford }
326db5b4763SSamuel Ortiz 
3274a99d3dbSSebastien Boeuf pub(crate) trait Vfio: Send + Sync {
read_config_byte(&self, offset: u32) -> u8328a0e48a87SRob Bradford     fn read_config_byte(&self, offset: u32) -> u8 {
329a0e48a87SRob Bradford         let mut data: [u8; 1] = [0];
330a0e48a87SRob Bradford         self.read_config(offset, &mut data);
331a0e48a87SRob Bradford         data[0]
332a0e48a87SRob Bradford     }
333a0e48a87SRob Bradford 
read_config_word(&self, offset: u32) -> u16334a0e48a87SRob Bradford     fn read_config_word(&self, offset: u32) -> u16 {
335a0e48a87SRob Bradford         let mut data: [u8; 2] = [0, 0];
336a0e48a87SRob Bradford         self.read_config(offset, &mut data);
337a0e48a87SRob Bradford         u16::from_le_bytes(data)
338a0e48a87SRob Bradford     }
339a0e48a87SRob Bradford 
read_config_dword(&self, offset: u32) -> u32340a0e48a87SRob Bradford     fn read_config_dword(&self, offset: u32) -> u32 {
341a0e48a87SRob Bradford         let mut data: [u8; 4] = [0, 0, 0, 0];
342a0e48a87SRob Bradford         self.read_config(offset, &mut data);
343a0e48a87SRob Bradford         u32::from_le_bytes(data)
344a0e48a87SRob Bradford     }
345a0e48a87SRob Bradford 
write_config_dword(&self, offset: u32, buf: u32)346a0e48a87SRob Bradford     fn write_config_dword(&self, offset: u32, buf: u32) {
347a0e48a87SRob Bradford         let data: [u8; 4] = buf.to_le_bytes();
348a0e48a87SRob Bradford         self.write_config(offset, &data)
349a0e48a87SRob Bradford     }
350a0e48a87SRob Bradford 
read_config(&self, offset: u32, data: &mut [u8])35160d05451SRob Bradford     fn read_config(&self, offset: u32, data: &mut [u8]) {
35260d05451SRob Bradford         self.region_read(VFIO_PCI_CONFIG_REGION_INDEX, offset.into(), data.as_mut());
35360d05451SRob Bradford     }
35460d05451SRob Bradford 
write_config(&self, offset: u32, data: &[u8])35560d05451SRob Bradford     fn write_config(&self, offset: u32, data: &[u8]) {
35660d05451SRob Bradford         self.region_write(VFIO_PCI_CONFIG_REGION_INDEX, offset.into(), data)
35760d05451SRob Bradford     }
35860d05451SRob Bradford 
enable_msi(&self, fds: Vec<&EventFd>) -> Result<(), VfioError>359cdfc1773SRob Bradford     fn enable_msi(&self, fds: Vec<&EventFd>) -> Result<(), VfioError> {
360ecc8382fSRob Bradford         self.enable_irq(VFIO_PCI_MSI_IRQ_INDEX, fds)
361ecc8382fSRob Bradford     }
362ecc8382fSRob Bradford 
disable_msi(&self) -> Result<(), VfioError>363cdfc1773SRob Bradford     fn disable_msi(&self) -> Result<(), VfioError> {
364ecc8382fSRob Bradford         self.disable_irq(VFIO_PCI_MSI_IRQ_INDEX)
365ecc8382fSRob Bradford     }
366ecc8382fSRob Bradford 
enable_msix(&self, fds: Vec<&EventFd>) -> Result<(), VfioError>367cdfc1773SRob Bradford     fn enable_msix(&self, fds: Vec<&EventFd>) -> Result<(), VfioError> {
368ecc8382fSRob Bradford         self.enable_irq(VFIO_PCI_MSIX_IRQ_INDEX, fds)
369ecc8382fSRob Bradford     }
370ecc8382fSRob Bradford 
disable_msix(&self) -> Result<(), VfioError>371cdfc1773SRob Bradford     fn disable_msix(&self) -> Result<(), VfioError> {
372ecc8382fSRob Bradford         self.disable_irq(VFIO_PCI_MSIX_IRQ_INDEX)
373ecc8382fSRob Bradford     }
374ecc8382fSRob Bradford 
region_read(&self, _index: u32, _offset: u64, _data: &mut [u8])37560d05451SRob Bradford     fn region_read(&self, _index: u32, _offset: u64, _data: &mut [u8]) {
376349dbb9aSRob Bradford         unimplemented!()
377349dbb9aSRob Bradford     }
378349dbb9aSRob Bradford 
region_write(&self, _index: u32, _offset: u64, _data: &[u8])37960d05451SRob Bradford     fn region_write(&self, _index: u32, _offset: u64, _data: &[u8]) {
380349dbb9aSRob Bradford         unimplemented!()
381349dbb9aSRob Bradford     }
382521a11a1SRob Bradford 
get_irq_info(&self, _irq_index: u32) -> Option<VfioIrq>38351ceae91SRob Bradford     fn get_irq_info(&self, _irq_index: u32) -> Option<VfioIrq> {
384521a11a1SRob Bradford         unimplemented!()
385521a11a1SRob Bradford     }
386ecc8382fSRob Bradford 
enable_irq(&self, _irq_index: u32, _event_fds: Vec<&EventFd>) -> Result<(), VfioError>387cdfc1773SRob Bradford     fn enable_irq(&self, _irq_index: u32, _event_fds: Vec<&EventFd>) -> Result<(), VfioError> {
388ecc8382fSRob Bradford         unimplemented!()
389ecc8382fSRob Bradford     }
390ecc8382fSRob Bradford 
disable_irq(&self, _irq_index: u32) -> Result<(), VfioError>391cdfc1773SRob Bradford     fn disable_irq(&self, _irq_index: u32) -> Result<(), VfioError> {
392ecc8382fSRob Bradford         unimplemented!()
393ecc8382fSRob Bradford     }
394a5f4d795SRob Bradford 
unmask_irq(&self, _irq_index: u32) -> Result<(), VfioError>395cdfc1773SRob Bradford     fn unmask_irq(&self, _irq_index: u32) -> Result<(), VfioError> {
396a5f4d795SRob Bradford         unimplemented!()
397a5f4d795SRob Bradford     }
398349dbb9aSRob Bradford }
399349dbb9aSRob Bradford 
400dc35dac3SRob Bradford struct VfioDeviceWrapper {
401db5b4763SSamuel Ortiz     device: Arc<VfioDevice>,
402db5b4763SSamuel Ortiz }
403db5b4763SSamuel Ortiz 
404dc35dac3SRob Bradford impl VfioDeviceWrapper {
new(device: Arc<VfioDevice>) -> Self405db5b4763SSamuel Ortiz     fn new(device: Arc<VfioDevice>) -> Self {
406349dbb9aSRob Bradford         Self { device }
407349dbb9aSRob Bradford     }
408db5b4763SSamuel Ortiz }
409db5b4763SSamuel Ortiz 
410dc35dac3SRob Bradford impl Vfio for VfioDeviceWrapper {
region_read(&self, index: u32, offset: u64, data: &mut [u8])41160d05451SRob Bradford     fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) {
41260d05451SRob Bradford         self.device.region_read(index, data, offset)
413db5b4763SSamuel Ortiz     }
414db5b4763SSamuel Ortiz 
region_write(&self, index: u32, offset: u64, data: &[u8])41560d05451SRob Bradford     fn region_write(&self, index: u32, offset: u64, data: &[u8]) {
41660d05451SRob Bradford         self.device.region_write(index, data, offset)
417db5b4763SSamuel Ortiz     }
418521a11a1SRob Bradford 
get_irq_info(&self, irq_index: u32) -> Option<VfioIrq>41951ceae91SRob Bradford     fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> {
42051ceae91SRob Bradford         self.device.get_irq_info(irq_index).copied()
421521a11a1SRob Bradford     }
422ecc8382fSRob Bradford 
enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError>423cdfc1773SRob Bradford     fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> {
424cdfc1773SRob Bradford         self.device
425cdfc1773SRob Bradford             .enable_irq(irq_index, event_fds)
426cdfc1773SRob Bradford             .map_err(VfioError::KernelVfio)
427ecc8382fSRob Bradford     }
428ecc8382fSRob Bradford 
disable_irq(&self, irq_index: u32) -> Result<(), VfioError>429cdfc1773SRob Bradford     fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> {
430cdfc1773SRob Bradford         self.device
431cdfc1773SRob Bradford             .disable_irq(irq_index)
432cdfc1773SRob Bradford             .map_err(VfioError::KernelVfio)
433ecc8382fSRob Bradford     }
434a5f4d795SRob Bradford 
unmask_irq(&self, irq_index: u32) -> Result<(), VfioError>435cdfc1773SRob Bradford     fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> {
436cdfc1773SRob Bradford         self.device
437cdfc1773SRob Bradford             .unmask_irq(irq_index)
438cdfc1773SRob Bradford             .map_err(VfioError::KernelVfio)
439a5f4d795SRob Bradford     }
440db5b4763SSamuel Ortiz }
441db5b4763SSamuel Ortiz 
44210ab87d6SRob Bradford #[derive(Serialize, Deserialize)]
44349069d84SSebastien Boeuf struct VfioCommonState {
44449069d84SSebastien Boeuf     intx_state: Option<IntxState>,
44549069d84SSebastien Boeuf     msi_state: Option<MsiState>,
44649069d84SSebastien Boeuf     msix_state: Option<MsixState>,
44749069d84SSebastien Boeuf }
44849069d84SSebastien Boeuf 
449e45e3df6SSebastien Boeuf pub(crate) struct ConfigPatch {
450e45e3df6SSebastien Boeuf     mask: u32,
451e45e3df6SSebastien Boeuf     patch: u32,
452e45e3df6SSebastien Boeuf }
453e45e3df6SSebastien Boeuf 
454ec1f7189SRob Bradford pub(crate) struct VfioCommon {
455ec1f7189SRob Bradford     pub(crate) configuration: PciConfiguration,
456ec1f7189SRob Bradford     pub(crate) mmio_regions: Vec<MmioRegion>,
457ec1f7189SRob Bradford     pub(crate) interrupt: Interrupt,
458eb6daa2fSSebastien Boeuf     pub(crate) msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
459e6aa792cSSebastien Boeuf     pub(crate) legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
4604a99d3dbSSebastien Boeuf     pub(crate) vfio_wrapper: Arc<dyn Vfio>,
461e45e3df6SSebastien Boeuf     pub(crate) patches: HashMap<usize, ConfigPatch>,
462b750c332SThomas Barrett     x_nv_gpudirect_clique: Option<u8>,
463d27ea34aSRob Bradford }
464d27ea34aSRob Bradford 
46522275c34SRob Bradford impl VfioCommon {
new( msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, vfio_wrapper: Arc<dyn Vfio>, subclass: &dyn PciSubclass, bdf: PciBdf, snapshot: Option<Snapshot>, x_nv_gpudirect_clique: Option<u8>, ) -> Result<Self, VfioPciError>466d6bf1f5eSSebastien Boeuf     pub(crate) fn new(
467d6bf1f5eSSebastien Boeuf         msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
468d6bf1f5eSSebastien Boeuf         legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
469d6bf1f5eSSebastien Boeuf         vfio_wrapper: Arc<dyn Vfio>,
470d6bf1f5eSSebastien Boeuf         subclass: &dyn PciSubclass,
471d6bf1f5eSSebastien Boeuf         bdf: PciBdf,
472cc3706afSSebastien Boeuf         snapshot: Option<Snapshot>,
473b750c332SThomas Barrett         x_nv_gpudirect_clique: Option<u8>,
474d6bf1f5eSSebastien Boeuf     ) -> Result<Self, VfioPciError> {
475cc3706afSSebastien Boeuf         let pci_configuration_state =
47610ab87d6SRob Bradford             vm_migration::state_from_id(snapshot.as_ref(), PCI_CONFIGURATION_ID).map_err(|e| {
477cc3706afSSebastien Boeuf                 VfioPciError::RetrievePciConfigurationState(anyhow!(
478cc3706afSSebastien Boeuf                     "Failed to get PciConfigurationState from Snapshot: {}",
479cc3706afSSebastien Boeuf                     e
480cc3706afSSebastien Boeuf                 ))
481cc3706afSSebastien Boeuf             })?;
482cc3706afSSebastien Boeuf 
483d6bf1f5eSSebastien Boeuf         let configuration = PciConfiguration::new(
484d6bf1f5eSSebastien Boeuf             0,
485d6bf1f5eSSebastien Boeuf             0,
486d6bf1f5eSSebastien Boeuf             0,
487d6bf1f5eSSebastien Boeuf             PciClassCode::Other,
488d6bf1f5eSSebastien Boeuf             subclass,
489d6bf1f5eSSebastien Boeuf             None,
490d6bf1f5eSSebastien Boeuf             PciHeaderType::Device,
491d6bf1f5eSSebastien Boeuf             0,
492d6bf1f5eSSebastien Boeuf             0,
493d6bf1f5eSSebastien Boeuf             None,
494cc3706afSSebastien Boeuf             pci_configuration_state,
495d6bf1f5eSSebastien Boeuf         );
496d6bf1f5eSSebastien Boeuf 
497d6bf1f5eSSebastien Boeuf         let mut vfio_common = VfioCommon {
498d6bf1f5eSSebastien Boeuf             mmio_regions: Vec::new(),
499d6bf1f5eSSebastien Boeuf             configuration,
500d6bf1f5eSSebastien Boeuf             interrupt: Interrupt {
501d6bf1f5eSSebastien Boeuf                 intx: None,
502d6bf1f5eSSebastien Boeuf                 msi: None,
503d6bf1f5eSSebastien Boeuf                 msix: None,
504d6bf1f5eSSebastien Boeuf             },
505d6bf1f5eSSebastien Boeuf             msi_interrupt_manager,
506d6bf1f5eSSebastien Boeuf             legacy_interrupt_group,
507d6bf1f5eSSebastien Boeuf             vfio_wrapper,
508d6bf1f5eSSebastien Boeuf             patches: HashMap::new(),
509b750c332SThomas Barrett             x_nv_gpudirect_clique,
510d6bf1f5eSSebastien Boeuf         };
511d6bf1f5eSSebastien Boeuf 
512cc3706afSSebastien Boeuf         let state: Option<VfioCommonState> = snapshot
513cc3706afSSebastien Boeuf             .as_ref()
51410ab87d6SRob Bradford             .map(|s| s.to_state())
515cc3706afSSebastien Boeuf             .transpose()
516cc3706afSSebastien Boeuf             .map_err(|e| {
517cc3706afSSebastien Boeuf                 VfioPciError::RetrieveVfioCommonState(anyhow!(
518cc3706afSSebastien Boeuf                     "Failed to get VfioCommonState from Snapshot: {}",
519cc3706afSSebastien Boeuf                     e
520cc3706afSSebastien Boeuf                 ))
521cc3706afSSebastien Boeuf             })?;
52210ab87d6SRob Bradford         let msi_state =
52310ab87d6SRob Bradford             vm_migration::state_from_id(snapshot.as_ref(), MSI_CONFIG_ID).map_err(|e| {
524cc3706afSSebastien Boeuf                 VfioPciError::RetrieveMsiConfigState(anyhow!(
525cc3706afSSebastien Boeuf                     "Failed to get MsiConfigState from Snapshot: {}",
526cc3706afSSebastien Boeuf                     e
527cc3706afSSebastien Boeuf                 ))
528cc3706afSSebastien Boeuf             })?;
52910ab87d6SRob Bradford         let msix_state =
53010ab87d6SRob Bradford             vm_migration::state_from_id(snapshot.as_ref(), MSIX_CONFIG_ID).map_err(|e| {
531cc3706afSSebastien Boeuf                 VfioPciError::RetrieveMsixConfigState(anyhow!(
532cc3706afSSebastien Boeuf                     "Failed to get MsixConfigState from Snapshot: {}",
533cc3706afSSebastien Boeuf                     e
534cc3706afSSebastien Boeuf                 ))
535cc3706afSSebastien Boeuf             })?;
536cc3706afSSebastien Boeuf 
537cc3706afSSebastien Boeuf         if let Some(state) = state.as_ref() {
538cc3706afSSebastien Boeuf             vfio_common.set_state(state, msi_state, msix_state)?;
539cc3706afSSebastien Boeuf         } else {
540d6bf1f5eSSebastien Boeuf             vfio_common.parse_capabilities(bdf);
541d6bf1f5eSSebastien Boeuf             vfio_common.initialize_legacy_interrupt()?;
542d6bf1f5eSSebastien Boeuf         }
543d6bf1f5eSSebastien Boeuf 
544d6bf1f5eSSebastien Boeuf         Ok(vfio_common)
545d6bf1f5eSSebastien Boeuf     }
546d6bf1f5eSSebastien Boeuf 
5477bf0cc1eSPhilipp Schuster     /// In case msix table offset is not page size aligned, we need do some fixup to achieve it.
5487bf0cc1eSPhilipp Schuster     /// Because we don't want the MMIO RW region and trap region overlap each other.
fixup_msix_region(&mut self, bar_id: u32, region_size: u64) -> u64549a7187168SJianyong Wu     fn fixup_msix_region(&mut self, bar_id: u32, region_size: u64) -> u64 {
550363b4780SRob Bradford         if let Some(msix) = self.interrupt.msix.as_mut() {
551a7187168SJianyong Wu             let msix_cap = &mut msix.cap;
552a7187168SJianyong Wu 
553a7187168SJianyong Wu             // Suppose table_bir equals to pba_bir here. Am I right?
554a7187168SJianyong Wu             let (table_offset, table_size) = msix_cap.table_range();
555a7187168SJianyong Wu             if is_page_size_aligned(table_offset) || msix_cap.table_bir() != bar_id {
556a7187168SJianyong Wu                 return region_size;
557a7187168SJianyong Wu             }
558a7187168SJianyong Wu 
559a7187168SJianyong Wu             let (pba_offset, pba_size) = msix_cap.pba_range();
560a7187168SJianyong Wu             let msix_sz = align_page_size_up(table_size + pba_size);
561a7187168SJianyong Wu             // Expand region to hold RW and trap region which both page size aligned
562a7187168SJianyong Wu             let size = std::cmp::max(region_size * 2, msix_sz * 2);
563a7187168SJianyong Wu             // let table starts from the middle of the region
564a7187168SJianyong Wu             msix_cap.table_set_offset((size / 2) as u32);
565a7187168SJianyong Wu             msix_cap.pba_set_offset((size / 2 + pba_offset - table_offset) as u32);
566a7187168SJianyong Wu 
567a7187168SJianyong Wu             size
568363b4780SRob Bradford         } else {
569363b4780SRob Bradford             // MSI-X not supported for this device
570363b4780SRob Bradford             region_size
571363b4780SRob Bradford         }
572a7187168SJianyong Wu     }
573a7187168SJianyong Wu 
57445b01d59SThomas Barrett     // The `allocator` argument is unused on `aarch64`
57545b01d59SThomas Barrett     #[allow(unused_variables)]
allocate_bars( &mut self, allocator: &Arc<Mutex<SystemAllocator>>, mmio32_allocator: &mut AddressAllocator, mmio64_allocator: &mut AddressAllocator, resources: Option<Vec<Resource>>, ) -> Result<Vec<PciBarConfiguration>, PciDeviceError>576ec1f7189SRob Bradford     pub(crate) fn allocate_bars(
57722275c34SRob Bradford         &mut self,
5789ef1187fSRob Bradford         allocator: &Arc<Mutex<SystemAllocator>>,
57945b01d59SThomas Barrett         mmio32_allocator: &mut AddressAllocator,
58045b01d59SThomas Barrett         mmio64_allocator: &mut AddressAllocator,
5816175cc09SSebastien Boeuf         resources: Option<Vec<Resource>>,
58289218b6dSSebastien Boeuf     ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> {
58389218b6dSSebastien Boeuf         let mut bars = Vec::new();
584a9ec0f33SBo Chen         let mut bar_id = VFIO_PCI_BAR0_REGION_INDEX;
58522275c34SRob Bradford 
58622275c34SRob Bradford         // Going through all regular regions to compute the BAR size.
58722275c34SRob Bradford         // We're not saving the BAR address to restore it, because we
58822275c34SRob Bradford         // are going to allocate a guest address for each BAR and write
58922275c34SRob Bradford         // that new address back.
59022275c34SRob Bradford         while bar_id < VFIO_PCI_CONFIG_REGION_INDEX {
59137521ddfSSebastien Boeuf             let mut region_size: u64 = 0;
59237521ddfSSebastien Boeuf             let mut region_type = PciBarRegionType::Memory32BitRegion;
593868d1f69SSteven Dake             let mut prefetchable = PciBarPrefetchable::NotPrefetchable;
59437521ddfSSebastien Boeuf             let mut flags: u32 = 0;
59522275c34SRob Bradford 
59611e9f433SSebastien Boeuf             let mut restored_bar_addr = None;
59711e9f433SSebastien Boeuf             if let Some(resources) = &resources {
59811e9f433SSebastien Boeuf                 for resource in resources {
59937521ddfSSebastien Boeuf                     if let Resource::PciBar {
60037521ddfSSebastien Boeuf                         index,
60137521ddfSSebastien Boeuf                         base,
60237521ddfSSebastien Boeuf                         size,
60337521ddfSSebastien Boeuf                         type_,
60437521ddfSSebastien Boeuf                         ..
60537521ddfSSebastien Boeuf                     } = resource
60637521ddfSSebastien Boeuf                     {
60711e9f433SSebastien Boeuf                         if *index == bar_id as usize {
60811e9f433SSebastien Boeuf                             restored_bar_addr = Some(GuestAddress(*base));
60937521ddfSSebastien Boeuf                             region_size = *size;
61037521ddfSSebastien Boeuf                             region_type = PciBarRegionType::from(*type_);
61111e9f433SSebastien Boeuf                             break;
6126175cc09SSebastien Boeuf                         }
61311e9f433SSebastien Boeuf                     }
61411e9f433SSebastien Boeuf                 }
61537521ddfSSebastien Boeuf                 if restored_bar_addr.is_none() {
61637521ddfSSebastien Boeuf                     bar_id += 1;
61737521ddfSSebastien Boeuf                     continue;
61811e9f433SSebastien Boeuf                 }
61937521ddfSSebastien Boeuf             } else {
62022275c34SRob Bradford                 let bar_offset = if bar_id == VFIO_PCI_ROM_REGION_INDEX {
62122275c34SRob Bradford                     (PCI_ROM_EXP_BAR_INDEX * 4) as u32
62222275c34SRob Bradford                 } else {
62322275c34SRob Bradford                     PCI_CONFIG_BAR_OFFSET + bar_id * 4
62422275c34SRob Bradford                 };
62522275c34SRob Bradford 
62622275c34SRob Bradford                 // First read flags
6274a99d3dbSSebastien Boeuf                 flags = self.vfio_wrapper.read_config_dword(bar_offset);
62822275c34SRob Bradford 
62922275c34SRob Bradford                 // Is this an IO BAR?
63022275c34SRob Bradford                 let io_bar = if bar_id != VFIO_PCI_ROM_REGION_INDEX {
63122275c34SRob Bradford                     matches!(flags & PCI_CONFIG_IO_BAR, PCI_CONFIG_IO_BAR)
63222275c34SRob Bradford                 } else {
63322275c34SRob Bradford                     false
63422275c34SRob Bradford                 };
63522275c34SRob Bradford 
63622275c34SRob Bradford                 // Is this a 64-bit BAR?
63722275c34SRob Bradford                 let is_64bit_bar = if bar_id != VFIO_PCI_ROM_REGION_INDEX {
63822275c34SRob Bradford                     matches!(
63922275c34SRob Bradford                         flags & PCI_CONFIG_MEMORY_BAR_64BIT,
64022275c34SRob Bradford                         PCI_CONFIG_MEMORY_BAR_64BIT
64122275c34SRob Bradford                     )
64222275c34SRob Bradford                 } else {
64322275c34SRob Bradford                     false
64422275c34SRob Bradford                 };
64522275c34SRob Bradford 
646868d1f69SSteven Dake                 if matches!(
647868d1f69SSteven Dake                     flags & PCI_CONFIG_BAR_PREFETCHABLE,
648868d1f69SSteven Dake                     PCI_CONFIG_BAR_PREFETCHABLE
649868d1f69SSteven Dake                 ) {
650868d1f69SSteven Dake                     prefetchable = PciBarPrefetchable::Prefetchable
651868d1f69SSteven Dake                 };
652868d1f69SSteven Dake 
65322275c34SRob Bradford                 // To get size write all 1s
6544a99d3dbSSebastien Boeuf                 self.vfio_wrapper
6554a99d3dbSSebastien Boeuf                     .write_config_dword(bar_offset, 0xffff_ffff);
65622275c34SRob Bradford 
65722275c34SRob Bradford                 // And read back BAR value. The device will write zeros for bits it doesn't care about
6584a99d3dbSSebastien Boeuf                 let mut lower = self.vfio_wrapper.read_config_dword(bar_offset);
65922275c34SRob Bradford 
66022275c34SRob Bradford                 if io_bar {
66122275c34SRob Bradford                     // Mask flag bits (lowest 2 for I/O bars)
66222275c34SRob Bradford                     lower &= !0b11;
66322275c34SRob Bradford 
66422275c34SRob Bradford                     // BAR is not enabled
66522275c34SRob Bradford                     if lower == 0 {
66622275c34SRob Bradford                         bar_id += 1;
66722275c34SRob Bradford                         continue;
66822275c34SRob Bradford                     }
66922275c34SRob Bradford 
670cf68f03aSHenry Wang                     // IO BAR
671cf68f03aSHenry Wang                     region_type = PciBarRegionType::IoRegion;
672cf68f03aSHenry Wang 
67322275c34SRob Bradford                     // Invert bits and add 1 to calculate size
67422275c34SRob Bradford                     region_size = (!lower + 1) as u64;
67522275c34SRob Bradford                 } else if is_64bit_bar {
67622275c34SRob Bradford                     // 64 bits Memory BAR
67722275c34SRob Bradford                     region_type = PciBarRegionType::Memory64BitRegion;
67822275c34SRob Bradford 
67922275c34SRob Bradford                     // Query size of upper BAR of 64-bit BAR
68022275c34SRob Bradford                     let upper_offset: u32 = PCI_CONFIG_BAR_OFFSET + (bar_id + 1) * 4;
6814a99d3dbSSebastien Boeuf                     self.vfio_wrapper
6824a99d3dbSSebastien Boeuf                         .write_config_dword(upper_offset, 0xffff_ffff);
6834a99d3dbSSebastien Boeuf                     let upper = self.vfio_wrapper.read_config_dword(upper_offset);
68422275c34SRob Bradford 
685b57cc3d7SRob Bradford                     let mut combined_size = (u64::from(upper) << 32) | u64::from(lower);
68622275c34SRob Bradford 
68722275c34SRob Bradford                     // Mask out flag bits (lowest 4 for memory bars)
68822275c34SRob Bradford                     combined_size &= !0b1111;
68922275c34SRob Bradford 
69022275c34SRob Bradford                     // BAR is not enabled
69122275c34SRob Bradford                     if combined_size == 0 {
69222275c34SRob Bradford                         bar_id += 1;
69322275c34SRob Bradford                         continue;
69422275c34SRob Bradford                     }
69522275c34SRob Bradford 
69622275c34SRob Bradford                     // Invert and add 1 to to find size
697a9ec0f33SBo Chen                     region_size = !combined_size + 1;
69822275c34SRob Bradford                 } else {
69937521ddfSSebastien Boeuf                     region_type = PciBarRegionType::Memory32BitRegion;
70037521ddfSSebastien Boeuf 
70122275c34SRob Bradford                     // Mask out flag bits (lowest 4 for memory bars)
70222275c34SRob Bradford                     lower &= !0b1111;
70322275c34SRob Bradford 
70422275c34SRob Bradford                     if lower == 0 {
70522275c34SRob Bradford                         bar_id += 1;
70622275c34SRob Bradford                         continue;
70722275c34SRob Bradford                     }
70822275c34SRob Bradford 
70922275c34SRob Bradford                     // Invert and add 1 to to find size
71022275c34SRob Bradford                     region_size = (!lower + 1) as u64;
71137521ddfSSebastien Boeuf                 }
71237521ddfSSebastien Boeuf             }
71322275c34SRob Bradford 
71437521ddfSSebastien Boeuf             let bar_addr = match region_type {
71537521ddfSSebastien Boeuf                 PciBarRegionType::IoRegion => {
71637521ddfSSebastien Boeuf                     // The address needs to be 4 bytes aligned.
71737521ddfSSebastien Boeuf                     allocator
7189ef1187fSRob Bradford                         .lock()
7199ef1187fSRob Bradford                         .unwrap()
72037521ddfSSebastien Boeuf                         .allocate_io_addresses(restored_bar_addr, region_size, Some(0x4))
72137521ddfSSebastien Boeuf                         .ok_or(PciDeviceError::IoAllocationFailed(region_size))?
72222275c34SRob Bradford                 }
72337521ddfSSebastien Boeuf                 PciBarRegionType::Memory32BitRegion => {
72437521ddfSSebastien Boeuf                     // BAR allocation must be naturally aligned
72545b01d59SThomas Barrett                     mmio32_allocator
72645b01d59SThomas Barrett                         .allocate(restored_bar_addr, region_size, Some(region_size))
72737521ddfSSebastien Boeuf                         .ok_or(PciDeviceError::IoAllocationFailed(region_size))?
72837521ddfSSebastien Boeuf                 }
72937521ddfSSebastien Boeuf                 PciBarRegionType::Memory64BitRegion => {
730a7187168SJianyong Wu                     // We need do some fixup to keep MMIO RW region and msix cap region page size
731a7187168SJianyong Wu                     // aligned.
732a7187168SJianyong Wu                     region_size = self.fixup_msix_region(bar_id, region_size);
73345b01d59SThomas Barrett                     mmio64_allocator
734eca75dcfSJianyong Wu                         .allocate(
735eca75dcfSJianyong Wu                             restored_bar_addr,
736eca75dcfSJianyong Wu                             region_size,
737c9f94be7SThomas Barrett                             Some(std::cmp::max(
738eca75dcfSJianyong Wu                                 // SAFETY: FFI call. Trivially safe.
739c9f94be7SThomas Barrett                                 unsafe { sysconf(_SC_PAGESIZE) as GuestUsize },
740c9f94be7SThomas Barrett                                 region_size,
741c9f94be7SThomas Barrett                             )),
742eca75dcfSJianyong Wu                         )
74337521ddfSSebastien Boeuf                         .ok_or(PciDeviceError::IoAllocationFailed(region_size))?
74437521ddfSSebastien Boeuf                 }
74537521ddfSSebastien Boeuf             };
74622275c34SRob Bradford 
74722275c34SRob Bradford             // We can now build our BAR configuration block.
74889218b6dSSebastien Boeuf             let bar = PciBarConfiguration::default()
749da95c0d7SSebastien Boeuf                 .set_index(bar_id as usize)
75022275c34SRob Bradford                 .set_address(bar_addr.raw_value())
75122275c34SRob Bradford                 .set_size(region_size)
752868d1f69SSteven Dake                 .set_region_type(region_type)
753868d1f69SSteven Dake                 .set_prefetchable(prefetchable);
75422275c34SRob Bradford 
75522275c34SRob Bradford             if bar_id == VFIO_PCI_ROM_REGION_INDEX {
75622275c34SRob Bradford                 self.configuration
75789218b6dSSebastien Boeuf                     .add_pci_rom_bar(&bar, flags & 0x1)
75822275c34SRob Bradford                     .map_err(|e| PciDeviceError::IoRegistrationFailed(bar_addr.raw_value(), e))?;
75922275c34SRob Bradford             } else {
76022275c34SRob Bradford                 self.configuration
76189218b6dSSebastien Boeuf                     .add_pci_bar(&bar)
76222275c34SRob Bradford                     .map_err(|e| PciDeviceError::IoRegistrationFailed(bar_addr.raw_value(), e))?;
76322275c34SRob Bradford             }
76422275c34SRob Bradford 
76589218b6dSSebastien Boeuf             bars.push(bar);
76622275c34SRob Bradford             self.mmio_regions.push(MmioRegion {
76722275c34SRob Bradford                 start: bar_addr,
76822275c34SRob Bradford                 length: region_size,
76922275c34SRob Bradford                 type_: region_type,
770a9ec0f33SBo Chen                 index: bar_id,
771ed5f2544SSebastien Boeuf                 user_memory_regions: Vec::new(),
77222275c34SRob Bradford             });
77322275c34SRob Bradford 
77422275c34SRob Bradford             bar_id += 1;
77537521ddfSSebastien Boeuf             if region_type == PciBarRegionType::Memory64BitRegion {
77622275c34SRob Bradford                 bar_id += 1;
77722275c34SRob Bradford             }
77822275c34SRob Bradford         }
77922275c34SRob Bradford 
78089218b6dSSebastien Boeuf         Ok(bars)
78122275c34SRob Bradford     }
78222275c34SRob Bradford 
78345b01d59SThomas Barrett     // The `allocator` argument is unused on `aarch64`
78445b01d59SThomas Barrett     #[allow(unused_variables)]
free_bars( &mut self, allocator: &mut SystemAllocator, mmio32_allocator: &mut AddressAllocator, mmio64_allocator: &mut AddressAllocator, ) -> Result<(), PciDeviceError>785ec1f7189SRob Bradford     pub(crate) fn free_bars(
78622275c34SRob Bradford         &mut self,
78722275c34SRob Bradford         allocator: &mut SystemAllocator,
78845b01d59SThomas Barrett         mmio32_allocator: &mut AddressAllocator,
78945b01d59SThomas Barrett         mmio64_allocator: &mut AddressAllocator,
790cdfc1773SRob Bradford     ) -> Result<(), PciDeviceError> {
79122275c34SRob Bradford         for region in self.mmio_regions.iter() {
79222275c34SRob Bradford             match region.type_ {
79322275c34SRob Bradford                 PciBarRegionType::IoRegion => {
79422275c34SRob Bradford                     allocator.free_io_addresses(region.start, region.length);
79522275c34SRob Bradford                 }
79622275c34SRob Bradford                 PciBarRegionType::Memory32BitRegion => {
79745b01d59SThomas Barrett                     mmio32_allocator.free(region.start, region.length);
79822275c34SRob Bradford                 }
79922275c34SRob Bradford                 PciBarRegionType::Memory64BitRegion => {
80045b01d59SThomas Barrett                     mmio64_allocator.free(region.start, region.length);
80122275c34SRob Bradford                 }
80222275c34SRob Bradford             }
80322275c34SRob Bradford         }
80422275c34SRob Bradford         Ok(())
80522275c34SRob Bradford     }
8062a76a589SRob Bradford 
parse_msix_capabilities(&mut self, cap: u8) -> MsixCap807de764456SJulian Stecklina     fn parse_msix_capabilities(&mut self, cap: u8) -> MsixCap {
8084a99d3dbSSebastien Boeuf         let msg_ctl = self.vfio_wrapper.read_config_word((cap + 2).into());
8092a76a589SRob Bradford 
8104a99d3dbSSebastien Boeuf         let table = self.vfio_wrapper.read_config_dword((cap + 4).into());
8112a76a589SRob Bradford 
8124a99d3dbSSebastien Boeuf         let pba = self.vfio_wrapper.read_config_dword((cap + 8).into());
8132a76a589SRob Bradford 
814f767e97fSSebastien Boeuf         MsixCap {
8152a76a589SRob Bradford             msg_ctl,
8162a76a589SRob Bradford             table,
8172a76a589SRob Bradford             pba,
818f767e97fSSebastien Boeuf         }
819f767e97fSSebastien Boeuf     }
8202a76a589SRob Bradford 
initialize_msix( &mut self, msix_cap: MsixCap, cap_offset: u32, bdf: PciBdf, state: Option<MsixConfigState>, )821de764456SJulian Stecklina     fn initialize_msix(
822cc3706afSSebastien Boeuf         &mut self,
823cc3706afSSebastien Boeuf         msix_cap: MsixCap,
824cc3706afSSebastien Boeuf         cap_offset: u32,
825cc3706afSSebastien Boeuf         bdf: PciBdf,
826cc3706afSSebastien Boeuf         state: Option<MsixConfigState>,
827cc3706afSSebastien Boeuf     ) {
828eb6daa2fSSebastien Boeuf         let interrupt_source_group = self
829eb6daa2fSSebastien Boeuf             .msi_interrupt_manager
8302a76a589SRob Bradford             .create_group(MsiIrqGroupConfig {
8312a76a589SRob Bradford                 base: 0,
8322a76a589SRob Bradford                 count: msix_cap.table_size() as InterruptIndex,
8332a76a589SRob Bradford             })
8342a76a589SRob Bradford             .unwrap();
8352a76a589SRob Bradford 
8361db77185SMichael Zhao         let msix_config = MsixConfig::new(
8371db77185SMichael Zhao             msix_cap.table_size(),
8381db77185SMichael Zhao             interrupt_source_group.clone(),
8391db77185SMichael Zhao             bdf.into(),
840cc3706afSSebastien Boeuf             state,
841eae80438SSebastien Boeuf         )
842eae80438SSebastien Boeuf         .unwrap();
8432a76a589SRob Bradford 
8442a76a589SRob Bradford         self.interrupt.msix = Some(VfioMsix {
8452a76a589SRob Bradford             bar: msix_config,
8462a76a589SRob Bradford             cap: msix_cap,
847f767e97fSSebastien Boeuf             cap_offset,
8482a76a589SRob Bradford             interrupt_source_group,
8492a76a589SRob Bradford         });
8502a76a589SRob Bradford     }
8512a76a589SRob Bradford 
parse_msi_capabilities(&mut self, cap: u8) -> u16852de764456SJulian Stecklina     fn parse_msi_capabilities(&mut self, cap: u8) -> u16 {
8534a99d3dbSSebastien Boeuf         self.vfio_wrapper.read_config_word((cap + 2).into())
854f767e97fSSebastien Boeuf     }
8552a76a589SRob Bradford 
initialize_msi(&mut self, msg_ctl: u16, cap_offset: u32, state: Option<MsiConfigState>)856de764456SJulian Stecklina     fn initialize_msi(&mut self, msg_ctl: u16, cap_offset: u32, state: Option<MsiConfigState>) {
857eb6daa2fSSebastien Boeuf         let interrupt_source_group = self
858eb6daa2fSSebastien Boeuf             .msi_interrupt_manager
8592a76a589SRob Bradford             .create_group(MsiIrqGroupConfig {
8602a76a589SRob Bradford                 base: 0,
8612a76a589SRob Bradford                 count: msi_num_enabled_vectors(msg_ctl) as InterruptIndex,
8622a76a589SRob Bradford             })
8632a76a589SRob Bradford             .unwrap();
8642a76a589SRob Bradford 
865cc3706afSSebastien Boeuf         let msi_config = MsiConfig::new(msg_ctl, interrupt_source_group.clone(), state).unwrap();
8662a76a589SRob Bradford 
8672a76a589SRob Bradford         self.interrupt.msi = Some(VfioMsi {
8682a76a589SRob Bradford             cfg: msi_config,
869f767e97fSSebastien Boeuf             cap_offset,
8702a76a589SRob Bradford             interrupt_source_group,
8712a76a589SRob Bradford         });
8722a76a589SRob Bradford     }
873521a11a1SRob Bradford 
87456ca26e7SJulian Stecklina     /// Returns true, if the device claims to have a PCI capability list.
has_capabilities(&self) -> bool875de764456SJulian Stecklina     fn has_capabilities(&self) -> bool {
87656ca26e7SJulian Stecklina         let status = self.vfio_wrapper.read_config_word(PCI_CONFIG_STATUS_OFFSET);
87756ca26e7SJulian Stecklina         status & PCI_CONFIG_STATUS_CAPABILITIES_LIST != 0
87856ca26e7SJulian Stecklina     }
87956ca26e7SJulian Stecklina 
get_msix_cap_idx(&self) -> Option<usize>880de764456SJulian Stecklina     fn get_msix_cap_idx(&self) -> Option<usize> {
88156ca26e7SJulian Stecklina         if !self.has_capabilities() {
88256ca26e7SJulian Stecklina             return None;
88356ca26e7SJulian Stecklina         }
88456ca26e7SJulian Stecklina 
885a7187168SJianyong Wu         let mut cap_next = self
886a7187168SJianyong Wu             .vfio_wrapper
887a0065452SJulian Stecklina             .read_config_byte(PCI_CONFIG_CAPABILITY_OFFSET)
888a0065452SJulian Stecklina             & PCI_CONFIG_CAPABILITY_PTR_MASK;
889a7187168SJianyong Wu 
890a7187168SJianyong Wu         while cap_next != 0 {
891a7187168SJianyong Wu             let cap_id = self.vfio_wrapper.read_config_byte(cap_next.into());
892a7187168SJianyong Wu             if PciCapabilityId::from(cap_id) == PciCapabilityId::MsiX {
893a7187168SJianyong Wu                 return Some(cap_next as usize);
894a7187168SJianyong Wu             } else {
89500955568SJulian Stecklina                 let cap_ptr = self.vfio_wrapper.read_config_byte((cap_next + 1).into())
896a0065452SJulian Stecklina                     & PCI_CONFIG_CAPABILITY_PTR_MASK;
89700955568SJulian Stecklina 
89800955568SJulian Stecklina                 // See parse_capabilities below for an explanation.
89900955568SJulian Stecklina                 if cap_ptr != cap_next {
90000955568SJulian Stecklina                     cap_next = cap_ptr;
90100955568SJulian Stecklina                 } else {
90200955568SJulian Stecklina                     break;
90300955568SJulian Stecklina                 }
904a7187168SJianyong Wu             }
905a7187168SJianyong Wu         }
906a7187168SJianyong Wu 
907a7187168SJianyong Wu         None
908a7187168SJianyong Wu     }
909a7187168SJianyong Wu 
parse_capabilities(&mut self, bdf: PciBdf)910de764456SJulian Stecklina     fn parse_capabilities(&mut self, bdf: PciBdf) {
91156ca26e7SJulian Stecklina         if !self.has_capabilities() {
91256ca26e7SJulian Stecklina             return;
91356ca26e7SJulian Stecklina         }
91456ca26e7SJulian Stecklina 
915b750c332SThomas Barrett         let mut cap_iter = self
9164a99d3dbSSebastien Boeuf             .vfio_wrapper
917a0065452SJulian Stecklina             .read_config_byte(PCI_CONFIG_CAPABILITY_OFFSET)
918a0065452SJulian Stecklina             & PCI_CONFIG_CAPABILITY_PTR_MASK;
919521a11a1SRob Bradford 
920e45e3df6SSebastien Boeuf         let mut pci_express_cap_found = false;
921e45e3df6SSebastien Boeuf         let mut power_management_cap_found = false;
922e45e3df6SSebastien Boeuf 
923b750c332SThomas Barrett         while cap_iter != 0 {
924b750c332SThomas Barrett             let cap_id = self.vfio_wrapper.read_config_byte(cap_iter.into());
925521a11a1SRob Bradford 
926521a11a1SRob Bradford             match PciCapabilityId::from(cap_id) {
927521a11a1SRob Bradford                 PciCapabilityId::MessageSignalledInterrupts => {
9284a99d3dbSSebastien Boeuf                     if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_MSI_IRQ_INDEX) {
929521a11a1SRob Bradford                         if irq_info.count > 0 {
930521a11a1SRob Bradford                             // Parse capability only if the VFIO device
931521a11a1SRob Bradford                             // supports MSI.
932b750c332SThomas Barrett                             let msg_ctl = self.parse_msi_capabilities(cap_iter);
933b750c332SThomas Barrett                             self.initialize_msi(msg_ctl, cap_iter as u32, None);
934521a11a1SRob Bradford                         }
935521a11a1SRob Bradford                     }
936521a11a1SRob Bradford                 }
937521a11a1SRob Bradford                 PciCapabilityId::MsiX => {
9384a99d3dbSSebastien Boeuf                     if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_MSIX_IRQ_INDEX)
9394a99d3dbSSebastien Boeuf                     {
940521a11a1SRob Bradford                         if irq_info.count > 0 {
941521a11a1SRob Bradford                             // Parse capability only if the VFIO device
942521a11a1SRob Bradford                             // supports MSI-X.
943b750c332SThomas Barrett                             let msix_cap = self.parse_msix_capabilities(cap_iter);
944b750c332SThomas Barrett                             self.initialize_msix(msix_cap, cap_iter as u32, bdf, None);
945521a11a1SRob Bradford                         }
946521a11a1SRob Bradford                     }
947521a11a1SRob Bradford                 }
948e45e3df6SSebastien Boeuf                 PciCapabilityId::PciExpress => pci_express_cap_found = true,
949e45e3df6SSebastien Boeuf                 PciCapabilityId::PowerManagement => power_management_cap_found = true,
950521a11a1SRob Bradford                 _ => {}
951521a11a1SRob Bradford             };
952521a11a1SRob Bradford 
953a0065452SJulian Stecklina             let cap_next = self.vfio_wrapper.read_config_byte((cap_iter + 1).into())
954a0065452SJulian Stecklina                 & PCI_CONFIG_CAPABILITY_PTR_MASK;
95500955568SJulian Stecklina 
95600955568SJulian Stecklina             // Break out of the loop, if we either find the end or we have a broken device. This
95700955568SJulian Stecklina             // doesn't handle all cases where a device might send us in a loop here, but it
95800955568SJulian Stecklina             // handles case of a device returning 0xFF instead of implementing a real
95900955568SJulian Stecklina             // capabilities list.
96000955568SJulian Stecklina             if cap_next == 0 || cap_next == cap_iter {
961b750c332SThomas Barrett                 break;
962b750c332SThomas Barrett             }
963b750c332SThomas Barrett 
964b750c332SThomas Barrett             cap_iter = cap_next;
965b750c332SThomas Barrett         }
966b750c332SThomas Barrett 
967b750c332SThomas Barrett         if let Some(clique_id) = self.x_nv_gpudirect_clique {
968b750c332SThomas Barrett             self.add_nv_gpudirect_clique_cap(cap_iter, clique_id);
969521a11a1SRob Bradford         }
970e45e3df6SSebastien Boeuf 
971e45e3df6SSebastien Boeuf         if pci_express_cap_found && power_management_cap_found {
972e45e3df6SSebastien Boeuf             self.parse_extended_capabilities();
973e45e3df6SSebastien Boeuf         }
974e45e3df6SSebastien Boeuf     }
975e45e3df6SSebastien Boeuf 
add_nv_gpudirect_clique_cap(&mut self, cap_iter: u8, clique_id: u8)976b750c332SThomas Barrett     fn add_nv_gpudirect_clique_cap(&mut self, cap_iter: u8, clique_id: u8) {
977b750c332SThomas Barrett         // Turing, Ampere, Hopper, and Lovelace GPUs have dedicated space
978b750c332SThomas Barrett         // at 0xD4 for this capability.
979b750c332SThomas Barrett         let cap_offset = 0xd4u32;
980b750c332SThomas Barrett 
981b750c332SThomas Barrett         let reg_idx = (cap_iter / 4) as usize;
982b750c332SThomas Barrett         self.patches.insert(
983b750c332SThomas Barrett             reg_idx,
984b750c332SThomas Barrett             ConfigPatch {
985b750c332SThomas Barrett                 mask: 0x0000_ff00,
986b750c332SThomas Barrett                 patch: cap_offset << 8,
987b750c332SThomas Barrett             },
988b750c332SThomas Barrett         );
989b750c332SThomas Barrett 
990b750c332SThomas Barrett         let reg_idx = (cap_offset / 4) as usize;
991b750c332SThomas Barrett         self.patches.insert(
992b750c332SThomas Barrett             reg_idx,
993b750c332SThomas Barrett             ConfigPatch {
994b750c332SThomas Barrett                 mask: 0xffff_ffff,
995b750c332SThomas Barrett                 patch: 0x50080009u32,
996b750c332SThomas Barrett             },
997b750c332SThomas Barrett         );
998b750c332SThomas Barrett         self.patches.insert(
999b750c332SThomas Barrett             reg_idx + 1,
1000b750c332SThomas Barrett             ConfigPatch {
1001b750c332SThomas Barrett                 mask: 0xffff_ffff,
1002b57cc3d7SRob Bradford                 patch: (u32::from(clique_id) << 19) | 0x5032,
1003b750c332SThomas Barrett             },
1004b750c332SThomas Barrett         );
1005b750c332SThomas Barrett     }
1006b750c332SThomas Barrett 
parse_extended_capabilities(&mut self)1007e45e3df6SSebastien Boeuf     fn parse_extended_capabilities(&mut self) {
1008e45e3df6SSebastien Boeuf         let mut current_offset = PCI_CONFIG_EXTENDED_CAPABILITY_OFFSET;
1009e45e3df6SSebastien Boeuf 
1010e45e3df6SSebastien Boeuf         loop {
1011e45e3df6SSebastien Boeuf             let ext_cap_hdr = self.vfio_wrapper.read_config_dword(current_offset);
1012e45e3df6SSebastien Boeuf 
1013e45e3df6SSebastien Boeuf             let cap_id: u16 = (ext_cap_hdr & 0xffff) as u16;
1014e45e3df6SSebastien Boeuf             let cap_next: u16 = ((ext_cap_hdr >> 20) & 0xfff) as u16;
1015e45e3df6SSebastien Boeuf 
1016e45e3df6SSebastien Boeuf             match PciExpressCapabilityId::from(cap_id) {
10177bf0cc1eSPhilipp Schuster                 PciExpressCapabilityId::AlternativeRoutingIdentificationInterpretation
1018e45e3df6SSebastien Boeuf                 | PciExpressCapabilityId::ResizeableBar
1019e45e3df6SSebastien Boeuf                 | PciExpressCapabilityId::SingleRootIoVirtualization => {
1020e45e3df6SSebastien Boeuf                     let reg_idx = (current_offset / 4) as usize;
1021e45e3df6SSebastien Boeuf                     self.patches.insert(
1022e45e3df6SSebastien Boeuf                         reg_idx,
1023e45e3df6SSebastien Boeuf                         ConfigPatch {
1024e45e3df6SSebastien Boeuf                             mask: 0x0000_ffff,
1025e45e3df6SSebastien Boeuf                             patch: PciExpressCapabilityId::NullCapability as u32,
1026e45e3df6SSebastien Boeuf                         },
1027e45e3df6SSebastien Boeuf                     );
1028e45e3df6SSebastien Boeuf                 }
1029e45e3df6SSebastien Boeuf                 _ => {}
1030e45e3df6SSebastien Boeuf             }
1031e45e3df6SSebastien Boeuf 
1032e45e3df6SSebastien Boeuf             if cap_next == 0 {
1033e45e3df6SSebastien Boeuf                 break;
1034e45e3df6SSebastien Boeuf             }
1035e45e3df6SSebastien Boeuf 
1036e45e3df6SSebastien Boeuf             current_offset = cap_next.into();
1037e45e3df6SSebastien Boeuf         }
1038521a11a1SRob Bradford     }
1039ecc8382fSRob Bradford 
enable_intx(&mut self) -> Result<(), VfioPciError>10404a99d3dbSSebastien Boeuf     pub(crate) fn enable_intx(&mut self) -> Result<(), VfioPciError> {
1041ecc8382fSRob Bradford         if let Some(intx) = &mut self.interrupt.intx {
1042ecc8382fSRob Bradford             if !intx.enabled {
1043ecc8382fSRob Bradford                 if let Some(eventfd) = intx.interrupt_source_group.notifier(0) {
10444a99d3dbSSebastien Boeuf                     self.vfio_wrapper
1045ecc8382fSRob Bradford                         .enable_irq(VFIO_PCI_INTX_IRQ_INDEX, vec![&eventfd])
1046ecc8382fSRob Bradford                         .map_err(VfioPciError::EnableIntx)?;
1047ecc8382fSRob Bradford 
1048ecc8382fSRob Bradford                     intx.enabled = true;
1049ecc8382fSRob Bradford                 } else {
1050ecc8382fSRob Bradford                     return Err(VfioPciError::MissingNotifier);
1051ecc8382fSRob Bradford                 }
1052ecc8382fSRob Bradford             }
1053ecc8382fSRob Bradford         }
1054ecc8382fSRob Bradford 
1055ecc8382fSRob Bradford         Ok(())
1056ecc8382fSRob Bradford     }
1057ecc8382fSRob Bradford 
disable_intx(&mut self)10584a99d3dbSSebastien Boeuf     pub(crate) fn disable_intx(&mut self) {
1059ecc8382fSRob Bradford         if let Some(intx) = &mut self.interrupt.intx {
1060ecc8382fSRob Bradford             if intx.enabled {
10614a99d3dbSSebastien Boeuf                 if let Err(e) = self.vfio_wrapper.disable_irq(VFIO_PCI_INTX_IRQ_INDEX) {
1062ecc8382fSRob Bradford                     error!("Could not disable INTx: {}", e);
1063ecc8382fSRob Bradford                 } else {
1064ecc8382fSRob Bradford                     intx.enabled = false;
1065ecc8382fSRob Bradford                 }
1066ecc8382fSRob Bradford             }
1067ecc8382fSRob Bradford         }
1068ecc8382fSRob Bradford     }
1069ecc8382fSRob Bradford 
enable_msi(&self) -> Result<(), VfioPciError>10704a99d3dbSSebastien Boeuf     pub(crate) fn enable_msi(&self) -> Result<(), VfioPciError> {
1071ecc8382fSRob Bradford         if let Some(msi) = &self.interrupt.msi {
1072ecc8382fSRob Bradford             let mut irq_fds: Vec<EventFd> = Vec::new();
1073ecc8382fSRob Bradford             for i in 0..msi.cfg.num_enabled_vectors() {
1074ecc8382fSRob Bradford                 if let Some(eventfd) = msi.interrupt_source_group.notifier(i as InterruptIndex) {
1075ecc8382fSRob Bradford                     irq_fds.push(eventfd);
1076ecc8382fSRob Bradford                 } else {
1077ecc8382fSRob Bradford                     return Err(VfioPciError::MissingNotifier);
1078ecc8382fSRob Bradford                 }
1079ecc8382fSRob Bradford             }
1080ecc8382fSRob Bradford 
10814a99d3dbSSebastien Boeuf             self.vfio_wrapper
1082ecc8382fSRob Bradford                 .enable_msi(irq_fds.iter().collect())
1083ecc8382fSRob Bradford                 .map_err(VfioPciError::EnableMsi)?;
1084ecc8382fSRob Bradford         }
1085ecc8382fSRob Bradford 
1086ecc8382fSRob Bradford         Ok(())
1087ecc8382fSRob Bradford     }
1088ecc8382fSRob Bradford 
disable_msi(&self)10894a99d3dbSSebastien Boeuf     pub(crate) fn disable_msi(&self) {
10904a99d3dbSSebastien Boeuf         if let Err(e) = self.vfio_wrapper.disable_msi() {
1091ecc8382fSRob Bradford             error!("Could not disable MSI: {}", e);
1092ecc8382fSRob Bradford         }
1093ecc8382fSRob Bradford     }
1094ecc8382fSRob Bradford 
enable_msix(&self) -> Result<(), VfioPciError>10954a99d3dbSSebastien Boeuf     pub(crate) fn enable_msix(&self) -> Result<(), VfioPciError> {
1096ecc8382fSRob Bradford         if let Some(msix) = &self.interrupt.msix {
1097ecc8382fSRob Bradford             let mut irq_fds: Vec<EventFd> = Vec::new();
1098ecc8382fSRob Bradford             for i in 0..msix.bar.table_entries.len() {
1099ecc8382fSRob Bradford                 if let Some(eventfd) = msix.interrupt_source_group.notifier(i as InterruptIndex) {
1100ecc8382fSRob Bradford                     irq_fds.push(eventfd);
1101ecc8382fSRob Bradford                 } else {
1102ecc8382fSRob Bradford                     return Err(VfioPciError::MissingNotifier);
1103ecc8382fSRob Bradford                 }
1104ecc8382fSRob Bradford             }
1105ecc8382fSRob Bradford 
11064a99d3dbSSebastien Boeuf             self.vfio_wrapper
1107ecc8382fSRob Bradford                 .enable_msix(irq_fds.iter().collect())
1108ecc8382fSRob Bradford                 .map_err(VfioPciError::EnableMsix)?;
1109ecc8382fSRob Bradford         }
1110ecc8382fSRob Bradford 
1111ecc8382fSRob Bradford         Ok(())
1112ecc8382fSRob Bradford     }
1113ecc8382fSRob Bradford 
disable_msix(&self)11144a99d3dbSSebastien Boeuf     pub(crate) fn disable_msix(&self) {
11154a99d3dbSSebastien Boeuf         if let Err(e) = self.vfio_wrapper.disable_msix() {
1116ecc8382fSRob Bradford             error!("Could not disable MSI-X: {}", e);
1117ecc8382fSRob Bradford         }
1118ecc8382fSRob Bradford     }
1119ecc8382fSRob Bradford 
initialize_legacy_interrupt(&mut self) -> Result<(), VfioPciError>1120de764456SJulian Stecklina     fn initialize_legacy_interrupt(&mut self) -> Result<(), VfioPciError> {
11214a99d3dbSSebastien Boeuf         if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_INTX_IRQ_INDEX) {
1122ecc8382fSRob Bradford             if irq_info.count == 0 {
1123ecc8382fSRob Bradford                 // A count of 0 means the INTx IRQ is not supported, therefore
1124ecc8382fSRob Bradford                 // it shouldn't be initialized.
1125ecc8382fSRob Bradford                 return Ok(());
1126ecc8382fSRob Bradford             }
1127ecc8382fSRob Bradford         }
1128ecc8382fSRob Bradford 
1129e6aa792cSSebastien Boeuf         if let Some(interrupt_source_group) = self.legacy_interrupt_group.clone() {
1130ecc8382fSRob Bradford             self.interrupt.intx = Some(VfioIntx {
1131ecc8382fSRob Bradford                 interrupt_source_group,
1132ecc8382fSRob Bradford                 enabled: false,
1133ecc8382fSRob Bradford             });
1134ecc8382fSRob Bradford 
11354a99d3dbSSebastien Boeuf             self.enable_intx()?;
1136ecc8382fSRob Bradford         }
1137ecc8382fSRob Bradford 
1138ecc8382fSRob Bradford         Ok(())
1139ecc8382fSRob Bradford     }
1140ecc8382fSRob Bradford 
update_msi_capabilities(&mut self, offset: u64, data: &[u8]) -> Result<(), VfioPciError>1141de764456SJulian Stecklina     fn update_msi_capabilities(&mut self, offset: u64, data: &[u8]) -> Result<(), VfioPciError> {
1142ecc8382fSRob Bradford         match self.interrupt.update_msi(offset, data) {
1143ecc8382fSRob Bradford             Some(InterruptUpdateAction::EnableMsi) => {
1144ecc8382fSRob Bradford                 // Disable INTx before we can enable MSI
11454a99d3dbSSebastien Boeuf                 self.disable_intx();
11464a99d3dbSSebastien Boeuf                 self.enable_msi()?;
1147ecc8382fSRob Bradford             }
1148ecc8382fSRob Bradford             Some(InterruptUpdateAction::DisableMsi) => {
1149ecc8382fSRob Bradford                 // Fallback onto INTx when disabling MSI
11504a99d3dbSSebastien Boeuf                 self.disable_msi();
11514a99d3dbSSebastien Boeuf                 self.enable_intx()?;
1152ecc8382fSRob Bradford             }
1153ecc8382fSRob Bradford             _ => {}
1154ecc8382fSRob Bradford         }
1155ecc8382fSRob Bradford 
1156ecc8382fSRob Bradford         Ok(())
1157ecc8382fSRob Bradford     }
1158ecc8382fSRob Bradford 
update_msix_capabilities(&mut self, offset: u64, data: &[u8]) -> Result<(), VfioPciError>1159de764456SJulian Stecklina     fn update_msix_capabilities(&mut self, offset: u64, data: &[u8]) -> Result<(), VfioPciError> {
1160ecc8382fSRob Bradford         match self.interrupt.update_msix(offset, data) {
1161ecc8382fSRob Bradford             Some(InterruptUpdateAction::EnableMsix) => {
1162ecc8382fSRob Bradford                 // Disable INTx before we can enable MSI-X
11634a99d3dbSSebastien Boeuf                 self.disable_intx();
11644a99d3dbSSebastien Boeuf                 self.enable_msix()?;
1165ecc8382fSRob Bradford             }
1166ecc8382fSRob Bradford             Some(InterruptUpdateAction::DisableMsix) => {
1167ecc8382fSRob Bradford                 // Fallback onto INTx when disabling MSI-X
11684a99d3dbSSebastien Boeuf                 self.disable_msix();
11694a99d3dbSSebastien Boeuf                 self.enable_intx()?;
1170ecc8382fSRob Bradford             }
1171ecc8382fSRob Bradford             _ => {}
1172ecc8382fSRob Bradford         }
1173ecc8382fSRob Bradford 
1174ecc8382fSRob Bradford         Ok(())
1175ecc8382fSRob Bradford     }
11762ff19345SRob Bradford 
find_region(&self, addr: u64) -> Option<MmioRegion>1177de764456SJulian Stecklina     fn find_region(&self, addr: u64) -> Option<MmioRegion> {
11782ff19345SRob Bradford         for region in self.mmio_regions.iter() {
11792ff19345SRob Bradford             if addr >= region.start.raw_value()
11802ff19345SRob Bradford                 && addr < region.start.unchecked_add(region.length).raw_value()
11812ff19345SRob Bradford             {
1182ed5f2544SSebastien Boeuf                 return Some(region.clone());
11832ff19345SRob Bradford             }
11842ff19345SRob Bradford         }
11852ff19345SRob Bradford         None
11862ff19345SRob Bradford     }
1187a5f4d795SRob Bradford 
read_bar(&mut self, base: u64, offset: u64, data: &mut [u8])11884a99d3dbSSebastien Boeuf     pub(crate) fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) {
1189a5f4d795SRob Bradford         let addr = base + offset;
1190a5f4d795SRob Bradford         if let Some(region) = self.find_region(addr) {
1191a5f4d795SRob Bradford             let offset = addr - region.start.raw_value();
1192a5f4d795SRob Bradford 
1193a5f4d795SRob Bradford             if self.interrupt.msix_table_accessed(region.index, offset) {
1194a5f4d795SRob Bradford                 self.interrupt.msix_read_table(offset, data);
1195a5f4d795SRob Bradford             } else {
11964a99d3dbSSebastien Boeuf                 self.vfio_wrapper.region_read(region.index, offset, data);
1197a5f4d795SRob Bradford             }
1198a5f4d795SRob Bradford         }
1199a5f4d795SRob Bradford 
1200a5f4d795SRob Bradford         // INTx EOI
1201a5f4d795SRob Bradford         // The guest reading from the BAR potentially means the interrupt has
1202a5f4d795SRob Bradford         // been received and can be acknowledged.
1203a5f4d795SRob Bradford         if self.interrupt.intx_in_use() {
12044a99d3dbSSebastien Boeuf             if let Err(e) = self.vfio_wrapper.unmask_irq(VFIO_PCI_INTX_IRQ_INDEX) {
1205a5f4d795SRob Bradford                 error!("Failed unmasking INTx IRQ: {}", e);
1206a5f4d795SRob Bradford             }
1207a5f4d795SRob Bradford         }
1208a5f4d795SRob Bradford     }
1209a5f4d795SRob Bradford 
write_bar( &mut self, base: u64, offset: u64, data: &[u8], ) -> Option<Arc<Barrier>>1210a5f4d795SRob Bradford     pub(crate) fn write_bar(
1211a5f4d795SRob Bradford         &mut self,
1212a5f4d795SRob Bradford         base: u64,
1213a5f4d795SRob Bradford         offset: u64,
1214a5f4d795SRob Bradford         data: &[u8],
1215a5f4d795SRob Bradford     ) -> Option<Arc<Barrier>> {
1216a5f4d795SRob Bradford         let addr = base + offset;
1217a5f4d795SRob Bradford         if let Some(region) = self.find_region(addr) {
1218a5f4d795SRob Bradford             let offset = addr - region.start.raw_value();
1219a5f4d795SRob Bradford 
1220a5f4d795SRob Bradford             // If the MSI-X table is written to, we need to update our cache.
1221a5f4d795SRob Bradford             if self.interrupt.msix_table_accessed(region.index, offset) {
1222a5f4d795SRob Bradford                 self.interrupt.msix_write_table(offset, data);
1223a5f4d795SRob Bradford             } else {
12244a99d3dbSSebastien Boeuf                 self.vfio_wrapper.region_write(region.index, offset, data);
1225a5f4d795SRob Bradford             }
1226a5f4d795SRob Bradford         }
1227a5f4d795SRob Bradford 
1228a5f4d795SRob Bradford         // INTx EOI
1229a5f4d795SRob Bradford         // The guest writing to the BAR potentially means the interrupt has
1230a5f4d795SRob Bradford         // been received and can be acknowledged.
1231a5f4d795SRob Bradford         if self.interrupt.intx_in_use() {
12324a99d3dbSSebastien Boeuf             if let Err(e) = self.vfio_wrapper.unmask_irq(VFIO_PCI_INTX_IRQ_INDEX) {
1233a5f4d795SRob Bradford                 error!("Failed unmasking INTx IRQ: {}", e);
1234a5f4d795SRob Bradford             }
1235a5f4d795SRob Bradford         }
1236a5f4d795SRob Bradford 
1237a5f4d795SRob Bradford         None
1238a5f4d795SRob Bradford     }
12391997152eSRob Bradford 
write_config_register( &mut self, reg_idx: usize, offset: u64, data: &[u8], ) -> (Vec<BarReprogrammingParams>, Option<Arc<Barrier>>)12401997152eSRob Bradford     pub(crate) fn write_config_register(
12411997152eSRob Bradford         &mut self,
12421997152eSRob Bradford         reg_idx: usize,
12431997152eSRob Bradford         offset: u64,
12441997152eSRob Bradford         data: &[u8],
1245aaf86ef2SBo Chen     ) -> (Vec<BarReprogrammingParams>, Option<Arc<Barrier>>) {
12461997152eSRob Bradford         // When the guest wants to write to a BAR, we trap it into
12471997152eSRob Bradford         // our local configuration space. We're not reprogramming
12481997152eSRob Bradford         // VFIO device.
12491997152eSRob Bradford         if (PCI_CONFIG_BAR0_INDEX..PCI_CONFIG_BAR0_INDEX + BAR_NUMS).contains(&reg_idx)
12501997152eSRob Bradford             || reg_idx == PCI_ROM_EXP_BAR_INDEX
12511997152eSRob Bradford         {
12521997152eSRob Bradford             // We keep our local cache updated with the BARs.
12531997152eSRob Bradford             // We'll read it back from there when the guest is asking
12541997152eSRob Bradford             // for BARs (see read_config_register()).
1255cb52cf91SBo Chen             return (
12561997152eSRob Bradford                 self.configuration
1257cb52cf91SBo Chen                     .write_config_register(reg_idx, offset, data),
1258cb52cf91SBo Chen                 None,
1259cb52cf91SBo Chen             );
12601997152eSRob Bradford         }
12611997152eSRob Bradford 
12621997152eSRob Bradford         let reg = (reg_idx * PCI_CONFIG_REGISTER_SIZE) as u64;
12631997152eSRob Bradford 
12641997152eSRob Bradford         // If the MSI or MSI-X capabilities are accessed, we need to
12651997152eSRob Bradford         // update our local cache accordingly.
12661997152eSRob Bradford         // Depending on how the capabilities are modified, this could
12671997152eSRob Bradford         // trigger a VFIO MSI or MSI-X toggle.
12681997152eSRob Bradford         if let Some((cap_id, cap_base)) = self.interrupt.accessed(reg) {
12691997152eSRob Bradford             let cap_offset: u64 = reg - cap_base + offset;
12701997152eSRob Bradford             match cap_id {
12711997152eSRob Bradford                 PciCapabilityId::MessageSignalledInterrupts => {
12724a99d3dbSSebastien Boeuf                     if let Err(e) = self.update_msi_capabilities(cap_offset, data) {
12731997152eSRob Bradford                         error!("Could not update MSI capabilities: {}", e);
12741997152eSRob Bradford                     }
12751997152eSRob Bradford                 }
12761997152eSRob Bradford                 PciCapabilityId::MsiX => {
12774a99d3dbSSebastien Boeuf                     if let Err(e) = self.update_msix_capabilities(cap_offset, data) {
12781997152eSRob Bradford                         error!("Could not update MSI-X capabilities: {}", e);
12791997152eSRob Bradford                     }
12801997152eSRob Bradford                 }
12811997152eSRob Bradford                 _ => {}
12821997152eSRob Bradford             }
12831997152eSRob Bradford         }
12841997152eSRob Bradford 
12851997152eSRob Bradford         // Make sure to write to the device's PCI config space after MSI/MSI-X
12861997152eSRob Bradford         // interrupts have been enabled/disabled. In case of MSI, when the
12871997152eSRob Bradford         // interrupts are enabled through VFIO (using VFIO_DEVICE_SET_IRQS),
12881997152eSRob Bradford         // the MSI Enable bit in the MSI capability structure found in the PCI
12891997152eSRob Bradford         // config space is disabled by default. That's why when the guest is
12901997152eSRob Bradford         // enabling this bit, we first need to enable the MSI interrupts with
12911997152eSRob Bradford         // VFIO through VFIO_DEVICE_SET_IRQS ioctl, and only after we can write
12921997152eSRob Bradford         // to the device region to update the MSI Enable bit.
12934a99d3dbSSebastien Boeuf         self.vfio_wrapper.write_config((reg + offset) as u32, data);
12941997152eSRob Bradford 
12958da7c13eSBo Chen         // Return pending BAR repgrogramming if MSE bit is set
12968da7c13eSBo Chen         let mut ret_param = self.configuration.pending_bar_reprogram();
12978da7c13eSBo Chen         if !ret_param.is_empty() {
12988da7c13eSBo Chen             if self.read_config_register(crate::configuration::COMMAND_REG)
12998da7c13eSBo Chen                 & crate::configuration::COMMAND_REG_MEMORY_SPACE_MASK
13008da7c13eSBo Chen                 == crate::configuration::COMMAND_REG_MEMORY_SPACE_MASK
13018da7c13eSBo Chen             {
13028da7c13eSBo Chen                 info!("BAR reprogramming parameter is returned: {:x?}", ret_param);
13038da7c13eSBo Chen                 self.configuration.clear_pending_bar_reprogram();
13048da7c13eSBo Chen             } else {
13058da7c13eSBo Chen                 info!(
13068da7c13eSBo Chen                     "MSE bit is disabled. No BAR reprogramming parameter is returned: {:x?}",
13078da7c13eSBo Chen                     ret_param
13088da7c13eSBo Chen                 );
13098da7c13eSBo Chen 
13108da7c13eSBo Chen                 ret_param = Vec::new();
13118da7c13eSBo Chen             }
13128da7c13eSBo Chen         }
13138da7c13eSBo Chen 
13148da7c13eSBo Chen         (ret_param, None)
13151997152eSRob Bradford     }
13161997152eSRob Bradford 
read_config_register(&mut self, reg_idx: usize) -> u3213174a99d3dbSSebastien Boeuf     pub(crate) fn read_config_register(&mut self, reg_idx: usize) -> u32 {
13181997152eSRob Bradford         // When reading the BARs, we trap it and return what comes
13191997152eSRob Bradford         // from our local configuration space. We want the guest to
13201997152eSRob Bradford         // use that and not the VFIO device BARs as it does not map
13211997152eSRob Bradford         // with the guest address space.
13221997152eSRob Bradford         if (PCI_CONFIG_BAR0_INDEX..PCI_CONFIG_BAR0_INDEX + BAR_NUMS).contains(&reg_idx)
13231997152eSRob Bradford             || reg_idx == PCI_ROM_EXP_BAR_INDEX
13241997152eSRob Bradford         {
13251997152eSRob Bradford             return self.configuration.read_reg(reg_idx);
13261997152eSRob Bradford         }
13271997152eSRob Bradford 
1328a7187168SJianyong Wu         if let Some(id) = self.get_msix_cap_idx() {
1329a7187168SJianyong Wu             let msix = self.interrupt.msix.as_mut().unwrap();
1330a7187168SJianyong Wu             if reg_idx * 4 == id + 4 {
1331a7187168SJianyong Wu                 return msix.cap.table;
1332a7187168SJianyong Wu             } else if reg_idx * 4 == id + 8 {
1333a7187168SJianyong Wu                 return msix.cap.pba;
1334a7187168SJianyong Wu             }
1335a7187168SJianyong Wu         }
1336a7187168SJianyong Wu 
13371997152eSRob Bradford         // Since we don't support passing multi-functions devices, we should
13381997152eSRob Bradford         // mask the multi-function bit, bit 7 of the Header Type byte on the
13391997152eSRob Bradford         // register 3.
13401997152eSRob Bradford         let mask = if reg_idx == PCI_HEADER_TYPE_REG_INDEX {
13411997152eSRob Bradford             0xff7f_ffff
13421997152eSRob Bradford         } else {
13431997152eSRob Bradford             0xffff_ffff
13441997152eSRob Bradford         };
13451997152eSRob Bradford 
13461997152eSRob Bradford         // The config register read comes from the VFIO device itself.
1347e45e3df6SSebastien Boeuf         let mut value = self.vfio_wrapper.read_config_dword((reg_idx * 4) as u32) & mask;
1348e45e3df6SSebastien Boeuf 
1349e45e3df6SSebastien Boeuf         if let Some(config_patch) = self.patches.get(&reg_idx) {
1350e45e3df6SSebastien Boeuf             value = (value & !config_patch.mask) | config_patch.patch;
1351e45e3df6SSebastien Boeuf         }
1352e45e3df6SSebastien Boeuf 
1353e45e3df6SSebastien Boeuf         value
13541997152eSRob Bradford     }
135549069d84SSebastien Boeuf 
state(&self) -> VfioCommonState135649069d84SSebastien Boeuf     fn state(&self) -> VfioCommonState {
135749069d84SSebastien Boeuf         let intx_state = self.interrupt.intx.as_ref().map(|intx| IntxState {
135849069d84SSebastien Boeuf             enabled: intx.enabled,
135949069d84SSebastien Boeuf         });
136049069d84SSebastien Boeuf 
136149069d84SSebastien Boeuf         let msi_state = self.interrupt.msi.as_ref().map(|msi| MsiState {
136249069d84SSebastien Boeuf             cap: msi.cfg.cap,
136349069d84SSebastien Boeuf             cap_offset: msi.cap_offset,
136449069d84SSebastien Boeuf         });
136549069d84SSebastien Boeuf 
136649069d84SSebastien Boeuf         let msix_state = self.interrupt.msix.as_ref().map(|msix| MsixState {
136749069d84SSebastien Boeuf             cap: msix.cap,
136849069d84SSebastien Boeuf             cap_offset: msix.cap_offset,
136949069d84SSebastien Boeuf             bdf: msix.bar.devid,
137049069d84SSebastien Boeuf         });
137149069d84SSebastien Boeuf 
137249069d84SSebastien Boeuf         VfioCommonState {
137349069d84SSebastien Boeuf             intx_state,
137449069d84SSebastien Boeuf             msi_state,
137549069d84SSebastien Boeuf             msix_state,
137649069d84SSebastien Boeuf         }
137749069d84SSebastien Boeuf     }
137849069d84SSebastien Boeuf 
set_state( &mut self, state: &VfioCommonState, msi_state: Option<MsiConfigState>, msix_state: Option<MsixConfigState>, ) -> Result<(), VfioPciError>1379cc3706afSSebastien Boeuf     fn set_state(
1380cc3706afSSebastien Boeuf         &mut self,
1381cc3706afSSebastien Boeuf         state: &VfioCommonState,
1382cc3706afSSebastien Boeuf         msi_state: Option<MsiConfigState>,
1383cc3706afSSebastien Boeuf         msix_state: Option<MsixConfigState>,
1384cc3706afSSebastien Boeuf     ) -> Result<(), VfioPciError> {
138549069d84SSebastien Boeuf         if let (Some(intx), Some(interrupt_source_group)) =
138649069d84SSebastien Boeuf             (&state.intx_state, self.legacy_interrupt_group.clone())
138749069d84SSebastien Boeuf         {
138849069d84SSebastien Boeuf             self.interrupt.intx = Some(VfioIntx {
138949069d84SSebastien Boeuf                 interrupt_source_group,
139049069d84SSebastien Boeuf                 enabled: false,
139149069d84SSebastien Boeuf             });
139249069d84SSebastien Boeuf 
139349069d84SSebastien Boeuf             if intx.enabled {
139449069d84SSebastien Boeuf                 self.enable_intx()?;
139549069d84SSebastien Boeuf             }
139649069d84SSebastien Boeuf         }
139749069d84SSebastien Boeuf 
139849069d84SSebastien Boeuf         if let Some(msi) = &state.msi_state {
1399cc3706afSSebastien Boeuf             self.initialize_msi(msi.cap.msg_ctl, msi.cap_offset, msi_state);
140049069d84SSebastien Boeuf         }
140149069d84SSebastien Boeuf 
140249069d84SSebastien Boeuf         if let Some(msix) = &state.msix_state {
1403cc3706afSSebastien Boeuf             self.initialize_msix(msix.cap, msix.cap_offset, msix.bdf.into(), msix_state);
140449069d84SSebastien Boeuf         }
140549069d84SSebastien Boeuf 
140649069d84SSebastien Boeuf         Ok(())
140749069d84SSebastien Boeuf     }
140849069d84SSebastien Boeuf }
140949069d84SSebastien Boeuf 
141049069d84SSebastien Boeuf impl Pausable for VfioCommon {}
141149069d84SSebastien Boeuf 
141249069d84SSebastien Boeuf impl Snapshottable for VfioCommon {
id(&self) -> String141349069d84SSebastien Boeuf     fn id(&self) -> String {
1414cc3706afSSebastien Boeuf         String::from(VFIO_COMMON_ID)
141549069d84SSebastien Boeuf     }
141649069d84SSebastien Boeuf 
snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError>141749069d84SSebastien Boeuf     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
141810ab87d6SRob Bradford         let mut vfio_common_snapshot = Snapshot::new_from_state(&self.state())?;
141949069d84SSebastien Boeuf 
142049069d84SSebastien Boeuf         // Snapshot PciConfiguration
1421748018acSSebastien Boeuf         vfio_common_snapshot.add_snapshot(self.configuration.id(), self.configuration.snapshot()?);
142249069d84SSebastien Boeuf 
142349069d84SSebastien Boeuf         // Snapshot MSI
142449069d84SSebastien Boeuf         if let Some(msi) = &mut self.interrupt.msi {
1425748018acSSebastien Boeuf             vfio_common_snapshot.add_snapshot(msi.cfg.id(), msi.cfg.snapshot()?);
142649069d84SSebastien Boeuf         }
142749069d84SSebastien Boeuf 
142849069d84SSebastien Boeuf         // Snapshot MSI-X
142949069d84SSebastien Boeuf         if let Some(msix) = &mut self.interrupt.msix {
1430748018acSSebastien Boeuf             vfio_common_snapshot.add_snapshot(msix.bar.id(), msix.bar.snapshot()?);
143149069d84SSebastien Boeuf         }
143249069d84SSebastien Boeuf 
143349069d84SSebastien Boeuf         Ok(vfio_common_snapshot)
143449069d84SSebastien Boeuf     }
143522275c34SRob Bradford }
143622275c34SRob Bradford 
1437db5b4763SSamuel Ortiz /// VfioPciDevice represents a VFIO PCI device.
1438db5b4763SSamuel Ortiz /// This structure implements the BusDevice and PciDevice traits.
1439db5b4763SSamuel Ortiz ///
1440db5b4763SSamuel Ortiz /// A VfioPciDevice is bound to a VfioDevice and is also a PCI device.
1441db5b4763SSamuel Ortiz /// The VMM creates a VfioDevice, then assigns it to a VfioPciDevice,
1442db5b4763SSamuel Ortiz /// which then gets added to the PCI bus.
1443db5b4763SSamuel Ortiz pub struct VfioPciDevice {
14445264d545SSebastien Boeuf     id: String,
1445e5552a53SWei Liu     vm: Arc<dyn hypervisor::Vm>,
1446db5b4763SSamuel Ortiz     device: Arc<VfioDevice>,
1447a0a89b13SSebastien Boeuf     container: Arc<VfioContainer>,
1448d27ea34aSRob Bradford     common: VfioCommon,
1449a0a89b13SSebastien Boeuf     iommu_attached: bool,
145081f8a27eSRob Bradford     memory_slot_allocator: MemorySlotAllocator,
145181eca69bSArvind Vasudev     bdf: PciBdf,
1452297b41d6SBo Chen     device_path: PathBuf,
1453db5b4763SSamuel Ortiz }
1454db5b4763SSamuel Ortiz 
1455db5b4763SSamuel Ortiz impl VfioPciDevice {
1456db5b4763SSamuel Ortiz     /// Constructs a new Vfio Pci device for the given Vfio device
14575264d545SSebastien Boeuf     #[allow(clippy::too_many_arguments)]
new( id: String, vm: &Arc<dyn hypervisor::Vm>, device: VfioDevice, container: Arc<VfioContainer>, msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, iommu_attached: bool, bdf: PciBdf, memory_slot_allocator: MemorySlotAllocator, snapshot: Option<Snapshot>, x_nv_gpudirect_clique: Option<u8>, device_path: PathBuf, ) -> Result<Self, VfioPciError>1458db5b4763SSamuel Ortiz     pub fn new(
14595264d545SSebastien Boeuf         id: String,
1460e5552a53SWei Liu         vm: &Arc<dyn hypervisor::Vm>,
1461db5b4763SSamuel Ortiz         device: VfioDevice,
1462a0a89b13SSebastien Boeuf         container: Arc<VfioContainer>,
1463eb6daa2fSSebastien Boeuf         msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
1464dcc646f5SSebastien Boeuf         legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
1465a0a89b13SSebastien Boeuf         iommu_attached: bool,
14661db77185SMichael Zhao         bdf: PciBdf,
146781f8a27eSRob Bradford         memory_slot_allocator: MemorySlotAllocator,
1468cc3706afSSebastien Boeuf         snapshot: Option<Snapshot>,
1469b750c332SThomas Barrett         x_nv_gpudirect_clique: Option<u8>,
1470297b41d6SBo Chen         device_path: PathBuf,
1471cdfc1773SRob Bradford     ) -> Result<Self, VfioPciError> {
1472db5b4763SSamuel Ortiz         let device = Arc::new(device);
1473db5b4763SSamuel Ortiz         device.reset();
1474db5b4763SSamuel Ortiz 
1475dc35dac3SRob Bradford         let vfio_wrapper = VfioDeviceWrapper::new(Arc::clone(&device));
1476db5b4763SSamuel Ortiz 
1477d6bf1f5eSSebastien Boeuf         let common = VfioCommon::new(
1478eb6daa2fSSebastien Boeuf             msi_interrupt_manager,
1479e6aa792cSSebastien Boeuf             legacy_interrupt_group,
1480d6bf1f5eSSebastien Boeuf             Arc::new(vfio_wrapper) as Arc<dyn Vfio>,
1481d6bf1f5eSSebastien Boeuf             &PciVfioSubclass::VfioSubclass,
1482d6bf1f5eSSebastien Boeuf             bdf,
1483cc3706afSSebastien Boeuf             vm_migration::snapshot_from_id(snapshot.as_ref(), VFIO_COMMON_ID),
1484b750c332SThomas Barrett             x_nv_gpudirect_clique,
1485d6bf1f5eSSebastien Boeuf         )?;
1486521a11a1SRob Bradford 
1487ecc8382fSRob Bradford         let vfio_pci_device = VfioPciDevice {
14885264d545SSebastien Boeuf             id,
1489521a11a1SRob Bradford             vm: vm.clone(),
1490521a11a1SRob Bradford             device,
1491521a11a1SRob Bradford             container,
1492521a11a1SRob Bradford             common,
1493521a11a1SRob Bradford             iommu_attached,
149481f8a27eSRob Bradford             memory_slot_allocator,
149581eca69bSArvind Vasudev             bdf,
1496297b41d6SBo Chen             device_path: device_path.clone(),
1497521a11a1SRob Bradford         };
149819167e76SSebastien Boeuf 
1499db5b4763SSamuel Ortiz         Ok(vfio_pci_device)
1500db5b4763SSamuel Ortiz     }
1501db5b4763SSamuel Ortiz 
iommu_attached(&self) -> bool1502933d41cfSSebastien Boeuf     pub fn iommu_attached(&self) -> bool {
1503933d41cfSSebastien Boeuf         self.iommu_attached
1504933d41cfSSebastien Boeuf     }
1505933d41cfSSebastien Boeuf 
generate_sparse_areas( caps: &[VfioRegionInfoCap], region_index: u32, region_start: u64, region_size: u64, vfio_msix: Option<&VfioMsix>, ) -> Result<Vec<VfioRegionSparseMmapArea>, VfioPciError>15061108bd19SSebastien Boeuf     fn generate_sparse_areas(
15071108bd19SSebastien Boeuf         caps: &[VfioRegionInfoCap],
1508ed5f2544SSebastien Boeuf         region_index: u32,
1509ed5f2544SSebastien Boeuf         region_start: u64,
1510ed5f2544SSebastien Boeuf         region_size: u64,
1511ed5f2544SSebastien Boeuf         vfio_msix: Option<&VfioMsix>,
15121108bd19SSebastien Boeuf     ) -> Result<Vec<VfioRegionSparseMmapArea>, VfioPciError> {
15131108bd19SSebastien Boeuf         for cap in caps {
15141108bd19SSebastien Boeuf             match cap {
15151108bd19SSebastien Boeuf                 VfioRegionInfoCap::SparseMmap(sparse_mmap) => return Ok(sparse_mmap.areas.clone()),
15161108bd19SSebastien Boeuf                 VfioRegionInfoCap::MsixMappable => {
1517a7187168SJianyong Wu                     if !is_4k_aligned(region_start) {
1518ed5f2544SSebastien Boeuf                         error!(
1519ed5f2544SSebastien Boeuf                             "Region start address 0x{:x} must be at least aligned on 4KiB",
1520ed5f2544SSebastien Boeuf                             region_start
1521ed5f2544SSebastien Boeuf                         );
15221108bd19SSebastien Boeuf                         return Err(VfioPciError::RegionAlignment);
1523ed5f2544SSebastien Boeuf                     }
1524a7187168SJianyong Wu                     if !is_4k_multiple(region_size) {
1525ed5f2544SSebastien Boeuf                         error!(
1526ed5f2544SSebastien Boeuf                             "Region size 0x{:x} must be at least a multiple of 4KiB",
1527ed5f2544SSebastien Boeuf                             region_size
1528ed5f2544SSebastien Boeuf                         );
15291108bd19SSebastien Boeuf                         return Err(VfioPciError::RegionSize);
1530ed5f2544SSebastien Boeuf                     }
1531ed5f2544SSebastien Boeuf 
15321108bd19SSebastien Boeuf                     // In case the region contains the MSI-X vectors table or
15331108bd19SSebastien Boeuf                     // the MSI-X PBA table, we must calculate the subregions
15341108bd19SSebastien Boeuf                     // around them, leading to a list of sparse areas.
15351108bd19SSebastien Boeuf                     // We want to make sure we will still trap MMIO accesses
1536a7187168SJianyong Wu                     // to these MSI-X specific ranges. If these region don't align
15377bf0cc1eSPhilipp Schuster                     // with pagesize, we can achieve it by enlarging its range.
15381108bd19SSebastien Boeuf                     //
1539ed5f2544SSebastien Boeuf                     // Using a BtreeMap as the list provided through the iterator is sorted
1540ed5f2544SSebastien Boeuf                     // by key. This ensures proper split of the whole region.
1541ed5f2544SSebastien Boeuf                     let mut inter_ranges = BTreeMap::new();
1542ed5f2544SSebastien Boeuf                     if let Some(msix) = vfio_msix {
1543ed5f2544SSebastien Boeuf                         if region_index == msix.cap.table_bir() {
1544ed5f2544SSebastien Boeuf                             let (offset, size) = msix.cap.table_range();
1545a7187168SJianyong Wu                             let offset = align_page_size_down(offset);
1546a7187168SJianyong Wu                             let size = align_page_size_up(size);
15471108bd19SSebastien Boeuf                             inter_ranges.insert(offset, size);
1548ed5f2544SSebastien Boeuf                         }
1549ed5f2544SSebastien Boeuf                         if region_index == msix.cap.pba_bir() {
1550ed5f2544SSebastien Boeuf                             let (offset, size) = msix.cap.pba_range();
1551a7187168SJianyong Wu                             let offset = align_page_size_down(offset);
1552a7187168SJianyong Wu                             let size = align_page_size_up(size);
15531108bd19SSebastien Boeuf                             inter_ranges.insert(offset, size);
1554ed5f2544SSebastien Boeuf                         }
1555ed5f2544SSebastien Boeuf                     }
1556ed5f2544SSebastien Boeuf 
15571108bd19SSebastien Boeuf                     let mut sparse_areas = Vec::new();
15581108bd19SSebastien Boeuf                     let mut current_offset = 0;
15591108bd19SSebastien Boeuf                     for (range_offset, range_size) in inter_ranges {
15601108bd19SSebastien Boeuf                         if range_offset > current_offset {
15611108bd19SSebastien Boeuf                             sparse_areas.push(VfioRegionSparseMmapArea {
15621108bd19SSebastien Boeuf                                 offset: current_offset,
15631108bd19SSebastien Boeuf                                 size: range_offset - current_offset,
1564ed5f2544SSebastien Boeuf                             });
1565ed5f2544SSebastien Boeuf                         }
1566a7187168SJianyong Wu                         current_offset = align_page_size_down(range_offset + range_size);
1567ed5f2544SSebastien Boeuf                     }
1568ed5f2544SSebastien Boeuf 
15691108bd19SSebastien Boeuf                     if region_size > current_offset {
15701108bd19SSebastien Boeuf                         sparse_areas.push(VfioRegionSparseMmapArea {
1571a7187168SJianyong Wu                             offset: current_offset,
1572a7187168SJianyong Wu                             size: region_size - current_offset,
1573ed5f2544SSebastien Boeuf                         });
1574ed5f2544SSebastien Boeuf                     }
1575ed5f2544SSebastien Boeuf 
15761108bd19SSebastien Boeuf                     return Ok(sparse_areas);
15771108bd19SSebastien Boeuf                 }
15781108bd19SSebastien Boeuf                 _ => {}
15791108bd19SSebastien Boeuf             }
15801108bd19SSebastien Boeuf         }
15811108bd19SSebastien Boeuf 
15821108bd19SSebastien Boeuf         // In case no relevant capabilities have been found, create a single
15831108bd19SSebastien Boeuf         // sparse area corresponding to the entire MMIO region.
15841108bd19SSebastien Boeuf         Ok(vec![VfioRegionSparseMmapArea {
15851108bd19SSebastien Boeuf             offset: 0,
15861108bd19SSebastien Boeuf             size: region_size,
15871108bd19SSebastien Boeuf         }])
1588ed5f2544SSebastien Boeuf     }
1589ed5f2544SSebastien Boeuf 
1590b746dd71SChao Peng     /// Map MMIO regions into the guest, and avoid VM exits when the guest tries
1591b746dd71SChao Peng     /// to reach those regions.
1592d92d7978SSebastien Boeuf     ///
1593d92d7978SSebastien Boeuf     /// # Arguments
1594d92d7978SSebastien Boeuf     ///
1595571c3685SWei Liu     /// * `vm` - The VM object. It is used to set the VFIO MMIO regions
1596571c3685SWei Liu     ///   as user memory regions.
1597571c3685SWei Liu     /// * `mem_slot` - The closure to return a memory slot.
map_mmio_regions(&mut self) -> Result<(), VfioPciError>159881ba70a4SSebastien Boeuf     pub fn map_mmio_regions(&mut self) -> Result<(), VfioPciError> {
1599b746dd71SChao Peng         let fd = self.device.as_raw_fd();
1600b746dd71SChao Peng 
1601d27ea34aSRob Bradford         for region in self.common.mmio_regions.iter_mut() {
1602b746dd71SChao Peng             let region_flags = self.device.get_region_flags(region.index);
1603b746dd71SChao Peng             if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
1604b746dd71SChao Peng                 let mut prot = 0;
1605b746dd71SChao Peng                 if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 {
1606b746dd71SChao Peng                     prot |= libc::PROT_READ;
1607b746dd71SChao Peng                 }
1608b746dd71SChao Peng                 if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 {
1609b746dd71SChao Peng                     prot |= libc::PROT_WRITE;
1610b746dd71SChao Peng                 }
1611ed5f2544SSebastien Boeuf 
1612b11a8a5aSSebastien Boeuf                 // Retrieve the list of capabilities found on the region
1613b11a8a5aSSebastien Boeuf                 let caps = if region_flags & VFIO_REGION_INFO_FLAG_CAPS != 0 {
1614b11a8a5aSSebastien Boeuf                     self.device.get_region_caps(region.index)
1615b11a8a5aSSebastien Boeuf                 } else {
1616b11a8a5aSSebastien Boeuf                     Vec::new()
1617b11a8a5aSSebastien Boeuf                 };
1618b11a8a5aSSebastien Boeuf 
1619b11a8a5aSSebastien Boeuf                 // Don't try to mmap the region if it contains MSI-X table or
1620b11a8a5aSSebastien Boeuf                 // MSI-X PBA subregion, and if we couldn't find MSIX_MAPPABLE
1621b11a8a5aSSebastien Boeuf                 // in the list of supported capabilities.
1622b11a8a5aSSebastien Boeuf                 if let Some(msix) = self.common.interrupt.msix.as_ref() {
1623b11a8a5aSSebastien Boeuf                     if (region.index == msix.cap.table_bir() || region.index == msix.cap.pba_bir())
1624b11a8a5aSSebastien Boeuf                         && !caps.contains(&VfioRegionInfoCap::MsixMappable)
1625b11a8a5aSSebastien Boeuf                     {
1626b11a8a5aSSebastien Boeuf                         continue;
1627b11a8a5aSSebastien Boeuf                     }
1628b11a8a5aSSebastien Boeuf                 }
1629b11a8a5aSSebastien Boeuf 
1630b11a8a5aSSebastien Boeuf                 let mmap_size = self.device.get_region_size(region.index);
16311108bd19SSebastien Boeuf                 let mmap_offset = self.device.get_region_offset(region.index);
1632b746dd71SChao Peng 
16331108bd19SSebastien Boeuf                 let sparse_areas = Self::generate_sparse_areas(
16341108bd19SSebastien Boeuf                     &caps,
16351108bd19SSebastien Boeuf                     region.index,
16361108bd19SSebastien Boeuf                     region.start.0,
16371108bd19SSebastien Boeuf                     mmap_size,
16381108bd19SSebastien Boeuf                     self.common.interrupt.msix.as_ref(),
16391108bd19SSebastien Boeuf                 )?;
16401108bd19SSebastien Boeuf 
16411108bd19SSebastien Boeuf                 for area in sparse_areas.iter() {
1642c5bd8cabSWei Liu                     // SAFETY: FFI call with correct arguments
1643b746dd71SChao Peng                     let host_addr = unsafe {
1644b746dd71SChao Peng                         libc::mmap(
1645b746dd71SChao Peng                             null_mut(),
16461108bd19SSebastien Boeuf                             area.size as usize,
1647b746dd71SChao Peng                             prot,
1648b746dd71SChao Peng                             libc::MAP_SHARED,
1649b746dd71SChao Peng                             fd,
16501108bd19SSebastien Boeuf                             mmap_offset as libc::off_t + area.offset as libc::off_t,
1651b746dd71SChao Peng                         )
1652b746dd71SChao Peng                     };
1653b746dd71SChao Peng 
1654a64ba04eSJinank Jain                     if std::ptr::eq(host_addr, libc::MAP_FAILED) {
1655b11a8a5aSSebastien Boeuf                         error!(
16561108bd19SSebastien Boeuf                             "Could not mmap sparse area (offset = 0x{:x}, size = 0x{:x}): {}",
16571108bd19SSebastien Boeuf                             area.offset,
16581108bd19SSebastien Boeuf                             area.size,
16591108bd19SSebastien Boeuf                             std::io::Error::last_os_error()
1660b746dd71SChao Peng                         );
16611108bd19SSebastien Boeuf                         return Err(VfioPciError::MmapArea);
1662b746dd71SChao Peng                     }
1663b746dd71SChao Peng 
1664a7187168SJianyong Wu                     if !is_page_size_aligned(area.size) || !is_page_size_aligned(area.offset) {
1665a7187168SJianyong Wu                         warn!(
1666a7187168SJianyong Wu                             "Could not mmap sparse area that is not page size aligned (offset = 0x{:x}, size = 0x{:x})",
1667a7187168SJianyong Wu                             area.offset,
1668a7187168SJianyong Wu                             area.size,
1669a7187168SJianyong Wu                             );
1670a7187168SJianyong Wu                         return Ok(());
1671a7187168SJianyong Wu                     }
1672a7187168SJianyong Wu 
16731108bd19SSebastien Boeuf                     let user_memory_region = UserMemoryRegion {
167481f8a27eSRob Bradford                         slot: self.memory_slot_allocator.next_memory_slot(),
16751108bd19SSebastien Boeuf                         start: region.start.0 + area.offset,
16761108bd19SSebastien Boeuf                         size: area.size,
16771108bd19SSebastien Boeuf                         host_addr: host_addr as u64,
16781108bd19SSebastien Boeuf                     };
16791108bd19SSebastien Boeuf 
168006f57abdSBo Chen                     region.user_memory_regions.push(user_memory_region);
168106f57abdSBo Chen 
168281ba70a4SSebastien Boeuf                     let mem_region = self.vm.make_user_memory_region(
1683ed5f2544SSebastien Boeuf                         user_memory_region.slot,
1684ed5f2544SSebastien Boeuf                         user_memory_region.start,
1685ed5f2544SSebastien Boeuf                         user_memory_region.size,
1686ed5f2544SSebastien Boeuf                         user_memory_region.host_addr,
1687053ea5dcSMuminul Islam                         false,
16888baa244eSRob Bradford                         false,
1689053ea5dcSMuminul Islam                     );
1690b746dd71SChao Peng 
169181ba70a4SSebastien Boeuf                     self.vm
169281ba70a4SSebastien Boeuf                         .create_user_memory_region(mem_region)
16931108bd19SSebastien Boeuf                         .map_err(VfioPciError::CreateUserMemoryRegion)?;
1694f0c1f8d0SThomas Barrett 
1695f0c1f8d0SThomas Barrett                     if !self.iommu_attached {
1696f0c1f8d0SThomas Barrett                         self.container
1697f0c1f8d0SThomas Barrett                             .vfio_dma_map(
1698f0c1f8d0SThomas Barrett                                 user_memory_region.start,
1699f0c1f8d0SThomas Barrett                                 user_memory_region.size,
1700f0c1f8d0SThomas Barrett                                 user_memory_region.host_addr,
1701f0c1f8d0SThomas Barrett                             )
1702297b41d6SBo Chen                             .map_err(|e| {
1703297b41d6SBo Chen                                 VfioPciError::DmaMap(e, self.device_path.clone(), self.bdf)
1704297b41d6SBo Chen                             })?;
1705f0c1f8d0SThomas Barrett                     }
1706ed5f2544SSebastien Boeuf                 }
1707b746dd71SChao Peng             }
1708b746dd71SChao Peng         }
1709b746dd71SChao Peng 
1710320fea0eSSebastien Boeuf         Ok(())
1711b746dd71SChao Peng     }
17124de04e84SWu Zongyong 
unmap_mmio_regions(&mut self)17134de04e84SWu Zongyong     pub fn unmap_mmio_regions(&mut self) {
1714d27ea34aSRob Bradford         for region in self.common.mmio_regions.iter() {
1715ed5f2544SSebastien Boeuf             for user_memory_region in region.user_memory_regions.iter() {
1716f0c1f8d0SThomas Barrett                 // Unmap from vfio container
1717f0c1f8d0SThomas Barrett                 if !self.iommu_attached {
1718f0c1f8d0SThomas Barrett                     if let Err(e) = self
1719f0c1f8d0SThomas Barrett                         .container
1720f0c1f8d0SThomas Barrett                         .vfio_dma_unmap(user_memory_region.start, user_memory_region.size)
17211307d31eSBo Chen                         .map_err(|e| VfioPciError::DmaUnmap(e, self.device_path.clone(), self.bdf))
1722f0c1f8d0SThomas Barrett                     {
17231307d31eSBo Chen                         error!(
17241307d31eSBo Chen                             "Could not unmap mmio region from vfio container: \
17251307d31eSBo Chen                             iova 0x{:x}, size 0x{:x}: {}, ",
17261307d31eSBo Chen                             user_memory_region.start, user_memory_region.size, e
17271307d31eSBo Chen                         );
1728f0c1f8d0SThomas Barrett                     }
1729f0c1f8d0SThomas Barrett                 }
1730f0c1f8d0SThomas Barrett 
1731e6849699SWei Liu                 // Remove region
1732e6849699SWei Liu                 let r = self.vm.make_user_memory_region(
1733ed5f2544SSebastien Boeuf                     user_memory_region.slot,
1734ed5f2544SSebastien Boeuf                     user_memory_region.start,
1735ed5f2544SSebastien Boeuf                     user_memory_region.size,
1736ed5f2544SSebastien Boeuf                     user_memory_region.host_addr,
1737053ea5dcSMuminul Islam                     false,
17388baa244eSRob Bradford                     false,
1739053ea5dcSMuminul Islam                 );
1740e4dee57eSMuminul Islam 
17411f2915bfSWei Liu                 if let Err(e) = self.vm.remove_user_memory_region(r) {
1742e6849699SWei Liu                     error!("Could not remove the userspace memory region: {}", e);
1743f0dff8b5SSebastien Boeuf                 }
1744f0dff8b5SSebastien Boeuf 
17450d6cef45SRob Bradford                 self.memory_slot_allocator
17460d6cef45SRob Bradford                     .free_memory_slot(user_memory_region.slot);
17470d6cef45SRob Bradford 
1748c5bd8cabSWei Liu                 // SAFETY: FFI call with correct arguments
17491108bd19SSebastien Boeuf                 let ret = unsafe {
17501108bd19SSebastien Boeuf                     libc::munmap(
17511108bd19SSebastien Boeuf                         user_memory_region.host_addr as *mut libc::c_void,
17521108bd19SSebastien Boeuf                         user_memory_region.size as usize,
17531108bd19SSebastien Boeuf                     )
17541108bd19SSebastien Boeuf                 };
17554de04e84SWu Zongyong                 if ret != 0 {
17564de04e84SWu Zongyong                     error!(
1757f0dff8b5SSebastien Boeuf                         "Could not unmap region {}, error:{}",
1758f0dff8b5SSebastien Boeuf                         region.index,
17594de04e84SWu Zongyong                         io::Error::last_os_error()
17604de04e84SWu Zongyong                     );
17614de04e84SWu Zongyong                 }
17624de04e84SWu Zongyong             }
17634de04e84SWu Zongyong         }
17644de04e84SWu Zongyong     }
1765e4a034aeSSebastien Boeuf 
dma_map(&self, iova: u64, size: u64, user_addr: u64) -> Result<(), VfioPciError>1766cdfc1773SRob Bradford     pub fn dma_map(&self, iova: u64, size: u64, user_addr: u64) -> Result<(), VfioPciError> {
1767a0a89b13SSebastien Boeuf         if !self.iommu_attached {
1768a0a89b13SSebastien Boeuf             self.container
1769080ea318SSebastien Boeuf                 .vfio_dma_map(iova, size, user_addr)
1770297b41d6SBo Chen                 .map_err(|e| VfioPciError::DmaMap(e, self.device_path.clone(), self.bdf))?;
1771080ea318SSebastien Boeuf         }
1772080ea318SSebastien Boeuf 
1773080ea318SSebastien Boeuf         Ok(())
1774080ea318SSebastien Boeuf     }
1775080ea318SSebastien Boeuf 
dma_unmap(&self, iova: u64, size: u64) -> Result<(), VfioPciError>1776cdfc1773SRob Bradford     pub fn dma_unmap(&self, iova: u64, size: u64) -> Result<(), VfioPciError> {
1777080ea318SSebastien Boeuf         if !self.iommu_attached {
1778080ea318SSebastien Boeuf             self.container
1779080ea318SSebastien Boeuf                 .vfio_dma_unmap(iova, size)
1780297b41d6SBo Chen                 .map_err(|e| VfioPciError::DmaUnmap(e, self.device_path.clone(), self.bdf))?;
1781a0a89b13SSebastien Boeuf         }
1782a0a89b13SSebastien Boeuf 
1783a0a89b13SSebastien Boeuf         Ok(())
1784e4a034aeSSebastien Boeuf     }
1785593a958fSRob Bradford 
mmio_regions(&self) -> Vec<MmioRegion>1786593a958fSRob Bradford     pub fn mmio_regions(&self) -> Vec<MmioRegion> {
1787d27ea34aSRob Bradford         self.common.mmio_regions.clone()
1788593a958fSRob Bradford     }
1789db5b4763SSamuel Ortiz }
1790db5b4763SSamuel Ortiz 
1791db5b4763SSamuel Ortiz impl Drop for VfioPciDevice {
drop(&mut self)1792db5b4763SSamuel Ortiz     fn drop(&mut self) {
17934de04e84SWu Zongyong         self.unmap_mmio_regions();
17944de04e84SWu Zongyong 
1795d27ea34aSRob Bradford         if let Some(msix) = &self.common.interrupt.msix {
179619167e76SSebastien Boeuf             if msix.bar.enabled() {
17974a99d3dbSSebastien Boeuf                 self.common.disable_msix();
1798d7dc1a92SWu Zongyong             }
179920f01161SSebastien Boeuf         }
180020f01161SSebastien Boeuf 
1801d27ea34aSRob Bradford         if let Some(msi) = &self.common.interrupt.msi {
180219167e76SSebastien Boeuf             if msi.cfg.enabled() {
18034a99d3dbSSebastien Boeuf                 self.common.disable_msi()
1804d7dc1a92SWu Zongyong             }
180520f01161SSebastien Boeuf         }
180620f01161SSebastien Boeuf 
1807d27ea34aSRob Bradford         if self.common.interrupt.intx_in_use() {
18084a99d3dbSSebastien Boeuf             self.common.disable_intx();
180919167e76SSebastien Boeuf         }
1810db5b4763SSamuel Ortiz     }
1811db5b4763SSamuel Ortiz }
1812db5b4763SSamuel Ortiz 
1813db5b4763SSamuel Ortiz impl BusDevice for VfioPciDevice {
read(&mut self, base: u64, offset: u64, data: &mut [u8])1814db5b4763SSamuel Ortiz     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
1815db5b4763SSamuel Ortiz         self.read_bar(base, offset, data)
1816db5b4763SSamuel Ortiz     }
1817db5b4763SSamuel Ortiz 
write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>>18181fc6d50fSRob Bradford     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
18197cc729c7SRob Bradford         self.write_bar(base, offset, data)
1820db5b4763SSamuel Ortiz     }
1821db5b4763SSamuel Ortiz }
1822db5b4763SSamuel Ortiz 
182356ca26e7SJulian Stecklina // Offset of the 16-bit status register in the PCI configuration space.
182456ca26e7SJulian Stecklina const PCI_CONFIG_STATUS_OFFSET: u32 = 0x06;
182556ca26e7SJulian Stecklina // Status bit indicating the presence of a capabilities list.
182656ca26e7SJulian Stecklina const PCI_CONFIG_STATUS_CAPABILITIES_LIST: u16 = 1 << 4;
1827db5b4763SSamuel Ortiz // First BAR offset in the PCI config space.
1828db5b4763SSamuel Ortiz const PCI_CONFIG_BAR_OFFSET: u32 = 0x10;
1829db5b4763SSamuel Ortiz // Capability register offset in the PCI config space.
1830db5b4763SSamuel Ortiz const PCI_CONFIG_CAPABILITY_OFFSET: u32 = 0x34;
1831a0065452SJulian Stecklina // The valid bits for the capabilities pointer.
1832a0065452SJulian Stecklina const PCI_CONFIG_CAPABILITY_PTR_MASK: u8 = !0b11;
1833e45e3df6SSebastien Boeuf // Extended capabilities register offset in the PCI config space.
1834e45e3df6SSebastien Boeuf const PCI_CONFIG_EXTENDED_CAPABILITY_OFFSET: u32 = 0x100;
1835db5b4763SSamuel Ortiz // IO BAR when first BAR bit is 1.
1836db5b4763SSamuel Ortiz const PCI_CONFIG_IO_BAR: u32 = 0x1;
1837db5b4763SSamuel Ortiz // 64-bit memory bar flag.
1838db5b4763SSamuel Ortiz const PCI_CONFIG_MEMORY_BAR_64BIT: u32 = 0x4;
1839868d1f69SSteven Dake // Prefetchable BAR bit
1840868d1f69SSteven Dake const PCI_CONFIG_BAR_PREFETCHABLE: u32 = 0x8;
1841db5b4763SSamuel Ortiz // PCI config register size (4 bytes).
1842db5b4763SSamuel Ortiz const PCI_CONFIG_REGISTER_SIZE: usize = 4;
1843db5b4763SSamuel Ortiz // Number of BARs for a PCI device
1844db5b4763SSamuel Ortiz const BAR_NUMS: usize = 6;
1845347f8a03SSebastien Boeuf // PCI Header Type register index
1846347f8a03SSebastien Boeuf const PCI_HEADER_TYPE_REG_INDEX: usize = 3;
1847347f8a03SSebastien Boeuf // First BAR register index
1848347f8a03SSebastien Boeuf const PCI_CONFIG_BAR0_INDEX: usize = 4;
18492f802880SSebastien Boeuf // PCI ROM expansion BAR register index
18502f802880SSebastien Boeuf const PCI_ROM_EXP_BAR_INDEX: usize = 12;
1851db5b4763SSamuel Ortiz 
1852db5b4763SSamuel Ortiz impl PciDevice for VfioPciDevice {
allocate_bars( &mut self, allocator: &Arc<Mutex<SystemAllocator>>, mmio32_allocator: &mut AddressAllocator, mmio64_allocator: &mut AddressAllocator, resources: Option<Vec<Resource>>, ) -> Result<Vec<PciBarConfiguration>, PciDeviceError>1853db5b4763SSamuel Ortiz     fn allocate_bars(
1854db5b4763SSamuel Ortiz         &mut self,
18559ef1187fSRob Bradford         allocator: &Arc<Mutex<SystemAllocator>>,
185645b01d59SThomas Barrett         mmio32_allocator: &mut AddressAllocator,
185745b01d59SThomas Barrett         mmio64_allocator: &mut AddressAllocator,
18586e084572SSebastien Boeuf         resources: Option<Vec<Resource>>,
185989218b6dSSebastien Boeuf     ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> {
1860cd9d1cf8SRob Bradford         self.common
186145b01d59SThomas Barrett             .allocate_bars(allocator, mmio32_allocator, mmio64_allocator, resources)
1862db5b4763SSamuel Ortiz     }
1863db5b4763SSamuel Ortiz 
free_bars( &mut self, allocator: &mut SystemAllocator, mmio32_allocator: &mut AddressAllocator, mmio64_allocator: &mut AddressAllocator, ) -> Result<(), PciDeviceError>1864cd9d1cf8SRob Bradford     fn free_bars(
1865cd9d1cf8SRob Bradford         &mut self,
1866cd9d1cf8SRob Bradford         allocator: &mut SystemAllocator,
186745b01d59SThomas Barrett         mmio32_allocator: &mut AddressAllocator,
186845b01d59SThomas Barrett         mmio64_allocator: &mut AddressAllocator,
1869cd9d1cf8SRob Bradford     ) -> Result<(), PciDeviceError> {
187045b01d59SThomas Barrett         self.common
187145b01d59SThomas Barrett             .free_bars(allocator, mmio32_allocator, mmio64_allocator)
187234d1f435SSebastien Boeuf     }
187334d1f435SSebastien Boeuf 
write_config_register( &mut self, reg_idx: usize, offset: u64, data: &[u8], ) -> (Vec<BarReprogrammingParams>, Option<Arc<Barrier>>)18747cc729c7SRob Bradford     fn write_config_register(
18757cc729c7SRob Bradford         &mut self,
18767cc729c7SRob Bradford         reg_idx: usize,
18777cc729c7SRob Bradford         offset: u64,
18787cc729c7SRob Bradford         data: &[u8],
1879aaf86ef2SBo Chen     ) -> (Vec<BarReprogrammingParams>, Option<Arc<Barrier>>) {
18804a99d3dbSSebastien Boeuf         self.common.write_config_register(reg_idx, offset, data)
1881db5b4763SSamuel Ortiz     }
1882db5b4763SSamuel Ortiz 
read_config_register(&mut self, reg_idx: usize) -> u321883db9f9b78SSebastien Boeuf     fn read_config_register(&mut self, reg_idx: usize) -> u32 {
18844a99d3dbSSebastien Boeuf         self.common.read_config_register(reg_idx)
1885db5b4763SSamuel Ortiz     }
1886db5b4763SSamuel Ortiz 
read_bar(&mut self, base: u64, offset: u64, data: &mut [u8])1887db5b4763SSamuel Ortiz     fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) {
18884a99d3dbSSebastien Boeuf         self.common.read_bar(base, offset, data)
1889c93d5361SSebastien Boeuf     }
1890db5b4763SSamuel Ortiz 
write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>>18917cc729c7SRob Bradford     fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
18924a99d3dbSSebastien Boeuf         self.common.write_bar(base, offset, data)
1893db5b4763SSamuel Ortiz     }
1894e536f880SSebastien Boeuf 
move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), io::Error>1895cdfc1773SRob Bradford     fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), io::Error> {
1896d27ea34aSRob Bradford         for region in self.common.mmio_regions.iter_mut() {
1897e536f880SSebastien Boeuf             if region.start.raw_value() == old_base {
1898e536f880SSebastien Boeuf                 region.start = GuestAddress(new_base);
1899e536f880SSebastien Boeuf 
1900ed5f2544SSebastien Boeuf                 for user_memory_region in region.user_memory_regions.iter_mut() {
19012f218274SBo Chen                     // Unmap the old MMIO region from vfio container
19022f218274SBo Chen                     if !self.iommu_attached {
19032f218274SBo Chen                         if let Err(e) = self
19042f218274SBo Chen                             .container
19052f218274SBo Chen                             .vfio_dma_unmap(user_memory_region.start, user_memory_region.size)
19062f218274SBo Chen                             .map_err(|e| {
19072f218274SBo Chen                                 VfioPciError::DmaUnmap(e, self.device_path.clone(), self.bdf)
19082f218274SBo Chen                             })
19092f218274SBo Chen                         {
19102f218274SBo Chen                             error!(
19112f218274SBo Chen                                 "Could not unmap mmio region from vfio container: \
19122f218274SBo Chen                                 iova 0x{:x}, size 0x{:x}: {}, ",
19132f218274SBo Chen                                 user_memory_region.start, user_memory_region.size, e
19142f218274SBo Chen                             );
19152f218274SBo Chen                         }
19162f218274SBo Chen                     }
19172f218274SBo Chen 
1918e6849699SWei Liu                     // Remove old region
1919053ea5dcSMuminul Islam                     let old_mem_region = self.vm.make_user_memory_region(
1920ed5f2544SSebastien Boeuf                         user_memory_region.slot,
1921ed5f2544SSebastien Boeuf                         user_memory_region.start,
1922ed5f2544SSebastien Boeuf                         user_memory_region.size,
1923ed5f2544SSebastien Boeuf                         user_memory_region.host_addr,
1924053ea5dcSMuminul Islam                         false,
19258baa244eSRob Bradford                         false,
1926053ea5dcSMuminul Islam                     );
1927e4dee57eSMuminul Islam 
1928e5552a53SWei Liu                     self.vm
19291f2915bfSWei Liu                         .remove_user_memory_region(old_mem_region)
1930ea4693a0SJinank Jain                         .map_err(io::Error::other)?;
1931e536f880SSebastien Boeuf 
1932ed5f2544SSebastien Boeuf                     // Update the user memory region with the correct start address.
1933ed5f2544SSebastien Boeuf                     if new_base > old_base {
1934ed5f2544SSebastien Boeuf                         user_memory_region.start += new_base - old_base;
1935ed5f2544SSebastien Boeuf                     } else {
1936ed5f2544SSebastien Boeuf                         user_memory_region.start -= old_base - new_base;
1937ed5f2544SSebastien Boeuf                     }
1938ed5f2544SSebastien Boeuf 
1939e6849699SWei Liu                     // Insert new region
1940053ea5dcSMuminul Islam                     let new_mem_region = self.vm.make_user_memory_region(
1941ed5f2544SSebastien Boeuf                         user_memory_region.slot,
1942ed5f2544SSebastien Boeuf                         user_memory_region.start,
1943ed5f2544SSebastien Boeuf                         user_memory_region.size,
1944ed5f2544SSebastien Boeuf                         user_memory_region.host_addr,
1945053ea5dcSMuminul Islam                         false,
19468baa244eSRob Bradford                         false,
1947053ea5dcSMuminul Islam                     );
1948e4dee57eSMuminul Islam 
1949e5552a53SWei Liu                     self.vm
19501f2915bfSWei Liu                         .create_user_memory_region(new_mem_region)
1951ea4693a0SJinank Jain                         .map_err(io::Error::other)?;
19522f218274SBo Chen 
19532f218274SBo Chen                     // Map the moved mmio region to vfio container
19542f218274SBo Chen                     if !self.iommu_attached {
19552f218274SBo Chen                         self.container
19562f218274SBo Chen                             .vfio_dma_map(
19572f218274SBo Chen                                 user_memory_region.start,
19582f218274SBo Chen                                 user_memory_region.size,
19592f218274SBo Chen                                 user_memory_region.host_addr,
19602f218274SBo Chen                             )
19612f218274SBo Chen                             .map_err(|e| {
19622f218274SBo Chen                                 VfioPciError::DmaMap(e, self.device_path.clone(), self.bdf)
19632f218274SBo Chen                             })
19642f218274SBo Chen                             .map_err(|e| {
19652f218274SBo Chen                                 io::Error::other(format!(
19662f218274SBo Chen                                     "Could not map mmio region to vfio container: \
19672f218274SBo Chen                                     iova 0x{:x}, size 0x{:x}: {}, ",
19682f218274SBo Chen                                     user_memory_region.start, user_memory_region.size, e
19692f218274SBo Chen                                 ))
19702f218274SBo Chen                             })?;
19712f218274SBo Chen                     }
1972e536f880SSebastien Boeuf                 }
1973e536f880SSebastien Boeuf             }
1974e536f880SSebastien Boeuf         }
1975d6c68e47SSebastien Boeuf 
1976d6c68e47SSebastien Boeuf         Ok(())
1977e536f880SSebastien Boeuf     }
1978de21c9baSSebastien Boeuf 
as_any_mut(&mut self) -> &mut dyn Any1979d99f2942SWei Liu     fn as_any_mut(&mut self) -> &mut dyn Any {
1980de21c9baSSebastien Boeuf         self
1981de21c9baSSebastien Boeuf     }
19825264d545SSebastien Boeuf 
id(&self) -> Option<String>19835264d545SSebastien Boeuf     fn id(&self) -> Option<String> {
19845264d545SSebastien Boeuf         Some(self.id.clone())
19855264d545SSebastien Boeuf     }
1986c93d5361SSebastien Boeuf }
19873b59e570SSebastien Boeuf 
19883b59e570SSebastien Boeuf impl Pausable for VfioPciDevice {}
19893b59e570SSebastien Boeuf 
19903b59e570SSebastien Boeuf impl Snapshottable for VfioPciDevice {
id(&self) -> String19913b59e570SSebastien Boeuf     fn id(&self) -> String {
19923b59e570SSebastien Boeuf         self.id.clone()
19933b59e570SSebastien Boeuf     }
19943b59e570SSebastien Boeuf 
snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError>19953b59e570SSebastien Boeuf     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
1996748018acSSebastien Boeuf         let mut vfio_pci_dev_snapshot = Snapshot::default();
19973b59e570SSebastien Boeuf 
19983b59e570SSebastien Boeuf         // Snapshot VfioCommon
1999748018acSSebastien Boeuf         vfio_pci_dev_snapshot.add_snapshot(self.common.id(), self.common.snapshot()?);
20003b59e570SSebastien Boeuf 
20013b59e570SSebastien Boeuf         Ok(vfio_pci_dev_snapshot)
20023b59e570SSebastien Boeuf     }
20033b59e570SSebastien Boeuf }
20043b59e570SSebastien Boeuf impl Transportable for VfioPciDevice {}
20053b59e570SSebastien Boeuf impl Migratable for VfioPciDevice {}
2006a5e2460dSAndrew Carp 
2007a5e2460dSAndrew Carp /// This structure implements the ExternalDmaMapping trait. It is meant to
2008a5e2460dSAndrew Carp /// be used when the caller tries to provide a way to update the mappings
2009a5e2460dSAndrew Carp /// associated with a specific VFIO container.
2010a5e2460dSAndrew Carp pub struct VfioDmaMapping<M: GuestAddressSpace> {
2011a5e2460dSAndrew Carp     container: Arc<VfioContainer>,
2012a5e2460dSAndrew Carp     memory: Arc<M>,
2013045964deSAndrew Carp     mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
2014a5e2460dSAndrew Carp }
2015a5e2460dSAndrew Carp 
2016a5e2460dSAndrew Carp impl<M: GuestAddressSpace> VfioDmaMapping<M> {
2017a5e2460dSAndrew Carp     /// Create a DmaMapping object.
2018a5e2460dSAndrew Carp     /// # Parameters
2019a5e2460dSAndrew Carp     /// * `container`: VFIO container object.
2020045964deSAndrew Carp     /// * `memory`: guest memory to mmap.
2021045964deSAndrew Carp     /// * `mmio_regions`: mmio_regions to mmap.
new( container: Arc<VfioContainer>, memory: Arc<M>, mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, ) -> Self2022045964deSAndrew Carp     pub fn new(
2023045964deSAndrew Carp         container: Arc<VfioContainer>,
2024045964deSAndrew Carp         memory: Arc<M>,
2025045964deSAndrew Carp         mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
2026045964deSAndrew Carp     ) -> Self {
2027045964deSAndrew Carp         VfioDmaMapping {
2028045964deSAndrew Carp             container,
2029045964deSAndrew Carp             memory,
2030045964deSAndrew Carp             mmio_regions,
2031045964deSAndrew Carp         }
2032a5e2460dSAndrew Carp     }
2033a5e2460dSAndrew Carp }
2034a5e2460dSAndrew Carp 
2035a5e2460dSAndrew Carp impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioDmaMapping<M> {
map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), io::Error>2036a5e2460dSAndrew Carp     fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), io::Error> {
2037a5e2460dSAndrew Carp         let mem = self.memory.memory();
2038a5e2460dSAndrew Carp         let guest_addr = GuestAddress(gpa);
2039a5e2460dSAndrew Carp         let user_addr = if mem.check_range(guest_addr, size as usize) {
2040045964deSAndrew Carp             match mem.get_host_address(guest_addr) {
2041045964deSAndrew Carp                 Ok(t) => t as u64,
2042045964deSAndrew Carp                 Err(e) => {
2043ea4693a0SJinank Jain                     return Err(io::Error::other(
2044045964deSAndrew Carp                         format!("unable to retrieve user address for gpa 0x{gpa:x} from guest memory region: {e}")
2045045964deSAndrew Carp                     ));
2046045964deSAndrew Carp                 }
2047045964deSAndrew Carp             }
2048045964deSAndrew Carp         } else if self.mmio_regions.lock().unwrap().check_range(gpa, size) {
2049045964deSAndrew Carp             self.mmio_regions.lock().unwrap().find_user_address(gpa)?
2050a5e2460dSAndrew Carp         } else {
2051ea4693a0SJinank Jain             return Err(io::Error::other(format!(
2052ea4693a0SJinank Jain                 "failed to locate guest address 0x{gpa:x} in guest memory"
2053ea4693a0SJinank Jain             )));
2054a5e2460dSAndrew Carp         };
2055a5e2460dSAndrew Carp 
2056a5e2460dSAndrew Carp         self.container
2057a5e2460dSAndrew Carp             .vfio_dma_map(iova, size, user_addr)
2058a5e2460dSAndrew Carp             .map_err(|e| {
2059ea4693a0SJinank Jain                 io::Error::other(format!(
2060a5e2460dSAndrew Carp                     "failed to map memory for VFIO container, \
2061a5e2460dSAndrew Carp                          iova 0x{iova:x}, gpa 0x{gpa:x}, size 0x{size:x}: {e:?}"
2062ea4693a0SJinank Jain                 ))
2063a5e2460dSAndrew Carp             })
2064a5e2460dSAndrew Carp     }
2065a5e2460dSAndrew Carp 
unmap(&self, iova: u64, size: u64) -> std::result::Result<(), io::Error>2066a5e2460dSAndrew Carp     fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), io::Error> {
2067a5e2460dSAndrew Carp         self.container.vfio_dma_unmap(iova, size).map_err(|e| {
2068ea4693a0SJinank Jain             io::Error::other(format!(
2069a5e2460dSAndrew Carp                 "failed to unmap memory for VFIO container, \
2070a5e2460dSAndrew Carp                      iova 0x{iova:x}, size 0x{size:x}: {e:?}"
2071ea4693a0SJinank Jain             ))
2072a5e2460dSAndrew Carp         })
2073a5e2460dSAndrew Carp     }
2074a5e2460dSAndrew Carp }
2075