xref: /cloud-hypervisor/pci/src/vfio.rs (revision d10f20eb718023742143fa847a37f3d6114ead52)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 
6 use crate::msi::{MsiConfigState, MSI_CONFIG_ID};
7 use crate::msix::MsixConfigState;
8 use crate::{
9     msi_num_enabled_vectors, BarReprogrammingParams, MsiCap, MsiConfig, MsixCap, MsixConfig,
10     PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciBdf, PciCapabilityId,
11     PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciExpressCapabilityId,
12     PciHeaderType, PciSubclass, MSIX_CONFIG_ID, MSIX_TABLE_ENTRY_SIZE, PCI_CONFIGURATION_ID,
13 };
14 use anyhow::anyhow;
15 use byteorder::{ByteOrder, LittleEndian};
16 use hypervisor::HypervisorVmError;
17 use libc::{sysconf, _SC_PAGESIZE};
18 use serde::{Deserialize, Serialize};
19 use std::any::Any;
20 use std::collections::{BTreeMap, HashMap};
21 use std::io;
22 use std::os::unix::io::AsRawFd;
23 use std::ptr::null_mut;
24 use std::sync::{Arc, Barrier, Mutex};
25 use thiserror::Error;
26 use vfio_bindings::bindings::vfio::*;
27 use vfio_ioctls::{
28     VfioContainer, VfioDevice, VfioIrq, VfioRegionInfoCap, VfioRegionSparseMmapArea,
29 };
30 use vm_allocator::page_size::{
31     align_page_size_down, align_page_size_up, is_4k_aligned, is_4k_multiple, is_page_size_aligned,
32 };
33 use vm_allocator::{AddressAllocator, SystemAllocator};
34 use vm_device::dma_mapping::ExternalDmaMapping;
35 use vm_device::interrupt::{
36     InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig,
37 };
38 use vm_device::{BusDevice, Resource};
39 use vm_memory::{Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestUsize};
40 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
41 use vmm_sys_util::eventfd::EventFd;
42 
43 pub(crate) const VFIO_COMMON_ID: &str = "vfio_common";
44 
45 #[derive(Debug, Error)]
46 pub enum VfioPciError {
47     #[error("Failed to create user memory region: {0}")]
48     CreateUserMemoryRegion(#[source] HypervisorVmError),
49     #[error("Failed to DMA map: {0}")]
50     DmaMap(#[source] vfio_ioctls::VfioError),
51     #[error("Failed to DMA unmap: {0}")]
52     DmaUnmap(#[source] vfio_ioctls::VfioError),
53     #[error("Failed to enable INTx: {0}")]
54     EnableIntx(#[source] VfioError),
55     #[error("Failed to enable MSI: {0}")]
56     EnableMsi(#[source] VfioError),
57     #[error("Failed to enable MSI-x: {0}")]
58     EnableMsix(#[source] VfioError),
59     #[error("Failed to mmap the area")]
60     MmapArea,
61     #[error("Failed to notifier's eventfd")]
62     MissingNotifier,
63     #[error("Invalid region alignment")]
64     RegionAlignment,
65     #[error("Invalid region size")]
66     RegionSize,
67     #[error("Failed to retrieve MsiConfigState: {0}")]
68     RetrieveMsiConfigState(#[source] anyhow::Error),
69     #[error("Failed to retrieve MsixConfigState: {0}")]
70     RetrieveMsixConfigState(#[source] anyhow::Error),
71     #[error("Failed to retrieve PciConfigurationState: {0}")]
72     RetrievePciConfigurationState(#[source] anyhow::Error),
73     #[error("Failed to retrieve VfioCommonState: {0}")]
74     RetrieveVfioCommonState(#[source] anyhow::Error),
75 }
76 
77 #[derive(Copy, Clone)]
78 enum PciVfioSubclass {
79     VfioSubclass = 0xff,
80 }
81 
82 impl PciSubclass for PciVfioSubclass {
83     fn get_register_value(&self) -> u8 {
84         *self as u8
85     }
86 }
87 
88 enum InterruptUpdateAction {
89     EnableMsi,
90     DisableMsi,
91     EnableMsix,
92     DisableMsix,
93 }
94 
95 #[derive(Serialize, Deserialize)]
96 struct IntxState {
97     enabled: bool,
98 }
99 
100 pub(crate) struct VfioIntx {
101     interrupt_source_group: Arc<dyn InterruptSourceGroup>,
102     enabled: bool,
103 }
104 
105 #[derive(Serialize, Deserialize)]
106 struct MsiState {
107     cap: MsiCap,
108     cap_offset: u32,
109 }
110 
111 pub(crate) struct VfioMsi {
112     pub(crate) cfg: MsiConfig,
113     cap_offset: u32,
114     interrupt_source_group: Arc<dyn InterruptSourceGroup>,
115 }
116 
117 impl VfioMsi {
118     fn update(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction> {
119         let old_enabled = self.cfg.enabled();
120 
121         self.cfg.update(offset, data);
122 
123         let new_enabled = self.cfg.enabled();
124 
125         if !old_enabled && new_enabled {
126             return Some(InterruptUpdateAction::EnableMsi);
127         }
128 
129         if old_enabled && !new_enabled {
130             return Some(InterruptUpdateAction::DisableMsi);
131         }
132 
133         None
134     }
135 }
136 
137 #[derive(Serialize, Deserialize)]
138 struct MsixState {
139     cap: MsixCap,
140     cap_offset: u32,
141     bdf: u32,
142 }
143 
144 pub(crate) struct VfioMsix {
145     pub(crate) bar: MsixConfig,
146     cap: MsixCap,
147     cap_offset: u32,
148     interrupt_source_group: Arc<dyn InterruptSourceGroup>,
149 }
150 
151 impl VfioMsix {
152     fn update(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction> {
153         let old_enabled = self.bar.enabled();
154 
155         // Update "Message Control" word
156         if offset == 2 && data.len() == 2 {
157             self.bar.set_msg_ctl(LittleEndian::read_u16(data));
158         }
159 
160         let new_enabled = self.bar.enabled();
161 
162         if !old_enabled && new_enabled {
163             return Some(InterruptUpdateAction::EnableMsix);
164         }
165 
166         if old_enabled && !new_enabled {
167             return Some(InterruptUpdateAction::DisableMsix);
168         }
169 
170         None
171     }
172 
173     fn table_accessed(&self, bar_index: u32, offset: u64) -> bool {
174         let table_offset: u64 = u64::from(self.cap.table_offset());
175         let table_size: u64 = u64::from(self.cap.table_size()) * (MSIX_TABLE_ENTRY_SIZE as u64);
176         let table_bir: u32 = self.cap.table_bir();
177 
178         bar_index == table_bir && offset >= table_offset && offset < table_offset + table_size
179     }
180 }
181 
182 pub(crate) struct Interrupt {
183     pub(crate) intx: Option<VfioIntx>,
184     pub(crate) msi: Option<VfioMsi>,
185     pub(crate) msix: Option<VfioMsix>,
186 }
187 
188 impl Interrupt {
189     fn update_msi(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction> {
190         if let Some(ref mut msi) = &mut self.msi {
191             let action = msi.update(offset, data);
192             return action;
193         }
194 
195         None
196     }
197 
198     fn update_msix(&mut self, offset: u64, data: &[u8]) -> Option<InterruptUpdateAction> {
199         if let Some(ref mut msix) = &mut self.msix {
200             let action = msix.update(offset, data);
201             return action;
202         }
203 
204         None
205     }
206 
207     fn accessed(&self, offset: u64) -> Option<(PciCapabilityId, u64)> {
208         if let Some(msi) = &self.msi {
209             if offset >= u64::from(msi.cap_offset)
210                 && offset < u64::from(msi.cap_offset) + msi.cfg.size()
211             {
212                 return Some((
213                     PciCapabilityId::MessageSignalledInterrupts,
214                     u64::from(msi.cap_offset),
215                 ));
216             }
217         }
218 
219         if let Some(msix) = &self.msix {
220             if offset == u64::from(msix.cap_offset) {
221                 return Some((PciCapabilityId::MsiX, u64::from(msix.cap_offset)));
222             }
223         }
224 
225         None
226     }
227 
228     fn msix_table_accessed(&self, bar_index: u32, offset: u64) -> bool {
229         if let Some(msix) = &self.msix {
230             return msix.table_accessed(bar_index, offset);
231         }
232 
233         false
234     }
235 
236     fn msix_write_table(&mut self, offset: u64, data: &[u8]) {
237         if let Some(ref mut msix) = &mut self.msix {
238             let offset = offset - u64::from(msix.cap.table_offset());
239             msix.bar.write_table(offset, data)
240         }
241     }
242 
243     fn msix_read_table(&self, offset: u64, data: &mut [u8]) {
244         if let Some(msix) = &self.msix {
245             let offset = offset - u64::from(msix.cap.table_offset());
246             msix.bar.read_table(offset, data)
247         }
248     }
249 
250     pub(crate) fn intx_in_use(&self) -> bool {
251         if let Some(intx) = &self.intx {
252             return intx.enabled;
253         }
254 
255         false
256     }
257 }
258 
259 #[derive(Copy, Clone)]
260 pub struct UserMemoryRegion {
261     pub slot: u32,
262     pub start: u64,
263     pub size: u64,
264     pub host_addr: u64,
265 }
266 
267 #[derive(Clone)]
268 pub struct MmioRegion {
269     pub start: GuestAddress,
270     pub length: GuestUsize,
271     pub(crate) type_: PciBarRegionType,
272     pub(crate) index: u32,
273     pub(crate) user_memory_regions: Vec<UserMemoryRegion>,
274 }
275 
276 trait MmioRegionRange {
277     fn check_range(&self, guest_addr: u64, size: u64) -> bool;
278     fn find_user_address(&self, guest_addr: u64) -> Result<u64, io::Error>;
279 }
280 
281 impl MmioRegionRange for Vec<MmioRegion> {
282     // Check if a guest address is within the range of mmio regions
283     fn check_range(&self, guest_addr: u64, size: u64) -> bool {
284         for region in self.iter() {
285             let Some(guest_addr_end) = guest_addr.checked_add(size) else {
286                 return false;
287             };
288             let Some(region_end) = region.start.raw_value().checked_add(region.length) else {
289                 return false;
290             };
291             if guest_addr >= region.start.raw_value() && guest_addr_end <= region_end {
292                 return true;
293             }
294         }
295         false
296     }
297 
298     // Locate the user region address for a guest address within all mmio regions
299     fn find_user_address(&self, guest_addr: u64) -> Result<u64, io::Error> {
300         for region in self.iter() {
301             for user_region in region.user_memory_regions.iter() {
302                 if guest_addr >= user_region.start
303                     && guest_addr < user_region.start + user_region.size
304                 {
305                     return Ok(user_region.host_addr + (guest_addr - user_region.start));
306                 }
307             }
308         }
309 
310         Err(io::Error::new(
311             io::ErrorKind::Other,
312             format!("unable to find user address: 0x{guest_addr:x}"),
313         ))
314     }
315 }
316 
317 #[derive(Debug, Error)]
318 pub enum VfioError {
319     #[error("Kernel VFIO error: {0}")]
320     KernelVfio(#[source] vfio_ioctls::VfioError),
321     #[error("VFIO user error: {0}")]
322     VfioUser(#[source] vfio_user::Error),
323 }
324 
325 pub(crate) trait Vfio: Send + Sync {
326     fn read_config_byte(&self, offset: u32) -> u8 {
327         let mut data: [u8; 1] = [0];
328         self.read_config(offset, &mut data);
329         data[0]
330     }
331 
332     fn read_config_word(&self, offset: u32) -> u16 {
333         let mut data: [u8; 2] = [0, 0];
334         self.read_config(offset, &mut data);
335         u16::from_le_bytes(data)
336     }
337 
338     fn read_config_dword(&self, offset: u32) -> u32 {
339         let mut data: [u8; 4] = [0, 0, 0, 0];
340         self.read_config(offset, &mut data);
341         u32::from_le_bytes(data)
342     }
343 
344     fn write_config_dword(&self, offset: u32, buf: u32) {
345         let data: [u8; 4] = buf.to_le_bytes();
346         self.write_config(offset, &data)
347     }
348 
349     fn read_config(&self, offset: u32, data: &mut [u8]) {
350         self.region_read(VFIO_PCI_CONFIG_REGION_INDEX, offset.into(), data.as_mut());
351     }
352 
353     fn write_config(&self, offset: u32, data: &[u8]) {
354         self.region_write(VFIO_PCI_CONFIG_REGION_INDEX, offset.into(), data)
355     }
356 
357     fn enable_msi(&self, fds: Vec<&EventFd>) -> Result<(), VfioError> {
358         self.enable_irq(VFIO_PCI_MSI_IRQ_INDEX, fds)
359     }
360 
361     fn disable_msi(&self) -> Result<(), VfioError> {
362         self.disable_irq(VFIO_PCI_MSI_IRQ_INDEX)
363     }
364 
365     fn enable_msix(&self, fds: Vec<&EventFd>) -> Result<(), VfioError> {
366         self.enable_irq(VFIO_PCI_MSIX_IRQ_INDEX, fds)
367     }
368 
369     fn disable_msix(&self) -> Result<(), VfioError> {
370         self.disable_irq(VFIO_PCI_MSIX_IRQ_INDEX)
371     }
372 
373     fn region_read(&self, _index: u32, _offset: u64, _data: &mut [u8]) {
374         unimplemented!()
375     }
376 
377     fn region_write(&self, _index: u32, _offset: u64, _data: &[u8]) {
378         unimplemented!()
379     }
380 
381     fn get_irq_info(&self, _irq_index: u32) -> Option<VfioIrq> {
382         unimplemented!()
383     }
384 
385     fn enable_irq(&self, _irq_index: u32, _event_fds: Vec<&EventFd>) -> Result<(), VfioError> {
386         unimplemented!()
387     }
388 
389     fn disable_irq(&self, _irq_index: u32) -> Result<(), VfioError> {
390         unimplemented!()
391     }
392 
393     fn unmask_irq(&self, _irq_index: u32) -> Result<(), VfioError> {
394         unimplemented!()
395     }
396 }
397 
398 struct VfioDeviceWrapper {
399     device: Arc<VfioDevice>,
400 }
401 
402 impl VfioDeviceWrapper {
403     fn new(device: Arc<VfioDevice>) -> Self {
404         Self { device }
405     }
406 }
407 
408 impl Vfio for VfioDeviceWrapper {
409     fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) {
410         self.device.region_read(index, data, offset)
411     }
412 
413     fn region_write(&self, index: u32, offset: u64, data: &[u8]) {
414         self.device.region_write(index, data, offset)
415     }
416 
417     fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> {
418         self.device.get_irq_info(irq_index).copied()
419     }
420 
421     fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> {
422         self.device
423             .enable_irq(irq_index, event_fds)
424             .map_err(VfioError::KernelVfio)
425     }
426 
427     fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> {
428         self.device
429             .disable_irq(irq_index)
430             .map_err(VfioError::KernelVfio)
431     }
432 
433     fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> {
434         self.device
435             .unmask_irq(irq_index)
436             .map_err(VfioError::KernelVfio)
437     }
438 }
439 
440 #[derive(Serialize, Deserialize)]
441 struct VfioCommonState {
442     intx_state: Option<IntxState>,
443     msi_state: Option<MsiState>,
444     msix_state: Option<MsixState>,
445 }
446 
447 pub(crate) struct ConfigPatch {
448     mask: u32,
449     patch: u32,
450 }
451 
452 pub(crate) struct VfioCommon {
453     pub(crate) configuration: PciConfiguration,
454     pub(crate) mmio_regions: Vec<MmioRegion>,
455     pub(crate) interrupt: Interrupt,
456     pub(crate) msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
457     pub(crate) legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
458     pub(crate) vfio_wrapper: Arc<dyn Vfio>,
459     pub(crate) patches: HashMap<usize, ConfigPatch>,
460     x_nv_gpudirect_clique: Option<u8>,
461 }
462 
463 impl VfioCommon {
464     pub(crate) fn new(
465         msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
466         legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
467         vfio_wrapper: Arc<dyn Vfio>,
468         subclass: &dyn PciSubclass,
469         bdf: PciBdf,
470         snapshot: Option<Snapshot>,
471         x_nv_gpudirect_clique: Option<u8>,
472     ) -> Result<Self, VfioPciError> {
473         let pci_configuration_state =
474             vm_migration::state_from_id(snapshot.as_ref(), PCI_CONFIGURATION_ID).map_err(|e| {
475                 VfioPciError::RetrievePciConfigurationState(anyhow!(
476                     "Failed to get PciConfigurationState from Snapshot: {}",
477                     e
478                 ))
479             })?;
480 
481         let configuration = PciConfiguration::new(
482             0,
483             0,
484             0,
485             PciClassCode::Other,
486             subclass,
487             None,
488             PciHeaderType::Device,
489             0,
490             0,
491             None,
492             pci_configuration_state,
493         );
494 
495         let mut vfio_common = VfioCommon {
496             mmio_regions: Vec::new(),
497             configuration,
498             interrupt: Interrupt {
499                 intx: None,
500                 msi: None,
501                 msix: None,
502             },
503             msi_interrupt_manager,
504             legacy_interrupt_group,
505             vfio_wrapper,
506             patches: HashMap::new(),
507             x_nv_gpudirect_clique,
508         };
509 
510         let state: Option<VfioCommonState> = snapshot
511             .as_ref()
512             .map(|s| s.to_state())
513             .transpose()
514             .map_err(|e| {
515                 VfioPciError::RetrieveVfioCommonState(anyhow!(
516                     "Failed to get VfioCommonState from Snapshot: {}",
517                     e
518                 ))
519             })?;
520         let msi_state =
521             vm_migration::state_from_id(snapshot.as_ref(), MSI_CONFIG_ID).map_err(|e| {
522                 VfioPciError::RetrieveMsiConfigState(anyhow!(
523                     "Failed to get MsiConfigState from Snapshot: {}",
524                     e
525                 ))
526             })?;
527         let msix_state =
528             vm_migration::state_from_id(snapshot.as_ref(), MSIX_CONFIG_ID).map_err(|e| {
529                 VfioPciError::RetrieveMsixConfigState(anyhow!(
530                     "Failed to get MsixConfigState from Snapshot: {}",
531                     e
532                 ))
533             })?;
534 
535         if let Some(state) = state.as_ref() {
536             vfio_common.set_state(state, msi_state, msix_state)?;
537         } else {
538             vfio_common.parse_capabilities(bdf);
539             vfio_common.initialize_legacy_interrupt()?;
540         }
541 
542         Ok(vfio_common)
543     }
544 
545     /// In case msix table offset is not page size aligned, we need do some fixup to achieve it.
546     /// Because we don't want the MMIO RW region and trap region overlap each other.
547     fn fixup_msix_region(&mut self, bar_id: u32, region_size: u64) -> u64 {
548         if let Some(msix) = self.interrupt.msix.as_mut() {
549             let msix_cap = &mut msix.cap;
550 
551             // Suppose table_bir equals to pba_bir here. Am I right?
552             let (table_offset, table_size) = msix_cap.table_range();
553             if is_page_size_aligned(table_offset) || msix_cap.table_bir() != bar_id {
554                 return region_size;
555             }
556 
557             let (pba_offset, pba_size) = msix_cap.pba_range();
558             let msix_sz = align_page_size_up(table_size + pba_size);
559             // Expand region to hold RW and trap region which both page size aligned
560             let size = std::cmp::max(region_size * 2, msix_sz * 2);
561             // let table starts from the middle of the region
562             msix_cap.table_set_offset((size / 2) as u32);
563             msix_cap.pba_set_offset((size / 2 + pba_offset - table_offset) as u32);
564 
565             size
566         } else {
567             // MSI-X not supported for this device
568             region_size
569         }
570     }
571 
572     // The `allocator` argument is unused on `aarch64`
573     #[allow(unused_variables)]
574     pub(crate) fn allocate_bars(
575         &mut self,
576         allocator: &Arc<Mutex<SystemAllocator>>,
577         mmio32_allocator: &mut AddressAllocator,
578         mmio64_allocator: &mut AddressAllocator,
579         resources: Option<Vec<Resource>>,
580     ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> {
581         let mut bars = Vec::new();
582         let mut bar_id = VFIO_PCI_BAR0_REGION_INDEX;
583 
584         // Going through all regular regions to compute the BAR size.
585         // We're not saving the BAR address to restore it, because we
586         // are going to allocate a guest address for each BAR and write
587         // that new address back.
588         while bar_id < VFIO_PCI_CONFIG_REGION_INDEX {
589             let mut region_size: u64 = 0;
590             let mut region_type = PciBarRegionType::Memory32BitRegion;
591             let mut prefetchable = PciBarPrefetchable::NotPrefetchable;
592             let mut flags: u32 = 0;
593 
594             let mut restored_bar_addr = None;
595             if let Some(resources) = &resources {
596                 for resource in resources {
597                     if let Resource::PciBar {
598                         index,
599                         base,
600                         size,
601                         type_,
602                         ..
603                     } = resource
604                     {
605                         if *index == bar_id as usize {
606                             restored_bar_addr = Some(GuestAddress(*base));
607                             region_size = *size;
608                             region_type = PciBarRegionType::from(*type_);
609                             break;
610                         }
611                     }
612                 }
613                 if restored_bar_addr.is_none() {
614                     bar_id += 1;
615                     continue;
616                 }
617             } else {
618                 let bar_offset = if bar_id == VFIO_PCI_ROM_REGION_INDEX {
619                     (PCI_ROM_EXP_BAR_INDEX * 4) as u32
620                 } else {
621                     PCI_CONFIG_BAR_OFFSET + bar_id * 4
622                 };
623 
624                 // First read flags
625                 flags = self.vfio_wrapper.read_config_dword(bar_offset);
626 
627                 // Is this an IO BAR?
628                 let io_bar = if bar_id != VFIO_PCI_ROM_REGION_INDEX {
629                     matches!(flags & PCI_CONFIG_IO_BAR, PCI_CONFIG_IO_BAR)
630                 } else {
631                     false
632                 };
633 
634                 // Is this a 64-bit BAR?
635                 let is_64bit_bar = if bar_id != VFIO_PCI_ROM_REGION_INDEX {
636                     matches!(
637                         flags & PCI_CONFIG_MEMORY_BAR_64BIT,
638                         PCI_CONFIG_MEMORY_BAR_64BIT
639                     )
640                 } else {
641                     false
642                 };
643 
644                 if matches!(
645                     flags & PCI_CONFIG_BAR_PREFETCHABLE,
646                     PCI_CONFIG_BAR_PREFETCHABLE
647                 ) {
648                     prefetchable = PciBarPrefetchable::Prefetchable
649                 };
650 
651                 // To get size write all 1s
652                 self.vfio_wrapper
653                     .write_config_dword(bar_offset, 0xffff_ffff);
654 
655                 // And read back BAR value. The device will write zeros for bits it doesn't care about
656                 let mut lower = self.vfio_wrapper.read_config_dword(bar_offset);
657 
658                 if io_bar {
659                     // Mask flag bits (lowest 2 for I/O bars)
660                     lower &= !0b11;
661 
662                     // BAR is not enabled
663                     if lower == 0 {
664                         bar_id += 1;
665                         continue;
666                     }
667 
668                     // IO BAR
669                     region_type = PciBarRegionType::IoRegion;
670 
671                     // Invert bits and add 1 to calculate size
672                     region_size = (!lower + 1) as u64;
673                 } else if is_64bit_bar {
674                     // 64 bits Memory BAR
675                     region_type = PciBarRegionType::Memory64BitRegion;
676 
677                     // Query size of upper BAR of 64-bit BAR
678                     let upper_offset: u32 = PCI_CONFIG_BAR_OFFSET + (bar_id + 1) * 4;
679                     self.vfio_wrapper
680                         .write_config_dword(upper_offset, 0xffff_ffff);
681                     let upper = self.vfio_wrapper.read_config_dword(upper_offset);
682 
683                     let mut combined_size = u64::from(upper) << 32 | u64::from(lower);
684 
685                     // Mask out flag bits (lowest 4 for memory bars)
686                     combined_size &= !0b1111;
687 
688                     // BAR is not enabled
689                     if combined_size == 0 {
690                         bar_id += 1;
691                         continue;
692                     }
693 
694                     // Invert and add 1 to to find size
695                     region_size = !combined_size + 1;
696                 } else {
697                     region_type = PciBarRegionType::Memory32BitRegion;
698 
699                     // Mask out flag bits (lowest 4 for memory bars)
700                     lower &= !0b1111;
701 
702                     if lower == 0 {
703                         bar_id += 1;
704                         continue;
705                     }
706 
707                     // Invert and add 1 to to find size
708                     region_size = (!lower + 1) as u64;
709                 }
710             }
711 
712             let bar_addr = match region_type {
713                 PciBarRegionType::IoRegion => {
714                     #[cfg(target_arch = "aarch64")]
715                     unimplemented!();
716 
717                     // The address needs to be 4 bytes aligned.
718                     #[cfg(not(target_arch = "aarch64"))]
719                     allocator
720                         .lock()
721                         .unwrap()
722                         .allocate_io_addresses(restored_bar_addr, region_size, Some(0x4))
723                         .ok_or(PciDeviceError::IoAllocationFailed(region_size))?
724                 }
725                 PciBarRegionType::Memory32BitRegion => {
726                     // BAR allocation must be naturally aligned
727                     mmio32_allocator
728                         .allocate(restored_bar_addr, region_size, Some(region_size))
729                         .ok_or(PciDeviceError::IoAllocationFailed(region_size))?
730                 }
731                 PciBarRegionType::Memory64BitRegion => {
732                     // We need do some fixup to keep MMIO RW region and msix cap region page size
733                     // aligned.
734                     region_size = self.fixup_msix_region(bar_id, region_size);
735                     mmio64_allocator
736                         .allocate(
737                             restored_bar_addr,
738                             region_size,
739                             Some(std::cmp::max(
740                                 // SAFETY: FFI call. Trivially safe.
741                                 unsafe { sysconf(_SC_PAGESIZE) as GuestUsize },
742                                 region_size,
743                             )),
744                         )
745                         .ok_or(PciDeviceError::IoAllocationFailed(region_size))?
746                 }
747             };
748 
749             // We can now build our BAR configuration block.
750             let bar = PciBarConfiguration::default()
751                 .set_index(bar_id as usize)
752                 .set_address(bar_addr.raw_value())
753                 .set_size(region_size)
754                 .set_region_type(region_type)
755                 .set_prefetchable(prefetchable);
756 
757             if bar_id == VFIO_PCI_ROM_REGION_INDEX {
758                 self.configuration
759                     .add_pci_rom_bar(&bar, flags & 0x1)
760                     .map_err(|e| PciDeviceError::IoRegistrationFailed(bar_addr.raw_value(), e))?;
761             } else {
762                 self.configuration
763                     .add_pci_bar(&bar)
764                     .map_err(|e| PciDeviceError::IoRegistrationFailed(bar_addr.raw_value(), e))?;
765             }
766 
767             bars.push(bar);
768             self.mmio_regions.push(MmioRegion {
769                 start: bar_addr,
770                 length: region_size,
771                 type_: region_type,
772                 index: bar_id,
773                 user_memory_regions: Vec::new(),
774             });
775 
776             bar_id += 1;
777             if region_type == PciBarRegionType::Memory64BitRegion {
778                 bar_id += 1;
779             }
780         }
781 
782         Ok(bars)
783     }
784 
785     // The `allocator` argument is unused on `aarch64`
786     #[allow(unused_variables)]
787     pub(crate) fn free_bars(
788         &mut self,
789         allocator: &mut SystemAllocator,
790         mmio32_allocator: &mut AddressAllocator,
791         mmio64_allocator: &mut AddressAllocator,
792     ) -> Result<(), PciDeviceError> {
793         for region in self.mmio_regions.iter() {
794             match region.type_ {
795                 PciBarRegionType::IoRegion => {
796                     #[cfg(target_arch = "x86_64")]
797                     allocator.free_io_addresses(region.start, region.length);
798                     #[cfg(target_arch = "aarch64")]
799                     error!("I/O region is not supported");
800                 }
801                 PciBarRegionType::Memory32BitRegion => {
802                     mmio32_allocator.free(region.start, region.length);
803                 }
804                 PciBarRegionType::Memory64BitRegion => {
805                     mmio64_allocator.free(region.start, region.length);
806                 }
807             }
808         }
809         Ok(())
810     }
811 
812     pub(crate) fn parse_msix_capabilities(&mut self, cap: u8) -> MsixCap {
813         let msg_ctl = self.vfio_wrapper.read_config_word((cap + 2).into());
814 
815         let table = self.vfio_wrapper.read_config_dword((cap + 4).into());
816 
817         let pba = self.vfio_wrapper.read_config_dword((cap + 8).into());
818 
819         MsixCap {
820             msg_ctl,
821             table,
822             pba,
823         }
824     }
825 
826     pub(crate) fn initialize_msix(
827         &mut self,
828         msix_cap: MsixCap,
829         cap_offset: u32,
830         bdf: PciBdf,
831         state: Option<MsixConfigState>,
832     ) {
833         let interrupt_source_group = self
834             .msi_interrupt_manager
835             .create_group(MsiIrqGroupConfig {
836                 base: 0,
837                 count: msix_cap.table_size() as InterruptIndex,
838             })
839             .unwrap();
840 
841         let msix_config = MsixConfig::new(
842             msix_cap.table_size(),
843             interrupt_source_group.clone(),
844             bdf.into(),
845             state,
846         )
847         .unwrap();
848 
849         self.interrupt.msix = Some(VfioMsix {
850             bar: msix_config,
851             cap: msix_cap,
852             cap_offset,
853             interrupt_source_group,
854         });
855     }
856 
857     pub(crate) fn parse_msi_capabilities(&mut self, cap: u8) -> u16 {
858         self.vfio_wrapper.read_config_word((cap + 2).into())
859     }
860 
861     pub(crate) fn initialize_msi(
862         &mut self,
863         msg_ctl: u16,
864         cap_offset: u32,
865         state: Option<MsiConfigState>,
866     ) {
867         let interrupt_source_group = self
868             .msi_interrupt_manager
869             .create_group(MsiIrqGroupConfig {
870                 base: 0,
871                 count: msi_num_enabled_vectors(msg_ctl) as InterruptIndex,
872             })
873             .unwrap();
874 
875         let msi_config = MsiConfig::new(msg_ctl, interrupt_source_group.clone(), state).unwrap();
876 
877         self.interrupt.msi = Some(VfioMsi {
878             cfg: msi_config,
879             cap_offset,
880             interrupt_source_group,
881         });
882     }
883 
884     pub(crate) fn get_msix_cap_idx(&self) -> Option<usize> {
885         let mut cap_next = self
886             .vfio_wrapper
887             .read_config_byte(PCI_CONFIG_CAPABILITY_OFFSET);
888 
889         while cap_next != 0 {
890             let cap_id = self.vfio_wrapper.read_config_byte(cap_next.into());
891             if PciCapabilityId::from(cap_id) == PciCapabilityId::MsiX {
892                 return Some(cap_next as usize);
893             } else {
894                 cap_next = self.vfio_wrapper.read_config_byte((cap_next + 1).into());
895             }
896         }
897 
898         None
899     }
900 
901     pub(crate) fn parse_capabilities(&mut self, bdf: PciBdf) {
902         let mut cap_iter = self
903             .vfio_wrapper
904             .read_config_byte(PCI_CONFIG_CAPABILITY_OFFSET);
905 
906         let mut pci_express_cap_found = false;
907         let mut power_management_cap_found = false;
908 
909         while cap_iter != 0 {
910             let cap_id = self.vfio_wrapper.read_config_byte(cap_iter.into());
911 
912             match PciCapabilityId::from(cap_id) {
913                 PciCapabilityId::MessageSignalledInterrupts => {
914                     if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_MSI_IRQ_INDEX) {
915                         if irq_info.count > 0 {
916                             // Parse capability only if the VFIO device
917                             // supports MSI.
918                             let msg_ctl = self.parse_msi_capabilities(cap_iter);
919                             self.initialize_msi(msg_ctl, cap_iter as u32, None);
920                         }
921                     }
922                 }
923                 PciCapabilityId::MsiX => {
924                     if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_MSIX_IRQ_INDEX)
925                     {
926                         if irq_info.count > 0 {
927                             // Parse capability only if the VFIO device
928                             // supports MSI-X.
929                             let msix_cap = self.parse_msix_capabilities(cap_iter);
930                             self.initialize_msix(msix_cap, cap_iter as u32, bdf, None);
931                         }
932                     }
933                 }
934                 PciCapabilityId::PciExpress => pci_express_cap_found = true,
935                 PciCapabilityId::PowerManagement => power_management_cap_found = true,
936                 _ => {}
937             };
938 
939             let cap_next = self.vfio_wrapper.read_config_byte((cap_iter + 1).into());
940             if cap_next == 0 {
941                 break;
942             }
943 
944             cap_iter = cap_next;
945         }
946 
947         if let Some(clique_id) = self.x_nv_gpudirect_clique {
948             self.add_nv_gpudirect_clique_cap(cap_iter, clique_id);
949         }
950 
951         if pci_express_cap_found && power_management_cap_found {
952             self.parse_extended_capabilities();
953         }
954     }
955 
956     fn add_nv_gpudirect_clique_cap(&mut self, cap_iter: u8, clique_id: u8) {
957         // Turing, Ampere, Hopper, and Lovelace GPUs have dedicated space
958         // at 0xD4 for this capability.
959         let cap_offset = 0xd4u32;
960 
961         let reg_idx = (cap_iter / 4) as usize;
962         self.patches.insert(
963             reg_idx,
964             ConfigPatch {
965                 mask: 0x0000_ff00,
966                 patch: cap_offset << 8,
967             },
968         );
969 
970         let reg_idx = (cap_offset / 4) as usize;
971         self.patches.insert(
972             reg_idx,
973             ConfigPatch {
974                 mask: 0xffff_ffff,
975                 patch: 0x50080009u32,
976             },
977         );
978         self.patches.insert(
979             reg_idx + 1,
980             ConfigPatch {
981                 mask: 0xffff_ffff,
982                 patch: u32::from(clique_id) << 19 | 0x5032,
983             },
984         );
985     }
986 
987     fn parse_extended_capabilities(&mut self) {
988         let mut current_offset = PCI_CONFIG_EXTENDED_CAPABILITY_OFFSET;
989 
990         loop {
991             let ext_cap_hdr = self.vfio_wrapper.read_config_dword(current_offset);
992 
993             let cap_id: u16 = (ext_cap_hdr & 0xffff) as u16;
994             let cap_next: u16 = ((ext_cap_hdr >> 20) & 0xfff) as u16;
995 
996             match PciExpressCapabilityId::from(cap_id) {
997                 PciExpressCapabilityId::AlternativeRoutingIdentificationInterpretation
998                 | PciExpressCapabilityId::ResizeableBar
999                 | PciExpressCapabilityId::SingleRootIoVirtualization => {
1000                     let reg_idx = (current_offset / 4) as usize;
1001                     self.patches.insert(
1002                         reg_idx,
1003                         ConfigPatch {
1004                             mask: 0x0000_ffff,
1005                             patch: PciExpressCapabilityId::NullCapability as u32,
1006                         },
1007                     );
1008                 }
1009                 _ => {}
1010             }
1011 
1012             if cap_next == 0 {
1013                 break;
1014             }
1015 
1016             current_offset = cap_next.into();
1017         }
1018     }
1019 
1020     pub(crate) fn enable_intx(&mut self) -> Result<(), VfioPciError> {
1021         if let Some(intx) = &mut self.interrupt.intx {
1022             if !intx.enabled {
1023                 if let Some(eventfd) = intx.interrupt_source_group.notifier(0) {
1024                     self.vfio_wrapper
1025                         .enable_irq(VFIO_PCI_INTX_IRQ_INDEX, vec![&eventfd])
1026                         .map_err(VfioPciError::EnableIntx)?;
1027 
1028                     intx.enabled = true;
1029                 } else {
1030                     return Err(VfioPciError::MissingNotifier);
1031                 }
1032             }
1033         }
1034 
1035         Ok(())
1036     }
1037 
1038     pub(crate) fn disable_intx(&mut self) {
1039         if let Some(intx) = &mut self.interrupt.intx {
1040             if intx.enabled {
1041                 if let Err(e) = self.vfio_wrapper.disable_irq(VFIO_PCI_INTX_IRQ_INDEX) {
1042                     error!("Could not disable INTx: {}", e);
1043                 } else {
1044                     intx.enabled = false;
1045                 }
1046             }
1047         }
1048     }
1049 
1050     pub(crate) fn enable_msi(&self) -> Result<(), VfioPciError> {
1051         if let Some(msi) = &self.interrupt.msi {
1052             let mut irq_fds: Vec<EventFd> = Vec::new();
1053             for i in 0..msi.cfg.num_enabled_vectors() {
1054                 if let Some(eventfd) = msi.interrupt_source_group.notifier(i as InterruptIndex) {
1055                     irq_fds.push(eventfd);
1056                 } else {
1057                     return Err(VfioPciError::MissingNotifier);
1058                 }
1059             }
1060 
1061             self.vfio_wrapper
1062                 .enable_msi(irq_fds.iter().collect())
1063                 .map_err(VfioPciError::EnableMsi)?;
1064         }
1065 
1066         Ok(())
1067     }
1068 
1069     pub(crate) fn disable_msi(&self) {
1070         if let Err(e) = self.vfio_wrapper.disable_msi() {
1071             error!("Could not disable MSI: {}", e);
1072         }
1073     }
1074 
1075     pub(crate) fn enable_msix(&self) -> Result<(), VfioPciError> {
1076         if let Some(msix) = &self.interrupt.msix {
1077             let mut irq_fds: Vec<EventFd> = Vec::new();
1078             for i in 0..msix.bar.table_entries.len() {
1079                 if let Some(eventfd) = msix.interrupt_source_group.notifier(i as InterruptIndex) {
1080                     irq_fds.push(eventfd);
1081                 } else {
1082                     return Err(VfioPciError::MissingNotifier);
1083                 }
1084             }
1085 
1086             self.vfio_wrapper
1087                 .enable_msix(irq_fds.iter().collect())
1088                 .map_err(VfioPciError::EnableMsix)?;
1089         }
1090 
1091         Ok(())
1092     }
1093 
1094     pub(crate) fn disable_msix(&self) {
1095         if let Err(e) = self.vfio_wrapper.disable_msix() {
1096             error!("Could not disable MSI-X: {}", e);
1097         }
1098     }
1099 
1100     pub(crate) fn initialize_legacy_interrupt(&mut self) -> Result<(), VfioPciError> {
1101         if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_INTX_IRQ_INDEX) {
1102             if irq_info.count == 0 {
1103                 // A count of 0 means the INTx IRQ is not supported, therefore
1104                 // it shouldn't be initialized.
1105                 return Ok(());
1106             }
1107         }
1108 
1109         if let Some(interrupt_source_group) = self.legacy_interrupt_group.clone() {
1110             self.interrupt.intx = Some(VfioIntx {
1111                 interrupt_source_group,
1112                 enabled: false,
1113             });
1114 
1115             self.enable_intx()?;
1116         }
1117 
1118         Ok(())
1119     }
1120 
1121     pub(crate) fn update_msi_capabilities(
1122         &mut self,
1123         offset: u64,
1124         data: &[u8],
1125     ) -> Result<(), VfioPciError> {
1126         match self.interrupt.update_msi(offset, data) {
1127             Some(InterruptUpdateAction::EnableMsi) => {
1128                 // Disable INTx before we can enable MSI
1129                 self.disable_intx();
1130                 self.enable_msi()?;
1131             }
1132             Some(InterruptUpdateAction::DisableMsi) => {
1133                 // Fallback onto INTx when disabling MSI
1134                 self.disable_msi();
1135                 self.enable_intx()?;
1136             }
1137             _ => {}
1138         }
1139 
1140         Ok(())
1141     }
1142 
1143     pub(crate) fn update_msix_capabilities(
1144         &mut self,
1145         offset: u64,
1146         data: &[u8],
1147     ) -> Result<(), VfioPciError> {
1148         match self.interrupt.update_msix(offset, data) {
1149             Some(InterruptUpdateAction::EnableMsix) => {
1150                 // Disable INTx before we can enable MSI-X
1151                 self.disable_intx();
1152                 self.enable_msix()?;
1153             }
1154             Some(InterruptUpdateAction::DisableMsix) => {
1155                 // Fallback onto INTx when disabling MSI-X
1156                 self.disable_msix();
1157                 self.enable_intx()?;
1158             }
1159             _ => {}
1160         }
1161 
1162         Ok(())
1163     }
1164 
1165     pub(crate) fn find_region(&self, addr: u64) -> Option<MmioRegion> {
1166         for region in self.mmio_regions.iter() {
1167             if addr >= region.start.raw_value()
1168                 && addr < region.start.unchecked_add(region.length).raw_value()
1169             {
1170                 return Some(region.clone());
1171             }
1172         }
1173         None
1174     }
1175 
1176     pub(crate) fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) {
1177         let addr = base + offset;
1178         if let Some(region) = self.find_region(addr) {
1179             let offset = addr - region.start.raw_value();
1180 
1181             if self.interrupt.msix_table_accessed(region.index, offset) {
1182                 self.interrupt.msix_read_table(offset, data);
1183             } else {
1184                 self.vfio_wrapper.region_read(region.index, offset, data);
1185             }
1186         }
1187 
1188         // INTx EOI
1189         // The guest reading from the BAR potentially means the interrupt has
1190         // been received and can be acknowledged.
1191         if self.interrupt.intx_in_use() {
1192             if let Err(e) = self.vfio_wrapper.unmask_irq(VFIO_PCI_INTX_IRQ_INDEX) {
1193                 error!("Failed unmasking INTx IRQ: {}", e);
1194             }
1195         }
1196     }
1197 
1198     pub(crate) fn write_bar(
1199         &mut self,
1200         base: u64,
1201         offset: u64,
1202         data: &[u8],
1203     ) -> Option<Arc<Barrier>> {
1204         let addr = base + offset;
1205         if let Some(region) = self.find_region(addr) {
1206             let offset = addr - region.start.raw_value();
1207 
1208             // If the MSI-X table is written to, we need to update our cache.
1209             if self.interrupt.msix_table_accessed(region.index, offset) {
1210                 self.interrupt.msix_write_table(offset, data);
1211             } else {
1212                 self.vfio_wrapper.region_write(region.index, offset, data);
1213             }
1214         }
1215 
1216         // INTx EOI
1217         // The guest writing to the BAR potentially means the interrupt has
1218         // been received and can be acknowledged.
1219         if self.interrupt.intx_in_use() {
1220             if let Err(e) = self.vfio_wrapper.unmask_irq(VFIO_PCI_INTX_IRQ_INDEX) {
1221                 error!("Failed unmasking INTx IRQ: {}", e);
1222             }
1223         }
1224 
1225         None
1226     }
1227 
1228     pub(crate) fn write_config_register(
1229         &mut self,
1230         reg_idx: usize,
1231         offset: u64,
1232         data: &[u8],
1233     ) -> Option<Arc<Barrier>> {
1234         // When the guest wants to write to a BAR, we trap it into
1235         // our local configuration space. We're not reprogramming
1236         // VFIO device.
1237         if (PCI_CONFIG_BAR0_INDEX..PCI_CONFIG_BAR0_INDEX + BAR_NUMS).contains(&reg_idx)
1238             || reg_idx == PCI_ROM_EXP_BAR_INDEX
1239         {
1240             // We keep our local cache updated with the BARs.
1241             // We'll read it back from there when the guest is asking
1242             // for BARs (see read_config_register()).
1243             self.configuration
1244                 .write_config_register(reg_idx, offset, data);
1245             return None;
1246         }
1247 
1248         let reg = (reg_idx * PCI_CONFIG_REGISTER_SIZE) as u64;
1249 
1250         // If the MSI or MSI-X capabilities are accessed, we need to
1251         // update our local cache accordingly.
1252         // Depending on how the capabilities are modified, this could
1253         // trigger a VFIO MSI or MSI-X toggle.
1254         if let Some((cap_id, cap_base)) = self.interrupt.accessed(reg) {
1255             let cap_offset: u64 = reg - cap_base + offset;
1256             match cap_id {
1257                 PciCapabilityId::MessageSignalledInterrupts => {
1258                     if let Err(e) = self.update_msi_capabilities(cap_offset, data) {
1259                         error!("Could not update MSI capabilities: {}", e);
1260                     }
1261                 }
1262                 PciCapabilityId::MsiX => {
1263                     if let Err(e) = self.update_msix_capabilities(cap_offset, data) {
1264                         error!("Could not update MSI-X capabilities: {}", e);
1265                     }
1266                 }
1267                 _ => {}
1268             }
1269         }
1270 
1271         // Make sure to write to the device's PCI config space after MSI/MSI-X
1272         // interrupts have been enabled/disabled. In case of MSI, when the
1273         // interrupts are enabled through VFIO (using VFIO_DEVICE_SET_IRQS),
1274         // the MSI Enable bit in the MSI capability structure found in the PCI
1275         // config space is disabled by default. That's why when the guest is
1276         // enabling this bit, we first need to enable the MSI interrupts with
1277         // VFIO through VFIO_DEVICE_SET_IRQS ioctl, and only after we can write
1278         // to the device region to update the MSI Enable bit.
1279         self.vfio_wrapper.write_config((reg + offset) as u32, data);
1280 
1281         None
1282     }
1283 
1284     pub(crate) fn read_config_register(&mut self, reg_idx: usize) -> u32 {
1285         // When reading the BARs, we trap it and return what comes
1286         // from our local configuration space. We want the guest to
1287         // use that and not the VFIO device BARs as it does not map
1288         // with the guest address space.
1289         if (PCI_CONFIG_BAR0_INDEX..PCI_CONFIG_BAR0_INDEX + BAR_NUMS).contains(&reg_idx)
1290             || reg_idx == PCI_ROM_EXP_BAR_INDEX
1291         {
1292             return self.configuration.read_reg(reg_idx);
1293         }
1294 
1295         if let Some(id) = self.get_msix_cap_idx() {
1296             let msix = self.interrupt.msix.as_mut().unwrap();
1297             if reg_idx * 4 == id + 4 {
1298                 return msix.cap.table;
1299             } else if reg_idx * 4 == id + 8 {
1300                 return msix.cap.pba;
1301             }
1302         }
1303 
1304         // Since we don't support passing multi-functions devices, we should
1305         // mask the multi-function bit, bit 7 of the Header Type byte on the
1306         // register 3.
1307         let mask = if reg_idx == PCI_HEADER_TYPE_REG_INDEX {
1308             0xff7f_ffff
1309         } else {
1310             0xffff_ffff
1311         };
1312 
1313         // The config register read comes from the VFIO device itself.
1314         let mut value = self.vfio_wrapper.read_config_dword((reg_idx * 4) as u32) & mask;
1315 
1316         if let Some(config_patch) = self.patches.get(&reg_idx) {
1317             value = (value & !config_patch.mask) | config_patch.patch;
1318         }
1319 
1320         value
1321     }
1322 
1323     fn state(&self) -> VfioCommonState {
1324         let intx_state = self.interrupt.intx.as_ref().map(|intx| IntxState {
1325             enabled: intx.enabled,
1326         });
1327 
1328         let msi_state = self.interrupt.msi.as_ref().map(|msi| MsiState {
1329             cap: msi.cfg.cap,
1330             cap_offset: msi.cap_offset,
1331         });
1332 
1333         let msix_state = self.interrupt.msix.as_ref().map(|msix| MsixState {
1334             cap: msix.cap,
1335             cap_offset: msix.cap_offset,
1336             bdf: msix.bar.devid,
1337         });
1338 
1339         VfioCommonState {
1340             intx_state,
1341             msi_state,
1342             msix_state,
1343         }
1344     }
1345 
1346     fn set_state(
1347         &mut self,
1348         state: &VfioCommonState,
1349         msi_state: Option<MsiConfigState>,
1350         msix_state: Option<MsixConfigState>,
1351     ) -> Result<(), VfioPciError> {
1352         if let (Some(intx), Some(interrupt_source_group)) =
1353             (&state.intx_state, self.legacy_interrupt_group.clone())
1354         {
1355             self.interrupt.intx = Some(VfioIntx {
1356                 interrupt_source_group,
1357                 enabled: false,
1358             });
1359 
1360             if intx.enabled {
1361                 self.enable_intx()?;
1362             }
1363         }
1364 
1365         if let Some(msi) = &state.msi_state {
1366             self.initialize_msi(msi.cap.msg_ctl, msi.cap_offset, msi_state);
1367         }
1368 
1369         if let Some(msix) = &state.msix_state {
1370             self.initialize_msix(msix.cap, msix.cap_offset, msix.bdf.into(), msix_state);
1371         }
1372 
1373         Ok(())
1374     }
1375 }
1376 
1377 impl Pausable for VfioCommon {}
1378 
1379 impl Snapshottable for VfioCommon {
1380     fn id(&self) -> String {
1381         String::from(VFIO_COMMON_ID)
1382     }
1383 
1384     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
1385         let mut vfio_common_snapshot = Snapshot::new_from_state(&self.state())?;
1386 
1387         // Snapshot PciConfiguration
1388         vfio_common_snapshot.add_snapshot(self.configuration.id(), self.configuration.snapshot()?);
1389 
1390         // Snapshot MSI
1391         if let Some(msi) = &mut self.interrupt.msi {
1392             vfio_common_snapshot.add_snapshot(msi.cfg.id(), msi.cfg.snapshot()?);
1393         }
1394 
1395         // Snapshot MSI-X
1396         if let Some(msix) = &mut self.interrupt.msix {
1397             vfio_common_snapshot.add_snapshot(msix.bar.id(), msix.bar.snapshot()?);
1398         }
1399 
1400         Ok(vfio_common_snapshot)
1401     }
1402 }
1403 
1404 /// VfioPciDevice represents a VFIO PCI device.
1405 /// This structure implements the BusDevice and PciDevice traits.
1406 ///
1407 /// A VfioPciDevice is bound to a VfioDevice and is also a PCI device.
1408 /// The VMM creates a VfioDevice, then assigns it to a VfioPciDevice,
1409 /// which then gets added to the PCI bus.
1410 pub struct VfioPciDevice {
1411     id: String,
1412     vm: Arc<dyn hypervisor::Vm>,
1413     device: Arc<VfioDevice>,
1414     container: Arc<VfioContainer>,
1415     common: VfioCommon,
1416     iommu_attached: bool,
1417     memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>,
1418 }
1419 
1420 impl VfioPciDevice {
1421     /// Constructs a new Vfio Pci device for the given Vfio device
1422     #[allow(clippy::too_many_arguments)]
1423     pub fn new(
1424         id: String,
1425         vm: &Arc<dyn hypervisor::Vm>,
1426         device: VfioDevice,
1427         container: Arc<VfioContainer>,
1428         msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
1429         legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
1430         iommu_attached: bool,
1431         bdf: PciBdf,
1432         memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>,
1433         snapshot: Option<Snapshot>,
1434         x_nv_gpudirect_clique: Option<u8>,
1435     ) -> Result<Self, VfioPciError> {
1436         let device = Arc::new(device);
1437         device.reset();
1438 
1439         let vfio_wrapper = VfioDeviceWrapper::new(Arc::clone(&device));
1440 
1441         let common = VfioCommon::new(
1442             msi_interrupt_manager,
1443             legacy_interrupt_group,
1444             Arc::new(vfio_wrapper) as Arc<dyn Vfio>,
1445             &PciVfioSubclass::VfioSubclass,
1446             bdf,
1447             vm_migration::snapshot_from_id(snapshot.as_ref(), VFIO_COMMON_ID),
1448             x_nv_gpudirect_clique,
1449         )?;
1450 
1451         let vfio_pci_device = VfioPciDevice {
1452             id,
1453             vm: vm.clone(),
1454             device,
1455             container,
1456             common,
1457             iommu_attached,
1458             memory_slot,
1459         };
1460 
1461         Ok(vfio_pci_device)
1462     }
1463 
1464     pub fn iommu_attached(&self) -> bool {
1465         self.iommu_attached
1466     }
1467 
1468     fn generate_sparse_areas(
1469         caps: &[VfioRegionInfoCap],
1470         region_index: u32,
1471         region_start: u64,
1472         region_size: u64,
1473         vfio_msix: Option<&VfioMsix>,
1474     ) -> Result<Vec<VfioRegionSparseMmapArea>, VfioPciError> {
1475         for cap in caps {
1476             match cap {
1477                 VfioRegionInfoCap::SparseMmap(sparse_mmap) => return Ok(sparse_mmap.areas.clone()),
1478                 VfioRegionInfoCap::MsixMappable => {
1479                     if !is_4k_aligned(region_start) {
1480                         error!(
1481                             "Region start address 0x{:x} must be at least aligned on 4KiB",
1482                             region_start
1483                         );
1484                         return Err(VfioPciError::RegionAlignment);
1485                     }
1486                     if !is_4k_multiple(region_size) {
1487                         error!(
1488                             "Region size 0x{:x} must be at least a multiple of 4KiB",
1489                             region_size
1490                         );
1491                         return Err(VfioPciError::RegionSize);
1492                     }
1493 
1494                     // In case the region contains the MSI-X vectors table or
1495                     // the MSI-X PBA table, we must calculate the subregions
1496                     // around them, leading to a list of sparse areas.
1497                     // We want to make sure we will still trap MMIO accesses
1498                     // to these MSI-X specific ranges. If these region don't align
1499                     // with pagesize, we can achieve it by enlarging its range.
1500                     //
1501                     // Using a BtreeMap as the list provided through the iterator is sorted
1502                     // by key. This ensures proper split of the whole region.
1503                     let mut inter_ranges = BTreeMap::new();
1504                     if let Some(msix) = vfio_msix {
1505                         if region_index == msix.cap.table_bir() {
1506                             let (offset, size) = msix.cap.table_range();
1507                             let offset = align_page_size_down(offset);
1508                             let size = align_page_size_up(size);
1509                             inter_ranges.insert(offset, size);
1510                         }
1511                         if region_index == msix.cap.pba_bir() {
1512                             let (offset, size) = msix.cap.pba_range();
1513                             let offset = align_page_size_down(offset);
1514                             let size = align_page_size_up(size);
1515                             inter_ranges.insert(offset, size);
1516                         }
1517                     }
1518 
1519                     let mut sparse_areas = Vec::new();
1520                     let mut current_offset = 0;
1521                     for (range_offset, range_size) in inter_ranges {
1522                         if range_offset > current_offset {
1523                             sparse_areas.push(VfioRegionSparseMmapArea {
1524                                 offset: current_offset,
1525                                 size: range_offset - current_offset,
1526                             });
1527                         }
1528                         current_offset = align_page_size_down(range_offset + range_size);
1529                     }
1530 
1531                     if region_size > current_offset {
1532                         sparse_areas.push(VfioRegionSparseMmapArea {
1533                             offset: current_offset,
1534                             size: region_size - current_offset,
1535                         });
1536                     }
1537 
1538                     return Ok(sparse_areas);
1539                 }
1540                 _ => {}
1541             }
1542         }
1543 
1544         // In case no relevant capabilities have been found, create a single
1545         // sparse area corresponding to the entire MMIO region.
1546         Ok(vec![VfioRegionSparseMmapArea {
1547             offset: 0,
1548             size: region_size,
1549         }])
1550     }
1551 
1552     /// Map MMIO regions into the guest, and avoid VM exits when the guest tries
1553     /// to reach those regions.
1554     ///
1555     /// # Arguments
1556     ///
1557     /// * `vm` - The VM object. It is used to set the VFIO MMIO regions
1558     ///          as user memory regions.
1559     /// * `mem_slot` - The closure to return a memory slot.
1560     pub fn map_mmio_regions(&mut self) -> Result<(), VfioPciError> {
1561         let fd = self.device.as_raw_fd();
1562 
1563         for region in self.common.mmio_regions.iter_mut() {
1564             let region_flags = self.device.get_region_flags(region.index);
1565             if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
1566                 let mut prot = 0;
1567                 if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 {
1568                     prot |= libc::PROT_READ;
1569                 }
1570                 if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 {
1571                     prot |= libc::PROT_WRITE;
1572                 }
1573 
1574                 // Retrieve the list of capabilities found on the region
1575                 let caps = if region_flags & VFIO_REGION_INFO_FLAG_CAPS != 0 {
1576                     self.device.get_region_caps(region.index)
1577                 } else {
1578                     Vec::new()
1579                 };
1580 
1581                 // Don't try to mmap the region if it contains MSI-X table or
1582                 // MSI-X PBA subregion, and if we couldn't find MSIX_MAPPABLE
1583                 // in the list of supported capabilities.
1584                 if let Some(msix) = self.common.interrupt.msix.as_ref() {
1585                     if (region.index == msix.cap.table_bir() || region.index == msix.cap.pba_bir())
1586                         && !caps.contains(&VfioRegionInfoCap::MsixMappable)
1587                     {
1588                         continue;
1589                     }
1590                 }
1591 
1592                 let mmap_size = self.device.get_region_size(region.index);
1593                 let mmap_offset = self.device.get_region_offset(region.index);
1594 
1595                 let sparse_areas = Self::generate_sparse_areas(
1596                     &caps,
1597                     region.index,
1598                     region.start.0,
1599                     mmap_size,
1600                     self.common.interrupt.msix.as_ref(),
1601                 )?;
1602 
1603                 for area in sparse_areas.iter() {
1604                     // SAFETY: FFI call with correct arguments
1605                     let host_addr = unsafe {
1606                         libc::mmap(
1607                             null_mut(),
1608                             area.size as usize,
1609                             prot,
1610                             libc::MAP_SHARED,
1611                             fd,
1612                             mmap_offset as libc::off_t + area.offset as libc::off_t,
1613                         )
1614                     };
1615 
1616                     if host_addr == libc::MAP_FAILED {
1617                         error!(
1618                             "Could not mmap sparse area (offset = 0x{:x}, size = 0x{:x}): {}",
1619                             area.offset,
1620                             area.size,
1621                             std::io::Error::last_os_error()
1622                         );
1623                         return Err(VfioPciError::MmapArea);
1624                     }
1625 
1626                     if !is_page_size_aligned(area.size) || !is_page_size_aligned(area.offset) {
1627                         warn!(
1628                             "Could not mmap sparse area that is not page size aligned (offset = 0x{:x}, size = 0x{:x})",
1629                             area.offset,
1630                             area.size,
1631                             );
1632                         return Ok(());
1633                     }
1634 
1635                     let user_memory_region = UserMemoryRegion {
1636                         slot: (self.memory_slot)(),
1637                         start: region.start.0 + area.offset,
1638                         size: area.size,
1639                         host_addr: host_addr as u64,
1640                     };
1641 
1642                     region.user_memory_regions.push(user_memory_region);
1643 
1644                     let mem_region = self.vm.make_user_memory_region(
1645                         user_memory_region.slot,
1646                         user_memory_region.start,
1647                         user_memory_region.size,
1648                         user_memory_region.host_addr,
1649                         false,
1650                         false,
1651                     );
1652 
1653                     self.vm
1654                         .create_user_memory_region(mem_region)
1655                         .map_err(VfioPciError::CreateUserMemoryRegion)?;
1656 
1657                     if !self.iommu_attached {
1658                         self.container
1659                             .vfio_dma_map(
1660                                 user_memory_region.start,
1661                                 user_memory_region.size,
1662                                 user_memory_region.host_addr,
1663                             )
1664                             .map_err(VfioPciError::DmaMap)?;
1665                     }
1666                 }
1667             }
1668         }
1669 
1670         Ok(())
1671     }
1672 
1673     pub fn unmap_mmio_regions(&mut self) {
1674         for region in self.common.mmio_regions.iter() {
1675             for user_memory_region in region.user_memory_regions.iter() {
1676                 // Unmap from vfio container
1677                 if !self.iommu_attached {
1678                     if let Err(e) = self
1679                         .container
1680                         .vfio_dma_unmap(user_memory_region.start, user_memory_region.size)
1681                     {
1682                         error!("Could not unmap mmio region from vfio container: {}", e);
1683                     }
1684                 }
1685 
1686                 // Remove region
1687                 let r = self.vm.make_user_memory_region(
1688                     user_memory_region.slot,
1689                     user_memory_region.start,
1690                     user_memory_region.size,
1691                     user_memory_region.host_addr,
1692                     false,
1693                     false,
1694                 );
1695 
1696                 if let Err(e) = self.vm.remove_user_memory_region(r) {
1697                     error!("Could not remove the userspace memory region: {}", e);
1698                 }
1699 
1700                 // SAFETY: FFI call with correct arguments
1701                 let ret = unsafe {
1702                     libc::munmap(
1703                         user_memory_region.host_addr as *mut libc::c_void,
1704                         user_memory_region.size as usize,
1705                     )
1706                 };
1707                 if ret != 0 {
1708                     error!(
1709                         "Could not unmap region {}, error:{}",
1710                         region.index,
1711                         io::Error::last_os_error()
1712                     );
1713                 }
1714             }
1715         }
1716     }
1717 
1718     pub fn dma_map(&self, iova: u64, size: u64, user_addr: u64) -> Result<(), VfioPciError> {
1719         if !self.iommu_attached {
1720             self.container
1721                 .vfio_dma_map(iova, size, user_addr)
1722                 .map_err(VfioPciError::DmaMap)?;
1723         }
1724 
1725         Ok(())
1726     }
1727 
1728     pub fn dma_unmap(&self, iova: u64, size: u64) -> Result<(), VfioPciError> {
1729         if !self.iommu_attached {
1730             self.container
1731                 .vfio_dma_unmap(iova, size)
1732                 .map_err(VfioPciError::DmaUnmap)?;
1733         }
1734 
1735         Ok(())
1736     }
1737 
1738     pub fn mmio_regions(&self) -> Vec<MmioRegion> {
1739         self.common.mmio_regions.clone()
1740     }
1741 }
1742 
1743 impl Drop for VfioPciDevice {
1744     fn drop(&mut self) {
1745         self.unmap_mmio_regions();
1746 
1747         if let Some(msix) = &self.common.interrupt.msix {
1748             if msix.bar.enabled() {
1749                 self.common.disable_msix();
1750             }
1751         }
1752 
1753         if let Some(msi) = &self.common.interrupt.msi {
1754             if msi.cfg.enabled() {
1755                 self.common.disable_msi()
1756             }
1757         }
1758 
1759         if self.common.interrupt.intx_in_use() {
1760             self.common.disable_intx();
1761         }
1762     }
1763 }
1764 
1765 impl BusDevice for VfioPciDevice {
1766     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
1767         self.read_bar(base, offset, data)
1768     }
1769 
1770     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
1771         self.write_bar(base, offset, data)
1772     }
1773 }
1774 
1775 // First BAR offset in the PCI config space.
1776 const PCI_CONFIG_BAR_OFFSET: u32 = 0x10;
1777 // Capability register offset in the PCI config space.
1778 const PCI_CONFIG_CAPABILITY_OFFSET: u32 = 0x34;
1779 // Extended capabilities register offset in the PCI config space.
1780 const PCI_CONFIG_EXTENDED_CAPABILITY_OFFSET: u32 = 0x100;
1781 // IO BAR when first BAR bit is 1.
1782 const PCI_CONFIG_IO_BAR: u32 = 0x1;
1783 // 64-bit memory bar flag.
1784 const PCI_CONFIG_MEMORY_BAR_64BIT: u32 = 0x4;
1785 // Prefetchable BAR bit
1786 const PCI_CONFIG_BAR_PREFETCHABLE: u32 = 0x8;
1787 // PCI config register size (4 bytes).
1788 const PCI_CONFIG_REGISTER_SIZE: usize = 4;
1789 // Number of BARs for a PCI device
1790 const BAR_NUMS: usize = 6;
1791 // PCI Header Type register index
1792 const PCI_HEADER_TYPE_REG_INDEX: usize = 3;
1793 // First BAR register index
1794 const PCI_CONFIG_BAR0_INDEX: usize = 4;
1795 // PCI ROM expansion BAR register index
1796 const PCI_ROM_EXP_BAR_INDEX: usize = 12;
1797 
1798 impl PciDevice for VfioPciDevice {
1799     fn allocate_bars(
1800         &mut self,
1801         allocator: &Arc<Mutex<SystemAllocator>>,
1802         mmio32_allocator: &mut AddressAllocator,
1803         mmio64_allocator: &mut AddressAllocator,
1804         resources: Option<Vec<Resource>>,
1805     ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> {
1806         self.common
1807             .allocate_bars(allocator, mmio32_allocator, mmio64_allocator, resources)
1808     }
1809 
1810     fn free_bars(
1811         &mut self,
1812         allocator: &mut SystemAllocator,
1813         mmio32_allocator: &mut AddressAllocator,
1814         mmio64_allocator: &mut AddressAllocator,
1815     ) -> Result<(), PciDeviceError> {
1816         self.common
1817             .free_bars(allocator, mmio32_allocator, mmio64_allocator)
1818     }
1819 
1820     fn write_config_register(
1821         &mut self,
1822         reg_idx: usize,
1823         offset: u64,
1824         data: &[u8],
1825     ) -> Option<Arc<Barrier>> {
1826         self.common.write_config_register(reg_idx, offset, data)
1827     }
1828 
1829     fn read_config_register(&mut self, reg_idx: usize) -> u32 {
1830         self.common.read_config_register(reg_idx)
1831     }
1832 
1833     fn detect_bar_reprogramming(
1834         &mut self,
1835         reg_idx: usize,
1836         data: &[u8],
1837     ) -> Option<BarReprogrammingParams> {
1838         self.common
1839             .configuration
1840             .detect_bar_reprogramming(reg_idx, data)
1841     }
1842 
1843     fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) {
1844         self.common.read_bar(base, offset, data)
1845     }
1846 
1847     fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
1848         self.common.write_bar(base, offset, data)
1849     }
1850 
1851     fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), io::Error> {
1852         for region in self.common.mmio_regions.iter_mut() {
1853             if region.start.raw_value() == old_base {
1854                 region.start = GuestAddress(new_base);
1855 
1856                 for user_memory_region in region.user_memory_regions.iter_mut() {
1857                     // Remove old region
1858                     let old_mem_region = self.vm.make_user_memory_region(
1859                         user_memory_region.slot,
1860                         user_memory_region.start,
1861                         user_memory_region.size,
1862                         user_memory_region.host_addr,
1863                         false,
1864                         false,
1865                     );
1866 
1867                     self.vm
1868                         .remove_user_memory_region(old_mem_region)
1869                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
1870 
1871                     // Update the user memory region with the correct start address.
1872                     if new_base > old_base {
1873                         user_memory_region.start += new_base - old_base;
1874                     } else {
1875                         user_memory_region.start -= old_base - new_base;
1876                     }
1877 
1878                     // Insert new region
1879                     let new_mem_region = self.vm.make_user_memory_region(
1880                         user_memory_region.slot,
1881                         user_memory_region.start,
1882                         user_memory_region.size,
1883                         user_memory_region.host_addr,
1884                         false,
1885                         false,
1886                     );
1887 
1888                     self.vm
1889                         .create_user_memory_region(new_mem_region)
1890                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
1891                 }
1892             }
1893         }
1894 
1895         Ok(())
1896     }
1897 
1898     fn as_any(&mut self) -> &mut dyn Any {
1899         self
1900     }
1901 
1902     fn id(&self) -> Option<String> {
1903         Some(self.id.clone())
1904     }
1905 }
1906 
1907 impl Pausable for VfioPciDevice {}
1908 
1909 impl Snapshottable for VfioPciDevice {
1910     fn id(&self) -> String {
1911         self.id.clone()
1912     }
1913 
1914     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
1915         let mut vfio_pci_dev_snapshot = Snapshot::default();
1916 
1917         // Snapshot VfioCommon
1918         vfio_pci_dev_snapshot.add_snapshot(self.common.id(), self.common.snapshot()?);
1919 
1920         Ok(vfio_pci_dev_snapshot)
1921     }
1922 }
1923 impl Transportable for VfioPciDevice {}
1924 impl Migratable for VfioPciDevice {}
1925 
1926 /// This structure implements the ExternalDmaMapping trait. It is meant to
1927 /// be used when the caller tries to provide a way to update the mappings
1928 /// associated with a specific VFIO container.
1929 pub struct VfioDmaMapping<M: GuestAddressSpace> {
1930     container: Arc<VfioContainer>,
1931     memory: Arc<M>,
1932     mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
1933 }
1934 
1935 impl<M: GuestAddressSpace> VfioDmaMapping<M> {
1936     /// Create a DmaMapping object.
1937     /// # Parameters
1938     /// * `container`: VFIO container object.
1939     /// * `memory`: guest memory to mmap.
1940     /// * `mmio_regions`: mmio_regions to mmap.
1941     pub fn new(
1942         container: Arc<VfioContainer>,
1943         memory: Arc<M>,
1944         mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
1945     ) -> Self {
1946         VfioDmaMapping {
1947             container,
1948             memory,
1949             mmio_regions,
1950         }
1951     }
1952 }
1953 
1954 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioDmaMapping<M> {
1955     fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), io::Error> {
1956         let mem = self.memory.memory();
1957         let guest_addr = GuestAddress(gpa);
1958         let user_addr = if mem.check_range(guest_addr, size as usize) {
1959             match mem.get_host_address(guest_addr) {
1960                 Ok(t) => t as u64,
1961                 Err(e) => {
1962                     return Err(io::Error::new(
1963                         io::ErrorKind::Other,
1964                         format!("unable to retrieve user address for gpa 0x{gpa:x} from guest memory region: {e}")
1965                     ));
1966                 }
1967             }
1968         } else if self.mmio_regions.lock().unwrap().check_range(gpa, size) {
1969             self.mmio_regions.lock().unwrap().find_user_address(gpa)?
1970         } else {
1971             return Err(io::Error::new(
1972                 io::ErrorKind::Other,
1973                 format!("failed to locate guest address 0x{gpa:x} in guest memory"),
1974             ));
1975         };
1976 
1977         self.container
1978             .vfio_dma_map(iova, size, user_addr)
1979             .map_err(|e| {
1980                 io::Error::new(
1981                     io::ErrorKind::Other,
1982                     format!(
1983                         "failed to map memory for VFIO container, \
1984                          iova 0x{iova:x}, gpa 0x{gpa:x}, size 0x{size:x}: {e:?}"
1985                     ),
1986                 )
1987             })
1988     }
1989 
1990     fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), io::Error> {
1991         self.container.vfio_dma_unmap(iova, size).map_err(|e| {
1992             io::Error::new(
1993                 io::ErrorKind::Other,
1994                 format!(
1995                     "failed to unmap memory for VFIO container, \
1996                      iova 0x{iova:x}, size 0x{size:x}: {e:?}"
1997                 ),
1998             )
1999         })
2000     }
2001 }
2002