xref: /cloud-hypervisor/vmm/src/memory_manager.rs (revision f67b3f79ea19c9a66e04074cbbf5d292f6529e43)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 #[cfg(target_arch = "x86_64")]
6 use crate::config::SgxEpcConfig;
7 use crate::config::{HotplugMethod, MemoryConfig, MemoryZoneConfig};
8 use crate::migration::url_to_path;
9 use crate::MEMORY_MANAGER_SNAPSHOT_ID;
10 use crate::{GuestMemoryMmap, GuestRegionMmap};
11 #[cfg(feature = "acpi")]
12 use acpi_tables::{aml, aml::Aml};
13 use anyhow::anyhow;
14 #[cfg(target_arch = "x86_64")]
15 use arch::x86_64::{SgxEpcRegion, SgxEpcSection};
16 use arch::{layout, RegionType};
17 #[cfg(target_arch = "x86_64")]
18 use devices::ioapic;
19 #[cfg(target_arch = "x86_64")]
20 use libc::{MAP_NORESERVE, MAP_POPULATE, MAP_SHARED, PROT_READ, PROT_WRITE};
21 use std::collections::HashMap;
22 use std::convert::TryInto;
23 use std::ffi;
24 use std::fs::{File, OpenOptions};
25 use std::io;
26 use std::ops::Deref;
27 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
28 use std::path::PathBuf;
29 use std::result;
30 use std::sync::{Arc, Barrier, Mutex};
31 use versionize::{VersionMap, Versionize, VersionizeResult};
32 use versionize_derive::Versionize;
33 #[cfg(target_arch = "x86_64")]
34 use vm_allocator::GsiApic;
35 use vm_allocator::SystemAllocator;
36 use vm_device::BusDevice;
37 use vm_memory::guest_memory::FileOffset;
38 use vm_memory::{
39     mmap::MmapRegionError, Address, Bytes, Error as MmapError, GuestAddress, GuestAddressSpace,
40     GuestMemory, GuestMemoryAtomic, GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion,
41     GuestUsize, MmapRegion,
42 };
43 use vm_migration::{
44     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot,
45     SnapshotDataSection, Snapshottable, Transportable, VersionMapped,
46 };
47 
48 #[cfg(feature = "acpi")]
49 pub const MEMORY_MANAGER_ACPI_SIZE: usize = 0x18;
50 
51 const DEFAULT_MEMORY_ZONE: &str = "mem0";
52 
53 #[cfg(target_arch = "x86_64")]
54 const X86_64_IRQ_BASE: u32 = 5;
55 
56 const HOTPLUG_COUNT: usize = 8;
57 
58 // Memory policy constants
59 const MPOL_BIND: u32 = 2;
60 const MPOL_MF_STRICT: u32 = 1;
61 const MPOL_MF_MOVE: u32 = 1 << 1;
62 
63 #[derive(Default)]
64 struct HotPlugState {
65     base: u64,
66     length: u64,
67     active: bool,
68     inserting: bool,
69     removing: bool,
70 }
71 
72 pub struct VirtioMemZone {
73     region: Arc<GuestRegionMmap>,
74     resize_handler: virtio_devices::Resize,
75     hotplugged_size: u64,
76     hugepages: bool,
77 }
78 
79 impl VirtioMemZone {
80     pub fn region(&self) -> &Arc<GuestRegionMmap> {
81         &self.region
82     }
83     pub fn resize_handler(&self) -> &virtio_devices::Resize {
84         &self.resize_handler
85     }
86     pub fn hotplugged_size(&self) -> u64 {
87         self.hotplugged_size
88     }
89     pub fn hugepages(&self) -> bool {
90         self.hugepages
91     }
92 }
93 
94 #[derive(Default)]
95 pub struct MemoryZone {
96     regions: Vec<Arc<GuestRegionMmap>>,
97     virtio_mem_zone: Option<VirtioMemZone>,
98 }
99 
100 impl MemoryZone {
101     pub fn regions(&self) -> &Vec<Arc<GuestRegionMmap>> {
102         &self.regions
103     }
104     pub fn virtio_mem_zone(&self) -> &Option<VirtioMemZone> {
105         &self.virtio_mem_zone
106     }
107 }
108 
109 pub type MemoryZones = HashMap<String, MemoryZone>;
110 
111 struct GuestRamMapping {
112     slot: u32,
113     gpa: u64,
114     size: u64,
115 }
116 
117 pub struct MemoryManager {
118     boot_guest_memory: GuestMemoryMmap,
119     guest_memory: GuestMemoryAtomic<GuestMemoryMmap>,
120     next_memory_slot: u32,
121     start_of_device_area: GuestAddress,
122     end_of_device_area: GuestAddress,
123     pub vm: Arc<dyn hypervisor::Vm>,
124     hotplug_slots: Vec<HotPlugState>,
125     selected_slot: usize,
126     mergeable: bool,
127     allocator: Arc<Mutex<SystemAllocator>>,
128     hotplug_method: HotplugMethod,
129     boot_ram: u64,
130     current_ram: u64,
131     next_hotplug_slot: usize,
132     snapshot: Mutex<Option<GuestMemoryLoadGuard<GuestMemoryMmap>>>,
133     shared: bool,
134     hugepages: bool,
135     hugepage_size: Option<u64>,
136     #[cfg(target_arch = "x86_64")]
137     sgx_epc_region: Option<SgxEpcRegion>,
138     user_provided_zones: bool,
139     snapshot_memory_regions: Vec<MemoryRegion>,
140     memory_zones: MemoryZones,
141     log_dirty: bool, // Enable dirty logging for created RAM regions
142 
143     // Keep track of calls to create_userspace_mapping() for guest RAM.
144     // This is useful for getting the dirty pages as we need to know the
145     // slots that the mapping is created in.
146     guest_ram_mappings: Vec<GuestRamMapping>,
147 
148     #[cfg(feature = "acpi")]
149     pub acpi_address: GuestAddress,
150 }
151 
152 #[derive(Debug)]
153 pub enum Error {
154     /// Failed to create shared file.
155     SharedFileCreate(io::Error),
156 
157     /// Failed to set shared file length.
158     SharedFileSetLen(io::Error),
159 
160     /// Mmap backed guest memory error
161     GuestMemory(MmapError),
162 
163     /// Failed to allocate a memory range.
164     MemoryRangeAllocation,
165 
166     /// Error from region creation
167     GuestMemoryRegion(MmapRegionError),
168 
169     /// No ACPI slot available
170     NoSlotAvailable,
171 
172     /// Not enough space in the hotplug RAM region
173     InsufficientHotplugRam,
174 
175     /// The requested hotplug memory addition is not a valid size
176     InvalidSize,
177 
178     /// Failed to create the user memory region.
179     CreateUserMemoryRegion(hypervisor::HypervisorVmError),
180 
181     /// Failed to remove the user memory region.
182     RemoveUserMemoryRegion(hypervisor::HypervisorVmError),
183 
184     /// Failed to EventFd.
185     EventFdFail(io::Error),
186 
187     /// Eventfd write error
188     EventfdError(io::Error),
189 
190     /// Failed to virtio-mem resize
191     VirtioMemResizeFail(virtio_devices::mem::Error),
192 
193     /// Cannot restore VM
194     Restore(MigratableError),
195 
196     /// Cannot create the system allocator
197     CreateSystemAllocator,
198 
199     /// Invalid SGX EPC section size
200     #[cfg(target_arch = "x86_64")]
201     EpcSectionSizeInvalid,
202 
203     /// Failed allocating SGX EPC region
204     #[cfg(target_arch = "x86_64")]
205     SgxEpcRangeAllocation,
206 
207     /// Failed opening SGX virtual EPC device
208     #[cfg(target_arch = "x86_64")]
209     SgxVirtEpcOpen(io::Error),
210 
211     /// Failed setting the SGX virtual EPC section size
212     #[cfg(target_arch = "x86_64")]
213     SgxVirtEpcFileSetLen(io::Error),
214 
215     /// Failed opening SGX provisioning device
216     #[cfg(target_arch = "x86_64")]
217     SgxProvisionOpen(io::Error),
218 
219     /// Failed enabling SGX provisioning
220     #[cfg(target_arch = "x86_64")]
221     SgxEnableProvisioning(hypervisor::HypervisorVmError),
222 
223     /// Failed creating a new MmapRegion instance.
224     #[cfg(target_arch = "x86_64")]
225     NewMmapRegion(vm_memory::mmap::MmapRegionError),
226 
227     /// No memory zones found.
228     MissingMemoryZones,
229 
230     /// Memory configuration is not valid.
231     InvalidMemoryParameters,
232 
233     /// Forbidden operation. Impossible to resize guest memory if it is
234     /// backed by user defined memory regions.
235     InvalidResizeWithMemoryZones,
236 
237     /// It's invalid to try applying a NUMA policy to a memory zone that is
238     /// memory mapped with MAP_SHARED.
239     InvalidSharedMemoryZoneWithHostNuma,
240 
241     /// Failed applying NUMA memory policy.
242     ApplyNumaPolicy(io::Error),
243 
244     /// Memory zone identifier is not unique.
245     DuplicateZoneId,
246 
247     /// No virtio-mem resizing handler found.
248     MissingVirtioMemHandler,
249 
250     /// Unknown memory zone.
251     UnknownMemoryZone,
252 
253     /// Invalid size for resizing. Can be anything except 0.
254     InvalidHotplugSize,
255 
256     /// Invalid hotplug method associated with memory zones resizing capability.
257     InvalidHotplugMethodWithMemoryZones,
258 
259     /// Could not find specified memory zone identifier from hash map.
260     MissingZoneIdentifier,
261 
262     /// Resizing the memory zone failed.
263     ResizeZone,
264 
265     /// Guest address overflow
266     GuestAddressOverFlow,
267 
268     /// Error opening snapshot file
269     SnapshotOpen(io::Error),
270 
271     // Error copying snapshot into region
272     SnapshotCopy(GuestMemoryError),
273 
274     /// Failed to allocate MMIO address
275     AllocateMmioAddress,
276 }
277 
278 const ENABLE_FLAG: usize = 0;
279 const INSERTING_FLAG: usize = 1;
280 const REMOVING_FLAG: usize = 2;
281 const EJECT_FLAG: usize = 3;
282 
283 const BASE_OFFSET_LOW: u64 = 0;
284 const BASE_OFFSET_HIGH: u64 = 0x4;
285 const LENGTH_OFFSET_LOW: u64 = 0x8;
286 const LENGTH_OFFSET_HIGH: u64 = 0xC;
287 const STATUS_OFFSET: u64 = 0x14;
288 const SELECTION_OFFSET: u64 = 0;
289 
290 // The MMIO address space size is subtracted with 64k. This is done for the
291 // following reasons:
292 //  - Reduce the addressable space size by at least 4k to workaround a Linux
293 //    bug when the VMM allocates devices at the end of the addressable space
294 //  - Windows requires the addressable space size to be 64k aligned
295 fn mmio_address_space_size(phys_bits: u8) -> u64 {
296     (1 << phys_bits) - (1 << 16)
297 }
298 
299 impl BusDevice for MemoryManager {
300     fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) {
301         if self.selected_slot < self.hotplug_slots.len() {
302             let state = &self.hotplug_slots[self.selected_slot];
303             match offset {
304                 BASE_OFFSET_LOW => {
305                     data.copy_from_slice(&state.base.to_le_bytes()[..4]);
306                 }
307                 BASE_OFFSET_HIGH => {
308                     data.copy_from_slice(&state.base.to_le_bytes()[4..]);
309                 }
310                 LENGTH_OFFSET_LOW => {
311                     data.copy_from_slice(&state.length.to_le_bytes()[..4]);
312                 }
313                 LENGTH_OFFSET_HIGH => {
314                     data.copy_from_slice(&state.length.to_le_bytes()[4..]);
315                 }
316                 STATUS_OFFSET => {
317                     // The Linux kernel, quite reasonably, doesn't zero the memory it gives us.
318                     data.fill(0);
319                     if state.active {
320                         data[0] |= 1 << ENABLE_FLAG;
321                     }
322                     if state.inserting {
323                         data[0] |= 1 << INSERTING_FLAG;
324                     }
325                     if state.removing {
326                         data[0] |= 1 << REMOVING_FLAG;
327                     }
328                 }
329                 _ => {
330                     warn!(
331                         "Unexpected offset for accessing memory manager device: {:#}",
332                         offset
333                     );
334                 }
335             }
336         } else {
337             warn!("Out of range memory slot: {}", self.selected_slot);
338         }
339     }
340 
341     fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
342         match offset {
343             SELECTION_OFFSET => {
344                 self.selected_slot = usize::from(data[0]);
345             }
346             STATUS_OFFSET => {
347                 if self.selected_slot < self.hotplug_slots.len() {
348                     let state = &mut self.hotplug_slots[self.selected_slot];
349                     // The ACPI code writes back a 1 to acknowledge the insertion
350                     if (data[0] & (1 << INSERTING_FLAG) == 1 << INSERTING_FLAG) && state.inserting {
351                         state.inserting = false;
352                     }
353                     // Ditto for removal
354                     if (data[0] & (1 << REMOVING_FLAG) == 1 << REMOVING_FLAG) && state.removing {
355                         state.removing = false;
356                     }
357                     // Trigger removal of "DIMM"
358                     if data[0] & (1 << EJECT_FLAG) == 1 << EJECT_FLAG {
359                         warn!("Ejection of memory not currently supported");
360                     }
361                 } else {
362                     warn!("Out of range memory slot: {}", self.selected_slot);
363                 }
364             }
365             _ => {
366                 warn!(
367                     "Unexpected offset for accessing memory manager device: {:#}",
368                     offset
369                 );
370             }
371         };
372         None
373     }
374 }
375 
376 impl MemoryManager {
377     /// Creates all memory regions based on the available RAM ranges defined
378     /// by `ram_regions`, and based on the description of the memory zones.
379     /// In practice, this function can perform multiple memory mappings of the
380     /// same backing file if there's a hole in the address space between two
381     /// RAM ranges.
382     /// One example might be ram_regions containing 2 regions (0-3G and 4G-6G)
383     /// and zones containing two zones (size 1G and size 4G).
384     /// This function will create 3 resulting memory regions:
385     /// - First one mapping entirely the first memory zone on 0-1G range
386     /// - Second one mapping partially the second memory zone on 1G-3G range
387     /// - Third one mapping partially the second memory zone on 4G-6G range
388     fn create_memory_regions_from_zones(
389         ram_regions: &[(GuestAddress, usize)],
390         zones: &[MemoryZoneConfig],
391         prefault: bool,
392     ) -> Result<(Vec<Arc<GuestRegionMmap>>, MemoryZones), Error> {
393         let mut zones = zones.to_owned();
394         let mut mem_regions = Vec::new();
395         let mut zone = zones.remove(0);
396         let mut zone_offset = 0;
397         let mut memory_zones = HashMap::new();
398 
399         // Add zone id to the list of memory zones.
400         memory_zones.insert(zone.id.clone(), MemoryZone::default());
401 
402         for ram_region in ram_regions.iter() {
403             let mut ram_region_offset = 0;
404             let mut exit = false;
405 
406             loop {
407                 let mut ram_region_consumed = false;
408                 let mut pull_next_zone = false;
409 
410                 let ram_region_sub_size = ram_region.1 - ram_region_offset;
411                 let zone_sub_size = zone.size as usize - zone_offset;
412 
413                 let file_offset = zone_offset as u64;
414                 let region_start = ram_region
415                     .0
416                     .checked_add(ram_region_offset as u64)
417                     .ok_or(Error::GuestAddressOverFlow)?;
418                 let region_size = if zone_sub_size <= ram_region_sub_size {
419                     if zone_sub_size == ram_region_sub_size {
420                         ram_region_consumed = true;
421                     }
422 
423                     ram_region_offset += zone_sub_size;
424                     pull_next_zone = true;
425 
426                     zone_sub_size
427                 } else {
428                     zone_offset += ram_region_sub_size;
429                     ram_region_consumed = true;
430 
431                     ram_region_sub_size
432                 };
433 
434                 let region = MemoryManager::create_ram_region(
435                     &zone.file,
436                     file_offset,
437                     region_start,
438                     region_size,
439                     prefault,
440                     zone.shared,
441                     zone.hugepages,
442                     zone.hugepage_size,
443                     zone.host_numa_node,
444                 )?;
445 
446                 // Add region to the list of regions associated with the
447                 // current memory zone.
448                 if let Some(memory_zone) = memory_zones.get_mut(&zone.id) {
449                     memory_zone.regions.push(region.clone());
450                 }
451 
452                 mem_regions.push(region);
453 
454                 if pull_next_zone {
455                     // Get the next zone and reset the offset.
456                     zone_offset = 0;
457                     if zones.is_empty() {
458                         exit = true;
459                         break;
460                     }
461                     zone = zones.remove(0);
462 
463                     // Check if zone id already exist. In case it does, throw
464                     // an error as we need unique identifiers. Otherwise, add
465                     // the new zone id to the list of memory zones.
466                     if memory_zones.contains_key(&zone.id) {
467                         error!(
468                             "Memory zone identifier '{}' found more than once. \
469                             It must be unique",
470                             zone.id,
471                         );
472                         return Err(Error::DuplicateZoneId);
473                     }
474                     memory_zones.insert(zone.id.clone(), MemoryZone::default());
475                 }
476 
477                 if ram_region_consumed {
478                     break;
479                 }
480             }
481 
482             if exit {
483                 break;
484             }
485         }
486 
487         Ok((mem_regions, memory_zones))
488     }
489 
490     fn fill_saved_regions(&mut self, saved_regions: Vec<MemoryRegion>) -> Result<(), Error> {
491         for region in saved_regions {
492             if let Some(content) = region.content {
493                 // Open (read only) the snapshot file for the given region.
494                 let mut memory_region_file = OpenOptions::new()
495                     .read(true)
496                     .open(content)
497                     .map_err(Error::SnapshotOpen)?;
498 
499                 self.guest_memory
500                     .memory()
501                     .read_exact_from(
502                         GuestAddress(region.start_addr),
503                         &mut memory_region_file,
504                         region.size as usize,
505                     )
506                     .map_err(Error::SnapshotCopy)?;
507             }
508         }
509 
510         Ok(())
511     }
512 
513     pub fn new(
514         vm: Arc<dyn hypervisor::Vm>,
515         config: &MemoryConfig,
516         prefault: bool,
517         phys_bits: u8,
518         #[cfg(feature = "tdx")] tdx_enabled: bool,
519     ) -> Result<Arc<Mutex<MemoryManager>>, Error> {
520         let user_provided_zones = config.size == 0;
521         let mut allow_mem_hotplug: bool = false;
522 
523         let (ram_size, zones) = if !user_provided_zones {
524             if config.zones.is_some() {
525                 error!(
526                     "User defined memory regions can't be provided if the \
527                     memory size is not 0"
528                 );
529                 return Err(Error::InvalidMemoryParameters);
530             }
531 
532             if config.hotplug_size.is_some() {
533                 allow_mem_hotplug = true;
534             }
535 
536             if let Some(hotplugged_size) = config.hotplugged_size {
537                 if let Some(hotplug_size) = config.hotplug_size {
538                     if hotplugged_size > hotplug_size {
539                         error!(
540                             "'hotplugged_size' {} can't be bigger than \
541                             'hotplug_size' {}",
542                             hotplugged_size, hotplug_size,
543                         );
544                         return Err(Error::InvalidMemoryParameters);
545                     }
546                 } else {
547                     error!(
548                         "Invalid to define 'hotplugged_size' when there is\
549                         no 'hotplug_size'"
550                     );
551                     return Err(Error::InvalidMemoryParameters);
552                 }
553                 if config.hotplug_method == HotplugMethod::Acpi {
554                     error!(
555                         "Invalid to define 'hotplugged_size' with hotplug \
556                         method 'acpi'"
557                     );
558                     return Err(Error::InvalidMemoryParameters);
559                 }
560             }
561 
562             // Create a single zone from the global memory config. This lets
563             // us reuse the codepath for user defined memory zones.
564             let zones = vec![MemoryZoneConfig {
565                 id: String::from(DEFAULT_MEMORY_ZONE),
566                 size: config.size,
567                 file: None,
568                 shared: config.shared,
569                 hugepages: config.hugepages,
570                 hugepage_size: config.hugepage_size,
571                 host_numa_node: None,
572                 hotplug_size: config.hotplug_size,
573                 hotplugged_size: config.hotplugged_size,
574             }];
575 
576             (config.size, zones)
577         } else {
578             if config.zones.is_none() {
579                 error!(
580                     "User defined memory regions must be provided if the \
581                     memory size is 0"
582                 );
583                 return Err(Error::MissingMemoryZones);
584             }
585 
586             // Safe to unwrap as we checked right above there were some
587             // regions.
588             let zones = config.zones.clone().unwrap();
589             if zones.is_empty() {
590                 return Err(Error::MissingMemoryZones);
591             }
592 
593             let mut total_ram_size: u64 = 0;
594             for zone in zones.iter() {
595                 total_ram_size += zone.size;
596 
597                 if zone.shared && zone.file.is_some() && zone.host_numa_node.is_some() {
598                     error!(
599                         "Invalid to set host NUMA policy for a memory zone \
600                         backed by a regular file and mapped as 'shared'"
601                     );
602                     return Err(Error::InvalidSharedMemoryZoneWithHostNuma);
603                 }
604 
605                 if zone.hotplug_size.is_some() && config.hotplug_method == HotplugMethod::Acpi {
606                     error!("Invalid to set ACPI hotplug method for memory zones");
607                     return Err(Error::InvalidHotplugMethodWithMemoryZones);
608                 }
609 
610                 if let Some(hotplugged_size) = zone.hotplugged_size {
611                     if let Some(hotplug_size) = zone.hotplug_size {
612                         if hotplugged_size > hotplug_size {
613                             error!(
614                                 "'hotplugged_size' {} can't be bigger than \
615                                 'hotplug_size' {}",
616                                 hotplugged_size, hotplug_size,
617                             );
618                             return Err(Error::InvalidMemoryParameters);
619                         }
620                     } else {
621                         error!(
622                             "Invalid to define 'hotplugged_size' when there is\
623                             no 'hotplug_size' for a memory zone"
624                         );
625                         return Err(Error::InvalidMemoryParameters);
626                     }
627                     if config.hotplug_method == HotplugMethod::Acpi {
628                         error!(
629                             "Invalid to define 'hotplugged_size' with hotplug \
630                             method 'acpi'"
631                         );
632                         return Err(Error::InvalidMemoryParameters);
633                     }
634                 }
635             }
636 
637             (total_ram_size, zones)
638         };
639 
640         // Init guest memory
641         let arch_mem_regions = arch::arch_memory_regions(ram_size);
642 
643         let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
644             .iter()
645             .filter(|r| r.2 == RegionType::Ram)
646             .map(|r| (r.0, r.1))
647             .collect();
648 
649         let (mem_regions, mut memory_zones) =
650             Self::create_memory_regions_from_zones(&ram_regions, &zones, prefault)?;
651 
652         let guest_memory =
653             GuestMemoryMmap::from_arc_regions(mem_regions).map_err(Error::GuestMemory)?;
654 
655         let boot_guest_memory = guest_memory.clone();
656 
657         let mmio_address_space_size = mmio_address_space_size(phys_bits);
658         debug_assert_eq!(
659             (((mmio_address_space_size) >> 16) << 16),
660             mmio_address_space_size
661         );
662         let end_of_device_area = GuestAddress(mmio_address_space_size - 1);
663 
664         let mut start_of_device_area =
665             MemoryManager::start_addr(guest_memory.last_addr(), allow_mem_hotplug)?;
666         let mut virtio_mem_regions: Vec<Arc<GuestRegionMmap>> = Vec::new();
667 
668         // Update list of memory zones for resize.
669         for zone in zones {
670             if let Some(memory_zone) = memory_zones.get_mut(&zone.id) {
671                 if let Some(hotplug_size) = zone.hotplug_size {
672                     if hotplug_size == 0 {
673                         error!("'hotplug_size' can't be 0");
674                         return Err(Error::InvalidHotplugSize);
675                     }
676 
677                     if !user_provided_zones && config.hotplug_method == HotplugMethod::Acpi {
678                         start_of_device_area = start_of_device_area
679                             .checked_add(hotplug_size)
680                             .ok_or(Error::GuestAddressOverFlow)?;
681                     } else {
682                         // Alignment must be "natural" i.e. same as size of block
683                         let start_addr = GuestAddress(
684                             (start_of_device_area.0 + virtio_devices::VIRTIO_MEM_ALIGN_SIZE - 1)
685                                 / virtio_devices::VIRTIO_MEM_ALIGN_SIZE
686                                 * virtio_devices::VIRTIO_MEM_ALIGN_SIZE,
687                         );
688 
689                         let region = MemoryManager::create_ram_region(
690                             &None,
691                             0,
692                             start_addr,
693                             hotplug_size as usize,
694                             false,
695                             zone.shared,
696                             zone.hugepages,
697                             zone.hugepage_size,
698                             zone.host_numa_node,
699                         )?;
700 
701                         virtio_mem_regions.push(region.clone());
702 
703                         memory_zone.virtio_mem_zone = Some(VirtioMemZone {
704                             region,
705                             resize_handler: virtio_devices::Resize::new()
706                                 .map_err(Error::EventFdFail)?,
707                             hotplugged_size: zone.hotplugged_size.unwrap_or(0),
708                             hugepages: zone.hugepages,
709                         });
710 
711                         start_of_device_area = start_addr
712                             .checked_add(hotplug_size)
713                             .ok_or(Error::GuestAddressOverFlow)?;
714                     }
715                 }
716             } else {
717                 return Err(Error::MissingZoneIdentifier);
718             }
719         }
720 
721         let guest_memory = GuestMemoryAtomic::new(guest_memory);
722 
723         let mut hotplug_slots = Vec::with_capacity(HOTPLUG_COUNT);
724         hotplug_slots.resize_with(HOTPLUG_COUNT, HotPlugState::default);
725 
726         // Both MMIO and PIO address spaces start at address 0.
727         let allocator = Arc::new(Mutex::new(
728             SystemAllocator::new(
729                 #[cfg(target_arch = "x86_64")]
730                 {
731                     GuestAddress(0)
732                 },
733                 #[cfg(target_arch = "x86_64")]
734                 {
735                     1 << 16
736                 },
737                 GuestAddress(0),
738                 mmio_address_space_size,
739                 layout::MEM_32BIT_DEVICES_START,
740                 layout::MEM_32BIT_DEVICES_SIZE,
741                 #[cfg(target_arch = "x86_64")]
742                 vec![GsiApic::new(
743                     X86_64_IRQ_BASE,
744                     ioapic::NUM_IOAPIC_PINS as u32 - X86_64_IRQ_BASE,
745                 )],
746             )
747             .ok_or(Error::CreateSystemAllocator)?,
748         ));
749 
750         #[cfg(feature = "acpi")]
751         let acpi_address = allocator
752             .lock()
753             .unwrap()
754             .allocate_mmio_addresses(None, MEMORY_MANAGER_ACPI_SIZE as u64, None)
755             .ok_or(Error::AllocateMmioAddress)?;
756 
757         #[cfg(not(feature = "tdx"))]
758         let log_dirty = true;
759         #[cfg(feature = "tdx")]
760         let log_dirty = !tdx_enabled; // Cannot log dirty pages on a TD
761 
762         let memory_manager = Arc::new(Mutex::new(MemoryManager {
763             boot_guest_memory,
764             guest_memory: guest_memory.clone(),
765             next_memory_slot: 0,
766             start_of_device_area,
767             end_of_device_area,
768             vm,
769             hotplug_slots,
770             selected_slot: 0,
771             mergeable: config.mergeable,
772             allocator: allocator.clone(),
773             hotplug_method: config.hotplug_method.clone(),
774             boot_ram: ram_size,
775             current_ram: ram_size,
776             next_hotplug_slot: 0,
777             snapshot: Mutex::new(None),
778             shared: config.shared,
779             hugepages: config.hugepages,
780             hugepage_size: config.hugepage_size,
781             #[cfg(target_arch = "x86_64")]
782             sgx_epc_region: None,
783             user_provided_zones,
784             snapshot_memory_regions: Vec::new(),
785             memory_zones,
786             guest_ram_mappings: Vec::new(),
787             #[cfg(feature = "acpi")]
788             acpi_address,
789             log_dirty,
790         }));
791 
792         for region in guest_memory.memory().iter() {
793             let mut mm = memory_manager.lock().unwrap();
794             let slot = mm.create_userspace_mapping(
795                 region.start_addr().raw_value(),
796                 region.len() as u64,
797                 region.as_ptr() as u64,
798                 config.mergeable,
799                 false,
800                 log_dirty,
801             )?;
802             mm.guest_ram_mappings.push(GuestRamMapping {
803                 gpa: region.start_addr().raw_value(),
804                 size: region.len(),
805                 slot,
806             });
807         }
808 
809         for region in virtio_mem_regions.drain(..) {
810             let mut mm = memory_manager.lock().unwrap();
811             let slot = mm.create_userspace_mapping(
812                 region.start_addr().raw_value(),
813                 region.len() as u64,
814                 region.as_ptr() as u64,
815                 config.mergeable,
816                 false,
817                 log_dirty,
818             )?;
819 
820             mm.guest_ram_mappings.push(GuestRamMapping {
821                 gpa: region.start_addr().raw_value(),
822                 size: region.len(),
823                 slot,
824             });
825             allocator
826                 .lock()
827                 .unwrap()
828                 .allocate_mmio_addresses(Some(region.start_addr()), region.len(), None)
829                 .ok_or(Error::MemoryRangeAllocation)?;
830             mm.add_region(region)?;
831         }
832 
833         // Allocate RAM and Reserved address ranges.
834         for region in arch_mem_regions.iter() {
835             allocator
836                 .lock()
837                 .unwrap()
838                 .allocate_mmio_addresses(Some(region.0), region.1 as GuestUsize, None)
839                 .ok_or(Error::MemoryRangeAllocation)?;
840         }
841 
842         Ok(memory_manager)
843     }
844 
845     pub fn new_from_snapshot(
846         snapshot: &Snapshot,
847         vm: Arc<dyn hypervisor::Vm>,
848         config: &MemoryConfig,
849         source_url: Option<&str>,
850         prefault: bool,
851         phys_bits: u8,
852     ) -> Result<Arc<Mutex<MemoryManager>>, Error> {
853         let mm = MemoryManager::new(
854             vm,
855             config,
856             prefault,
857             phys_bits,
858             #[cfg(feature = "tdx")]
859             false,
860         )?;
861 
862         if let Some(source_url) = source_url {
863             let vm_snapshot_path = url_to_path(source_url).map_err(Error::Restore)?;
864 
865             let mem_snapshot: MemoryManagerSnapshotData = snapshot
866                 .to_versioned_state(MEMORY_MANAGER_SNAPSHOT_ID)
867                 .map_err(Error::Restore)?;
868 
869             // Here we turn the content file name into a content file path as
870             // this will be needed to copy the content of the saved memory
871             // region into the newly created memory region.
872             // We simply ignore the content files that are None, as they
873             // represent regions that have been directly saved by the user, with
874             // no need for saving into a dedicated external file. For these
875             // files, the VmConfig already contains the information on where to
876             // find them.
877             let mut saved_regions = mem_snapshot.memory_regions;
878             for region in saved_regions.iter_mut() {
879                 if let Some(content) = &mut region.content {
880                     let mut memory_region_path = vm_snapshot_path.clone();
881                     memory_region_path.push(content.clone());
882                     *content = memory_region_path.to_str().unwrap().to_owned();
883                 }
884             }
885 
886             mm.lock().unwrap().fill_saved_regions(saved_regions)?;
887         }
888 
889         Ok(mm)
890     }
891 
892     fn memfd_create(name: &ffi::CStr, flags: u32) -> Result<RawFd, io::Error> {
893         let res = unsafe { libc::syscall(libc::SYS_memfd_create, name.as_ptr(), flags) };
894 
895         if res < 0 {
896             Err(io::Error::last_os_error())
897         } else {
898             Ok(res as RawFd)
899         }
900     }
901 
902     fn mbind(
903         addr: *mut u8,
904         len: u64,
905         mode: u32,
906         nodemask: Vec<u64>,
907         maxnode: u64,
908         flags: u32,
909     ) -> Result<(), io::Error> {
910         let res = unsafe {
911             libc::syscall(
912                 libc::SYS_mbind,
913                 addr as *mut libc::c_void,
914                 len,
915                 mode,
916                 nodemask.as_ptr(),
917                 maxnode,
918                 flags,
919             )
920         };
921 
922         if res < 0 {
923             Err(io::Error::last_os_error())
924         } else {
925             Ok(())
926         }
927     }
928 
929     #[allow(clippy::too_many_arguments)]
930     fn create_ram_region(
931         backing_file: &Option<PathBuf>,
932         file_offset: u64,
933         start_addr: GuestAddress,
934         size: usize,
935         prefault: bool,
936         shared: bool,
937         hugepages: bool,
938         hugepage_size: Option<u64>,
939         host_numa_node: Option<u32>,
940     ) -> Result<Arc<GuestRegionMmap>, Error> {
941         let (f, f_off) = match backing_file {
942             Some(ref file) => {
943                 if file.is_dir() {
944                     // Override file offset as it does not apply in this case.
945                     info!(
946                         "Ignoring file offset since the backing file is a \
947                         temporary file created from the specified directory."
948                     );
949                     let fs_str = format!("{}{}", file.display(), "/tmpfile_XXXXXX");
950                     let fs = ffi::CString::new(fs_str).unwrap();
951                     let mut path = fs.as_bytes_with_nul().to_owned();
952                     let path_ptr = path.as_mut_ptr() as *mut _;
953                     let fd = unsafe { libc::mkstemp(path_ptr) };
954                     unsafe { libc::unlink(path_ptr) };
955                     let f = unsafe { File::from_raw_fd(fd) };
956                     f.set_len(size as u64).map_err(Error::SharedFileSetLen)?;
957 
958                     (f, 0)
959                 } else {
960                     let f = OpenOptions::new()
961                         .read(true)
962                         .write(true)
963                         .open(file)
964                         .map_err(Error::SharedFileCreate)?;
965 
966                     (f, file_offset)
967                 }
968             }
969             None => {
970                 let fd = Self::memfd_create(
971                     &ffi::CString::new("ch_ram").unwrap(),
972                     if hugepages {
973                         libc::MFD_HUGETLB
974                             | if let Some(hugepage_size) = hugepage_size {
975                                 /*
976                                  * From the Linux kernel:
977                                  * Several system calls take a flag to request "hugetlb" huge pages.
978                                  * Without further specification, these system calls will use the
979                                  * system's default huge page size.  If a system supports multiple
980                                  * huge page sizes, the desired huge page size can be specified in
981                                  * bits [26:31] of the flag arguments.  The value in these 6 bits
982                                  * will encode the log2 of the huge page size.
983                                  */
984 
985                                 hugepage_size.trailing_zeros() << 26
986                             } else {
987                                 // Use the system default huge page size
988                                 0
989                             }
990                     } else {
991                         0
992                     },
993                 )
994                 .map_err(Error::SharedFileCreate)?;
995 
996                 let f = unsafe { File::from_raw_fd(fd) };
997                 f.set_len(size as u64).map_err(Error::SharedFileSetLen)?;
998 
999                 (f, 0)
1000             }
1001         };
1002 
1003         let mut mmap_flags = libc::MAP_NORESERVE
1004             | if shared {
1005                 libc::MAP_SHARED
1006             } else {
1007                 libc::MAP_PRIVATE
1008             };
1009         if prefault {
1010             mmap_flags |= libc::MAP_POPULATE;
1011         }
1012 
1013         let region = GuestRegionMmap::new(
1014             MmapRegion::build(
1015                 Some(FileOffset::new(f, f_off)),
1016                 size,
1017                 libc::PROT_READ | libc::PROT_WRITE,
1018                 mmap_flags,
1019             )
1020             .map_err(Error::GuestMemoryRegion)?,
1021             start_addr,
1022         )
1023         .map_err(Error::GuestMemory)?;
1024 
1025         // Apply NUMA policy if needed.
1026         if let Some(node) = host_numa_node {
1027             let addr = region.deref().as_ptr();
1028             let len = region.deref().size() as u64;
1029             let mode = MPOL_BIND;
1030             let mut nodemask: Vec<u64> = Vec::new();
1031             let flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
1032 
1033             // Linux is kind of buggy in the way it interprets maxnode as it
1034             // will cut off the last node. That's why we have to add 1 to what
1035             // we would consider as the proper maxnode value.
1036             let maxnode = node as u64 + 1 + 1;
1037 
1038             // Allocate the right size for the vector.
1039             nodemask.resize((node as usize / 64) + 1, 0);
1040 
1041             // Fill the global bitmask through the nodemask vector.
1042             let idx = (node / 64) as usize;
1043             let shift = node % 64;
1044             nodemask[idx] |= 1u64 << shift;
1045 
1046             // Policies are enforced by using MPOL_MF_MOVE flag as it will
1047             // force the kernel to move all pages that might have been already
1048             // allocated to the proper set of NUMA nodes. MPOL_MF_STRICT is
1049             // used to throw an error if MPOL_MF_MOVE didn't succeed.
1050             // MPOL_BIND is the selected mode as it specifies a strict policy
1051             // that restricts memory allocation to the nodes specified in the
1052             // nodemask.
1053             Self::mbind(addr, len, mode, nodemask, maxnode, flags)
1054                 .map_err(Error::ApplyNumaPolicy)?;
1055         }
1056 
1057         Ok(Arc::new(region))
1058     }
1059 
1060     // Update the GuestMemoryMmap with the new range
1061     fn add_region(&mut self, region: Arc<GuestRegionMmap>) -> Result<(), Error> {
1062         let guest_memory = self
1063             .guest_memory
1064             .memory()
1065             .insert_region(region)
1066             .map_err(Error::GuestMemory)?;
1067         self.guest_memory.lock().unwrap().replace(guest_memory);
1068 
1069         Ok(())
1070     }
1071 
1072     //
1073     // Calculate the start address of an area next to RAM.
1074     //
1075     // If memory hotplug is allowed, the start address needs to be aligned
1076     // (rounded-up) to 128MiB boundary.
1077     // If memory hotplug is not allowed, there is no alignment required.
1078     // On x86_64, it must also start at the 64bit start.
1079     #[allow(clippy::let_and_return)]
1080     fn start_addr(mem_end: GuestAddress, allow_mem_hotplug: bool) -> Result<GuestAddress, Error> {
1081         let mut start_addr = if allow_mem_hotplug {
1082             GuestAddress(mem_end.0 | ((128 << 20) - 1))
1083         } else {
1084             mem_end
1085         };
1086 
1087         start_addr = start_addr
1088             .checked_add(1)
1089             .ok_or(Error::GuestAddressOverFlow)?;
1090 
1091         #[cfg(target_arch = "x86_64")]
1092         if mem_end < arch::layout::MEM_32BIT_RESERVED_START {
1093             return Ok(arch::layout::RAM_64BIT_START);
1094         }
1095 
1096         Ok(start_addr)
1097     }
1098 
1099     pub fn add_ram_region(
1100         &mut self,
1101         start_addr: GuestAddress,
1102         size: usize,
1103     ) -> Result<Arc<GuestRegionMmap>, Error> {
1104         // Allocate memory for the region
1105         let region = MemoryManager::create_ram_region(
1106             &None,
1107             0,
1108             start_addr,
1109             size,
1110             false,
1111             self.shared,
1112             self.hugepages,
1113             self.hugepage_size,
1114             None,
1115         )?;
1116 
1117         // Map it into the guest
1118         let slot = self.create_userspace_mapping(
1119             region.start_addr().0,
1120             region.len() as u64,
1121             region.as_ptr() as u64,
1122             self.mergeable,
1123             false,
1124             self.log_dirty,
1125         )?;
1126         self.guest_ram_mappings.push(GuestRamMapping {
1127             gpa: region.start_addr().raw_value(),
1128             size: region.len(),
1129             slot,
1130         });
1131 
1132         self.add_region(Arc::clone(&region))?;
1133 
1134         Ok(region)
1135     }
1136 
1137     fn hotplug_ram_region(&mut self, size: usize) -> Result<Arc<GuestRegionMmap>, Error> {
1138         info!("Hotplugging new RAM: {}", size);
1139 
1140         // Check that there is a free slot
1141         if self.next_hotplug_slot >= HOTPLUG_COUNT {
1142             return Err(Error::NoSlotAvailable);
1143         }
1144 
1145         // "Inserted" DIMM must have a size that is a multiple of 128MiB
1146         if size % (128 << 20) != 0 {
1147             return Err(Error::InvalidSize);
1148         }
1149 
1150         let start_addr = MemoryManager::start_addr(self.guest_memory.memory().last_addr(), true)?;
1151 
1152         if start_addr.checked_add(size.try_into().unwrap()).unwrap() > self.start_of_device_area() {
1153             return Err(Error::InsufficientHotplugRam);
1154         }
1155 
1156         let region = self.add_ram_region(start_addr, size)?;
1157 
1158         // Add region to the list of regions associated with the default
1159         // memory zone.
1160         if let Some(memory_zone) = self.memory_zones.get_mut(DEFAULT_MEMORY_ZONE) {
1161             memory_zone.regions.push(Arc::clone(&region));
1162         }
1163 
1164         // Tell the allocator
1165         self.allocator
1166             .lock()
1167             .unwrap()
1168             .allocate_mmio_addresses(Some(start_addr), size as GuestUsize, None)
1169             .ok_or(Error::MemoryRangeAllocation)?;
1170 
1171         // Update the slot so that it can be queried via the I/O port
1172         let mut slot = &mut self.hotplug_slots[self.next_hotplug_slot];
1173         slot.active = true;
1174         slot.inserting = true;
1175         slot.base = region.start_addr().0;
1176         slot.length = region.len() as u64;
1177 
1178         self.next_hotplug_slot += 1;
1179 
1180         Ok(region)
1181     }
1182 
1183     pub fn guest_memory(&self) -> GuestMemoryAtomic<GuestMemoryMmap> {
1184         self.guest_memory.clone()
1185     }
1186 
1187     pub fn boot_guest_memory(&self) -> GuestMemoryMmap {
1188         self.boot_guest_memory.clone()
1189     }
1190 
1191     pub fn allocator(&self) -> Arc<Mutex<SystemAllocator>> {
1192         self.allocator.clone()
1193     }
1194 
1195     pub fn start_of_device_area(&self) -> GuestAddress {
1196         self.start_of_device_area
1197     }
1198 
1199     pub fn end_of_device_area(&self) -> GuestAddress {
1200         self.end_of_device_area
1201     }
1202 
1203     pub fn allocate_memory_slot(&mut self) -> u32 {
1204         let slot_id = self.next_memory_slot;
1205         self.next_memory_slot += 1;
1206         slot_id
1207     }
1208 
1209     pub fn create_userspace_mapping(
1210         &mut self,
1211         guest_phys_addr: u64,
1212         memory_size: u64,
1213         userspace_addr: u64,
1214         mergeable: bool,
1215         readonly: bool,
1216         log_dirty: bool,
1217     ) -> Result<u32, Error> {
1218         let slot = self.allocate_memory_slot();
1219         let mem_region = self.vm.make_user_memory_region(
1220             slot,
1221             guest_phys_addr,
1222             memory_size,
1223             userspace_addr,
1224             readonly,
1225             log_dirty,
1226         );
1227 
1228         self.vm
1229             .create_user_memory_region(mem_region)
1230             .map_err(Error::CreateUserMemoryRegion)?;
1231 
1232         // Mark the pages as mergeable if explicitly asked for.
1233         if mergeable {
1234             // Safe because the address and size are valid since the
1235             // mmap succeeded.
1236             let ret = unsafe {
1237                 libc::madvise(
1238                     userspace_addr as *mut libc::c_void,
1239                     memory_size as libc::size_t,
1240                     libc::MADV_MERGEABLE,
1241                 )
1242             };
1243             if ret != 0 {
1244                 let err = io::Error::last_os_error();
1245                 // Safe to unwrap because the error is constructed with
1246                 // last_os_error(), which ensures the output will be Some().
1247                 let errno = err.raw_os_error().unwrap();
1248                 if errno == libc::EINVAL {
1249                     warn!("kernel not configured with CONFIG_KSM");
1250                 } else {
1251                     warn!("madvise error: {}", err);
1252                 }
1253                 warn!("failed to mark pages as mergeable");
1254             }
1255         }
1256 
1257         info!(
1258             "Created userspace mapping: {:x} -> {:x} {:x}",
1259             guest_phys_addr, userspace_addr, memory_size
1260         );
1261 
1262         Ok(slot)
1263     }
1264 
1265     pub fn remove_userspace_mapping(
1266         &mut self,
1267         guest_phys_addr: u64,
1268         memory_size: u64,
1269         userspace_addr: u64,
1270         mergeable: bool,
1271         slot: u32,
1272     ) -> Result<(), Error> {
1273         let mem_region = self.vm.make_user_memory_region(
1274             slot,
1275             guest_phys_addr,
1276             memory_size,
1277             userspace_addr,
1278             false, /* readonly -- don't care */
1279             false, /* log dirty */
1280         );
1281 
1282         self.vm
1283             .remove_user_memory_region(mem_region)
1284             .map_err(Error::RemoveUserMemoryRegion)?;
1285 
1286         // Mark the pages as unmergeable if there were previously marked as
1287         // mergeable.
1288         if mergeable {
1289             // Safe because the address and size are valid as the region was
1290             // previously advised.
1291             let ret = unsafe {
1292                 libc::madvise(
1293                     userspace_addr as *mut libc::c_void,
1294                     memory_size as libc::size_t,
1295                     libc::MADV_UNMERGEABLE,
1296                 )
1297             };
1298             if ret != 0 {
1299                 let err = io::Error::last_os_error();
1300                 // Safe to unwrap because the error is constructed with
1301                 // last_os_error(), which ensures the output will be Some().
1302                 let errno = err.raw_os_error().unwrap();
1303                 if errno == libc::EINVAL {
1304                     warn!("kernel not configured with CONFIG_KSM");
1305                 } else {
1306                     warn!("madvise error: {}", err);
1307                 }
1308                 warn!("failed to mark pages as unmergeable");
1309             }
1310         }
1311 
1312         info!(
1313             "Removed userspace mapping: {:x} -> {:x} {:x}",
1314             guest_phys_addr, userspace_addr, memory_size
1315         );
1316 
1317         Ok(())
1318     }
1319 
1320     pub fn virtio_mem_resize(&mut self, id: &str, size: u64) -> Result<(), Error> {
1321         if let Some(memory_zone) = self.memory_zones.get_mut(id) {
1322             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() {
1323                 virtio_mem_zone
1324                     .resize_handler()
1325                     .work(size)
1326                     .map_err(Error::VirtioMemResizeFail)?;
1327             } else {
1328                 error!("Failed resizing virtio-mem region: No virtio-mem handler");
1329                 return Err(Error::MissingVirtioMemHandler);
1330             }
1331 
1332             return Ok(());
1333         }
1334 
1335         error!("Failed resizing virtio-mem region: Unknown memory zone");
1336         Err(Error::UnknownMemoryZone)
1337     }
1338 
1339     /// In case this function resulted in adding a new memory region to the
1340     /// guest memory, the new region is returned to the caller. The virtio-mem
1341     /// use case never adds a new region as the whole hotpluggable memory has
1342     /// already been allocated at boot time.
1343     pub fn resize(&mut self, desired_ram: u64) -> Result<Option<Arc<GuestRegionMmap>>, Error> {
1344         if self.user_provided_zones {
1345             error!(
1346                 "Not allowed to resize guest memory when backed with user \
1347                 defined memory zones."
1348             );
1349             return Err(Error::InvalidResizeWithMemoryZones);
1350         }
1351 
1352         let mut region: Option<Arc<GuestRegionMmap>> = None;
1353         match self.hotplug_method {
1354             HotplugMethod::VirtioMem => {
1355                 if desired_ram >= self.boot_ram {
1356                     self.virtio_mem_resize(DEFAULT_MEMORY_ZONE, desired_ram - self.boot_ram)?;
1357                     self.current_ram = desired_ram;
1358                 }
1359             }
1360             HotplugMethod::Acpi => {
1361                 if desired_ram > self.current_ram {
1362                     region =
1363                         Some(self.hotplug_ram_region((desired_ram - self.current_ram) as usize)?);
1364                     self.current_ram = desired_ram;
1365                 }
1366             }
1367         }
1368         Ok(region)
1369     }
1370 
1371     pub fn resize_zone(&mut self, id: &str, virtio_mem_size: u64) -> Result<(), Error> {
1372         if !self.user_provided_zones {
1373             error!(
1374                 "Not allowed to resize guest memory zone when no zone is \
1375                 defined."
1376             );
1377             return Err(Error::ResizeZone);
1378         }
1379 
1380         self.virtio_mem_resize(id, virtio_mem_size)
1381     }
1382 
1383     #[cfg(target_arch = "x86_64")]
1384     pub fn setup_sgx(
1385         &mut self,
1386         sgx_epc_config: Vec<SgxEpcConfig>,
1387         vm: &Arc<dyn hypervisor::Vm>,
1388     ) -> Result<(), Error> {
1389         let file = OpenOptions::new()
1390             .read(true)
1391             .open("/dev/sgx_provision")
1392             .map_err(Error::SgxProvisionOpen)?;
1393         vm.enable_sgx_attribute(file)
1394             .map_err(Error::SgxEnableProvisioning)?;
1395 
1396         // Go over each EPC section and verify its size is a 4k multiple. At
1397         // the same time, calculate the total size needed for the contiguous
1398         // EPC region.
1399         let mut epc_region_size = 0;
1400         for epc_section in sgx_epc_config.iter() {
1401             if epc_section.size == 0 {
1402                 return Err(Error::EpcSectionSizeInvalid);
1403             }
1404             if epc_section.size & 0x0fff != 0 {
1405                 return Err(Error::EpcSectionSizeInvalid);
1406             }
1407 
1408             epc_region_size += epc_section.size;
1409         }
1410 
1411         // Now that we know about the total size for the EPC region, we can
1412         // proceed with the allocation of the entire range. The EPC region
1413         // must be 4kiB aligned.
1414         let epc_region_start = self
1415             .allocator
1416             .lock()
1417             .unwrap()
1418             .allocate_mmio_addresses(None, epc_region_size as GuestUsize, Some(0x1000))
1419             .ok_or(Error::SgxEpcRangeAllocation)?;
1420 
1421         let mut sgx_epc_region = SgxEpcRegion::new(epc_region_start, epc_region_size as GuestUsize);
1422 
1423         // Each section can be memory mapped into the allocated region.
1424         let mut epc_section_start = epc_region_start.raw_value();
1425         for epc_section in sgx_epc_config.iter() {
1426             let file = OpenOptions::new()
1427                 .read(true)
1428                 .write(true)
1429                 .open("/dev/sgx_vepc")
1430                 .map_err(Error::SgxVirtEpcOpen)?;
1431 
1432             let prot = PROT_READ | PROT_WRITE;
1433             let mut flags = MAP_NORESERVE | MAP_SHARED;
1434             if epc_section.prefault {
1435                 flags |= MAP_POPULATE;
1436             }
1437 
1438             // We can't use the vm-memory crate to perform the memory mapping
1439             // here as it would try to ensure the size of the backing file is
1440             // matching the size of the expected mapping. The /dev/sgx_vepc
1441             // device does not work that way, it provides a file descriptor
1442             // which is not matching the mapping size, as it's a just a way to
1443             // let KVM know that an EPC section is being created for the guest.
1444             let host_addr = unsafe {
1445                 libc::mmap(
1446                     std::ptr::null_mut(),
1447                     epc_section.size as usize,
1448                     prot,
1449                     flags,
1450                     file.as_raw_fd(),
1451                     0,
1452                 )
1453             } as u64;
1454 
1455             let _mem_slot = self.create_userspace_mapping(
1456                 epc_section_start,
1457                 epc_section.size,
1458                 host_addr,
1459                 false,
1460                 false,
1461                 false,
1462             )?;
1463 
1464             sgx_epc_region.insert(
1465                 epc_section.id.clone(),
1466                 SgxEpcSection::new(
1467                     GuestAddress(epc_section_start),
1468                     epc_section.size as GuestUsize,
1469                 ),
1470             );
1471 
1472             epc_section_start += epc_section.size;
1473         }
1474 
1475         self.sgx_epc_region = Some(sgx_epc_region);
1476 
1477         Ok(())
1478     }
1479 
1480     #[cfg(target_arch = "x86_64")]
1481     pub fn sgx_epc_region(&self) -> &Option<SgxEpcRegion> {
1482         &self.sgx_epc_region
1483     }
1484 
1485     pub fn is_hardlink(f: &File) -> bool {
1486         let mut stat = std::mem::MaybeUninit::<libc::stat>::uninit();
1487         let ret = unsafe { libc::fstat(f.as_raw_fd(), stat.as_mut_ptr()) };
1488         if ret != 0 {
1489             error!("Couldn't fstat the backing file");
1490             return false;
1491         }
1492 
1493         unsafe { (*stat.as_ptr()).st_nlink as usize > 0 }
1494     }
1495 
1496     pub fn memory_zones(&self) -> &MemoryZones {
1497         &self.memory_zones
1498     }
1499 }
1500 
1501 #[cfg(feature = "acpi")]
1502 struct MemoryNotify {
1503     slot_id: usize,
1504 }
1505 
1506 #[cfg(feature = "acpi")]
1507 impl Aml for MemoryNotify {
1508     fn to_aml_bytes(&self) -> Vec<u8> {
1509         let object = aml::Path::new(&format!("M{:03}", self.slot_id));
1510         aml::If::new(
1511             &aml::Equal::new(&aml::Arg(0), &self.slot_id),
1512             vec![&aml::Notify::new(&object, &aml::Arg(1))],
1513         )
1514         .to_aml_bytes()
1515     }
1516 }
1517 
1518 #[cfg(feature = "acpi")]
1519 struct MemorySlot {
1520     slot_id: usize,
1521 }
1522 
1523 #[cfg(feature = "acpi")]
1524 impl Aml for MemorySlot {
1525     fn to_aml_bytes(&self) -> Vec<u8> {
1526         aml::Device::new(
1527             format!("M{:03}", self.slot_id).as_str().into(),
1528             vec![
1529                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C80")),
1530                 &aml::Name::new("_UID".into(), &self.slot_id),
1531                 /*
1532                 _STA return value:
1533                 Bit [0] – Set if the device is present.
1534                 Bit [1] – Set if the device is enabled and decoding its resources.
1535                 Bit [2] – Set if the device should be shown in the UI.
1536                 Bit [3] – Set if the device is functioning properly (cleared if device failed its diagnostics).
1537                 Bit [4] – Set if the battery is present.
1538                 Bits [31:5] – Reserved (must be cleared).
1539                 */
1540                 &aml::Method::new(
1541                     "_STA".into(),
1542                     0,
1543                     false,
1544                     // Call into MSTA method which will interrogate device
1545                     vec![&aml::Return::new(&aml::MethodCall::new(
1546                         "MSTA".into(),
1547                         vec![&self.slot_id],
1548                     ))],
1549                 ),
1550                 // Get details of memory
1551                 &aml::Method::new(
1552                     "_CRS".into(),
1553                     0,
1554                     false,
1555                     // Call into MCRS which provides actual memory details
1556                     vec![&aml::Return::new(&aml::MethodCall::new(
1557                         "MCRS".into(),
1558                         vec![&self.slot_id],
1559                     ))],
1560                 ),
1561             ],
1562         )
1563         .to_aml_bytes()
1564     }
1565 }
1566 
1567 #[cfg(feature = "acpi")]
1568 struct MemorySlots {
1569     slots: usize,
1570 }
1571 
1572 #[cfg(feature = "acpi")]
1573 impl Aml for MemorySlots {
1574     fn to_aml_bytes(&self) -> Vec<u8> {
1575         let mut bytes = Vec::new();
1576 
1577         for slot_id in 0..self.slots {
1578             bytes.extend_from_slice(&MemorySlot { slot_id }.to_aml_bytes());
1579         }
1580 
1581         bytes
1582     }
1583 }
1584 
1585 #[cfg(feature = "acpi")]
1586 struct MemoryMethods {
1587     slots: usize,
1588 }
1589 
1590 #[cfg(feature = "acpi")]
1591 impl Aml for MemoryMethods {
1592     fn to_aml_bytes(&self) -> Vec<u8> {
1593         let mut bytes = Vec::new();
1594         // Add "MTFY" notification method
1595         let mut memory_notifies = Vec::new();
1596         for slot_id in 0..self.slots {
1597             memory_notifies.push(MemoryNotify { slot_id });
1598         }
1599 
1600         let mut memory_notifies_refs: Vec<&dyn aml::Aml> = Vec::new();
1601         for memory_notifier in memory_notifies.iter() {
1602             memory_notifies_refs.push(memory_notifier);
1603         }
1604 
1605         bytes.extend_from_slice(
1606             &aml::Method::new("MTFY".into(), 2, true, memory_notifies_refs).to_aml_bytes(),
1607         );
1608 
1609         // MSCN method
1610         bytes.extend_from_slice(
1611             &aml::Method::new(
1612                 "MSCN".into(),
1613                 0,
1614                 true,
1615                 vec![
1616                     // Take lock defined above
1617                     &aml::Acquire::new("MLCK".into(), 0xffff),
1618                     &aml::Store::new(&aml::Local(0), &aml::ZERO),
1619                     &aml::While::new(
1620                         &aml::LessThan::new(&aml::Local(0), &self.slots),
1621                         vec![
1622                             // Write slot number (in first argument) to I/O port via field
1623                             &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Local(0)),
1624                             // Check if MINS bit is set (inserting)
1625                             &aml::If::new(
1626                                 &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MINS"), &aml::ONE),
1627                                 // Notify device if it is
1628                                 vec![
1629                                     &aml::MethodCall::new(
1630                                         "MTFY".into(),
1631                                         vec![&aml::Local(0), &aml::ONE],
1632                                     ),
1633                                     // Reset MINS bit
1634                                     &aml::Store::new(
1635                                         &aml::Path::new("\\_SB_.MHPC.MINS"),
1636                                         &aml::ONE,
1637                                     ),
1638                                 ],
1639                             ),
1640                             // Check if MRMV bit is set
1641                             &aml::If::new(
1642                                 &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MRMV"), &aml::ONE),
1643                                 // Notify device if it is (with the eject constant 0x3)
1644                                 vec![
1645                                     &aml::MethodCall::new(
1646                                         "MTFY".into(),
1647                                         vec![&aml::Local(0), &3u8],
1648                                     ),
1649                                     // Reset MRMV bit
1650                                     &aml::Store::new(
1651                                         &aml::Path::new("\\_SB_.MHPC.MRMV"),
1652                                         &aml::ONE,
1653                                     ),
1654                                 ],
1655                             ),
1656                             &aml::Add::new(&aml::Local(0), &aml::Local(0), &aml::ONE),
1657                         ],
1658                     ),
1659                     // Release lock
1660                     &aml::Release::new("MLCK".into()),
1661                 ],
1662             )
1663             .to_aml_bytes(),
1664         );
1665 
1666         bytes.extend_from_slice(
1667             // Memory status method
1668             &aml::Method::new(
1669                 "MSTA".into(),
1670                 1,
1671                 true,
1672                 vec![
1673                     // Take lock defined above
1674                     &aml::Acquire::new("MLCK".into(), 0xffff),
1675                     // Write slot number (in first argument) to I/O port via field
1676                     &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Arg(0)),
1677                     &aml::Store::new(&aml::Local(0), &aml::ZERO),
1678                     // Check if MEN_ bit is set, if so make the local variable 0xf (see _STA for details of meaning)
1679                     &aml::If::new(
1680                         &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MEN_"), &aml::ONE),
1681                         vec![&aml::Store::new(&aml::Local(0), &0xfu8)],
1682                     ),
1683                     // Release lock
1684                     &aml::Release::new("MLCK".into()),
1685                     // Return 0 or 0xf
1686                     &aml::Return::new(&aml::Local(0)),
1687                 ],
1688             )
1689             .to_aml_bytes(),
1690         );
1691 
1692         bytes.extend_from_slice(
1693             // Memory range method
1694             &aml::Method::new(
1695                 "MCRS".into(),
1696                 1,
1697                 true,
1698                 vec![
1699                     // Take lock defined above
1700                     &aml::Acquire::new("MLCK".into(), 0xffff),
1701                     // Write slot number (in first argument) to I/O port via field
1702                     &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Arg(0)),
1703                     &aml::Name::new(
1704                         "MR64".into(),
1705                         &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
1706                             aml::AddressSpaceCachable::Cacheable,
1707                             true,
1708                             0x0000_0000_0000_0000u64,
1709                             0xFFFF_FFFF_FFFF_FFFEu64,
1710                         )]),
1711                     ),
1712                     &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &14usize, "MINL".into()),
1713                     &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &18usize, "MINH".into()),
1714                     &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &22usize, "MAXL".into()),
1715                     &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &26usize, "MAXH".into()),
1716                     &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &38usize, "LENL".into()),
1717                     &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &42usize, "LENH".into()),
1718                     &aml::Store::new(&aml::Path::new("MINL"), &aml::Path::new("\\_SB_.MHPC.MHBL")),
1719                     &aml::Store::new(&aml::Path::new("MINH"), &aml::Path::new("\\_SB_.MHPC.MHBH")),
1720                     &aml::Store::new(&aml::Path::new("LENL"), &aml::Path::new("\\_SB_.MHPC.MHLL")),
1721                     &aml::Store::new(&aml::Path::new("LENH"), &aml::Path::new("\\_SB_.MHPC.MHLH")),
1722                     &aml::Add::new(
1723                         &aml::Path::new("MAXL"),
1724                         &aml::Path::new("MINL"),
1725                         &aml::Path::new("LENL"),
1726                     ),
1727                     &aml::Add::new(
1728                         &aml::Path::new("MAXH"),
1729                         &aml::Path::new("MINH"),
1730                         &aml::Path::new("LENH"),
1731                     ),
1732                     &aml::If::new(
1733                         &aml::LessThan::new(&aml::Path::new("MAXL"), &aml::Path::new("MINL")),
1734                         vec![&aml::Add::new(
1735                             &aml::Path::new("MAXH"),
1736                             &aml::ONE,
1737                             &aml::Path::new("MAXH"),
1738                         )],
1739                     ),
1740                     &aml::Subtract::new(
1741                         &aml::Path::new("MAXL"),
1742                         &aml::Path::new("MAXL"),
1743                         &aml::ONE,
1744                     ),
1745                     // Release lock
1746                     &aml::Release::new("MLCK".into()),
1747                     &aml::Return::new(&aml::Path::new("MR64")),
1748                 ],
1749             )
1750             .to_aml_bytes(),
1751         );
1752         bytes
1753     }
1754 }
1755 
1756 #[cfg(feature = "acpi")]
1757 impl Aml for MemoryManager {
1758     fn to_aml_bytes(&self) -> Vec<u8> {
1759         let mut bytes = Vec::new();
1760 
1761         // Memory Hotplug Controller
1762         bytes.extend_from_slice(
1763             &aml::Device::new(
1764                 "_SB_.MHPC".into(),
1765                 vec![
1766                     &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
1767                     &aml::Name::new("_UID".into(), &"Memory Hotplug Controller"),
1768                     // Mutex to protect concurrent access as we write to choose slot and then read back status
1769                     &aml::Mutex::new("MLCK".into(), 0),
1770                     &aml::Name::new(
1771                         "_CRS".into(),
1772                         &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
1773                             aml::AddressSpaceCachable::NotCacheable,
1774                             true,
1775                             self.acpi_address.0 as u64,
1776                             self.acpi_address.0 + MEMORY_MANAGER_ACPI_SIZE as u64 - 1,
1777                         )]),
1778                     ),
1779                     // OpRegion and Fields map MMIO range into individual field values
1780                     &aml::OpRegion::new(
1781                         "MHPR".into(),
1782                         aml::OpRegionSpace::SystemMemory,
1783                         self.acpi_address.0 as usize,
1784                         MEMORY_MANAGER_ACPI_SIZE,
1785                     ),
1786                     &aml::Field::new(
1787                         "MHPR".into(),
1788                         aml::FieldAccessType::DWord,
1789                         aml::FieldUpdateRule::Preserve,
1790                         vec![
1791                             aml::FieldEntry::Named(*b"MHBL", 32), // Base (low 4 bytes)
1792                             aml::FieldEntry::Named(*b"MHBH", 32), // Base (high 4 bytes)
1793                             aml::FieldEntry::Named(*b"MHLL", 32), // Length (low 4 bytes)
1794                             aml::FieldEntry::Named(*b"MHLH", 32), // Length (high 4 bytes)
1795                         ],
1796                     ),
1797                     &aml::Field::new(
1798                         "MHPR".into(),
1799                         aml::FieldAccessType::DWord,
1800                         aml::FieldUpdateRule::Preserve,
1801                         vec![
1802                             aml::FieldEntry::Reserved(128),
1803                             aml::FieldEntry::Named(*b"MHPX", 32), // PXM
1804                         ],
1805                     ),
1806                     &aml::Field::new(
1807                         "MHPR".into(),
1808                         aml::FieldAccessType::Byte,
1809                         aml::FieldUpdateRule::WriteAsZeroes,
1810                         vec![
1811                             aml::FieldEntry::Reserved(160),
1812                             aml::FieldEntry::Named(*b"MEN_", 1), // Enabled
1813                             aml::FieldEntry::Named(*b"MINS", 1), // Inserting
1814                             aml::FieldEntry::Named(*b"MRMV", 1), // Removing
1815                             aml::FieldEntry::Named(*b"MEJ0", 1), // Ejecting
1816                         ],
1817                     ),
1818                     &aml::Field::new(
1819                         "MHPR".into(),
1820                         aml::FieldAccessType::DWord,
1821                         aml::FieldUpdateRule::Preserve,
1822                         vec![
1823                             aml::FieldEntry::Named(*b"MSEL", 32), // Selector
1824                             aml::FieldEntry::Named(*b"MOEV", 32), // Event
1825                             aml::FieldEntry::Named(*b"MOSC", 32), // OSC
1826                         ],
1827                     ),
1828                     &MemoryMethods {
1829                         slots: self.hotplug_slots.len(),
1830                     },
1831                     &MemorySlots {
1832                         slots: self.hotplug_slots.len(),
1833                     },
1834                 ],
1835             )
1836             .to_aml_bytes(),
1837         );
1838 
1839         #[cfg(target_arch = "x86_64")]
1840         {
1841             if let Some(sgx_epc_region) = &self.sgx_epc_region {
1842                 let min = sgx_epc_region.start().raw_value() as u64;
1843                 let max = min + sgx_epc_region.size() as u64 - 1;
1844                 // SGX EPC region
1845                 bytes.extend_from_slice(
1846                     &aml::Device::new(
1847                         "_SB_.EPC_".into(),
1848                         vec![
1849                             &aml::Name::new("_HID".into(), &aml::EisaName::new("INT0E0C")),
1850                             // QWORD describing the EPC region start and size
1851                             &aml::Name::new(
1852                                 "_CRS".into(),
1853                                 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
1854                                     aml::AddressSpaceCachable::NotCacheable,
1855                                     true,
1856                                     min,
1857                                     max,
1858                                 )]),
1859                             ),
1860                             &aml::Method::new(
1861                                 "_STA".into(),
1862                                 0,
1863                                 false,
1864                                 vec![&aml::Return::new(&0xfu8)],
1865                             ),
1866                         ],
1867                     )
1868                     .to_aml_bytes(),
1869                 );
1870             }
1871         }
1872 
1873         bytes
1874     }
1875 }
1876 
1877 impl Pausable for MemoryManager {}
1878 
1879 #[derive(Clone, Versionize)]
1880 pub struct MemoryRegion {
1881     content: Option<String>,
1882     start_addr: u64,
1883     size: u64,
1884 }
1885 
1886 #[derive(Versionize)]
1887 pub struct MemoryManagerSnapshotData {
1888     memory_regions: Vec<MemoryRegion>,
1889 }
1890 
1891 impl VersionMapped for MemoryManagerSnapshotData {}
1892 
1893 impl Snapshottable for MemoryManager {
1894     fn id(&self) -> String {
1895         MEMORY_MANAGER_SNAPSHOT_ID.to_string()
1896     }
1897 
1898     fn snapshot(&mut self) -> result::Result<Snapshot, MigratableError> {
1899         let mut memory_manager_snapshot = Snapshot::new(MEMORY_MANAGER_SNAPSHOT_ID);
1900         let guest_memory = self.guest_memory.memory();
1901 
1902         let mut memory_regions: Vec<MemoryRegion> = Vec::new();
1903 
1904         for (index, region) in guest_memory.iter().enumerate() {
1905             if region.len() == 0 {
1906                 return Err(MigratableError::Snapshot(anyhow!("Zero length region")));
1907             }
1908 
1909             let mut content = Some(PathBuf::from(format!("memory-region-{}", index)));
1910             if let Some(file_offset) = region.file_offset() {
1911                 if (region.flags() & libc::MAP_SHARED == libc::MAP_SHARED)
1912                     && Self::is_hardlink(file_offset.file())
1913                 {
1914                     // In this very specific case, we know the memory region
1915                     // is backed by a file on the host filesystem that can be
1916                     // accessed by the user, and additionally the mapping is
1917                     // shared, which means that modifications to the content
1918                     // are written to the actual file.
1919                     // When meeting these conditions, we can skip the copy of
1920                     // the memory content for this specific region, as we can
1921                     // assume the user will have it saved through the backing
1922                     // file already.
1923                     content = None;
1924                 }
1925             }
1926 
1927             memory_regions.push(MemoryRegion {
1928                 content: content.map(|p| p.to_str().unwrap().to_owned()),
1929                 start_addr: region.start_addr().0,
1930                 size: region.len(),
1931             });
1932         }
1933 
1934         // Store locally this list of regions as it will be used through the
1935         // Transportable::send() implementation. The point is to avoid the
1936         // duplication of code regarding the creation of the path for each
1937         // region. The 'snapshot' step creates the list of memory regions,
1938         // including information about the need to copy a memory region or
1939         // not. This saves the 'send' step having to go through the same
1940         // process, and instead it can directly proceed with storing the
1941         // memory region content for the regions requiring it.
1942         self.snapshot_memory_regions = memory_regions.clone();
1943 
1944         memory_manager_snapshot.add_data_section(SnapshotDataSection::new_from_versioned_state(
1945             MEMORY_MANAGER_SNAPSHOT_ID,
1946             &MemoryManagerSnapshotData { memory_regions },
1947         )?);
1948 
1949         let mut memory_snapshot = self.snapshot.lock().unwrap();
1950         *memory_snapshot = Some(guest_memory);
1951 
1952         Ok(memory_manager_snapshot)
1953     }
1954 }
1955 
1956 impl Transportable for MemoryManager {
1957     fn send(
1958         &self,
1959         _snapshot: &Snapshot,
1960         destination_url: &str,
1961     ) -> result::Result<(), MigratableError> {
1962         let vm_memory_snapshot_path = url_to_path(destination_url)?;
1963 
1964         if let Some(guest_memory) = &*self.snapshot.lock().unwrap() {
1965             for region in self.snapshot_memory_regions.iter() {
1966                 if let Some(content) = &region.content {
1967                     let mut memory_region_path = vm_memory_snapshot_path.clone();
1968                     memory_region_path.push(content);
1969 
1970                     // Create the snapshot file for the region
1971                     let mut memory_region_file = OpenOptions::new()
1972                         .read(true)
1973                         .write(true)
1974                         .create_new(true)
1975                         .open(memory_region_path)
1976                         .map_err(|e| MigratableError::MigrateSend(e.into()))?;
1977 
1978                     guest_memory
1979                         .write_all_to(
1980                             GuestAddress(region.start_addr),
1981                             &mut memory_region_file,
1982                             region.size as usize,
1983                         )
1984                         .map_err(|e| MigratableError::MigrateSend(e.into()))?;
1985                 }
1986             }
1987         }
1988         Ok(())
1989     }
1990 }
1991 
1992 impl Migratable for MemoryManager {
1993     // Start the dirty log in the hypervisor (kvm/mshv).
1994     // Also, reset the dirty bitmap logged by the vmm.
1995     // Just before we do a bulk copy we want to start/clear the dirty log so that
1996     // pages touched during our bulk copy are tracked.
1997     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
1998         self.vm.start_dirty_log().map_err(|e| {
1999             MigratableError::MigrateSend(anyhow!("Error starting VM dirty log {}", e))
2000         })?;
2001 
2002         for r in self.guest_memory.memory().iter() {
2003             r.bitmap().reset();
2004         }
2005 
2006         Ok(())
2007     }
2008 
2009     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
2010         self.vm.stop_dirty_log().map_err(|e| {
2011             MigratableError::MigrateSend(anyhow!("Error stopping VM dirty log {}", e))
2012         })?;
2013 
2014         Ok(())
2015     }
2016 
2017     // Generate a table for the pages that are dirty. The dirty pages are collapsed
2018     // together in the table if they are contiguous.
2019     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
2020         let mut table = MemoryRangeTable::default();
2021         for r in &self.guest_ram_mappings {
2022             let vm_dirty_bitmap = self.vm.get_dirty_log(r.slot, r.gpa, r.size).map_err(|e| {
2023                 MigratableError::MigrateSend(anyhow!("Error getting VM dirty log {}", e))
2024             })?;
2025             let vmm_dirty_bitmap = match self.guest_memory.memory().find_region(GuestAddress(r.gpa))
2026             {
2027                 Some(region) => {
2028                     assert!(region.start_addr().raw_value() == r.gpa);
2029                     assert!(region.len() == r.size);
2030                     region.bitmap().get_and_reset()
2031                 }
2032                 None => {
2033                     return Err(MigratableError::MigrateSend(anyhow!(
2034                         "Error finding 'guest memory region' with address {:x}",
2035                         r.gpa
2036                     )))
2037                 }
2038             };
2039 
2040             let dirty_bitmap: Vec<u64> = vm_dirty_bitmap
2041                 .iter()
2042                 .zip(vmm_dirty_bitmap.iter())
2043                 .map(|(x, y)| x | y)
2044                 .collect();
2045 
2046             let sub_table = MemoryRangeTable::from_bitmap(dirty_bitmap, r.gpa);
2047 
2048             if sub_table.regions().is_empty() {
2049                 info!("Dirty Memory Range Table is empty");
2050             } else {
2051                 info!("Dirty Memory Range Table:");
2052                 for range in sub_table.regions() {
2053                     info!("GPA: {:x} size: {} (KiB)", range.gpa, range.length / 1024);
2054                 }
2055             }
2056 
2057             table.extend(sub_table);
2058         }
2059         Ok(table)
2060     }
2061 }
2062