xref: /cloud-hypervisor/vmm/src/memory_manager.rs (revision f7f2f25a574b1b2dba22c094fc8226d404157d15)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 #[cfg(target_arch = "x86_64")]
6 use crate::config::SgxEpcConfig;
7 use crate::config::{HotplugMethod, MemoryConfig, MemoryZoneConfig};
8 use crate::migration::url_to_path;
9 use crate::MEMORY_MANAGER_SNAPSHOT_ID;
10 use crate::{GuestMemoryMmap, GuestRegionMmap};
11 #[cfg(feature = "acpi")]
12 use acpi_tables::{aml, aml::Aml};
13 use anyhow::anyhow;
14 #[cfg(target_arch = "x86_64")]
15 use arch::x86_64::{SgxEpcRegion, SgxEpcSection};
16 use arch::{layout, RegionType};
17 #[cfg(target_arch = "x86_64")]
18 use devices::ioapic;
19 #[cfg(target_arch = "x86_64")]
20 use libc::{MAP_NORESERVE, MAP_POPULATE, MAP_SHARED, PROT_READ, PROT_WRITE};
21 use std::collections::HashMap;
22 use std::convert::TryInto;
23 use std::ffi;
24 use std::fs::{File, OpenOptions};
25 use std::io;
26 use std::ops::Deref;
27 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
28 use std::path::PathBuf;
29 use std::result;
30 use std::sync::{Arc, Barrier, Mutex};
31 use versionize::{VersionMap, Versionize, VersionizeResult};
32 use versionize_derive::Versionize;
33 #[cfg(target_arch = "x86_64")]
34 use vm_allocator::GsiApic;
35 use vm_allocator::SystemAllocator;
36 use vm_device::BusDevice;
37 use vm_memory::guest_memory::FileOffset;
38 use vm_memory::{
39     mmap::MmapRegionError, Address, Bytes, Error as MmapError, GuestAddress, GuestAddressSpace,
40     GuestMemory, GuestMemoryAtomic, GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion,
41     GuestUsize, MmapRegion,
42 };
43 use vm_migration::{
44     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot,
45     SnapshotDataSection, Snapshottable, Transportable, VersionMapped,
46 };
47 
48 #[cfg(feature = "acpi")]
49 pub const MEMORY_MANAGER_ACPI_SIZE: usize = 0x18;
50 
51 const DEFAULT_MEMORY_ZONE: &str = "mem0";
52 
53 #[cfg(target_arch = "x86_64")]
54 const X86_64_IRQ_BASE: u32 = 5;
55 
56 const HOTPLUG_COUNT: usize = 8;
57 
58 // Memory policy constants
59 const MPOL_BIND: u32 = 2;
60 const MPOL_MF_STRICT: u32 = 1;
61 const MPOL_MF_MOVE: u32 = 1 << 1;
62 
63 #[derive(Default)]
64 struct HotPlugState {
65     base: u64,
66     length: u64,
67     active: bool,
68     inserting: bool,
69     removing: bool,
70 }
71 
72 pub struct VirtioMemZone {
73     region: Arc<GuestRegionMmap>,
74     resize_handler: virtio_devices::Resize,
75     hotplugged_size: u64,
76     hugepages: bool,
77 }
78 
79 impl VirtioMemZone {
80     pub fn region(&self) -> &Arc<GuestRegionMmap> {
81         &self.region
82     }
83     pub fn resize_handler(&self) -> &virtio_devices::Resize {
84         &self.resize_handler
85     }
86     pub fn hotplugged_size(&self) -> u64 {
87         self.hotplugged_size
88     }
89     pub fn hugepages(&self) -> bool {
90         self.hugepages
91     }
92 }
93 
94 #[derive(Default)]
95 pub struct MemoryZone {
96     regions: Vec<Arc<GuestRegionMmap>>,
97     virtio_mem_zone: Option<VirtioMemZone>,
98 }
99 
100 impl MemoryZone {
101     pub fn regions(&self) -> &Vec<Arc<GuestRegionMmap>> {
102         &self.regions
103     }
104     pub fn virtio_mem_zone(&self) -> &Option<VirtioMemZone> {
105         &self.virtio_mem_zone
106     }
107 }
108 
109 pub type MemoryZones = HashMap<String, MemoryZone>;
110 
111 struct GuestRamMapping {
112     slot: u32,
113     gpa: u64,
114     size: u64,
115 }
116 
117 pub struct MemoryManager {
118     boot_guest_memory: GuestMemoryMmap,
119     guest_memory: GuestMemoryAtomic<GuestMemoryMmap>,
120     next_memory_slot: u32,
121     start_of_device_area: GuestAddress,
122     end_of_device_area: GuestAddress,
123     pub vm: Arc<dyn hypervisor::Vm>,
124     hotplug_slots: Vec<HotPlugState>,
125     selected_slot: usize,
126     mergeable: bool,
127     allocator: Arc<Mutex<SystemAllocator>>,
128     hotplug_method: HotplugMethod,
129     boot_ram: u64,
130     current_ram: u64,
131     next_hotplug_slot: usize,
132     snapshot: Mutex<Option<GuestMemoryLoadGuard<GuestMemoryMmap>>>,
133     shared: bool,
134     hugepages: bool,
135     hugepage_size: Option<u64>,
136     #[cfg(target_arch = "x86_64")]
137     sgx_epc_region: Option<SgxEpcRegion>,
138     user_provided_zones: bool,
139     snapshot_memory_regions: Vec<MemoryRegion>,
140     memory_zones: MemoryZones,
141     log_dirty: bool, // Enable dirty logging for created RAM regions
142 
143     // Keep track of calls to create_userspace_mapping() for guest RAM.
144     // This is useful for getting the dirty pages as we need to know the
145     // slots that the mapping is created in.
146     guest_ram_mappings: Vec<GuestRamMapping>,
147 
148     #[cfg(feature = "acpi")]
149     pub acpi_address: GuestAddress,
150 }
151 
152 #[derive(Debug)]
153 pub enum Error {
154     /// Failed to create shared file.
155     SharedFileCreate(io::Error),
156 
157     /// Failed to set shared file length.
158     SharedFileSetLen(io::Error),
159 
160     /// Mmap backed guest memory error
161     GuestMemory(MmapError),
162 
163     /// Failed to allocate a memory range.
164     MemoryRangeAllocation,
165 
166     /// Error from region creation
167     GuestMemoryRegion(MmapRegionError),
168 
169     /// No ACPI slot available
170     NoSlotAvailable,
171 
172     /// Not enough space in the hotplug RAM region
173     InsufficientHotplugRam,
174 
175     /// The requested hotplug memory addition is not a valid size
176     InvalidSize,
177 
178     /// Failed to create the user memory region.
179     CreateUserMemoryRegion(hypervisor::HypervisorVmError),
180 
181     /// Failed to remove the user memory region.
182     RemoveUserMemoryRegion(hypervisor::HypervisorVmError),
183 
184     /// Failed to EventFd.
185     EventFdFail(io::Error),
186 
187     /// Eventfd write error
188     EventfdError(io::Error),
189 
190     /// Failed to virtio-mem resize
191     VirtioMemResizeFail(virtio_devices::mem::Error),
192 
193     /// Cannot restore VM
194     Restore(MigratableError),
195 
196     /// Cannot create the system allocator
197     CreateSystemAllocator,
198 
199     /// Invalid SGX EPC section size
200     #[cfg(target_arch = "x86_64")]
201     EpcSectionSizeInvalid,
202 
203     /// Failed allocating SGX EPC region
204     #[cfg(target_arch = "x86_64")]
205     SgxEpcRangeAllocation,
206 
207     /// Failed opening SGX virtual EPC device
208     #[cfg(target_arch = "x86_64")]
209     SgxVirtEpcOpen(io::Error),
210 
211     /// Failed setting the SGX virtual EPC section size
212     #[cfg(target_arch = "x86_64")]
213     SgxVirtEpcFileSetLen(io::Error),
214 
215     /// Failed opening SGX provisioning device
216     #[cfg(target_arch = "x86_64")]
217     SgxProvisionOpen(io::Error),
218 
219     /// Failed enabling SGX provisioning
220     #[cfg(target_arch = "x86_64")]
221     SgxEnableProvisioning(hypervisor::HypervisorVmError),
222 
223     /// Failed creating a new MmapRegion instance.
224     #[cfg(target_arch = "x86_64")]
225     NewMmapRegion(vm_memory::mmap::MmapRegionError),
226 
227     /// No memory zones found.
228     MissingMemoryZones,
229 
230     /// Memory configuration is not valid.
231     InvalidMemoryParameters,
232 
233     /// Forbidden operation. Impossible to resize guest memory if it is
234     /// backed by user defined memory regions.
235     InvalidResizeWithMemoryZones,
236 
237     /// It's invalid to try applying a NUMA policy to a memory zone that is
238     /// memory mapped with MAP_SHARED.
239     InvalidSharedMemoryZoneWithHostNuma,
240 
241     /// Failed applying NUMA memory policy.
242     ApplyNumaPolicy(io::Error),
243 
244     /// Memory zone identifier is not unique.
245     DuplicateZoneId,
246 
247     /// No virtio-mem resizing handler found.
248     MissingVirtioMemHandler,
249 
250     /// Unknown memory zone.
251     UnknownMemoryZone,
252 
253     /// Invalid size for resizing. Can be anything except 0.
254     InvalidHotplugSize,
255 
256     /// Invalid hotplug method associated with memory zones resizing capability.
257     InvalidHotplugMethodWithMemoryZones,
258 
259     /// Could not find specified memory zone identifier from hash map.
260     MissingZoneIdentifier,
261 
262     /// Resizing the memory zone failed.
263     ResizeZone,
264 
265     /// Guest address overflow
266     GuestAddressOverFlow,
267 
268     /// Error opening snapshot file
269     SnapshotOpen(io::Error),
270 
271     // Error copying snapshot into region
272     SnapshotCopy(GuestMemoryError),
273 
274     /// Failed to allocate MMIO address
275     AllocateMmioAddress,
276 }
277 
278 const ENABLE_FLAG: usize = 0;
279 const INSERTING_FLAG: usize = 1;
280 const REMOVING_FLAG: usize = 2;
281 const EJECT_FLAG: usize = 3;
282 
283 const BASE_OFFSET_LOW: u64 = 0;
284 const BASE_OFFSET_HIGH: u64 = 0x4;
285 const LENGTH_OFFSET_LOW: u64 = 0x8;
286 const LENGTH_OFFSET_HIGH: u64 = 0xC;
287 const STATUS_OFFSET: u64 = 0x14;
288 const SELECTION_OFFSET: u64 = 0;
289 
290 // The MMIO address space size is subtracted with 64k. This is done for the
291 // following reasons:
292 //  - Reduce the addressable space size by at least 4k to workaround a Linux
293 //    bug when the VMM allocates devices at the end of the addressable space
294 //  - Windows requires the addressable space size to be 64k aligned
295 fn mmio_address_space_size(phys_bits: u8) -> u64 {
296     (1 << phys_bits) - (1 << 16)
297 }
298 
299 impl BusDevice for MemoryManager {
300     fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) {
301         if self.selected_slot < self.hotplug_slots.len() {
302             let state = &self.hotplug_slots[self.selected_slot];
303             match offset {
304                 BASE_OFFSET_LOW => {
305                     data.copy_from_slice(&state.base.to_le_bytes()[..4]);
306                 }
307                 BASE_OFFSET_HIGH => {
308                     data.copy_from_slice(&state.base.to_le_bytes()[4..]);
309                 }
310                 LENGTH_OFFSET_LOW => {
311                     data.copy_from_slice(&state.length.to_le_bytes()[..4]);
312                 }
313                 LENGTH_OFFSET_HIGH => {
314                     data.copy_from_slice(&state.length.to_le_bytes()[4..]);
315                 }
316                 STATUS_OFFSET => {
317                     // The Linux kernel, quite reasonably, doesn't zero the memory it gives us.
318                     data.copy_from_slice(&[0; 8][0..data.len()]);
319                     if state.active {
320                         data[0] |= 1 << ENABLE_FLAG;
321                     }
322                     if state.inserting {
323                         data[0] |= 1 << INSERTING_FLAG;
324                     }
325                     if state.removing {
326                         data[0] |= 1 << REMOVING_FLAG;
327                     }
328                 }
329                 _ => {
330                     warn!(
331                         "Unexpected offset for accessing memory manager device: {:#}",
332                         offset
333                     );
334                 }
335             }
336         }
337     }
338 
339     fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
340         match offset {
341             SELECTION_OFFSET => {
342                 self.selected_slot = usize::from(data[0]);
343             }
344             STATUS_OFFSET => {
345                 let state = &mut self.hotplug_slots[self.selected_slot];
346                 // The ACPI code writes back a 1 to acknowledge the insertion
347                 if (data[0] & (1 << INSERTING_FLAG) == 1 << INSERTING_FLAG) && state.inserting {
348                     state.inserting = false;
349                 }
350                 // Ditto for removal
351                 if (data[0] & (1 << REMOVING_FLAG) == 1 << REMOVING_FLAG) && state.removing {
352                     state.removing = false;
353                 }
354                 // Trigger removal of "DIMM"
355                 if data[0] & (1 << EJECT_FLAG) == 1 << EJECT_FLAG {
356                     warn!("Ejection of memory not currently supported");
357                 }
358             }
359             _ => {
360                 warn!(
361                     "Unexpected offset for accessing memory manager device: {:#}",
362                     offset
363                 );
364             }
365         };
366         None
367     }
368 }
369 
370 impl MemoryManager {
371     /// Creates all memory regions based on the available RAM ranges defined
372     /// by `ram_regions`, and based on the description of the memory zones.
373     /// In practice, this function can perform multiple memory mappings of the
374     /// same backing file if there's a hole in the address space between two
375     /// RAM ranges.
376     /// One example might be ram_regions containing 2 regions (0-3G and 4G-6G)
377     /// and zones containing two zones (size 1G and size 4G).
378     /// This function will create 3 resulting memory regions:
379     /// - First one mapping entirely the first memory zone on 0-1G range
380     /// - Second one mapping partially the second memory zone on 1G-3G range
381     /// - Third one mapping partially the second memory zone on 4G-6G range
382     fn create_memory_regions_from_zones(
383         ram_regions: &[(GuestAddress, usize)],
384         zones: &[MemoryZoneConfig],
385         prefault: bool,
386     ) -> Result<(Vec<Arc<GuestRegionMmap>>, MemoryZones), Error> {
387         let mut zones = zones.to_owned();
388         let mut mem_regions = Vec::new();
389         let mut zone = zones.remove(0);
390         let mut zone_offset = 0;
391         let mut memory_zones = HashMap::new();
392 
393         // Add zone id to the list of memory zones.
394         memory_zones.insert(zone.id.clone(), MemoryZone::default());
395 
396         for ram_region in ram_regions.iter() {
397             let mut ram_region_offset = 0;
398             let mut exit = false;
399 
400             loop {
401                 let mut ram_region_consumed = false;
402                 let mut pull_next_zone = false;
403 
404                 let ram_region_sub_size = ram_region.1 - ram_region_offset;
405                 let zone_sub_size = zone.size as usize - zone_offset;
406 
407                 let file_offset = zone_offset as u64;
408                 let region_start = ram_region
409                     .0
410                     .checked_add(ram_region_offset as u64)
411                     .ok_or(Error::GuestAddressOverFlow)?;
412                 let region_size = if zone_sub_size <= ram_region_sub_size {
413                     if zone_sub_size == ram_region_sub_size {
414                         ram_region_consumed = true;
415                     }
416 
417                     ram_region_offset += zone_sub_size;
418                     pull_next_zone = true;
419 
420                     zone_sub_size
421                 } else {
422                     zone_offset += ram_region_sub_size;
423                     ram_region_consumed = true;
424 
425                     ram_region_sub_size
426                 };
427 
428                 let region = MemoryManager::create_ram_region(
429                     &zone.file,
430                     file_offset,
431                     region_start,
432                     region_size,
433                     prefault,
434                     zone.shared,
435                     zone.hugepages,
436                     zone.hugepage_size,
437                     zone.host_numa_node,
438                 )?;
439 
440                 // Add region to the list of regions associated with the
441                 // current memory zone.
442                 if let Some(memory_zone) = memory_zones.get_mut(&zone.id) {
443                     memory_zone.regions.push(region.clone());
444                 }
445 
446                 mem_regions.push(region);
447 
448                 if pull_next_zone {
449                     // Get the next zone and reset the offset.
450                     zone_offset = 0;
451                     if zones.is_empty() {
452                         exit = true;
453                         break;
454                     }
455                     zone = zones.remove(0);
456 
457                     // Check if zone id already exist. In case it does, throw
458                     // an error as we need unique identifiers. Otherwise, add
459                     // the new zone id to the list of memory zones.
460                     if memory_zones.contains_key(&zone.id) {
461                         error!(
462                             "Memory zone identifier '{}' found more than once. \
463                             It must be unique",
464                             zone.id,
465                         );
466                         return Err(Error::DuplicateZoneId);
467                     }
468                     memory_zones.insert(zone.id.clone(), MemoryZone::default());
469                 }
470 
471                 if ram_region_consumed {
472                     break;
473                 }
474             }
475 
476             if exit {
477                 break;
478             }
479         }
480 
481         Ok((mem_regions, memory_zones))
482     }
483 
484     fn fill_saved_regions(&mut self, saved_regions: Vec<MemoryRegion>) -> Result<(), Error> {
485         for region in saved_regions {
486             if let Some(content) = region.content {
487                 // Open (read only) the snapshot file for the given region.
488                 let mut memory_region_file = OpenOptions::new()
489                     .read(true)
490                     .open(content)
491                     .map_err(Error::SnapshotOpen)?;
492 
493                 self.guest_memory
494                     .memory()
495                     .read_exact_from(
496                         GuestAddress(region.start_addr),
497                         &mut memory_region_file,
498                         region.size as usize,
499                     )
500                     .map_err(Error::SnapshotCopy)?;
501             }
502         }
503 
504         Ok(())
505     }
506 
507     pub fn new(
508         vm: Arc<dyn hypervisor::Vm>,
509         config: &MemoryConfig,
510         prefault: bool,
511         phys_bits: u8,
512         #[cfg(feature = "tdx")] tdx_enabled: bool,
513     ) -> Result<Arc<Mutex<MemoryManager>>, Error> {
514         let user_provided_zones = config.size == 0;
515         let mut allow_mem_hotplug: bool = false;
516 
517         let (ram_size, zones) = if !user_provided_zones {
518             if config.zones.is_some() {
519                 error!(
520                     "User defined memory regions can't be provided if the \
521                     memory size is not 0"
522                 );
523                 return Err(Error::InvalidMemoryParameters);
524             }
525 
526             if config.hotplug_size.is_some() {
527                 allow_mem_hotplug = true;
528             }
529 
530             if let Some(hotplugged_size) = config.hotplugged_size {
531                 if let Some(hotplug_size) = config.hotplug_size {
532                     if hotplugged_size > hotplug_size {
533                         error!(
534                             "'hotplugged_size' {} can't be bigger than \
535                             'hotplug_size' {}",
536                             hotplugged_size, hotplug_size,
537                         );
538                         return Err(Error::InvalidMemoryParameters);
539                     }
540                 } else {
541                     error!(
542                         "Invalid to define 'hotplugged_size' when there is\
543                         no 'hotplug_size'"
544                     );
545                     return Err(Error::InvalidMemoryParameters);
546                 }
547                 if config.hotplug_method == HotplugMethod::Acpi {
548                     error!(
549                         "Invalid to define 'hotplugged_size' with hotplug \
550                         method 'acpi'"
551                     );
552                     return Err(Error::InvalidMemoryParameters);
553                 }
554             }
555 
556             // Create a single zone from the global memory config. This lets
557             // us reuse the codepath for user defined memory zones.
558             let zones = vec![MemoryZoneConfig {
559                 id: String::from(DEFAULT_MEMORY_ZONE),
560                 size: config.size,
561                 file: None,
562                 shared: config.shared,
563                 hugepages: config.hugepages,
564                 hugepage_size: config.hugepage_size,
565                 host_numa_node: None,
566                 hotplug_size: config.hotplug_size,
567                 hotplugged_size: config.hotplugged_size,
568             }];
569 
570             (config.size, zones)
571         } else {
572             if config.zones.is_none() {
573                 error!(
574                     "User defined memory regions must be provided if the \
575                     memory size is 0"
576                 );
577                 return Err(Error::MissingMemoryZones);
578             }
579 
580             // Safe to unwrap as we checked right above there were some
581             // regions.
582             let zones = config.zones.clone().unwrap();
583             if zones.is_empty() {
584                 return Err(Error::MissingMemoryZones);
585             }
586 
587             let mut total_ram_size: u64 = 0;
588             for zone in zones.iter() {
589                 total_ram_size += zone.size;
590 
591                 if zone.shared && zone.file.is_some() && zone.host_numa_node.is_some() {
592                     error!(
593                         "Invalid to set host NUMA policy for a memory zone \
594                         backed by a regular file and mapped as 'shared'"
595                     );
596                     return Err(Error::InvalidSharedMemoryZoneWithHostNuma);
597                 }
598 
599                 if zone.hotplug_size.is_some() && config.hotplug_method == HotplugMethod::Acpi {
600                     error!("Invalid to set ACPI hotplug method for memory zones");
601                     return Err(Error::InvalidHotplugMethodWithMemoryZones);
602                 }
603 
604                 if let Some(hotplugged_size) = zone.hotplugged_size {
605                     if let Some(hotplug_size) = zone.hotplug_size {
606                         if hotplugged_size > hotplug_size {
607                             error!(
608                                 "'hotplugged_size' {} can't be bigger than \
609                                 'hotplug_size' {}",
610                                 hotplugged_size, hotplug_size,
611                             );
612                             return Err(Error::InvalidMemoryParameters);
613                         }
614                     } else {
615                         error!(
616                             "Invalid to define 'hotplugged_size' when there is\
617                             no 'hotplug_size' for a memory zone"
618                         );
619                         return Err(Error::InvalidMemoryParameters);
620                     }
621                     if config.hotplug_method == HotplugMethod::Acpi {
622                         error!(
623                             "Invalid to define 'hotplugged_size' with hotplug \
624                             method 'acpi'"
625                         );
626                         return Err(Error::InvalidMemoryParameters);
627                     }
628                 }
629             }
630 
631             (total_ram_size, zones)
632         };
633 
634         // Init guest memory
635         let arch_mem_regions = arch::arch_memory_regions(ram_size);
636 
637         let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
638             .iter()
639             .filter(|r| r.2 == RegionType::Ram)
640             .map(|r| (r.0, r.1))
641             .collect();
642 
643         let (mem_regions, mut memory_zones) =
644             Self::create_memory_regions_from_zones(&ram_regions, &zones, prefault)?;
645 
646         let guest_memory =
647             GuestMemoryMmap::from_arc_regions(mem_regions).map_err(Error::GuestMemory)?;
648 
649         let boot_guest_memory = guest_memory.clone();
650 
651         let mmio_address_space_size = mmio_address_space_size(phys_bits);
652         debug_assert_eq!(
653             (((mmio_address_space_size) >> 16) << 16),
654             mmio_address_space_size
655         );
656         let end_of_device_area = GuestAddress(mmio_address_space_size - 1);
657 
658         let mut start_of_device_area =
659             MemoryManager::start_addr(guest_memory.last_addr(), allow_mem_hotplug)?;
660         let mut virtio_mem_regions: Vec<Arc<GuestRegionMmap>> = Vec::new();
661 
662         // Update list of memory zones for resize.
663         for zone in zones {
664             if let Some(memory_zone) = memory_zones.get_mut(&zone.id) {
665                 if let Some(hotplug_size) = zone.hotplug_size {
666                     if hotplug_size == 0 {
667                         error!("'hotplug_size' can't be 0");
668                         return Err(Error::InvalidHotplugSize);
669                     }
670 
671                     if !user_provided_zones && config.hotplug_method == HotplugMethod::Acpi {
672                         start_of_device_area = start_of_device_area
673                             .checked_add(hotplug_size)
674                             .ok_or(Error::GuestAddressOverFlow)?;
675                     } else {
676                         // Alignment must be "natural" i.e. same as size of block
677                         let start_addr = GuestAddress(
678                             (start_of_device_area.0 + virtio_devices::VIRTIO_MEM_ALIGN_SIZE - 1)
679                                 / virtio_devices::VIRTIO_MEM_ALIGN_SIZE
680                                 * virtio_devices::VIRTIO_MEM_ALIGN_SIZE,
681                         );
682 
683                         let region = MemoryManager::create_ram_region(
684                             &None,
685                             0,
686                             start_addr,
687                             hotplug_size as usize,
688                             false,
689                             zone.shared,
690                             zone.hugepages,
691                             zone.hugepage_size,
692                             zone.host_numa_node,
693                         )?;
694 
695                         virtio_mem_regions.push(region.clone());
696 
697                         memory_zone.virtio_mem_zone = Some(VirtioMemZone {
698                             region,
699                             resize_handler: virtio_devices::Resize::new()
700                                 .map_err(Error::EventFdFail)?,
701                             hotplugged_size: zone.hotplugged_size.unwrap_or(0),
702                             hugepages: zone.hugepages,
703                         });
704 
705                         start_of_device_area = start_addr
706                             .checked_add(hotplug_size)
707                             .ok_or(Error::GuestAddressOverFlow)?;
708                     }
709                 }
710             } else {
711                 return Err(Error::MissingZoneIdentifier);
712             }
713         }
714 
715         let guest_memory = GuestMemoryAtomic::new(guest_memory);
716 
717         let mut hotplug_slots = Vec::with_capacity(HOTPLUG_COUNT);
718         hotplug_slots.resize_with(HOTPLUG_COUNT, HotPlugState::default);
719 
720         // Both MMIO and PIO address spaces start at address 0.
721         let allocator = Arc::new(Mutex::new(
722             SystemAllocator::new(
723                 #[cfg(target_arch = "x86_64")]
724                 {
725                     GuestAddress(0)
726                 },
727                 #[cfg(target_arch = "x86_64")]
728                 {
729                     1 << 16
730                 },
731                 GuestAddress(0),
732                 mmio_address_space_size,
733                 layout::MEM_32BIT_DEVICES_START,
734                 layout::MEM_32BIT_DEVICES_SIZE,
735                 #[cfg(target_arch = "x86_64")]
736                 vec![GsiApic::new(
737                     X86_64_IRQ_BASE,
738                     ioapic::NUM_IOAPIC_PINS as u32 - X86_64_IRQ_BASE,
739                 )],
740             )
741             .ok_or(Error::CreateSystemAllocator)?,
742         ));
743 
744         #[cfg(feature = "acpi")]
745         let acpi_address = allocator
746             .lock()
747             .unwrap()
748             .allocate_mmio_addresses(None, MEMORY_MANAGER_ACPI_SIZE as u64, None)
749             .ok_or(Error::AllocateMmioAddress)?;
750 
751         #[cfg(not(feature = "tdx"))]
752         let log_dirty = true;
753         #[cfg(feature = "tdx")]
754         let log_dirty = !tdx_enabled; // Cannot log dirty pages on a TD
755 
756         let memory_manager = Arc::new(Mutex::new(MemoryManager {
757             boot_guest_memory,
758             guest_memory: guest_memory.clone(),
759             next_memory_slot: 0,
760             start_of_device_area,
761             end_of_device_area,
762             vm,
763             hotplug_slots,
764             selected_slot: 0,
765             mergeable: config.mergeable,
766             allocator: allocator.clone(),
767             hotplug_method: config.hotplug_method.clone(),
768             boot_ram: ram_size,
769             current_ram: ram_size,
770             next_hotplug_slot: 0,
771             snapshot: Mutex::new(None),
772             shared: config.shared,
773             hugepages: config.hugepages,
774             hugepage_size: config.hugepage_size,
775             #[cfg(target_arch = "x86_64")]
776             sgx_epc_region: None,
777             user_provided_zones,
778             snapshot_memory_regions: Vec::new(),
779             memory_zones,
780             guest_ram_mappings: Vec::new(),
781             #[cfg(feature = "acpi")]
782             acpi_address,
783             log_dirty,
784         }));
785 
786         for region in guest_memory.memory().iter() {
787             let mut mm = memory_manager.lock().unwrap();
788             let slot = mm.create_userspace_mapping(
789                 region.start_addr().raw_value(),
790                 region.len() as u64,
791                 region.as_ptr() as u64,
792                 config.mergeable,
793                 false,
794                 log_dirty,
795             )?;
796             mm.guest_ram_mappings.push(GuestRamMapping {
797                 gpa: region.start_addr().raw_value(),
798                 size: region.len(),
799                 slot,
800             });
801         }
802 
803         for region in virtio_mem_regions.drain(..) {
804             let mut mm = memory_manager.lock().unwrap();
805             let slot = mm.create_userspace_mapping(
806                 region.start_addr().raw_value(),
807                 region.len() as u64,
808                 region.as_ptr() as u64,
809                 config.mergeable,
810                 false,
811                 log_dirty,
812             )?;
813 
814             mm.guest_ram_mappings.push(GuestRamMapping {
815                 gpa: region.start_addr().raw_value(),
816                 size: region.len(),
817                 slot,
818             });
819             allocator
820                 .lock()
821                 .unwrap()
822                 .allocate_mmio_addresses(Some(region.start_addr()), region.len(), None)
823                 .ok_or(Error::MemoryRangeAllocation)?;
824             mm.add_region(region)?;
825         }
826 
827         // Allocate RAM and Reserved address ranges.
828         for region in arch_mem_regions.iter() {
829             allocator
830                 .lock()
831                 .unwrap()
832                 .allocate_mmio_addresses(Some(region.0), region.1 as GuestUsize, None)
833                 .ok_or(Error::MemoryRangeAllocation)?;
834         }
835 
836         Ok(memory_manager)
837     }
838 
839     pub fn new_from_snapshot(
840         snapshot: &Snapshot,
841         vm: Arc<dyn hypervisor::Vm>,
842         config: &MemoryConfig,
843         source_url: Option<&str>,
844         prefault: bool,
845         phys_bits: u8,
846     ) -> Result<Arc<Mutex<MemoryManager>>, Error> {
847         let mm = MemoryManager::new(
848             vm,
849             config,
850             prefault,
851             phys_bits,
852             #[cfg(feature = "tdx")]
853             false,
854         )?;
855 
856         if let Some(source_url) = source_url {
857             let vm_snapshot_path = url_to_path(source_url).map_err(Error::Restore)?;
858 
859             let mem_snapshot: MemoryManagerSnapshotData = snapshot
860                 .to_versioned_state(MEMORY_MANAGER_SNAPSHOT_ID)
861                 .map_err(Error::Restore)?;
862 
863             // Here we turn the content file name into a content file path as
864             // this will be needed to copy the content of the saved memory
865             // region into the newly created memory region.
866             // We simply ignore the content files that are None, as they
867             // represent regions that have been directly saved by the user, with
868             // no need for saving into a dedicated external file. For these
869             // files, the VmConfig already contains the information on where to
870             // find them.
871             let mut saved_regions = mem_snapshot.memory_regions;
872             for region in saved_regions.iter_mut() {
873                 if let Some(content) = &mut region.content {
874                     let mut memory_region_path = vm_snapshot_path.clone();
875                     memory_region_path.push(content.clone());
876                     *content = memory_region_path.to_str().unwrap().to_owned();
877                 }
878             }
879 
880             mm.lock().unwrap().fill_saved_regions(saved_regions)?;
881 
882             Ok(mm)
883         } else {
884             Ok(mm)
885         }
886     }
887 
888     fn memfd_create(name: &ffi::CStr, flags: u32) -> Result<RawFd, io::Error> {
889         let res = unsafe { libc::syscall(libc::SYS_memfd_create, name.as_ptr(), flags) };
890 
891         if res < 0 {
892             Err(io::Error::last_os_error())
893         } else {
894             Ok(res as RawFd)
895         }
896     }
897 
898     fn mbind(
899         addr: *mut u8,
900         len: u64,
901         mode: u32,
902         nodemask: Vec<u64>,
903         maxnode: u64,
904         flags: u32,
905     ) -> Result<(), io::Error> {
906         let res = unsafe {
907             libc::syscall(
908                 libc::SYS_mbind,
909                 addr as *mut libc::c_void,
910                 len,
911                 mode,
912                 nodemask.as_ptr(),
913                 maxnode,
914                 flags,
915             )
916         };
917 
918         if res < 0 {
919             Err(io::Error::last_os_error())
920         } else {
921             Ok(())
922         }
923     }
924 
925     #[allow(clippy::too_many_arguments)]
926     fn create_ram_region(
927         backing_file: &Option<PathBuf>,
928         file_offset: u64,
929         start_addr: GuestAddress,
930         size: usize,
931         prefault: bool,
932         shared: bool,
933         hugepages: bool,
934         hugepage_size: Option<u64>,
935         host_numa_node: Option<u32>,
936     ) -> Result<Arc<GuestRegionMmap>, Error> {
937         let (f, f_off) = match backing_file {
938             Some(ref file) => {
939                 if file.is_dir() {
940                     // Override file offset as it does not apply in this case.
941                     info!(
942                         "Ignoring file offset since the backing file is a \
943                         temporary file created from the specified directory."
944                     );
945                     let fs_str = format!("{}{}", file.display(), "/tmpfile_XXXXXX");
946                     let fs = ffi::CString::new(fs_str).unwrap();
947                     let mut path = fs.as_bytes_with_nul().to_owned();
948                     let path_ptr = path.as_mut_ptr() as *mut _;
949                     let fd = unsafe { libc::mkstemp(path_ptr) };
950                     unsafe { libc::unlink(path_ptr) };
951                     let f = unsafe { File::from_raw_fd(fd) };
952                     f.set_len(size as u64).map_err(Error::SharedFileSetLen)?;
953 
954                     (f, 0)
955                 } else {
956                     let f = OpenOptions::new()
957                         .read(true)
958                         .write(true)
959                         .open(file)
960                         .map_err(Error::SharedFileCreate)?;
961 
962                     (f, file_offset)
963                 }
964             }
965             None => {
966                 let fd = Self::memfd_create(
967                     &ffi::CString::new("ch_ram").unwrap(),
968                     if hugepages {
969                         libc::MFD_HUGETLB
970                             | if let Some(hugepage_size) = hugepage_size {
971                                 /*
972                                  * From the Linux kernel:
973                                  * Several system calls take a flag to request "hugetlb" huge pages.
974                                  * Without further specification, these system calls will use the
975                                  * system's default huge page size.  If a system supports multiple
976                                  * huge page sizes, the desired huge page size can be specified in
977                                  * bits [26:31] of the flag arguments.  The value in these 6 bits
978                                  * will encode the log2 of the huge page size.
979                                  */
980 
981                                 hugepage_size.trailing_zeros() << 26
982                             } else {
983                                 // Use the system default huge page size
984                                 0
985                             }
986                     } else {
987                         0
988                     },
989                 )
990                 .map_err(Error::SharedFileCreate)?;
991 
992                 let f = unsafe { File::from_raw_fd(fd) };
993                 f.set_len(size as u64).map_err(Error::SharedFileSetLen)?;
994 
995                 (f, 0)
996             }
997         };
998 
999         let mut mmap_flags = libc::MAP_NORESERVE
1000             | if shared {
1001                 libc::MAP_SHARED
1002             } else {
1003                 libc::MAP_PRIVATE
1004             };
1005         if prefault {
1006             mmap_flags |= libc::MAP_POPULATE;
1007         }
1008 
1009         let region = GuestRegionMmap::new(
1010             MmapRegion::build(
1011                 Some(FileOffset::new(f, f_off)),
1012                 size,
1013                 libc::PROT_READ | libc::PROT_WRITE,
1014                 mmap_flags,
1015             )
1016             .map_err(Error::GuestMemoryRegion)?,
1017             start_addr,
1018         )
1019         .map_err(Error::GuestMemory)?;
1020 
1021         // Apply NUMA policy if needed.
1022         if let Some(node) = host_numa_node {
1023             let addr = region.deref().as_ptr();
1024             let len = region.deref().size() as u64;
1025             let mode = MPOL_BIND;
1026             let mut nodemask: Vec<u64> = Vec::new();
1027             let flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
1028 
1029             // Linux is kind of buggy in the way it interprets maxnode as it
1030             // will cut off the last node. That's why we have to add 1 to what
1031             // we would consider as the proper maxnode value.
1032             let maxnode = node as u64 + 1 + 1;
1033 
1034             // Allocate the right size for the vector.
1035             nodemask.resize((node as usize / 64) + 1, 0);
1036 
1037             // Fill the global bitmask through the nodemask vector.
1038             let idx = (node / 64) as usize;
1039             let shift = node % 64;
1040             nodemask[idx] |= 1u64 << shift;
1041 
1042             // Policies are enforced by using MPOL_MF_MOVE flag as it will
1043             // force the kernel to move all pages that might have been already
1044             // allocated to the proper set of NUMA nodes. MPOL_MF_STRICT is
1045             // used to throw an error if MPOL_MF_MOVE didn't succeed.
1046             // MPOL_BIND is the selected mode as it specifies a strict policy
1047             // that restricts memory allocation to the nodes specified in the
1048             // nodemask.
1049             Self::mbind(addr, len, mode, nodemask, maxnode, flags)
1050                 .map_err(Error::ApplyNumaPolicy)?;
1051         }
1052 
1053         Ok(Arc::new(region))
1054     }
1055 
1056     // Update the GuestMemoryMmap with the new range
1057     fn add_region(&mut self, region: Arc<GuestRegionMmap>) -> Result<(), Error> {
1058         let guest_memory = self
1059             .guest_memory
1060             .memory()
1061             .insert_region(region)
1062             .map_err(Error::GuestMemory)?;
1063         self.guest_memory.lock().unwrap().replace(guest_memory);
1064 
1065         Ok(())
1066     }
1067 
1068     //
1069     // Calculate the start address of an area next to RAM.
1070     //
1071     // If memory hotplug is allowed, the start address needs to be aligned
1072     // (rounded-up) to 128MiB boundary.
1073     // If memory hotplug is not allowed, there is no alignment required.
1074     // On x86_64, it must also start at the 64bit start.
1075     #[allow(clippy::let_and_return)]
1076     fn start_addr(mem_end: GuestAddress, allow_mem_hotplug: bool) -> Result<GuestAddress, Error> {
1077         let mut start_addr = if allow_mem_hotplug {
1078             GuestAddress(mem_end.0 | ((128 << 20) - 1))
1079         } else {
1080             mem_end
1081         };
1082 
1083         start_addr = start_addr
1084             .checked_add(1)
1085             .ok_or(Error::GuestAddressOverFlow)?;
1086 
1087         #[cfg(target_arch = "x86_64")]
1088         if mem_end < arch::layout::MEM_32BIT_RESERVED_START {
1089             return Ok(arch::layout::RAM_64BIT_START);
1090         }
1091 
1092         Ok(start_addr)
1093     }
1094 
1095     pub fn add_ram_region(
1096         &mut self,
1097         start_addr: GuestAddress,
1098         size: usize,
1099     ) -> Result<Arc<GuestRegionMmap>, Error> {
1100         // Allocate memory for the region
1101         let region = MemoryManager::create_ram_region(
1102             &None,
1103             0,
1104             start_addr,
1105             size,
1106             false,
1107             self.shared,
1108             self.hugepages,
1109             self.hugepage_size,
1110             None,
1111         )?;
1112 
1113         // Map it into the guest
1114         let slot = self.create_userspace_mapping(
1115             region.start_addr().0,
1116             region.len() as u64,
1117             region.as_ptr() as u64,
1118             self.mergeable,
1119             false,
1120             self.log_dirty,
1121         )?;
1122         self.guest_ram_mappings.push(GuestRamMapping {
1123             gpa: region.start_addr().raw_value(),
1124             size: region.len(),
1125             slot,
1126         });
1127 
1128         self.add_region(Arc::clone(&region))?;
1129 
1130         Ok(region)
1131     }
1132 
1133     fn hotplug_ram_region(&mut self, size: usize) -> Result<Arc<GuestRegionMmap>, Error> {
1134         info!("Hotplugging new RAM: {}", size);
1135 
1136         // Check that there is a free slot
1137         if self.next_hotplug_slot >= HOTPLUG_COUNT {
1138             return Err(Error::NoSlotAvailable);
1139         }
1140 
1141         // "Inserted" DIMM must have a size that is a multiple of 128MiB
1142         if size % (128 << 20) != 0 {
1143             return Err(Error::InvalidSize);
1144         }
1145 
1146         let start_addr = MemoryManager::start_addr(self.guest_memory.memory().last_addr(), true)?;
1147 
1148         if start_addr.checked_add(size.try_into().unwrap()).unwrap() > self.start_of_device_area() {
1149             return Err(Error::InsufficientHotplugRam);
1150         }
1151 
1152         let region = self.add_ram_region(start_addr, size)?;
1153 
1154         // Add region to the list of regions associated with the default
1155         // memory zone.
1156         if let Some(memory_zone) = self.memory_zones.get_mut(DEFAULT_MEMORY_ZONE) {
1157             memory_zone.regions.push(Arc::clone(&region));
1158         }
1159 
1160         // Tell the allocator
1161         self.allocator
1162             .lock()
1163             .unwrap()
1164             .allocate_mmio_addresses(Some(start_addr), size as GuestUsize, None)
1165             .ok_or(Error::MemoryRangeAllocation)?;
1166 
1167         // Update the slot so that it can be queried via the I/O port
1168         let mut slot = &mut self.hotplug_slots[self.next_hotplug_slot];
1169         slot.active = true;
1170         slot.inserting = true;
1171         slot.base = region.start_addr().0;
1172         slot.length = region.len() as u64;
1173 
1174         self.next_hotplug_slot += 1;
1175 
1176         Ok(region)
1177     }
1178 
1179     pub fn guest_memory(&self) -> GuestMemoryAtomic<GuestMemoryMmap> {
1180         self.guest_memory.clone()
1181     }
1182 
1183     pub fn boot_guest_memory(&self) -> GuestMemoryMmap {
1184         self.boot_guest_memory.clone()
1185     }
1186 
1187     pub fn allocator(&self) -> Arc<Mutex<SystemAllocator>> {
1188         self.allocator.clone()
1189     }
1190 
1191     pub fn start_of_device_area(&self) -> GuestAddress {
1192         self.start_of_device_area
1193     }
1194 
1195     pub fn end_of_device_area(&self) -> GuestAddress {
1196         self.end_of_device_area
1197     }
1198 
1199     pub fn allocate_memory_slot(&mut self) -> u32 {
1200         let slot_id = self.next_memory_slot;
1201         self.next_memory_slot += 1;
1202         slot_id
1203     }
1204 
1205     pub fn create_userspace_mapping(
1206         &mut self,
1207         guest_phys_addr: u64,
1208         memory_size: u64,
1209         userspace_addr: u64,
1210         mergeable: bool,
1211         readonly: bool,
1212         log_dirty: bool,
1213     ) -> Result<u32, Error> {
1214         let slot = self.allocate_memory_slot();
1215         let mem_region = self.vm.make_user_memory_region(
1216             slot,
1217             guest_phys_addr,
1218             memory_size,
1219             userspace_addr,
1220             readonly,
1221             log_dirty,
1222         );
1223 
1224         self.vm
1225             .create_user_memory_region(mem_region)
1226             .map_err(Error::CreateUserMemoryRegion)?;
1227 
1228         // Mark the pages as mergeable if explicitly asked for.
1229         if mergeable {
1230             // Safe because the address and size are valid since the
1231             // mmap succeeded.
1232             let ret = unsafe {
1233                 libc::madvise(
1234                     userspace_addr as *mut libc::c_void,
1235                     memory_size as libc::size_t,
1236                     libc::MADV_MERGEABLE,
1237                 )
1238             };
1239             if ret != 0 {
1240                 let err = io::Error::last_os_error();
1241                 // Safe to unwrap because the error is constructed with
1242                 // last_os_error(), which ensures the output will be Some().
1243                 let errno = err.raw_os_error().unwrap();
1244                 if errno == libc::EINVAL {
1245                     warn!("kernel not configured with CONFIG_KSM");
1246                 } else {
1247                     warn!("madvise error: {}", err);
1248                 }
1249                 warn!("failed to mark pages as mergeable");
1250             }
1251         }
1252 
1253         info!(
1254             "Created userspace mapping: {:x} -> {:x} {:x}",
1255             guest_phys_addr, userspace_addr, memory_size
1256         );
1257 
1258         Ok(slot)
1259     }
1260 
1261     pub fn remove_userspace_mapping(
1262         &mut self,
1263         guest_phys_addr: u64,
1264         memory_size: u64,
1265         userspace_addr: u64,
1266         mergeable: bool,
1267         slot: u32,
1268     ) -> Result<(), Error> {
1269         let mem_region = self.vm.make_user_memory_region(
1270             slot,
1271             guest_phys_addr,
1272             memory_size,
1273             userspace_addr,
1274             false, /* readonly -- don't care */
1275             false, /* log dirty */
1276         );
1277 
1278         self.vm
1279             .remove_user_memory_region(mem_region)
1280             .map_err(Error::RemoveUserMemoryRegion)?;
1281 
1282         // Mark the pages as unmergeable if there were previously marked as
1283         // mergeable.
1284         if mergeable {
1285             // Safe because the address and size are valid as the region was
1286             // previously advised.
1287             let ret = unsafe {
1288                 libc::madvise(
1289                     userspace_addr as *mut libc::c_void,
1290                     memory_size as libc::size_t,
1291                     libc::MADV_UNMERGEABLE,
1292                 )
1293             };
1294             if ret != 0 {
1295                 let err = io::Error::last_os_error();
1296                 // Safe to unwrap because the error is constructed with
1297                 // last_os_error(), which ensures the output will be Some().
1298                 let errno = err.raw_os_error().unwrap();
1299                 if errno == libc::EINVAL {
1300                     warn!("kernel not configured with CONFIG_KSM");
1301                 } else {
1302                     warn!("madvise error: {}", err);
1303                 }
1304                 warn!("failed to mark pages as unmergeable");
1305             }
1306         }
1307 
1308         info!(
1309             "Removed userspace mapping: {:x} -> {:x} {:x}",
1310             guest_phys_addr, userspace_addr, memory_size
1311         );
1312 
1313         Ok(())
1314     }
1315 
1316     pub fn virtio_mem_resize(&mut self, id: &str, size: u64) -> Result<(), Error> {
1317         if let Some(memory_zone) = self.memory_zones.get_mut(id) {
1318             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() {
1319                 virtio_mem_zone
1320                     .resize_handler()
1321                     .work(size)
1322                     .map_err(Error::VirtioMemResizeFail)?;
1323             } else {
1324                 error!("Failed resizing virtio-mem region: No virtio-mem handler");
1325                 return Err(Error::MissingVirtioMemHandler);
1326             }
1327 
1328             return Ok(());
1329         }
1330 
1331         error!("Failed resizing virtio-mem region: Unknown memory zone");
1332         Err(Error::UnknownMemoryZone)
1333     }
1334 
1335     /// In case this function resulted in adding a new memory region to the
1336     /// guest memory, the new region is returned to the caller. The virtio-mem
1337     /// use case never adds a new region as the whole hotpluggable memory has
1338     /// already been allocated at boot time.
1339     pub fn resize(&mut self, desired_ram: u64) -> Result<Option<Arc<GuestRegionMmap>>, Error> {
1340         if self.user_provided_zones {
1341             error!(
1342                 "Not allowed to resize guest memory when backed with user \
1343                 defined memory zones."
1344             );
1345             return Err(Error::InvalidResizeWithMemoryZones);
1346         }
1347 
1348         let mut region: Option<Arc<GuestRegionMmap>> = None;
1349         match self.hotplug_method {
1350             HotplugMethod::VirtioMem => {
1351                 if desired_ram >= self.boot_ram {
1352                     self.virtio_mem_resize(DEFAULT_MEMORY_ZONE, desired_ram - self.boot_ram)?;
1353                     self.current_ram = desired_ram;
1354                 }
1355             }
1356             HotplugMethod::Acpi => {
1357                 if desired_ram > self.current_ram {
1358                     region =
1359                         Some(self.hotplug_ram_region((desired_ram - self.current_ram) as usize)?);
1360                     self.current_ram = desired_ram;
1361                 }
1362             }
1363         }
1364         Ok(region)
1365     }
1366 
1367     pub fn resize_zone(&mut self, id: &str, virtio_mem_size: u64) -> Result<(), Error> {
1368         if !self.user_provided_zones {
1369             error!(
1370                 "Not allowed to resize guest memory zone when no zone is \
1371                 defined."
1372             );
1373             return Err(Error::ResizeZone);
1374         }
1375 
1376         self.virtio_mem_resize(id, virtio_mem_size)
1377     }
1378 
1379     #[cfg(target_arch = "x86_64")]
1380     pub fn setup_sgx(
1381         &mut self,
1382         sgx_epc_config: Vec<SgxEpcConfig>,
1383         vm: &Arc<dyn hypervisor::Vm>,
1384     ) -> Result<(), Error> {
1385         let file = OpenOptions::new()
1386             .read(true)
1387             .open("/dev/sgx_provision")
1388             .map_err(Error::SgxProvisionOpen)?;
1389         vm.enable_sgx_attribute(file)
1390             .map_err(Error::SgxEnableProvisioning)?;
1391 
1392         // Go over each EPC section and verify its size is a 4k multiple. At
1393         // the same time, calculate the total size needed for the contiguous
1394         // EPC region.
1395         let mut epc_region_size = 0;
1396         for epc_section in sgx_epc_config.iter() {
1397             if epc_section.size == 0 {
1398                 return Err(Error::EpcSectionSizeInvalid);
1399             }
1400             if epc_section.size & 0x0fff != 0 {
1401                 return Err(Error::EpcSectionSizeInvalid);
1402             }
1403 
1404             epc_region_size += epc_section.size;
1405         }
1406 
1407         // Now that we know about the total size for the EPC region, we can
1408         // proceed with the allocation of the entire range. The EPC region
1409         // must be 4kiB aligned.
1410         let epc_region_start = self
1411             .allocator
1412             .lock()
1413             .unwrap()
1414             .allocate_mmio_addresses(None, epc_region_size as GuestUsize, Some(0x1000))
1415             .ok_or(Error::SgxEpcRangeAllocation)?;
1416 
1417         let mut sgx_epc_region = SgxEpcRegion::new(epc_region_start, epc_region_size as GuestUsize);
1418 
1419         // Each section can be memory mapped into the allocated region.
1420         let mut epc_section_start = epc_region_start.raw_value();
1421         for epc_section in sgx_epc_config.iter() {
1422             let file = OpenOptions::new()
1423                 .read(true)
1424                 .write(true)
1425                 .open("/dev/sgx_vepc")
1426                 .map_err(Error::SgxVirtEpcOpen)?;
1427 
1428             let prot = PROT_READ | PROT_WRITE;
1429             let mut flags = MAP_NORESERVE | MAP_SHARED;
1430             if epc_section.prefault {
1431                 flags |= MAP_POPULATE;
1432             }
1433 
1434             // We can't use the vm-memory crate to perform the memory mapping
1435             // here as it would try to ensure the size of the backing file is
1436             // matching the size of the expected mapping. The /dev/sgx_vepc
1437             // device does not work that way, it provides a file descriptor
1438             // which is not matching the mapping size, as it's a just a way to
1439             // let KVM know that an EPC section is being created for the guest.
1440             let host_addr = unsafe {
1441                 libc::mmap(
1442                     std::ptr::null_mut(),
1443                     epc_section.size as usize,
1444                     prot,
1445                     flags,
1446                     file.as_raw_fd(),
1447                     0,
1448                 )
1449             } as u64;
1450 
1451             let _mem_slot = self.create_userspace_mapping(
1452                 epc_section_start,
1453                 epc_section.size,
1454                 host_addr,
1455                 false,
1456                 false,
1457                 false,
1458             )?;
1459 
1460             sgx_epc_region.insert(
1461                 epc_section.id.clone(),
1462                 SgxEpcSection::new(
1463                     GuestAddress(epc_section_start),
1464                     epc_section.size as GuestUsize,
1465                 ),
1466             );
1467 
1468             epc_section_start += epc_section.size;
1469         }
1470 
1471         self.sgx_epc_region = Some(sgx_epc_region);
1472 
1473         Ok(())
1474     }
1475 
1476     #[cfg(target_arch = "x86_64")]
1477     pub fn sgx_epc_region(&self) -> &Option<SgxEpcRegion> {
1478         &self.sgx_epc_region
1479     }
1480 
1481     pub fn is_hardlink(f: &File) -> bool {
1482         let mut stat = std::mem::MaybeUninit::<libc::stat>::uninit();
1483         let ret = unsafe { libc::fstat(f.as_raw_fd(), stat.as_mut_ptr()) };
1484         if ret != 0 {
1485             error!("Couldn't fstat the backing file");
1486             return false;
1487         }
1488 
1489         unsafe { (*stat.as_ptr()).st_nlink as usize > 0 }
1490     }
1491 
1492     pub fn memory_zones(&self) -> &MemoryZones {
1493         &self.memory_zones
1494     }
1495 }
1496 
1497 #[cfg(feature = "acpi")]
1498 struct MemoryNotify {
1499     slot_id: usize,
1500 }
1501 
1502 #[cfg(feature = "acpi")]
1503 impl Aml for MemoryNotify {
1504     fn to_aml_bytes(&self) -> Vec<u8> {
1505         let object = aml::Path::new(&format!("M{:03}", self.slot_id));
1506         aml::If::new(
1507             &aml::Equal::new(&aml::Arg(0), &self.slot_id),
1508             vec![&aml::Notify::new(&object, &aml::Arg(1))],
1509         )
1510         .to_aml_bytes()
1511     }
1512 }
1513 
1514 #[cfg(feature = "acpi")]
1515 struct MemorySlot {
1516     slot_id: usize,
1517 }
1518 
1519 #[cfg(feature = "acpi")]
1520 impl Aml for MemorySlot {
1521     fn to_aml_bytes(&self) -> Vec<u8> {
1522         aml::Device::new(
1523             format!("M{:03}", self.slot_id).as_str().into(),
1524             vec![
1525                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C80")),
1526                 &aml::Name::new("_UID".into(), &self.slot_id),
1527                 /*
1528                 _STA return value:
1529                 Bit [0] – Set if the device is present.
1530                 Bit [1] – Set if the device is enabled and decoding its resources.
1531                 Bit [2] – Set if the device should be shown in the UI.
1532                 Bit [3] – Set if the device is functioning properly (cleared if device failed its diagnostics).
1533                 Bit [4] – Set if the battery is present.
1534                 Bits [31:5] – Reserved (must be cleared).
1535                 */
1536                 &aml::Method::new(
1537                     "_STA".into(),
1538                     0,
1539                     false,
1540                     // Call into MSTA method which will interrogate device
1541                     vec![&aml::Return::new(&aml::MethodCall::new(
1542                         "MSTA".into(),
1543                         vec![&self.slot_id],
1544                     ))],
1545                 ),
1546                 // Get details of memory
1547                 &aml::Method::new(
1548                     "_CRS".into(),
1549                     0,
1550                     false,
1551                     // Call into MCRS which provides actual memory details
1552                     vec![&aml::Return::new(&aml::MethodCall::new(
1553                         "MCRS".into(),
1554                         vec![&self.slot_id],
1555                     ))],
1556                 ),
1557             ],
1558         )
1559         .to_aml_bytes()
1560     }
1561 }
1562 
1563 #[cfg(feature = "acpi")]
1564 struct MemorySlots {
1565     slots: usize,
1566 }
1567 
1568 #[cfg(feature = "acpi")]
1569 impl Aml for MemorySlots {
1570     fn to_aml_bytes(&self) -> Vec<u8> {
1571         let mut bytes = Vec::new();
1572 
1573         for slot_id in 0..self.slots {
1574             bytes.extend_from_slice(&MemorySlot { slot_id }.to_aml_bytes());
1575         }
1576 
1577         bytes
1578     }
1579 }
1580 
1581 #[cfg(feature = "acpi")]
1582 struct MemoryMethods {
1583     slots: usize,
1584 }
1585 
1586 #[cfg(feature = "acpi")]
1587 impl Aml for MemoryMethods {
1588     fn to_aml_bytes(&self) -> Vec<u8> {
1589         let mut bytes = Vec::new();
1590         // Add "MTFY" notification method
1591         let mut memory_notifies = Vec::new();
1592         for slot_id in 0..self.slots {
1593             memory_notifies.push(MemoryNotify { slot_id });
1594         }
1595 
1596         let mut memory_notifies_refs: Vec<&dyn aml::Aml> = Vec::new();
1597         for memory_notifier in memory_notifies.iter() {
1598             memory_notifies_refs.push(memory_notifier);
1599         }
1600 
1601         bytes.extend_from_slice(
1602             &aml::Method::new("MTFY".into(), 2, true, memory_notifies_refs).to_aml_bytes(),
1603         );
1604 
1605         // MSCN method
1606         bytes.extend_from_slice(
1607             &aml::Method::new(
1608                 "MSCN".into(),
1609                 0,
1610                 true,
1611                 vec![
1612                     // Take lock defined above
1613                     &aml::Acquire::new("MLCK".into(), 0xffff),
1614                     &aml::Store::new(&aml::Local(0), &aml::ZERO),
1615                     &aml::While::new(
1616                         &aml::LessThan::new(&aml::Local(0), &self.slots),
1617                         vec![
1618                             // Write slot number (in first argument) to I/O port via field
1619                             &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Local(0)),
1620                             // Check if MINS bit is set (inserting)
1621                             &aml::If::new(
1622                                 &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MINS"), &aml::ONE),
1623                                 // Notify device if it is
1624                                 vec![
1625                                     &aml::MethodCall::new(
1626                                         "MTFY".into(),
1627                                         vec![&aml::Local(0), &aml::ONE],
1628                                     ),
1629                                     // Reset MINS bit
1630                                     &aml::Store::new(
1631                                         &aml::Path::new("\\_SB_.MHPC.MINS"),
1632                                         &aml::ONE,
1633                                     ),
1634                                 ],
1635                             ),
1636                             // Check if MRMV bit is set
1637                             &aml::If::new(
1638                                 &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MRMV"), &aml::ONE),
1639                                 // Notify device if it is (with the eject constant 0x3)
1640                                 vec![
1641                                     &aml::MethodCall::new(
1642                                         "MTFY".into(),
1643                                         vec![&aml::Local(0), &3u8],
1644                                     ),
1645                                     // Reset MRMV bit
1646                                     &aml::Store::new(
1647                                         &aml::Path::new("\\_SB_.MHPC.MRMV"),
1648                                         &aml::ONE,
1649                                     ),
1650                                 ],
1651                             ),
1652                             &aml::Add::new(&aml::Local(0), &aml::Local(0), &aml::ONE),
1653                         ],
1654                     ),
1655                     // Release lock
1656                     &aml::Release::new("MLCK".into()),
1657                 ],
1658             )
1659             .to_aml_bytes(),
1660         );
1661 
1662         bytes.extend_from_slice(
1663             // Memory status method
1664             &aml::Method::new(
1665                 "MSTA".into(),
1666                 1,
1667                 true,
1668                 vec![
1669                     // Take lock defined above
1670                     &aml::Acquire::new("MLCK".into(), 0xffff),
1671                     // Write slot number (in first argument) to I/O port via field
1672                     &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Arg(0)),
1673                     &aml::Store::new(&aml::Local(0), &aml::ZERO),
1674                     // Check if MEN_ bit is set, if so make the local variable 0xf (see _STA for details of meaning)
1675                     &aml::If::new(
1676                         &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MEN_"), &aml::ONE),
1677                         vec![&aml::Store::new(&aml::Local(0), &0xfu8)],
1678                     ),
1679                     // Release lock
1680                     &aml::Release::new("MLCK".into()),
1681                     // Return 0 or 0xf
1682                     &aml::Return::new(&aml::Local(0)),
1683                 ],
1684             )
1685             .to_aml_bytes(),
1686         );
1687 
1688         bytes.extend_from_slice(
1689             // Memory range method
1690             &aml::Method::new(
1691                 "MCRS".into(),
1692                 1,
1693                 true,
1694                 vec![
1695                     // Take lock defined above
1696                     &aml::Acquire::new("MLCK".into(), 0xffff),
1697                     // Write slot number (in first argument) to I/O port via field
1698                     &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Arg(0)),
1699                     &aml::Name::new(
1700                         "MR64".into(),
1701                         &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
1702                             aml::AddressSpaceCachable::Cacheable,
1703                             true,
1704                             0x0000_0000_0000_0000u64,
1705                             0xFFFF_FFFF_FFFF_FFFEu64,
1706                         )]),
1707                     ),
1708                     &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &14usize, "MINL".into()),
1709                     &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &18usize, "MINH".into()),
1710                     &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &22usize, "MAXL".into()),
1711                     &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &26usize, "MAXH".into()),
1712                     &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &38usize, "LENL".into()),
1713                     &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &42usize, "LENH".into()),
1714                     &aml::Store::new(&aml::Path::new("MINL"), &aml::Path::new("\\_SB_.MHPC.MHBL")),
1715                     &aml::Store::new(&aml::Path::new("MINH"), &aml::Path::new("\\_SB_.MHPC.MHBH")),
1716                     &aml::Store::new(&aml::Path::new("LENL"), &aml::Path::new("\\_SB_.MHPC.MHLL")),
1717                     &aml::Store::new(&aml::Path::new("LENH"), &aml::Path::new("\\_SB_.MHPC.MHLH")),
1718                     &aml::Add::new(
1719                         &aml::Path::new("MAXL"),
1720                         &aml::Path::new("MINL"),
1721                         &aml::Path::new("LENL"),
1722                     ),
1723                     &aml::Add::new(
1724                         &aml::Path::new("MAXH"),
1725                         &aml::Path::new("MINH"),
1726                         &aml::Path::new("LENH"),
1727                     ),
1728                     &aml::If::new(
1729                         &aml::LessThan::new(&aml::Path::new("MAXL"), &aml::Path::new("MINL")),
1730                         vec![&aml::Add::new(
1731                             &aml::Path::new("MAXH"),
1732                             &aml::ONE,
1733                             &aml::Path::new("MAXH"),
1734                         )],
1735                     ),
1736                     &aml::Subtract::new(
1737                         &aml::Path::new("MAXL"),
1738                         &aml::Path::new("MAXL"),
1739                         &aml::ONE,
1740                     ),
1741                     // Release lock
1742                     &aml::Release::new("MLCK".into()),
1743                     &aml::Return::new(&aml::Path::new("MR64")),
1744                 ],
1745             )
1746             .to_aml_bytes(),
1747         );
1748         bytes
1749     }
1750 }
1751 
1752 #[cfg(feature = "acpi")]
1753 impl Aml for MemoryManager {
1754     fn to_aml_bytes(&self) -> Vec<u8> {
1755         let mut bytes = Vec::new();
1756 
1757         // Memory Hotplug Controller
1758         bytes.extend_from_slice(
1759             &aml::Device::new(
1760                 "_SB_.MHPC".into(),
1761                 vec![
1762                     &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
1763                     &aml::Name::new("_UID".into(), &"Memory Hotplug Controller"),
1764                     // Mutex to protect concurrent access as we write to choose slot and then read back status
1765                     &aml::Mutex::new("MLCK".into(), 0),
1766                     &aml::Name::new(
1767                         "_CRS".into(),
1768                         &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
1769                             aml::AddressSpaceCachable::NotCacheable,
1770                             true,
1771                             self.acpi_address.0 as u64,
1772                             self.acpi_address.0 + MEMORY_MANAGER_ACPI_SIZE as u64 - 1,
1773                         )]),
1774                     ),
1775                     // OpRegion and Fields map MMIO range into individual field values
1776                     &aml::OpRegion::new(
1777                         "MHPR".into(),
1778                         aml::OpRegionSpace::SystemMemory,
1779                         self.acpi_address.0 as usize,
1780                         MEMORY_MANAGER_ACPI_SIZE,
1781                     ),
1782                     &aml::Field::new(
1783                         "MHPR".into(),
1784                         aml::FieldAccessType::DWord,
1785                         aml::FieldUpdateRule::Preserve,
1786                         vec![
1787                             aml::FieldEntry::Named(*b"MHBL", 32), // Base (low 4 bytes)
1788                             aml::FieldEntry::Named(*b"MHBH", 32), // Base (high 4 bytes)
1789                             aml::FieldEntry::Named(*b"MHLL", 32), // Length (low 4 bytes)
1790                             aml::FieldEntry::Named(*b"MHLH", 32), // Length (high 4 bytes)
1791                         ],
1792                     ),
1793                     &aml::Field::new(
1794                         "MHPR".into(),
1795                         aml::FieldAccessType::DWord,
1796                         aml::FieldUpdateRule::Preserve,
1797                         vec![
1798                             aml::FieldEntry::Reserved(128),
1799                             aml::FieldEntry::Named(*b"MHPX", 32), // PXM
1800                         ],
1801                     ),
1802                     &aml::Field::new(
1803                         "MHPR".into(),
1804                         aml::FieldAccessType::Byte,
1805                         aml::FieldUpdateRule::WriteAsZeroes,
1806                         vec![
1807                             aml::FieldEntry::Reserved(160),
1808                             aml::FieldEntry::Named(*b"MEN_", 1), // Enabled
1809                             aml::FieldEntry::Named(*b"MINS", 1), // Inserting
1810                             aml::FieldEntry::Named(*b"MRMV", 1), // Removing
1811                             aml::FieldEntry::Named(*b"MEJ0", 1), // Ejecting
1812                         ],
1813                     ),
1814                     &aml::Field::new(
1815                         "MHPR".into(),
1816                         aml::FieldAccessType::DWord,
1817                         aml::FieldUpdateRule::Preserve,
1818                         vec![
1819                             aml::FieldEntry::Named(*b"MSEL", 32), // Selector
1820                             aml::FieldEntry::Named(*b"MOEV", 32), // Event
1821                             aml::FieldEntry::Named(*b"MOSC", 32), // OSC
1822                         ],
1823                     ),
1824                     &MemoryMethods {
1825                         slots: self.hotplug_slots.len(),
1826                     },
1827                     &MemorySlots {
1828                         slots: self.hotplug_slots.len(),
1829                     },
1830                 ],
1831             )
1832             .to_aml_bytes(),
1833         );
1834 
1835         #[cfg(target_arch = "x86_64")]
1836         {
1837             if let Some(sgx_epc_region) = &self.sgx_epc_region {
1838                 let min = sgx_epc_region.start().raw_value() as u64;
1839                 let max = min + sgx_epc_region.size() as u64 - 1;
1840                 // SGX EPC region
1841                 bytes.extend_from_slice(
1842                     &aml::Device::new(
1843                         "_SB_.EPC_".into(),
1844                         vec![
1845                             &aml::Name::new("_HID".into(), &aml::EisaName::new("INT0E0C")),
1846                             // QWORD describing the EPC region start and size
1847                             &aml::Name::new(
1848                                 "_CRS".into(),
1849                                 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
1850                                     aml::AddressSpaceCachable::NotCacheable,
1851                                     true,
1852                                     min,
1853                                     max,
1854                                 )]),
1855                             ),
1856                             &aml::Method::new(
1857                                 "_STA".into(),
1858                                 0,
1859                                 false,
1860                                 vec![&aml::Return::new(&0xfu8)],
1861                             ),
1862                         ],
1863                     )
1864                     .to_aml_bytes(),
1865                 );
1866             }
1867         }
1868 
1869         bytes
1870     }
1871 }
1872 
1873 impl Pausable for MemoryManager {}
1874 
1875 #[derive(Clone, Versionize)]
1876 pub struct MemoryRegion {
1877     content: Option<String>,
1878     start_addr: u64,
1879     size: u64,
1880 }
1881 
1882 #[derive(Versionize)]
1883 pub struct MemoryManagerSnapshotData {
1884     memory_regions: Vec<MemoryRegion>,
1885 }
1886 
1887 impl VersionMapped for MemoryManagerSnapshotData {}
1888 
1889 impl Snapshottable for MemoryManager {
1890     fn id(&self) -> String {
1891         MEMORY_MANAGER_SNAPSHOT_ID.to_string()
1892     }
1893 
1894     fn snapshot(&mut self) -> result::Result<Snapshot, MigratableError> {
1895         let mut memory_manager_snapshot = Snapshot::new(MEMORY_MANAGER_SNAPSHOT_ID);
1896         let guest_memory = self.guest_memory.memory();
1897 
1898         let mut memory_regions: Vec<MemoryRegion> = Vec::new();
1899 
1900         for (index, region) in guest_memory.iter().enumerate() {
1901             if region.len() == 0 {
1902                 return Err(MigratableError::Snapshot(anyhow!("Zero length region")));
1903             }
1904 
1905             let mut content = Some(PathBuf::from(format!("memory-region-{}", index)));
1906             if let Some(file_offset) = region.file_offset() {
1907                 if (region.flags() & libc::MAP_SHARED == libc::MAP_SHARED)
1908                     && Self::is_hardlink(file_offset.file())
1909                 {
1910                     // In this very specific case, we know the memory region
1911                     // is backed by a file on the host filesystem that can be
1912                     // accessed by the user, and additionally the mapping is
1913                     // shared, which means that modifications to the content
1914                     // are written to the actual file.
1915                     // When meeting these conditions, we can skip the copy of
1916                     // the memory content for this specific region, as we can
1917                     // assume the user will have it saved through the backing
1918                     // file already.
1919                     content = None;
1920                 }
1921             }
1922 
1923             memory_regions.push(MemoryRegion {
1924                 content: content.map(|p| p.to_str().unwrap().to_owned()),
1925                 start_addr: region.start_addr().0,
1926                 size: region.len(),
1927             });
1928         }
1929 
1930         // Store locally this list of regions as it will be used through the
1931         // Transportable::send() implementation. The point is to avoid the
1932         // duplication of code regarding the creation of the path for each
1933         // region. The 'snapshot' step creates the list of memory regions,
1934         // including information about the need to copy a memory region or
1935         // not. This saves the 'send' step having to go through the same
1936         // process, and instead it can directly proceed with storing the
1937         // memory region content for the regions requiring it.
1938         self.snapshot_memory_regions = memory_regions.clone();
1939 
1940         memory_manager_snapshot.add_data_section(SnapshotDataSection::new_from_versioned_state(
1941             MEMORY_MANAGER_SNAPSHOT_ID,
1942             &MemoryManagerSnapshotData { memory_regions },
1943         )?);
1944 
1945         let mut memory_snapshot = self.snapshot.lock().unwrap();
1946         *memory_snapshot = Some(guest_memory);
1947 
1948         Ok(memory_manager_snapshot)
1949     }
1950 }
1951 
1952 impl Transportable for MemoryManager {
1953     fn send(
1954         &self,
1955         _snapshot: &Snapshot,
1956         destination_url: &str,
1957     ) -> result::Result<(), MigratableError> {
1958         let vm_memory_snapshot_path = url_to_path(destination_url)?;
1959 
1960         if let Some(guest_memory) = &*self.snapshot.lock().unwrap() {
1961             for region in self.snapshot_memory_regions.iter() {
1962                 if let Some(content) = &region.content {
1963                     let mut memory_region_path = vm_memory_snapshot_path.clone();
1964                     memory_region_path.push(content);
1965 
1966                     // Create the snapshot file for the region
1967                     let mut memory_region_file = OpenOptions::new()
1968                         .read(true)
1969                         .write(true)
1970                         .create_new(true)
1971                         .open(memory_region_path)
1972                         .map_err(|e| MigratableError::MigrateSend(e.into()))?;
1973 
1974                     guest_memory
1975                         .write_all_to(
1976                             GuestAddress(region.start_addr),
1977                             &mut memory_region_file,
1978                             region.size as usize,
1979                         )
1980                         .map_err(|e| MigratableError::MigrateSend(e.into()))?;
1981                 }
1982             }
1983         }
1984         Ok(())
1985     }
1986 }
1987 
1988 impl Migratable for MemoryManager {
1989     // Start the dirty log in the hypervisor (kvm/mshv).
1990     // Also, reset the dirty bitmap logged by the vmm.
1991     // Just before we do a bulk copy we want to start/clear the dirty log so that
1992     // pages touched during our bulk copy are tracked.
1993     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
1994         self.vm.start_dirty_log().map_err(|e| {
1995             MigratableError::MigrateSend(anyhow!("Error starting VM dirty log {}", e))
1996         })?;
1997 
1998         for r in self.guest_memory.memory().iter() {
1999             r.bitmap().reset();
2000         }
2001 
2002         Ok(())
2003     }
2004 
2005     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
2006         self.vm.stop_dirty_log().map_err(|e| {
2007             MigratableError::MigrateSend(anyhow!("Error stopping VM dirty log {}", e))
2008         })?;
2009 
2010         Ok(())
2011     }
2012 
2013     // Generate a table for the pages that are dirty. The dirty pages are collapsed
2014     // together in the table if they are contiguous.
2015     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
2016         let mut table = MemoryRangeTable::default();
2017         for r in &self.guest_ram_mappings {
2018             let vm_dirty_bitmap = self.vm.get_dirty_log(r.slot, r.gpa, r.size).map_err(|e| {
2019                 MigratableError::MigrateSend(anyhow!("Error getting VM dirty log {}", e))
2020             })?;
2021             let vmm_dirty_bitmap = match self.guest_memory.memory().find_region(GuestAddress(r.gpa))
2022             {
2023                 Some(region) => {
2024                     assert!(region.start_addr().raw_value() == r.gpa);
2025                     assert!(region.len() == r.size);
2026                     region.bitmap().get_and_reset()
2027                 }
2028                 None => {
2029                     return Err(MigratableError::MigrateSend(anyhow!(
2030                         "Error finding 'guest memory region' with address {:x}",
2031                         r.gpa
2032                     )))
2033                 }
2034             };
2035 
2036             let dirty_bitmap: Vec<u64> = vm_dirty_bitmap
2037                 .iter()
2038                 .zip(vmm_dirty_bitmap.iter())
2039                 .map(|(x, y)| x | y)
2040                 .collect();
2041 
2042             let sub_table = MemoryRangeTable::from_bitmap(dirty_bitmap, r.gpa);
2043 
2044             if sub_table.regions().is_empty() {
2045                 info!("Dirty Memory Range Table is empty");
2046             } else {
2047                 info!("Dirty Memory Range Table:");
2048                 for range in sub_table.regions() {
2049                     info!("GPA: {:x} size: {} (KiB)", range.gpa, range.length / 1024);
2050                 }
2051             }
2052 
2053             table.extend(sub_table);
2054         }
2055         Ok(table)
2056     }
2057 }
2058