xref: /cloud-hypervisor/vmm/src/memory_manager.rs (revision 9af2968a7dc47b89bf07ea9dc5e735084efcfa3a)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 #[cfg(target_arch = "x86_64")]
6 use crate::config::SgxEpcConfig;
7 use crate::config::{HotplugMethod, MemoryConfig, MemoryZoneConfig};
8 use crate::migration::url_to_path;
9 use crate::MEMORY_MANAGER_SNAPSHOT_ID;
10 use crate::{GuestMemoryMmap, GuestRegionMmap};
11 #[cfg(feature = "acpi")]
12 use acpi_tables::{aml, aml::Aml};
13 use anyhow::anyhow;
14 #[cfg(target_arch = "x86_64")]
15 use arch::x86_64::{SgxEpcRegion, SgxEpcSection};
16 use arch::{layout, RegionType};
17 #[cfg(target_arch = "x86_64")]
18 use devices::ioapic;
19 #[cfg(target_arch = "x86_64")]
20 use libc::{MAP_NORESERVE, MAP_POPULATE, MAP_SHARED, PROT_READ, PROT_WRITE};
21 use std::collections::HashMap;
22 use std::convert::TryInto;
23 use std::ffi;
24 use std::fs::{File, OpenOptions};
25 use std::io;
26 use std::ops::Deref;
27 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
28 use std::path::PathBuf;
29 use std::result;
30 use std::sync::{Arc, Barrier, Mutex};
31 use versionize::{VersionMap, Versionize, VersionizeResult};
32 use versionize_derive::Versionize;
33 #[cfg(target_arch = "x86_64")]
34 use vm_allocator::GsiApic;
35 use vm_allocator::SystemAllocator;
36 use vm_device::BusDevice;
37 use vm_memory::guest_memory::FileOffset;
38 use vm_memory::{
39     mmap::MmapRegionError, Address, Bytes, Error as MmapError, GuestAddress, GuestAddressSpace,
40     GuestMemory, GuestMemoryAtomic, GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion,
41     GuestUsize, MmapRegion,
42 };
43 use vm_migration::{
44     protocol::{MemoryRange, MemoryRangeTable},
45     Migratable, MigratableError, Pausable, Snapshot, SnapshotDataSection, Snapshottable,
46     Transportable, VersionMapped,
47 };
48 
49 #[cfg(feature = "acpi")]
50 pub const MEMORY_MANAGER_ACPI_SIZE: usize = 0x18;
51 
52 const DEFAULT_MEMORY_ZONE: &str = "mem0";
53 
54 #[cfg(target_arch = "x86_64")]
55 const X86_64_IRQ_BASE: u32 = 5;
56 
57 const HOTPLUG_COUNT: usize = 8;
58 
59 // Memory policy constants
60 const MPOL_BIND: u32 = 2;
61 const MPOL_MF_STRICT: u32 = 1;
62 const MPOL_MF_MOVE: u32 = 1 << 1;
63 
64 #[derive(Default)]
65 struct HotPlugState {
66     base: u64,
67     length: u64,
68     active: bool,
69     inserting: bool,
70     removing: bool,
71 }
72 
73 pub struct VirtioMemZone {
74     region: Arc<GuestRegionMmap>,
75     resize_handler: virtio_devices::Resize,
76     hotplugged_size: u64,
77     hugepages: bool,
78 }
79 
80 impl VirtioMemZone {
81     pub fn region(&self) -> &Arc<GuestRegionMmap> {
82         &self.region
83     }
84     pub fn resize_handler(&self) -> &virtio_devices::Resize {
85         &self.resize_handler
86     }
87     pub fn hotplugged_size(&self) -> u64 {
88         self.hotplugged_size
89     }
90     pub fn hugepages(&self) -> bool {
91         self.hugepages
92     }
93 }
94 
95 #[derive(Default)]
96 pub struct MemoryZone {
97     regions: Vec<Arc<GuestRegionMmap>>,
98     virtio_mem_zone: Option<VirtioMemZone>,
99 }
100 
101 impl MemoryZone {
102     pub fn regions(&self) -> &Vec<Arc<GuestRegionMmap>> {
103         &self.regions
104     }
105     pub fn virtio_mem_zone(&self) -> &Option<VirtioMemZone> {
106         &self.virtio_mem_zone
107     }
108 }
109 
110 pub type MemoryZones = HashMap<String, MemoryZone>;
111 
112 struct GuestRamMapping {
113     slot: u32,
114     gpa: u64,
115     size: u64,
116 }
117 
118 pub struct MemoryManager {
119     boot_guest_memory: GuestMemoryMmap,
120     guest_memory: GuestMemoryAtomic<GuestMemoryMmap>,
121     next_memory_slot: u32,
122     start_of_device_area: GuestAddress,
123     end_of_device_area: GuestAddress,
124     pub vm: Arc<dyn hypervisor::Vm>,
125     hotplug_slots: Vec<HotPlugState>,
126     selected_slot: usize,
127     mergeable: bool,
128     allocator: Arc<Mutex<SystemAllocator>>,
129     hotplug_method: HotplugMethod,
130     boot_ram: u64,
131     current_ram: u64,
132     next_hotplug_slot: usize,
133     snapshot: Mutex<Option<GuestMemoryLoadGuard<GuestMemoryMmap>>>,
134     shared: bool,
135     hugepages: bool,
136     hugepage_size: Option<u64>,
137     #[cfg(target_arch = "x86_64")]
138     sgx_epc_region: Option<SgxEpcRegion>,
139     user_provided_zones: bool,
140     snapshot_memory_regions: Vec<MemoryRegion>,
141     memory_zones: MemoryZones,
142     log_dirty: bool, // Enable dirty logging for created RAM regions
143 
144     // Keep track of calls to create_userspace_mapping() for guest RAM.
145     // This is useful for getting the dirty pages as we need to know the
146     // slots that the mapping is created in.
147     guest_ram_mappings: Vec<GuestRamMapping>,
148 
149     #[cfg(feature = "acpi")]
150     pub acpi_address: GuestAddress,
151 }
152 
153 #[derive(Debug)]
154 pub enum Error {
155     /// Failed to create shared file.
156     SharedFileCreate(io::Error),
157 
158     /// Failed to set shared file length.
159     SharedFileSetLen(io::Error),
160 
161     /// Mmap backed guest memory error
162     GuestMemory(MmapError),
163 
164     /// Failed to allocate a memory range.
165     MemoryRangeAllocation,
166 
167     /// Error from region creation
168     GuestMemoryRegion(MmapRegionError),
169 
170     /// No ACPI slot available
171     NoSlotAvailable,
172 
173     /// Not enough space in the hotplug RAM region
174     InsufficientHotplugRam,
175 
176     /// The requested hotplug memory addition is not a valid size
177     InvalidSize,
178 
179     /// Failed to create the user memory region.
180     CreateUserMemoryRegion(hypervisor::HypervisorVmError),
181 
182     /// Failed to remove the user memory region.
183     RemoveUserMemoryRegion(hypervisor::HypervisorVmError),
184 
185     /// Failed to EventFd.
186     EventFdFail(io::Error),
187 
188     /// Eventfd write error
189     EventfdError(io::Error),
190 
191     /// Failed to virtio-mem resize
192     VirtioMemResizeFail(virtio_devices::mem::Error),
193 
194     /// Cannot restore VM
195     Restore(MigratableError),
196 
197     /// Cannot create the system allocator
198     CreateSystemAllocator,
199 
200     /// Invalid SGX EPC section size
201     #[cfg(target_arch = "x86_64")]
202     EpcSectionSizeInvalid,
203 
204     /// Failed allocating SGX EPC region
205     #[cfg(target_arch = "x86_64")]
206     SgxEpcRangeAllocation,
207 
208     /// Failed opening SGX virtual EPC device
209     #[cfg(target_arch = "x86_64")]
210     SgxVirtEpcOpen(io::Error),
211 
212     /// Failed setting the SGX virtual EPC section size
213     #[cfg(target_arch = "x86_64")]
214     SgxVirtEpcFileSetLen(io::Error),
215 
216     /// Failed opening SGX provisioning device
217     #[cfg(target_arch = "x86_64")]
218     SgxProvisionOpen(io::Error),
219 
220     /// Failed enabling SGX provisioning
221     #[cfg(target_arch = "x86_64")]
222     SgxEnableProvisioning(hypervisor::HypervisorVmError),
223 
224     /// Failed creating a new MmapRegion instance.
225     #[cfg(target_arch = "x86_64")]
226     NewMmapRegion(vm_memory::mmap::MmapRegionError),
227 
228     /// No memory zones found.
229     MissingMemoryZones,
230 
231     /// Memory configuration is not valid.
232     InvalidMemoryParameters,
233 
234     /// Forbidden operation. Impossible to resize guest memory if it is
235     /// backed by user defined memory regions.
236     InvalidResizeWithMemoryZones,
237 
238     /// It's invalid to try applying a NUMA policy to a memory zone that is
239     /// memory mapped with MAP_SHARED.
240     InvalidSharedMemoryZoneWithHostNuma,
241 
242     /// Failed applying NUMA memory policy.
243     ApplyNumaPolicy(io::Error),
244 
245     /// Memory zone identifier is not unique.
246     DuplicateZoneId,
247 
248     /// No virtio-mem resizing handler found.
249     MissingVirtioMemHandler,
250 
251     /// Unknown memory zone.
252     UnknownMemoryZone,
253 
254     /// Invalid size for resizing. Can be anything except 0.
255     InvalidHotplugSize,
256 
257     /// Invalid hotplug method associated with memory zones resizing capability.
258     InvalidHotplugMethodWithMemoryZones,
259 
260     /// Could not find specified memory zone identifier from hash map.
261     MissingZoneIdentifier,
262 
263     /// Resizing the memory zone failed.
264     ResizeZone,
265 
266     /// Guest address overflow
267     GuestAddressOverFlow,
268 
269     /// Error opening snapshot file
270     SnapshotOpen(io::Error),
271 
272     // Error copying snapshot into region
273     SnapshotCopy(GuestMemoryError),
274 
275     /// Failed to allocate MMIO address
276     AllocateMmioAddress,
277 }
278 
279 const ENABLE_FLAG: usize = 0;
280 const INSERTING_FLAG: usize = 1;
281 const REMOVING_FLAG: usize = 2;
282 const EJECT_FLAG: usize = 3;
283 
284 const BASE_OFFSET_LOW: u64 = 0;
285 const BASE_OFFSET_HIGH: u64 = 0x4;
286 const LENGTH_OFFSET_LOW: u64 = 0x8;
287 const LENGTH_OFFSET_HIGH: u64 = 0xC;
288 const STATUS_OFFSET: u64 = 0x14;
289 const SELECTION_OFFSET: u64 = 0;
290 
291 // The MMIO address space size is subtracted with 64k. This is done for the
292 // following reasons:
293 //  - Reduce the addressable space size by at least 4k to workaround a Linux
294 //    bug when the VMM allocates devices at the end of the addressable space
295 //  - Windows requires the addressable space size to be 64k aligned
296 fn mmio_address_space_size(phys_bits: u8) -> u64 {
297     (1 << phys_bits) - (1 << 16)
298 }
299 
300 impl BusDevice for MemoryManager {
301     fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) {
302         if self.selected_slot < self.hotplug_slots.len() {
303             let state = &self.hotplug_slots[self.selected_slot];
304             match offset {
305                 BASE_OFFSET_LOW => {
306                     data.copy_from_slice(&state.base.to_le_bytes()[..4]);
307                 }
308                 BASE_OFFSET_HIGH => {
309                     data.copy_from_slice(&state.base.to_le_bytes()[4..]);
310                 }
311                 LENGTH_OFFSET_LOW => {
312                     data.copy_from_slice(&state.length.to_le_bytes()[..4]);
313                 }
314                 LENGTH_OFFSET_HIGH => {
315                     data.copy_from_slice(&state.length.to_le_bytes()[4..]);
316                 }
317                 STATUS_OFFSET => {
318                     // The Linux kernel, quite reasonably, doesn't zero the memory it gives us.
319                     data.copy_from_slice(&[0; 8][0..data.len()]);
320                     if state.active {
321                         data[0] |= 1 << ENABLE_FLAG;
322                     }
323                     if state.inserting {
324                         data[0] |= 1 << INSERTING_FLAG;
325                     }
326                     if state.removing {
327                         data[0] |= 1 << REMOVING_FLAG;
328                     }
329                 }
330                 _ => {
331                     warn!(
332                         "Unexpected offset for accessing memory manager device: {:#}",
333                         offset
334                     );
335                 }
336             }
337         }
338     }
339 
340     fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
341         match offset {
342             SELECTION_OFFSET => {
343                 self.selected_slot = usize::from(data[0]);
344             }
345             STATUS_OFFSET => {
346                 let state = &mut self.hotplug_slots[self.selected_slot];
347                 // The ACPI code writes back a 1 to acknowledge the insertion
348                 if (data[0] & (1 << INSERTING_FLAG) == 1 << INSERTING_FLAG) && state.inserting {
349                     state.inserting = false;
350                 }
351                 // Ditto for removal
352                 if (data[0] & (1 << REMOVING_FLAG) == 1 << REMOVING_FLAG) && state.removing {
353                     state.removing = false;
354                 }
355                 // Trigger removal of "DIMM"
356                 if data[0] & (1 << EJECT_FLAG) == 1 << EJECT_FLAG {
357                     warn!("Ejection of memory not currently supported");
358                 }
359             }
360             _ => {
361                 warn!(
362                     "Unexpected offset for accessing memory manager device: {:#}",
363                     offset
364                 );
365             }
366         };
367         None
368     }
369 }
370 
371 impl MemoryManager {
372     /// Creates all memory regions based on the available RAM ranges defined
373     /// by `ram_regions`, and based on the description of the memory zones.
374     /// In practice, this function can perform multiple memory mappings of the
375     /// same backing file if there's a hole in the address space between two
376     /// RAM ranges.
377     /// One example might be ram_regions containing 2 regions (0-3G and 4G-6G)
378     /// and zones containing two zones (size 1G and size 4G).
379     /// This function will create 3 resulting memory regions:
380     /// - First one mapping entirely the first memory zone on 0-1G range
381     /// - Second one mapping partially the second memory zone on 1G-3G range
382     /// - Third one mapping partially the second memory zone on 4G-6G range
383     fn create_memory_regions_from_zones(
384         ram_regions: &[(GuestAddress, usize)],
385         zones: &[MemoryZoneConfig],
386         prefault: bool,
387     ) -> Result<(Vec<Arc<GuestRegionMmap>>, MemoryZones), Error> {
388         let mut zones = zones.to_owned();
389         let mut mem_regions = Vec::new();
390         let mut zone = zones.remove(0);
391         let mut zone_offset = 0;
392         let mut memory_zones = HashMap::new();
393 
394         // Add zone id to the list of memory zones.
395         memory_zones.insert(zone.id.clone(), MemoryZone::default());
396 
397         for ram_region in ram_regions.iter() {
398             let mut ram_region_offset = 0;
399             let mut exit = false;
400 
401             loop {
402                 let mut ram_region_consumed = false;
403                 let mut pull_next_zone = false;
404 
405                 let ram_region_sub_size = ram_region.1 - ram_region_offset;
406                 let zone_sub_size = zone.size as usize - zone_offset;
407 
408                 let file_offset = zone_offset as u64;
409                 let region_start = ram_region
410                     .0
411                     .checked_add(ram_region_offset as u64)
412                     .ok_or(Error::GuestAddressOverFlow)?;
413                 let region_size = if zone_sub_size <= ram_region_sub_size {
414                     if zone_sub_size == ram_region_sub_size {
415                         ram_region_consumed = true;
416                     }
417 
418                     ram_region_offset += zone_sub_size;
419                     pull_next_zone = true;
420 
421                     zone_sub_size
422                 } else {
423                     zone_offset += ram_region_sub_size;
424                     ram_region_consumed = true;
425 
426                     ram_region_sub_size
427                 };
428 
429                 let region = MemoryManager::create_ram_region(
430                     &zone.file,
431                     file_offset,
432                     region_start,
433                     region_size,
434                     prefault,
435                     zone.shared,
436                     zone.hugepages,
437                     zone.hugepage_size,
438                     zone.host_numa_node,
439                 )?;
440 
441                 // Add region to the list of regions associated with the
442                 // current memory zone.
443                 if let Some(memory_zone) = memory_zones.get_mut(&zone.id) {
444                     memory_zone.regions.push(region.clone());
445                 }
446 
447                 mem_regions.push(region);
448 
449                 if pull_next_zone {
450                     // Get the next zone and reset the offset.
451                     zone_offset = 0;
452                     if zones.is_empty() {
453                         exit = true;
454                         break;
455                     }
456                     zone = zones.remove(0);
457 
458                     // Check if zone id already exist. In case it does, throw
459                     // an error as we need unique identifiers. Otherwise, add
460                     // the new zone id to the list of memory zones.
461                     if memory_zones.contains_key(&zone.id) {
462                         error!(
463                             "Memory zone identifier '{}' found more than once. \
464                             It must be unique",
465                             zone.id,
466                         );
467                         return Err(Error::DuplicateZoneId);
468                     }
469                     memory_zones.insert(zone.id.clone(), MemoryZone::default());
470                 }
471 
472                 if ram_region_consumed {
473                     break;
474                 }
475             }
476 
477             if exit {
478                 break;
479             }
480         }
481 
482         Ok((mem_regions, memory_zones))
483     }
484 
485     fn fill_saved_regions(&mut self, saved_regions: Vec<MemoryRegion>) -> Result<(), Error> {
486         for region in saved_regions {
487             if let Some(content) = region.content {
488                 // Open (read only) the snapshot file for the given region.
489                 let mut memory_region_file = OpenOptions::new()
490                     .read(true)
491                     .open(content)
492                     .map_err(Error::SnapshotOpen)?;
493 
494                 self.guest_memory
495                     .memory()
496                     .read_exact_from(
497                         GuestAddress(region.start_addr),
498                         &mut memory_region_file,
499                         region.size as usize,
500                     )
501                     .map_err(Error::SnapshotCopy)?;
502             }
503         }
504 
505         Ok(())
506     }
507 
508     pub fn new(
509         vm: Arc<dyn hypervisor::Vm>,
510         config: &MemoryConfig,
511         prefault: bool,
512         phys_bits: u8,
513         #[cfg(feature = "tdx")] tdx_enabled: bool,
514     ) -> Result<Arc<Mutex<MemoryManager>>, Error> {
515         let user_provided_zones = config.size == 0;
516         let mut allow_mem_hotplug: bool = false;
517 
518         let (ram_size, zones) = if !user_provided_zones {
519             if config.zones.is_some() {
520                 error!(
521                     "User defined memory regions can't be provided if the \
522                     memory size is not 0"
523                 );
524                 return Err(Error::InvalidMemoryParameters);
525             }
526 
527             if config.hotplug_size.is_some() {
528                 allow_mem_hotplug = true;
529             }
530 
531             if let Some(hotplugged_size) = config.hotplugged_size {
532                 if let Some(hotplug_size) = config.hotplug_size {
533                     if hotplugged_size > hotplug_size {
534                         error!(
535                             "'hotplugged_size' {} can't be bigger than \
536                             'hotplug_size' {}",
537                             hotplugged_size, hotplug_size,
538                         );
539                         return Err(Error::InvalidMemoryParameters);
540                     }
541                 } else {
542                     error!(
543                         "Invalid to define 'hotplugged_size' when there is\
544                         no 'hotplug_size'"
545                     );
546                     return Err(Error::InvalidMemoryParameters);
547                 }
548                 if config.hotplug_method == HotplugMethod::Acpi {
549                     error!(
550                         "Invalid to define 'hotplugged_size' with hotplug \
551                         method 'acpi'"
552                     );
553                     return Err(Error::InvalidMemoryParameters);
554                 }
555             }
556 
557             // Create a single zone from the global memory config. This lets
558             // us reuse the codepath for user defined memory zones.
559             let zones = vec![MemoryZoneConfig {
560                 id: String::from(DEFAULT_MEMORY_ZONE),
561                 size: config.size,
562                 file: None,
563                 shared: config.shared,
564                 hugepages: config.hugepages,
565                 hugepage_size: config.hugepage_size,
566                 host_numa_node: None,
567                 hotplug_size: config.hotplug_size,
568                 hotplugged_size: config.hotplugged_size,
569             }];
570 
571             (config.size, zones)
572         } else {
573             if config.zones.is_none() {
574                 error!(
575                     "User defined memory regions must be provided if the \
576                     memory size is 0"
577                 );
578                 return Err(Error::MissingMemoryZones);
579             }
580 
581             // Safe to unwrap as we checked right above there were some
582             // regions.
583             let zones = config.zones.clone().unwrap();
584             if zones.is_empty() {
585                 return Err(Error::MissingMemoryZones);
586             }
587 
588             let mut total_ram_size: u64 = 0;
589             for zone in zones.iter() {
590                 total_ram_size += zone.size;
591 
592                 if zone.shared && zone.file.is_some() && zone.host_numa_node.is_some() {
593                     error!(
594                         "Invalid to set host NUMA policy for a memory zone \
595                         backed by a regular file and mapped as 'shared'"
596                     );
597                     return Err(Error::InvalidSharedMemoryZoneWithHostNuma);
598                 }
599 
600                 if zone.hotplug_size.is_some() && config.hotplug_method == HotplugMethod::Acpi {
601                     error!("Invalid to set ACPI hotplug method for memory zones");
602                     return Err(Error::InvalidHotplugMethodWithMemoryZones);
603                 }
604 
605                 if let Some(hotplugged_size) = zone.hotplugged_size {
606                     if let Some(hotplug_size) = zone.hotplug_size {
607                         if hotplugged_size > hotplug_size {
608                             error!(
609                                 "'hotplugged_size' {} can't be bigger than \
610                                 'hotplug_size' {}",
611                                 hotplugged_size, hotplug_size,
612                             );
613                             return Err(Error::InvalidMemoryParameters);
614                         }
615                     } else {
616                         error!(
617                             "Invalid to define 'hotplugged_size' when there is\
618                             no 'hotplug_size' for a memory zone"
619                         );
620                         return Err(Error::InvalidMemoryParameters);
621                     }
622                     if config.hotplug_method == HotplugMethod::Acpi {
623                         error!(
624                             "Invalid to define 'hotplugged_size' with hotplug \
625                             method 'acpi'"
626                         );
627                         return Err(Error::InvalidMemoryParameters);
628                     }
629                 }
630             }
631 
632             (total_ram_size, zones)
633         };
634 
635         // Init guest memory
636         let arch_mem_regions = arch::arch_memory_regions(ram_size);
637 
638         let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
639             .iter()
640             .filter(|r| r.2 == RegionType::Ram)
641             .map(|r| (r.0, r.1))
642             .collect();
643 
644         let (mem_regions, mut memory_zones) =
645             Self::create_memory_regions_from_zones(&ram_regions, &zones, prefault)?;
646 
647         let guest_memory =
648             GuestMemoryMmap::from_arc_regions(mem_regions).map_err(Error::GuestMemory)?;
649 
650         let boot_guest_memory = guest_memory.clone();
651 
652         let mmio_address_space_size = mmio_address_space_size(phys_bits);
653         debug_assert_eq!(
654             (((mmio_address_space_size) >> 16) << 16),
655             mmio_address_space_size
656         );
657         let end_of_device_area = GuestAddress(mmio_address_space_size - 1);
658 
659         let mut start_of_device_area =
660             MemoryManager::start_addr(guest_memory.last_addr(), allow_mem_hotplug)?;
661         let mut virtio_mem_regions: Vec<Arc<GuestRegionMmap>> = Vec::new();
662 
663         // Update list of memory zones for resize.
664         for zone in zones {
665             if let Some(memory_zone) = memory_zones.get_mut(&zone.id) {
666                 if let Some(hotplug_size) = zone.hotplug_size {
667                     if hotplug_size == 0 {
668                         error!("'hotplug_size' can't be 0");
669                         return Err(Error::InvalidHotplugSize);
670                     }
671 
672                     if !user_provided_zones && config.hotplug_method == HotplugMethod::Acpi {
673                         start_of_device_area = start_of_device_area
674                             .checked_add(hotplug_size)
675                             .ok_or(Error::GuestAddressOverFlow)?;
676                     } else {
677                         // Alignment must be "natural" i.e. same as size of block
678                         let start_addr = GuestAddress(
679                             (start_of_device_area.0 + virtio_devices::VIRTIO_MEM_ALIGN_SIZE - 1)
680                                 / virtio_devices::VIRTIO_MEM_ALIGN_SIZE
681                                 * virtio_devices::VIRTIO_MEM_ALIGN_SIZE,
682                         );
683 
684                         let region = MemoryManager::create_ram_region(
685                             &None,
686                             0,
687                             start_addr,
688                             hotplug_size as usize,
689                             false,
690                             zone.shared,
691                             zone.hugepages,
692                             zone.hugepage_size,
693                             zone.host_numa_node,
694                         )?;
695 
696                         virtio_mem_regions.push(region.clone());
697 
698                         memory_zone.virtio_mem_zone = Some(VirtioMemZone {
699                             region,
700                             resize_handler: virtio_devices::Resize::new()
701                                 .map_err(Error::EventFdFail)?,
702                             hotplugged_size: zone.hotplugged_size.unwrap_or(0),
703                             hugepages: zone.hugepages,
704                         });
705 
706                         start_of_device_area = start_addr
707                             .checked_add(hotplug_size)
708                             .ok_or(Error::GuestAddressOverFlow)?;
709                     }
710                 }
711             } else {
712                 return Err(Error::MissingZoneIdentifier);
713             }
714         }
715 
716         let guest_memory = GuestMemoryAtomic::new(guest_memory);
717 
718         let mut hotplug_slots = Vec::with_capacity(HOTPLUG_COUNT);
719         hotplug_slots.resize_with(HOTPLUG_COUNT, HotPlugState::default);
720 
721         // Both MMIO and PIO address spaces start at address 0.
722         let allocator = Arc::new(Mutex::new(
723             SystemAllocator::new(
724                 #[cfg(target_arch = "x86_64")]
725                 {
726                     GuestAddress(0)
727                 },
728                 #[cfg(target_arch = "x86_64")]
729                 {
730                     1 << 16
731                 },
732                 GuestAddress(0),
733                 mmio_address_space_size,
734                 layout::MEM_32BIT_DEVICES_START,
735                 layout::MEM_32BIT_DEVICES_SIZE,
736                 #[cfg(target_arch = "x86_64")]
737                 vec![GsiApic::new(
738                     X86_64_IRQ_BASE,
739                     ioapic::NUM_IOAPIC_PINS as u32 - X86_64_IRQ_BASE,
740                 )],
741             )
742             .ok_or(Error::CreateSystemAllocator)?,
743         ));
744 
745         #[cfg(feature = "acpi")]
746         let acpi_address = allocator
747             .lock()
748             .unwrap()
749             .allocate_mmio_addresses(None, MEMORY_MANAGER_ACPI_SIZE as u64, None)
750             .ok_or(Error::AllocateMmioAddress)?;
751 
752         #[cfg(not(feature = "tdx"))]
753         let log_dirty = true;
754         #[cfg(feature = "tdx")]
755         let log_dirty = !tdx_enabled; // Cannot log dirty pages on a TD
756 
757         let memory_manager = Arc::new(Mutex::new(MemoryManager {
758             boot_guest_memory,
759             guest_memory: guest_memory.clone(),
760             next_memory_slot: 0,
761             start_of_device_area,
762             end_of_device_area,
763             vm,
764             hotplug_slots,
765             selected_slot: 0,
766             mergeable: config.mergeable,
767             allocator: allocator.clone(),
768             hotplug_method: config.hotplug_method.clone(),
769             boot_ram: ram_size,
770             current_ram: ram_size,
771             next_hotplug_slot: 0,
772             snapshot: Mutex::new(None),
773             shared: config.shared,
774             hugepages: config.hugepages,
775             hugepage_size: config.hugepage_size,
776             #[cfg(target_arch = "x86_64")]
777             sgx_epc_region: None,
778             user_provided_zones,
779             snapshot_memory_regions: Vec::new(),
780             memory_zones,
781             guest_ram_mappings: Vec::new(),
782             #[cfg(feature = "acpi")]
783             acpi_address,
784             log_dirty,
785         }));
786 
787         for region in guest_memory.memory().iter() {
788             let mut mm = memory_manager.lock().unwrap();
789             let slot = mm.create_userspace_mapping(
790                 region.start_addr().raw_value(),
791                 region.len() as u64,
792                 region.as_ptr() as u64,
793                 config.mergeable,
794                 false,
795                 log_dirty,
796             )?;
797             mm.guest_ram_mappings.push(GuestRamMapping {
798                 gpa: region.start_addr().raw_value(),
799                 size: region.len(),
800                 slot,
801             });
802         }
803 
804         for region in virtio_mem_regions.drain(..) {
805             let mut mm = memory_manager.lock().unwrap();
806             let slot = mm.create_userspace_mapping(
807                 region.start_addr().raw_value(),
808                 region.len() as u64,
809                 region.as_ptr() as u64,
810                 config.mergeable,
811                 false,
812                 log_dirty,
813             )?;
814 
815             mm.guest_ram_mappings.push(GuestRamMapping {
816                 gpa: region.start_addr().raw_value(),
817                 size: region.len(),
818                 slot,
819             });
820             allocator
821                 .lock()
822                 .unwrap()
823                 .allocate_mmio_addresses(Some(region.start_addr()), region.len(), None)
824                 .ok_or(Error::MemoryRangeAllocation)?;
825             mm.add_region(region)?;
826         }
827 
828         // Allocate RAM and Reserved address ranges.
829         for region in arch_mem_regions.iter() {
830             allocator
831                 .lock()
832                 .unwrap()
833                 .allocate_mmio_addresses(Some(region.0), region.1 as GuestUsize, None)
834                 .ok_or(Error::MemoryRangeAllocation)?;
835         }
836 
837         Ok(memory_manager)
838     }
839 
840     pub fn new_from_snapshot(
841         snapshot: &Snapshot,
842         vm: Arc<dyn hypervisor::Vm>,
843         config: &MemoryConfig,
844         source_url: Option<&str>,
845         prefault: bool,
846         phys_bits: u8,
847     ) -> Result<Arc<Mutex<MemoryManager>>, Error> {
848         let mm = MemoryManager::new(
849             vm,
850             config,
851             prefault,
852             phys_bits,
853             #[cfg(feature = "tdx")]
854             false,
855         )?;
856 
857         if let Some(source_url) = source_url {
858             let vm_snapshot_path = url_to_path(source_url).map_err(Error::Restore)?;
859 
860             let mem_snapshot: MemoryManagerSnapshotData = snapshot
861                 .to_versioned_state(MEMORY_MANAGER_SNAPSHOT_ID)
862                 .map_err(Error::Restore)?;
863 
864             // Here we turn the content file name into a content file path as
865             // this will be needed to copy the content of the saved memory
866             // region into the newly created memory region.
867             // We simply ignore the content files that are None, as they
868             // represent regions that have been directly saved by the user, with
869             // no need for saving into a dedicated external file. For these
870             // files, the VmConfig already contains the information on where to
871             // find them.
872             let mut saved_regions = mem_snapshot.memory_regions;
873             for region in saved_regions.iter_mut() {
874                 if let Some(content) = &mut region.content {
875                     let mut memory_region_path = vm_snapshot_path.clone();
876                     memory_region_path.push(content.clone());
877                     *content = memory_region_path.to_str().unwrap().to_owned();
878                 }
879             }
880 
881             mm.lock().unwrap().fill_saved_regions(saved_regions)?;
882 
883             Ok(mm)
884         } else {
885             Ok(mm)
886         }
887     }
888 
889     fn memfd_create(name: &ffi::CStr, flags: u32) -> Result<RawFd, io::Error> {
890         let res = unsafe { libc::syscall(libc::SYS_memfd_create, name.as_ptr(), flags) };
891 
892         if res < 0 {
893             Err(io::Error::last_os_error())
894         } else {
895             Ok(res as RawFd)
896         }
897     }
898 
899     fn mbind(
900         addr: *mut u8,
901         len: u64,
902         mode: u32,
903         nodemask: Vec<u64>,
904         maxnode: u64,
905         flags: u32,
906     ) -> Result<(), io::Error> {
907         let res = unsafe {
908             libc::syscall(
909                 libc::SYS_mbind,
910                 addr as *mut libc::c_void,
911                 len,
912                 mode,
913                 nodemask.as_ptr(),
914                 maxnode,
915                 flags,
916             )
917         };
918 
919         if res < 0 {
920             Err(io::Error::last_os_error())
921         } else {
922             Ok(())
923         }
924     }
925 
926     #[allow(clippy::too_many_arguments)]
927     fn create_ram_region(
928         backing_file: &Option<PathBuf>,
929         file_offset: u64,
930         start_addr: GuestAddress,
931         size: usize,
932         prefault: bool,
933         shared: bool,
934         hugepages: bool,
935         hugepage_size: Option<u64>,
936         host_numa_node: Option<u32>,
937     ) -> Result<Arc<GuestRegionMmap>, Error> {
938         let (f, f_off) = match backing_file {
939             Some(ref file) => {
940                 if file.is_dir() {
941                     // Override file offset as it does not apply in this case.
942                     info!(
943                         "Ignoring file offset since the backing file is a \
944                         temporary file created from the specified directory."
945                     );
946                     let fs_str = format!("{}{}", file.display(), "/tmpfile_XXXXXX");
947                     let fs = ffi::CString::new(fs_str).unwrap();
948                     let mut path = fs.as_bytes_with_nul().to_owned();
949                     let path_ptr = path.as_mut_ptr() as *mut _;
950                     let fd = unsafe { libc::mkstemp(path_ptr) };
951                     unsafe { libc::unlink(path_ptr) };
952                     let f = unsafe { File::from_raw_fd(fd) };
953                     f.set_len(size as u64).map_err(Error::SharedFileSetLen)?;
954 
955                     (f, 0)
956                 } else {
957                     let f = OpenOptions::new()
958                         .read(true)
959                         .write(true)
960                         .open(file)
961                         .map_err(Error::SharedFileCreate)?;
962 
963                     (f, file_offset)
964                 }
965             }
966             None => {
967                 let fd = Self::memfd_create(
968                     &ffi::CString::new("ch_ram").unwrap(),
969                     if hugepages {
970                         libc::MFD_HUGETLB
971                             | if let Some(hugepage_size) = hugepage_size {
972                                 /*
973                                  * From the Linux kernel:
974                                  * Several system calls take a flag to request "hugetlb" huge pages.
975                                  * Without further specification, these system calls will use the
976                                  * system's default huge page size.  If a system supports multiple
977                                  * huge page sizes, the desired huge page size can be specified in
978                                  * bits [26:31] of the flag arguments.  The value in these 6 bits
979                                  * will encode the log2 of the huge page size.
980                                  */
981 
982                                 hugepage_size.trailing_zeros() << 26
983                             } else {
984                                 // Use the system default huge page size
985                                 0
986                             }
987                     } else {
988                         0
989                     },
990                 )
991                 .map_err(Error::SharedFileCreate)?;
992 
993                 let f = unsafe { File::from_raw_fd(fd) };
994                 f.set_len(size as u64).map_err(Error::SharedFileSetLen)?;
995 
996                 (f, 0)
997             }
998         };
999 
1000         let mut mmap_flags = libc::MAP_NORESERVE
1001             | if shared {
1002                 libc::MAP_SHARED
1003             } else {
1004                 libc::MAP_PRIVATE
1005             };
1006         if prefault {
1007             mmap_flags |= libc::MAP_POPULATE;
1008         }
1009 
1010         let region = GuestRegionMmap::new(
1011             MmapRegion::build(
1012                 Some(FileOffset::new(f, f_off)),
1013                 size,
1014                 libc::PROT_READ | libc::PROT_WRITE,
1015                 mmap_flags,
1016             )
1017             .map_err(Error::GuestMemoryRegion)?,
1018             start_addr,
1019         )
1020         .map_err(Error::GuestMemory)?;
1021 
1022         // Apply NUMA policy if needed.
1023         if let Some(node) = host_numa_node {
1024             let addr = region.deref().as_ptr();
1025             let len = region.deref().size() as u64;
1026             let mode = MPOL_BIND;
1027             let mut nodemask: Vec<u64> = Vec::new();
1028             let flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
1029 
1030             // Linux is kind of buggy in the way it interprets maxnode as it
1031             // will cut off the last node. That's why we have to add 1 to what
1032             // we would consider as the proper maxnode value.
1033             let maxnode = node as u64 + 1 + 1;
1034 
1035             // Allocate the right size for the vector.
1036             nodemask.resize((node as usize / 64) + 1, 0);
1037 
1038             // Fill the global bitmask through the nodemask vector.
1039             let idx = (node / 64) as usize;
1040             let shift = node % 64;
1041             nodemask[idx] |= 1u64 << shift;
1042 
1043             // Policies are enforced by using MPOL_MF_MOVE flag as it will
1044             // force the kernel to move all pages that might have been already
1045             // allocated to the proper set of NUMA nodes. MPOL_MF_STRICT is
1046             // used to throw an error if MPOL_MF_MOVE didn't succeed.
1047             // MPOL_BIND is the selected mode as it specifies a strict policy
1048             // that restricts memory allocation to the nodes specified in the
1049             // nodemask.
1050             Self::mbind(addr, len, mode, nodemask, maxnode, flags)
1051                 .map_err(Error::ApplyNumaPolicy)?;
1052         }
1053 
1054         Ok(Arc::new(region))
1055     }
1056 
1057     // Update the GuestMemoryMmap with the new range
1058     fn add_region(&mut self, region: Arc<GuestRegionMmap>) -> Result<(), Error> {
1059         let guest_memory = self
1060             .guest_memory
1061             .memory()
1062             .insert_region(region)
1063             .map_err(Error::GuestMemory)?;
1064         self.guest_memory.lock().unwrap().replace(guest_memory);
1065 
1066         Ok(())
1067     }
1068 
1069     //
1070     // Calculate the start address of an area next to RAM.
1071     //
1072     // If memory hotplug is allowed, the start address needs to be aligned
1073     // (rounded-up) to 128MiB boundary.
1074     // If memory hotplug is not allowed, there is no alignment required.
1075     // On x86_64, it must also start at the 64bit start.
1076     #[allow(clippy::let_and_return)]
1077     fn start_addr(mem_end: GuestAddress, allow_mem_hotplug: bool) -> Result<GuestAddress, Error> {
1078         let mut start_addr = if allow_mem_hotplug {
1079             GuestAddress(mem_end.0 | ((128 << 20) - 1))
1080         } else {
1081             mem_end
1082         };
1083 
1084         start_addr = start_addr
1085             .checked_add(1)
1086             .ok_or(Error::GuestAddressOverFlow)?;
1087 
1088         #[cfg(target_arch = "x86_64")]
1089         if mem_end < arch::layout::MEM_32BIT_RESERVED_START {
1090             return Ok(arch::layout::RAM_64BIT_START);
1091         }
1092 
1093         Ok(start_addr)
1094     }
1095 
1096     pub fn add_ram_region(
1097         &mut self,
1098         start_addr: GuestAddress,
1099         size: usize,
1100     ) -> Result<Arc<GuestRegionMmap>, Error> {
1101         // Allocate memory for the region
1102         let region = MemoryManager::create_ram_region(
1103             &None,
1104             0,
1105             start_addr,
1106             size,
1107             false,
1108             self.shared,
1109             self.hugepages,
1110             self.hugepage_size,
1111             None,
1112         )?;
1113 
1114         // Map it into the guest
1115         let slot = self.create_userspace_mapping(
1116             region.start_addr().0,
1117             region.len() as u64,
1118             region.as_ptr() as u64,
1119             self.mergeable,
1120             false,
1121             self.log_dirty,
1122         )?;
1123         self.guest_ram_mappings.push(GuestRamMapping {
1124             gpa: region.start_addr().raw_value(),
1125             size: region.len(),
1126             slot,
1127         });
1128 
1129         self.add_region(Arc::clone(&region))?;
1130 
1131         Ok(region)
1132     }
1133 
1134     fn hotplug_ram_region(&mut self, size: usize) -> Result<Arc<GuestRegionMmap>, Error> {
1135         info!("Hotplugging new RAM: {}", size);
1136 
1137         // Check that there is a free slot
1138         if self.next_hotplug_slot >= HOTPLUG_COUNT {
1139             return Err(Error::NoSlotAvailable);
1140         }
1141 
1142         // "Inserted" DIMM must have a size that is a multiple of 128MiB
1143         if size % (128 << 20) != 0 {
1144             return Err(Error::InvalidSize);
1145         }
1146 
1147         let start_addr = MemoryManager::start_addr(self.guest_memory.memory().last_addr(), true)?;
1148 
1149         if start_addr.checked_add(size.try_into().unwrap()).unwrap() > self.start_of_device_area() {
1150             return Err(Error::InsufficientHotplugRam);
1151         }
1152 
1153         let region = self.add_ram_region(start_addr, size)?;
1154 
1155         // Add region to the list of regions associated with the default
1156         // memory zone.
1157         if let Some(memory_zone) = self.memory_zones.get_mut(DEFAULT_MEMORY_ZONE) {
1158             memory_zone.regions.push(Arc::clone(&region));
1159         }
1160 
1161         // Tell the allocator
1162         self.allocator
1163             .lock()
1164             .unwrap()
1165             .allocate_mmio_addresses(Some(start_addr), size as GuestUsize, None)
1166             .ok_or(Error::MemoryRangeAllocation)?;
1167 
1168         // Update the slot so that it can be queried via the I/O port
1169         let mut slot = &mut self.hotplug_slots[self.next_hotplug_slot];
1170         slot.active = true;
1171         slot.inserting = true;
1172         slot.base = region.start_addr().0;
1173         slot.length = region.len() as u64;
1174 
1175         self.next_hotplug_slot += 1;
1176 
1177         Ok(region)
1178     }
1179 
1180     pub fn guest_memory(&self) -> GuestMemoryAtomic<GuestMemoryMmap> {
1181         self.guest_memory.clone()
1182     }
1183 
1184     pub fn boot_guest_memory(&self) -> GuestMemoryMmap {
1185         self.boot_guest_memory.clone()
1186     }
1187 
1188     pub fn allocator(&self) -> Arc<Mutex<SystemAllocator>> {
1189         self.allocator.clone()
1190     }
1191 
1192     pub fn start_of_device_area(&self) -> GuestAddress {
1193         self.start_of_device_area
1194     }
1195 
1196     pub fn end_of_device_area(&self) -> GuestAddress {
1197         self.end_of_device_area
1198     }
1199 
1200     pub fn allocate_memory_slot(&mut self) -> u32 {
1201         let slot_id = self.next_memory_slot;
1202         self.next_memory_slot += 1;
1203         slot_id
1204     }
1205 
1206     pub fn create_userspace_mapping(
1207         &mut self,
1208         guest_phys_addr: u64,
1209         memory_size: u64,
1210         userspace_addr: u64,
1211         mergeable: bool,
1212         readonly: bool,
1213         log_dirty: bool,
1214     ) -> Result<u32, Error> {
1215         let slot = self.allocate_memory_slot();
1216         let mem_region = self.vm.make_user_memory_region(
1217             slot,
1218             guest_phys_addr,
1219             memory_size,
1220             userspace_addr,
1221             readonly,
1222             log_dirty,
1223         );
1224 
1225         self.vm
1226             .create_user_memory_region(mem_region)
1227             .map_err(Error::CreateUserMemoryRegion)?;
1228 
1229         // Mark the pages as mergeable if explicitly asked for.
1230         if mergeable {
1231             // Safe because the address and size are valid since the
1232             // mmap succeeded.
1233             let ret = unsafe {
1234                 libc::madvise(
1235                     userspace_addr as *mut libc::c_void,
1236                     memory_size as libc::size_t,
1237                     libc::MADV_MERGEABLE,
1238                 )
1239             };
1240             if ret != 0 {
1241                 let err = io::Error::last_os_error();
1242                 // Safe to unwrap because the error is constructed with
1243                 // last_os_error(), which ensures the output will be Some().
1244                 let errno = err.raw_os_error().unwrap();
1245                 if errno == libc::EINVAL {
1246                     warn!("kernel not configured with CONFIG_KSM");
1247                 } else {
1248                     warn!("madvise error: {}", err);
1249                 }
1250                 warn!("failed to mark pages as mergeable");
1251             }
1252         }
1253 
1254         info!(
1255             "Created userspace mapping: {:x} -> {:x} {:x}",
1256             guest_phys_addr, userspace_addr, memory_size
1257         );
1258 
1259         Ok(slot)
1260     }
1261 
1262     pub fn remove_userspace_mapping(
1263         &mut self,
1264         guest_phys_addr: u64,
1265         memory_size: u64,
1266         userspace_addr: u64,
1267         mergeable: bool,
1268         slot: u32,
1269     ) -> Result<(), Error> {
1270         let mem_region = self.vm.make_user_memory_region(
1271             slot,
1272             guest_phys_addr,
1273             memory_size,
1274             userspace_addr,
1275             false, /* readonly -- don't care */
1276             false, /* log dirty */
1277         );
1278 
1279         self.vm
1280             .remove_user_memory_region(mem_region)
1281             .map_err(Error::RemoveUserMemoryRegion)?;
1282 
1283         // Mark the pages as unmergeable if there were previously marked as
1284         // mergeable.
1285         if mergeable {
1286             // Safe because the address and size are valid as the region was
1287             // previously advised.
1288             let ret = unsafe {
1289                 libc::madvise(
1290                     userspace_addr as *mut libc::c_void,
1291                     memory_size as libc::size_t,
1292                     libc::MADV_UNMERGEABLE,
1293                 )
1294             };
1295             if ret != 0 {
1296                 let err = io::Error::last_os_error();
1297                 // Safe to unwrap because the error is constructed with
1298                 // last_os_error(), which ensures the output will be Some().
1299                 let errno = err.raw_os_error().unwrap();
1300                 if errno == libc::EINVAL {
1301                     warn!("kernel not configured with CONFIG_KSM");
1302                 } else {
1303                     warn!("madvise error: {}", err);
1304                 }
1305                 warn!("failed to mark pages as unmergeable");
1306             }
1307         }
1308 
1309         info!(
1310             "Removed userspace mapping: {:x} -> {:x} {:x}",
1311             guest_phys_addr, userspace_addr, memory_size
1312         );
1313 
1314         Ok(())
1315     }
1316 
1317     pub fn virtio_mem_resize(&mut self, id: &str, size: u64) -> Result<(), Error> {
1318         if let Some(memory_zone) = self.memory_zones.get_mut(id) {
1319             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() {
1320                 virtio_mem_zone
1321                     .resize_handler()
1322                     .work(size)
1323                     .map_err(Error::VirtioMemResizeFail)?;
1324             } else {
1325                 error!("Failed resizing virtio-mem region: No virtio-mem handler");
1326                 return Err(Error::MissingVirtioMemHandler);
1327             }
1328 
1329             return Ok(());
1330         }
1331 
1332         error!("Failed resizing virtio-mem region: Unknown memory zone");
1333         Err(Error::UnknownMemoryZone)
1334     }
1335 
1336     /// In case this function resulted in adding a new memory region to the
1337     /// guest memory, the new region is returned to the caller. The virtio-mem
1338     /// use case never adds a new region as the whole hotpluggable memory has
1339     /// already been allocated at boot time.
1340     pub fn resize(&mut self, desired_ram: u64) -> Result<Option<Arc<GuestRegionMmap>>, Error> {
1341         if self.user_provided_zones {
1342             error!(
1343                 "Not allowed to resize guest memory when backed with user \
1344                 defined memory zones."
1345             );
1346             return Err(Error::InvalidResizeWithMemoryZones);
1347         }
1348 
1349         let mut region: Option<Arc<GuestRegionMmap>> = None;
1350         match self.hotplug_method {
1351             HotplugMethod::VirtioMem => {
1352                 if desired_ram >= self.boot_ram {
1353                     self.virtio_mem_resize(DEFAULT_MEMORY_ZONE, desired_ram - self.boot_ram)?;
1354                     self.current_ram = desired_ram;
1355                 }
1356             }
1357             HotplugMethod::Acpi => {
1358                 if desired_ram > self.current_ram {
1359                     region =
1360                         Some(self.hotplug_ram_region((desired_ram - self.current_ram) as usize)?);
1361                     self.current_ram = desired_ram;
1362                 }
1363             }
1364         }
1365         Ok(region)
1366     }
1367 
1368     pub fn resize_zone(&mut self, id: &str, virtio_mem_size: u64) -> Result<(), Error> {
1369         if !self.user_provided_zones {
1370             error!(
1371                 "Not allowed to resize guest memory zone when no zone is \
1372                 defined."
1373             );
1374             return Err(Error::ResizeZone);
1375         }
1376 
1377         self.virtio_mem_resize(id, virtio_mem_size)
1378     }
1379 
1380     #[cfg(target_arch = "x86_64")]
1381     pub fn setup_sgx(
1382         &mut self,
1383         sgx_epc_config: Vec<SgxEpcConfig>,
1384         vm: &Arc<dyn hypervisor::Vm>,
1385     ) -> Result<(), Error> {
1386         let file = OpenOptions::new()
1387             .read(true)
1388             .open("/dev/sgx_provision")
1389             .map_err(Error::SgxProvisionOpen)?;
1390         vm.enable_sgx_attribute(file)
1391             .map_err(Error::SgxEnableProvisioning)?;
1392 
1393         // Go over each EPC section and verify its size is a 4k multiple. At
1394         // the same time, calculate the total size needed for the contiguous
1395         // EPC region.
1396         let mut epc_region_size = 0;
1397         for epc_section in sgx_epc_config.iter() {
1398             if epc_section.size == 0 {
1399                 return Err(Error::EpcSectionSizeInvalid);
1400             }
1401             if epc_section.size & 0x0fff != 0 {
1402                 return Err(Error::EpcSectionSizeInvalid);
1403             }
1404 
1405             epc_region_size += epc_section.size;
1406         }
1407 
1408         // Now that we know about the total size for the EPC region, we can
1409         // proceed with the allocation of the entire range. The EPC region
1410         // must be 4kiB aligned.
1411         let epc_region_start = self
1412             .allocator
1413             .lock()
1414             .unwrap()
1415             .allocate_mmio_addresses(None, epc_region_size as GuestUsize, Some(0x1000))
1416             .ok_or(Error::SgxEpcRangeAllocation)?;
1417 
1418         let mut sgx_epc_region = SgxEpcRegion::new(epc_region_start, epc_region_size as GuestUsize);
1419 
1420         // Each section can be memory mapped into the allocated region.
1421         let mut epc_section_start = epc_region_start.raw_value();
1422         for epc_section in sgx_epc_config.iter() {
1423             let file = OpenOptions::new()
1424                 .read(true)
1425                 .write(true)
1426                 .open("/dev/sgx_vepc")
1427                 .map_err(Error::SgxVirtEpcOpen)?;
1428 
1429             let prot = PROT_READ | PROT_WRITE;
1430             let mut flags = MAP_NORESERVE | MAP_SHARED;
1431             if epc_section.prefault {
1432                 flags |= MAP_POPULATE;
1433             }
1434 
1435             // We can't use the vm-memory crate to perform the memory mapping
1436             // here as it would try to ensure the size of the backing file is
1437             // matching the size of the expected mapping. The /dev/sgx_vepc
1438             // device does not work that way, it provides a file descriptor
1439             // which is not matching the mapping size, as it's a just a way to
1440             // let KVM know that an EPC section is being created for the guest.
1441             let host_addr = unsafe {
1442                 libc::mmap(
1443                     std::ptr::null_mut(),
1444                     epc_section.size as usize,
1445                     prot,
1446                     flags,
1447                     file.as_raw_fd(),
1448                     0,
1449                 )
1450             } as u64;
1451 
1452             let _mem_slot = self.create_userspace_mapping(
1453                 epc_section_start,
1454                 epc_section.size,
1455                 host_addr,
1456                 false,
1457                 false,
1458                 false,
1459             )?;
1460 
1461             sgx_epc_region.insert(
1462                 epc_section.id.clone(),
1463                 SgxEpcSection::new(
1464                     GuestAddress(epc_section_start),
1465                     epc_section.size as GuestUsize,
1466                 ),
1467             );
1468 
1469             epc_section_start += epc_section.size;
1470         }
1471 
1472         self.sgx_epc_region = Some(sgx_epc_region);
1473 
1474         Ok(())
1475     }
1476 
1477     #[cfg(target_arch = "x86_64")]
1478     pub fn sgx_epc_region(&self) -> &Option<SgxEpcRegion> {
1479         &self.sgx_epc_region
1480     }
1481 
1482     pub fn is_hardlink(f: &File) -> bool {
1483         let mut stat = std::mem::MaybeUninit::<libc::stat>::uninit();
1484         let ret = unsafe { libc::fstat(f.as_raw_fd(), stat.as_mut_ptr()) };
1485         if ret != 0 {
1486             error!("Couldn't fstat the backing file");
1487             return false;
1488         }
1489 
1490         unsafe { (*stat.as_ptr()).st_nlink as usize > 0 }
1491     }
1492 
1493     pub fn memory_zones(&self) -> &MemoryZones {
1494         &self.memory_zones
1495     }
1496 
1497     // Generate a table for the pages that are dirty. The dirty pages are collapsed
1498     // together in the table if they are contiguous.
1499     pub fn dirty_memory_range_table(
1500         &self,
1501     ) -> std::result::Result<MemoryRangeTable, MigratableError> {
1502         let page_size = 4096; // TODO: Does this need to vary?
1503         let mut table = MemoryRangeTable::default();
1504         for r in &self.guest_ram_mappings {
1505             let vm_dirty_bitmap = self.vm.get_dirty_log(r.slot, r.size).map_err(|e| {
1506                 MigratableError::MigrateSend(anyhow!("Error getting VM dirty log {}", e))
1507             })?;
1508             let vmm_dirty_bitmap = match self.guest_memory.memory().find_region(GuestAddress(r.gpa))
1509             {
1510                 Some(region) => {
1511                     assert!(region.start_addr().raw_value() == r.gpa);
1512                     assert!(region.len() == r.size);
1513                     region.bitmap().get_and_reset()
1514                 }
1515                 None => {
1516                     return Err(MigratableError::MigrateSend(anyhow!(
1517                         "Error finding 'guest memory region' with address {:x}",
1518                         r.gpa
1519                     )))
1520                 }
1521             };
1522 
1523             let dirty_bitmap: Vec<u64> = vm_dirty_bitmap
1524                 .iter()
1525                 .zip(vmm_dirty_bitmap.iter())
1526                 .map(|(x, y)| x | y)
1527                 .collect();
1528 
1529             let mut entry: Option<MemoryRange> = None;
1530             for (i, block) in dirty_bitmap.iter().enumerate() {
1531                 for j in 0..64 {
1532                     let is_page_dirty = ((block >> j) & 1u64) != 0u64;
1533                     let page_offset = ((i * 64) + j) as u64 * page_size;
1534                     if is_page_dirty {
1535                         if let Some(entry) = &mut entry {
1536                             entry.length += page_size;
1537                         } else {
1538                             entry = Some(MemoryRange {
1539                                 gpa: r.gpa + page_offset,
1540                                 length: page_size,
1541                             });
1542                         }
1543                     } else if let Some(entry) = entry.take() {
1544                         table.push(entry);
1545                     }
1546                 }
1547             }
1548             if let Some(entry) = entry.take() {
1549                 table.push(entry);
1550             }
1551 
1552             if table.regions().is_empty() {
1553                 info!("Dirty Memory Range Table is empty");
1554             } else {
1555                 info!("Dirty Memory Range Table:");
1556                 for range in table.regions() {
1557                     info!("GPA: {:x} size: {} (KiB)", range.gpa, range.length / 1024);
1558                 }
1559             }
1560         }
1561         Ok(table)
1562     }
1563 
1564     // The dirty log is cleared by the kernel by calling the KVM_GET_DIRTY_LOG ioctl.
1565     // Just before we do a bulk copy we want to clear the dirty log so that
1566     // pages touched during our bulk copy are tracked.
1567     pub fn start_memory_dirty_log(&self) -> std::result::Result<(), MigratableError> {
1568         for r in &self.guest_ram_mappings {
1569             self.vm.get_dirty_log(r.slot, r.size).map_err(|e| {
1570                 MigratableError::MigrateSend(anyhow!("Error getting VM dirty log {}", e))
1571             })?;
1572         }
1573 
1574         for r in self.guest_memory.memory().iter() {
1575             r.bitmap().reset();
1576         }
1577 
1578         Ok(())
1579     }
1580 }
1581 
1582 #[cfg(feature = "acpi")]
1583 struct MemoryNotify {
1584     slot_id: usize,
1585 }
1586 
1587 #[cfg(feature = "acpi")]
1588 impl Aml for MemoryNotify {
1589     fn to_aml_bytes(&self) -> Vec<u8> {
1590         let object = aml::Path::new(&format!("M{:03}", self.slot_id));
1591         aml::If::new(
1592             &aml::Equal::new(&aml::Arg(0), &self.slot_id),
1593             vec![&aml::Notify::new(&object, &aml::Arg(1))],
1594         )
1595         .to_aml_bytes()
1596     }
1597 }
1598 
1599 #[cfg(feature = "acpi")]
1600 struct MemorySlot {
1601     slot_id: usize,
1602 }
1603 
1604 #[cfg(feature = "acpi")]
1605 impl Aml for MemorySlot {
1606     fn to_aml_bytes(&self) -> Vec<u8> {
1607         aml::Device::new(
1608             format!("M{:03}", self.slot_id).as_str().into(),
1609             vec![
1610                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C80")),
1611                 &aml::Name::new("_UID".into(), &self.slot_id),
1612                 /*
1613                 _STA return value:
1614                 Bit [0] – Set if the device is present.
1615                 Bit [1] – Set if the device is enabled and decoding its resources.
1616                 Bit [2] – Set if the device should be shown in the UI.
1617                 Bit [3] – Set if the device is functioning properly (cleared if device failed its diagnostics).
1618                 Bit [4] – Set if the battery is present.
1619                 Bits [31:5] – Reserved (must be cleared).
1620                 */
1621                 &aml::Method::new(
1622                     "_STA".into(),
1623                     0,
1624                     false,
1625                     // Call into MSTA method which will interrogate device
1626                     vec![&aml::Return::new(&aml::MethodCall::new(
1627                         "MSTA".into(),
1628                         vec![&self.slot_id],
1629                     ))],
1630                 ),
1631                 // Get details of memory
1632                 &aml::Method::new(
1633                     "_CRS".into(),
1634                     0,
1635                     false,
1636                     // Call into MCRS which provides actual memory details
1637                     vec![&aml::Return::new(&aml::MethodCall::new(
1638                         "MCRS".into(),
1639                         vec![&self.slot_id],
1640                     ))],
1641                 ),
1642             ],
1643         )
1644         .to_aml_bytes()
1645     }
1646 }
1647 
1648 #[cfg(feature = "acpi")]
1649 struct MemorySlots {
1650     slots: usize,
1651 }
1652 
1653 #[cfg(feature = "acpi")]
1654 impl Aml for MemorySlots {
1655     fn to_aml_bytes(&self) -> Vec<u8> {
1656         let mut bytes = Vec::new();
1657 
1658         for slot_id in 0..self.slots {
1659             bytes.extend_from_slice(&MemorySlot { slot_id }.to_aml_bytes());
1660         }
1661 
1662         bytes
1663     }
1664 }
1665 
1666 #[cfg(feature = "acpi")]
1667 struct MemoryMethods {
1668     slots: usize,
1669 }
1670 
1671 #[cfg(feature = "acpi")]
1672 impl Aml for MemoryMethods {
1673     fn to_aml_bytes(&self) -> Vec<u8> {
1674         let mut bytes = Vec::new();
1675         // Add "MTFY" notification method
1676         let mut memory_notifies = Vec::new();
1677         for slot_id in 0..self.slots {
1678             memory_notifies.push(MemoryNotify { slot_id });
1679         }
1680 
1681         let mut memory_notifies_refs: Vec<&dyn aml::Aml> = Vec::new();
1682         for memory_notifier in memory_notifies.iter() {
1683             memory_notifies_refs.push(memory_notifier);
1684         }
1685 
1686         bytes.extend_from_slice(
1687             &aml::Method::new("MTFY".into(), 2, true, memory_notifies_refs).to_aml_bytes(),
1688         );
1689 
1690         // MSCN method
1691         bytes.extend_from_slice(
1692             &aml::Method::new(
1693                 "MSCN".into(),
1694                 0,
1695                 true,
1696                 vec![
1697                     // Take lock defined above
1698                     &aml::Acquire::new("MLCK".into(), 0xffff),
1699                     &aml::Store::new(&aml::Local(0), &aml::ZERO),
1700                     &aml::While::new(
1701                         &aml::LessThan::new(&aml::Local(0), &self.slots),
1702                         vec![
1703                             // Write slot number (in first argument) to I/O port via field
1704                             &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Local(0)),
1705                             // Check if MINS bit is set (inserting)
1706                             &aml::If::new(
1707                                 &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MINS"), &aml::ONE),
1708                                 // Notify device if it is
1709                                 vec![
1710                                     &aml::MethodCall::new(
1711                                         "MTFY".into(),
1712                                         vec![&aml::Local(0), &aml::ONE],
1713                                     ),
1714                                     // Reset MINS bit
1715                                     &aml::Store::new(
1716                                         &aml::Path::new("\\_SB_.MHPC.MINS"),
1717                                         &aml::ONE,
1718                                     ),
1719                                 ],
1720                             ),
1721                             // Check if MRMV bit is set
1722                             &aml::If::new(
1723                                 &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MRMV"), &aml::ONE),
1724                                 // Notify device if it is (with the eject constant 0x3)
1725                                 vec![
1726                                     &aml::MethodCall::new(
1727                                         "MTFY".into(),
1728                                         vec![&aml::Local(0), &3u8],
1729                                     ),
1730                                     // Reset MRMV bit
1731                                     &aml::Store::new(
1732                                         &aml::Path::new("\\_SB_.MHPC.MRMV"),
1733                                         &aml::ONE,
1734                                     ),
1735                                 ],
1736                             ),
1737                             &aml::Add::new(&aml::Local(0), &aml::Local(0), &aml::ONE),
1738                         ],
1739                     ),
1740                     // Release lock
1741                     &aml::Release::new("MLCK".into()),
1742                 ],
1743             )
1744             .to_aml_bytes(),
1745         );
1746 
1747         bytes.extend_from_slice(
1748             // Memory status method
1749             &aml::Method::new(
1750                 "MSTA".into(),
1751                 1,
1752                 true,
1753                 vec![
1754                     // Take lock defined above
1755                     &aml::Acquire::new("MLCK".into(), 0xffff),
1756                     // Write slot number (in first argument) to I/O port via field
1757                     &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Arg(0)),
1758                     &aml::Store::new(&aml::Local(0), &aml::ZERO),
1759                     // Check if MEN_ bit is set, if so make the local variable 0xf (see _STA for details of meaning)
1760                     &aml::If::new(
1761                         &aml::Equal::new(&aml::Path::new("\\_SB_.MHPC.MEN_"), &aml::ONE),
1762                         vec![&aml::Store::new(&aml::Local(0), &0xfu8)],
1763                     ),
1764                     // Release lock
1765                     &aml::Release::new("MLCK".into()),
1766                     // Return 0 or 0xf
1767                     &aml::Return::new(&aml::Local(0)),
1768                 ],
1769             )
1770             .to_aml_bytes(),
1771         );
1772 
1773         bytes.extend_from_slice(
1774             // Memory range method
1775             &aml::Method::new(
1776                 "MCRS".into(),
1777                 1,
1778                 true,
1779                 vec![
1780                     // Take lock defined above
1781                     &aml::Acquire::new("MLCK".into(), 0xffff),
1782                     // Write slot number (in first argument) to I/O port via field
1783                     &aml::Store::new(&aml::Path::new("\\_SB_.MHPC.MSEL"), &aml::Arg(0)),
1784                     &aml::Name::new(
1785                         "MR64".into(),
1786                         &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
1787                             aml::AddressSpaceCachable::Cacheable,
1788                             true,
1789                             0x0000_0000_0000_0000u64,
1790                             0xFFFF_FFFF_FFFF_FFFEu64,
1791                         )]),
1792                     ),
1793                     &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &14usize, "MINL".into()),
1794                     &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &18usize, "MINH".into()),
1795                     &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &22usize, "MAXL".into()),
1796                     &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &26usize, "MAXH".into()),
1797                     &aml::CreateField::<u64>::new(&aml::Path::new("MR64"), &38usize, "LENL".into()),
1798                     &aml::CreateField::<u32>::new(&aml::Path::new("MR64"), &42usize, "LENH".into()),
1799                     &aml::Store::new(&aml::Path::new("MINL"), &aml::Path::new("\\_SB_.MHPC.MHBL")),
1800                     &aml::Store::new(&aml::Path::new("MINH"), &aml::Path::new("\\_SB_.MHPC.MHBH")),
1801                     &aml::Store::new(&aml::Path::new("LENL"), &aml::Path::new("\\_SB_.MHPC.MHLL")),
1802                     &aml::Store::new(&aml::Path::new("LENH"), &aml::Path::new("\\_SB_.MHPC.MHLH")),
1803                     &aml::Add::new(
1804                         &aml::Path::new("MAXL"),
1805                         &aml::Path::new("MINL"),
1806                         &aml::Path::new("LENL"),
1807                     ),
1808                     &aml::Add::new(
1809                         &aml::Path::new("MAXH"),
1810                         &aml::Path::new("MINH"),
1811                         &aml::Path::new("LENH"),
1812                     ),
1813                     &aml::If::new(
1814                         &aml::LessThan::new(&aml::Path::new("MAXL"), &aml::Path::new("MINL")),
1815                         vec![&aml::Add::new(
1816                             &aml::Path::new("MAXH"),
1817                             &aml::ONE,
1818                             &aml::Path::new("MAXH"),
1819                         )],
1820                     ),
1821                     &aml::Subtract::new(
1822                         &aml::Path::new("MAXL"),
1823                         &aml::Path::new("MAXL"),
1824                         &aml::ONE,
1825                     ),
1826                     // Release lock
1827                     &aml::Release::new("MLCK".into()),
1828                     &aml::Return::new(&aml::Path::new("MR64")),
1829                 ],
1830             )
1831             .to_aml_bytes(),
1832         );
1833         bytes
1834     }
1835 }
1836 
1837 #[cfg(feature = "acpi")]
1838 impl Aml for MemoryManager {
1839     fn to_aml_bytes(&self) -> Vec<u8> {
1840         let mut bytes = Vec::new();
1841 
1842         // Memory Hotplug Controller
1843         bytes.extend_from_slice(
1844             &aml::Device::new(
1845                 "_SB_.MHPC".into(),
1846                 vec![
1847                     &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
1848                     &aml::Name::new("_UID".into(), &"Memory Hotplug Controller"),
1849                     // Mutex to protect concurrent access as we write to choose slot and then read back status
1850                     &aml::Mutex::new("MLCK".into(), 0),
1851                     &aml::Name::new(
1852                         "_CRS".into(),
1853                         &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
1854                             aml::AddressSpaceCachable::NotCacheable,
1855                             true,
1856                             self.acpi_address.0 as u64,
1857                             self.acpi_address.0 + MEMORY_MANAGER_ACPI_SIZE as u64 - 1,
1858                         )]),
1859                     ),
1860                     // OpRegion and Fields map MMIO range into individual field values
1861                     &aml::OpRegion::new(
1862                         "MHPR".into(),
1863                         aml::OpRegionSpace::SystemMemory,
1864                         self.acpi_address.0 as usize,
1865                         MEMORY_MANAGER_ACPI_SIZE,
1866                     ),
1867                     &aml::Field::new(
1868                         "MHPR".into(),
1869                         aml::FieldAccessType::DWord,
1870                         aml::FieldUpdateRule::Preserve,
1871                         vec![
1872                             aml::FieldEntry::Named(*b"MHBL", 32), // Base (low 4 bytes)
1873                             aml::FieldEntry::Named(*b"MHBH", 32), // Base (high 4 bytes)
1874                             aml::FieldEntry::Named(*b"MHLL", 32), // Length (low 4 bytes)
1875                             aml::FieldEntry::Named(*b"MHLH", 32), // Length (high 4 bytes)
1876                         ],
1877                     ),
1878                     &aml::Field::new(
1879                         "MHPR".into(),
1880                         aml::FieldAccessType::DWord,
1881                         aml::FieldUpdateRule::Preserve,
1882                         vec![
1883                             aml::FieldEntry::Reserved(128),
1884                             aml::FieldEntry::Named(*b"MHPX", 32), // PXM
1885                         ],
1886                     ),
1887                     &aml::Field::new(
1888                         "MHPR".into(),
1889                         aml::FieldAccessType::Byte,
1890                         aml::FieldUpdateRule::WriteAsZeroes,
1891                         vec![
1892                             aml::FieldEntry::Reserved(160),
1893                             aml::FieldEntry::Named(*b"MEN_", 1), // Enabled
1894                             aml::FieldEntry::Named(*b"MINS", 1), // Inserting
1895                             aml::FieldEntry::Named(*b"MRMV", 1), // Removing
1896                             aml::FieldEntry::Named(*b"MEJ0", 1), // Ejecting
1897                         ],
1898                     ),
1899                     &aml::Field::new(
1900                         "MHPR".into(),
1901                         aml::FieldAccessType::DWord,
1902                         aml::FieldUpdateRule::Preserve,
1903                         vec![
1904                             aml::FieldEntry::Named(*b"MSEL", 32), // Selector
1905                             aml::FieldEntry::Named(*b"MOEV", 32), // Event
1906                             aml::FieldEntry::Named(*b"MOSC", 32), // OSC
1907                         ],
1908                     ),
1909                     &MemoryMethods {
1910                         slots: self.hotplug_slots.len(),
1911                     },
1912                     &MemorySlots {
1913                         slots: self.hotplug_slots.len(),
1914                     },
1915                 ],
1916             )
1917             .to_aml_bytes(),
1918         );
1919 
1920         #[cfg(target_arch = "x86_64")]
1921         {
1922             if let Some(sgx_epc_region) = &self.sgx_epc_region {
1923                 let min = sgx_epc_region.start().raw_value() as u64;
1924                 let max = min + sgx_epc_region.size() as u64 - 1;
1925                 // SGX EPC region
1926                 bytes.extend_from_slice(
1927                     &aml::Device::new(
1928                         "_SB_.EPC_".into(),
1929                         vec![
1930                             &aml::Name::new("_HID".into(), &aml::EisaName::new("INT0E0C")),
1931                             // QWORD describing the EPC region start and size
1932                             &aml::Name::new(
1933                                 "_CRS".into(),
1934                                 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
1935                                     aml::AddressSpaceCachable::NotCacheable,
1936                                     true,
1937                                     min,
1938                                     max,
1939                                 )]),
1940                             ),
1941                             &aml::Method::new(
1942                                 "_STA".into(),
1943                                 0,
1944                                 false,
1945                                 vec![&aml::Return::new(&0xfu8)],
1946                             ),
1947                         ],
1948                     )
1949                     .to_aml_bytes(),
1950                 );
1951             }
1952         }
1953 
1954         bytes
1955     }
1956 }
1957 
1958 impl Pausable for MemoryManager {}
1959 
1960 #[derive(Clone, Versionize)]
1961 pub struct MemoryRegion {
1962     content: Option<String>,
1963     start_addr: u64,
1964     size: u64,
1965 }
1966 
1967 #[derive(Versionize)]
1968 pub struct MemoryManagerSnapshotData {
1969     memory_regions: Vec<MemoryRegion>,
1970 }
1971 
1972 impl VersionMapped for MemoryManagerSnapshotData {}
1973 
1974 impl Snapshottable for MemoryManager {
1975     fn id(&self) -> String {
1976         MEMORY_MANAGER_SNAPSHOT_ID.to_string()
1977     }
1978 
1979     fn snapshot(&mut self) -> result::Result<Snapshot, MigratableError> {
1980         let mut memory_manager_snapshot = Snapshot::new(MEMORY_MANAGER_SNAPSHOT_ID);
1981         let guest_memory = self.guest_memory.memory();
1982 
1983         let mut memory_regions: Vec<MemoryRegion> = Vec::new();
1984 
1985         for (index, region) in guest_memory.iter().enumerate() {
1986             if region.len() == 0 {
1987                 return Err(MigratableError::Snapshot(anyhow!("Zero length region")));
1988             }
1989 
1990             let mut content = Some(PathBuf::from(format!("memory-region-{}", index)));
1991             if let Some(file_offset) = region.file_offset() {
1992                 if (region.flags() & libc::MAP_SHARED == libc::MAP_SHARED)
1993                     && Self::is_hardlink(file_offset.file())
1994                 {
1995                     // In this very specific case, we know the memory region
1996                     // is backed by a file on the host filesystem that can be
1997                     // accessed by the user, and additionally the mapping is
1998                     // shared, which means that modifications to the content
1999                     // are written to the actual file.
2000                     // When meeting these conditions, we can skip the copy of
2001                     // the memory content for this specific region, as we can
2002                     // assume the user will have it saved through the backing
2003                     // file already.
2004                     content = None;
2005                 }
2006             }
2007 
2008             memory_regions.push(MemoryRegion {
2009                 content: content.map(|p| p.to_str().unwrap().to_owned()),
2010                 start_addr: region.start_addr().0,
2011                 size: region.len(),
2012             });
2013         }
2014 
2015         // Store locally this list of regions as it will be used through the
2016         // Transportable::send() implementation. The point is to avoid the
2017         // duplication of code regarding the creation of the path for each
2018         // region. The 'snapshot' step creates the list of memory regions,
2019         // including information about the need to copy a memory region or
2020         // not. This saves the 'send' step having to go through the same
2021         // process, and instead it can directly proceed with storing the
2022         // memory region content for the regions requiring it.
2023         self.snapshot_memory_regions = memory_regions.clone();
2024 
2025         memory_manager_snapshot.add_data_section(SnapshotDataSection::new_from_versioned_state(
2026             MEMORY_MANAGER_SNAPSHOT_ID,
2027             &MemoryManagerSnapshotData { memory_regions },
2028         )?);
2029 
2030         let mut memory_snapshot = self.snapshot.lock().unwrap();
2031         *memory_snapshot = Some(guest_memory);
2032 
2033         Ok(memory_manager_snapshot)
2034     }
2035 }
2036 
2037 impl Transportable for MemoryManager {
2038     fn send(
2039         &self,
2040         _snapshot: &Snapshot,
2041         destination_url: &str,
2042     ) -> result::Result<(), MigratableError> {
2043         let vm_memory_snapshot_path = url_to_path(destination_url)?;
2044 
2045         if let Some(guest_memory) = &*self.snapshot.lock().unwrap() {
2046             for region in self.snapshot_memory_regions.iter() {
2047                 if let Some(content) = &region.content {
2048                     let mut memory_region_path = vm_memory_snapshot_path.clone();
2049                     memory_region_path.push(content);
2050 
2051                     // Create the snapshot file for the region
2052                     let mut memory_region_file = OpenOptions::new()
2053                         .read(true)
2054                         .write(true)
2055                         .create_new(true)
2056                         .open(memory_region_path)
2057                         .map_err(|e| MigratableError::MigrateSend(e.into()))?;
2058 
2059                     guest_memory
2060                         .write_all_to(
2061                             GuestAddress(region.start_addr),
2062                             &mut memory_region_file,
2063                             region.size as usize,
2064                         )
2065                         .map_err(|e| MigratableError::MigrateSend(e.into()))?;
2066                 }
2067             }
2068         }
2069         Ok(())
2070     }
2071 }
2072 impl Migratable for MemoryManager {}
2073