xref: /cloud-hypervisor/virtio-devices/src/mem.rs (revision 88a9f799449c04180c6b9a21d3b9c0c4b57e2bd6)
1 // Copyright (c) 2020 Ant Financial
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 use std::collections::BTreeMap;
18 use std::io;
19 use std::mem::size_of;
20 use std::os::unix::io::{AsRawFd, RawFd};
21 use std::result;
22 use std::sync::atomic::AtomicBool;
23 use std::sync::mpsc;
24 use std::sync::{Arc, Barrier, Mutex};
25 
26 use anyhow::anyhow;
27 use seccompiler::SeccompAction;
28 use serde::{Deserialize, Serialize};
29 use thiserror::Error;
30 use virtio_queue::{DescriptorChain, Queue, QueueT};
31 use vm_device::dma_mapping::ExternalDmaMapping;
32 use vm_memory::{
33     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
34     GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion,
35 };
36 use vm_migration::protocol::MemoryRangeTable;
37 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
38 use vmm_sys_util::eventfd::EventFd;
39 
40 use super::Error as DeviceError;
41 use super::{
42     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon,
43     VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
44 };
45 use crate::seccomp_filters::Thread;
46 use crate::thread_helper::spawn_virtio_thread;
47 use crate::{GuestMemoryMmap, GuestRegionMmap};
48 use crate::{VirtioInterrupt, VirtioInterruptType};
49 
50 const QUEUE_SIZE: u16 = 128;
51 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
52 
53 // 128MiB is the standard memory block size in Linux. A virtio-mem region must
54 // be aligned on this size, and the region size must be a multiple of it.
55 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20;
56 // Use 2 MiB alignment so transparent hugepages can be used by KVM.
57 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20;
58 
59 // Request processed successfully, applicable for
60 // - VIRTIO_MEM_REQ_PLUG
61 // - VIRTIO_MEM_REQ_UNPLUG
62 // - VIRTIO_MEM_REQ_UNPLUG_ALL
63 // - VIRTIO_MEM_REQ_STATE
64 const VIRTIO_MEM_RESP_ACK: u16 = 0;
65 
66 // Request denied - e.g. trying to plug more than requested, applicable for
67 // - VIRTIO_MEM_REQ_PLUG
68 const VIRTIO_MEM_RESP_NACK: u16 = 1;
69 
70 // Request cannot be processed right now, try again later, applicable for
71 // - VIRTIO_MEM_REQ_PLUG
72 // - VIRTIO_MEM_REQ_UNPLUG
73 // - VIRTIO_MEM_REQ_UNPLUG_ALL
74 #[allow(unused)]
75 const VIRTIO_MEM_RESP_BUSY: u16 = 2;
76 
77 // Error in request (e.g. addresses/alignment), applicable for
78 // - VIRTIO_MEM_REQ_PLUG
79 // - VIRTIO_MEM_REQ_UNPLUG
80 // - VIRTIO_MEM_REQ_STATE
81 const VIRTIO_MEM_RESP_ERROR: u16 = 3;
82 
83 // State of memory blocks is "plugged"
84 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0;
85 // State of memory blocks is "unplugged"
86 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1;
87 // State of memory blocks is "mixed"
88 const VIRTIO_MEM_STATE_MIXED: u16 = 2;
89 
90 // request to plug memory blocks
91 const VIRTIO_MEM_REQ_PLUG: u16 = 0;
92 // request to unplug memory blocks
93 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1;
94 // request to unplug all blocks and shrink the usable size
95 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2;
96 // request information about the plugged state of memory blocks
97 const VIRTIO_MEM_REQ_STATE: u16 = 3;
98 
99 // New descriptors are pending on the virtio queue.
100 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
101 
102 // Virtio features
103 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0;
104 
105 #[derive(Error, Debug)]
106 pub enum Error {
107     #[error("Guest gave us bad memory addresses: {0}")]
108     GuestMemory(GuestMemoryError),
109     #[error("Guest gave us a write only descriptor that protocol says to read from")]
110     UnexpectedWriteOnlyDescriptor,
111     #[error("Guest gave us a read only descriptor that protocol says to write to")]
112     UnexpectedReadOnlyDescriptor,
113     #[error("Guest gave us too few descriptors in a descriptor chain")]
114     DescriptorChainTooShort,
115     #[error("Guest gave us a buffer that was too short to use")]
116     BufferLengthTooSmall,
117     #[error("Guest sent us invalid request")]
118     InvalidRequest,
119     #[error("Failed to EventFd write: {0}")]
120     EventFdWriteFail(std::io::Error),
121     #[error("Failed to EventFd try_clone: {0}")]
122     EventFdTryCloneFail(std::io::Error),
123     #[error("Failed to MpscRecv: {0}")]
124     MpscRecvFail(mpsc::RecvError),
125     #[error("Resize invalid argument: {0}")]
126     ResizeError(anyhow::Error),
127     #[error("Fail to resize trigger: {0}")]
128     ResizeTriggerFail(DeviceError),
129     #[error("Invalid configuration: {0}")]
130     ValidateError(anyhow::Error),
131     #[error("Failed discarding memory range: {0}")]
132     DiscardMemoryRange(std::io::Error),
133     #[error("Failed DMA mapping: {0}")]
134     DmaMap(std::io::Error),
135     #[error("Failed DMA unmapping: {0}")]
136     DmaUnmap(std::io::Error),
137     #[error("Invalid DMA mapping handler")]
138     InvalidDmaMappingHandler,
139     #[error("Not activated by the guest")]
140     NotActivatedByGuest,
141     #[error("Unknown request type: {0}")]
142     UnknownRequestType(u16),
143     #[error("Failed adding used index: {0}")]
144     QueueAddUsed(virtio_queue::Error),
145 }
146 
147 #[repr(C)]
148 #[derive(Copy, Clone, Debug, Default)]
149 struct VirtioMemReq {
150     req_type: u16,
151     padding: [u16; 3],
152     addr: u64,
153     nb_blocks: u16,
154     padding_1: [u16; 3],
155 }
156 
157 // SAFETY: it only has data and has no implicit padding.
158 unsafe impl ByteValued for VirtioMemReq {}
159 
160 #[repr(C)]
161 #[derive(Copy, Clone, Debug, Default)]
162 struct VirtioMemResp {
163     resp_type: u16,
164     padding: [u16; 3],
165     state: u16,
166 }
167 
168 // SAFETY: it only has data and has no implicit padding.
169 unsafe impl ByteValued for VirtioMemResp {}
170 
171 #[repr(C)]
172 #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)]
173 pub struct VirtioMemConfig {
174     // Block size and alignment. Cannot change.
175     block_size: u64,
176     // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change.
177     node_id: u16,
178     padding: [u8; 6],
179     // Start address of the memory region. Cannot change.
180     addr: u64,
181     // Region size (maximum). Cannot change.
182     region_size: u64,
183     // Currently usable region size. Can grow up to region_size. Can
184     // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
185     // update will be sent).
186     usable_region_size: u64,
187     // Currently used size. Changes due to plug/unplug requests, but no
188     // config updates will be sent.
189     plugged_size: u64,
190     // Requested size. New plug requests cannot exceed it. Can change.
191     requested_size: u64,
192 }
193 
194 // SAFETY: it only has data and has no implicit padding.
195 unsafe impl ByteValued for VirtioMemConfig {}
196 
197 impl VirtioMemConfig {
198     fn validate(&self) -> result::Result<(), Error> {
199         if self.addr % self.block_size != 0 {
200             return Err(Error::ValidateError(anyhow!(
201                 "addr 0x{:x} is not aligned on block_size 0x{:x}",
202                 self.addr,
203                 self.block_size
204             )));
205         }
206         if self.region_size % self.block_size != 0 {
207             return Err(Error::ValidateError(anyhow!(
208                 "region_size 0x{:x} is not aligned on block_size 0x{:x}",
209                 self.region_size,
210                 self.block_size
211             )));
212         }
213         if self.usable_region_size % self.block_size != 0 {
214             return Err(Error::ValidateError(anyhow!(
215                 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}",
216                 self.usable_region_size,
217                 self.block_size
218             )));
219         }
220         if self.plugged_size % self.block_size != 0 {
221             return Err(Error::ValidateError(anyhow!(
222                 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}",
223                 self.plugged_size,
224                 self.block_size
225             )));
226         }
227         if self.requested_size % self.block_size != 0 {
228             return Err(Error::ValidateError(anyhow!(
229                 "requested_size 0x{:x} is not aligned on block_size 0x{:x}",
230                 self.requested_size,
231                 self.block_size
232             )));
233         }
234 
235         Ok(())
236     }
237 
238     fn resize(&mut self, size: u64) -> result::Result<(), Error> {
239         if self.requested_size == size {
240             return Err(Error::ResizeError(anyhow!(
241                 "new size 0x{:x} and requested_size are identical",
242                 size
243             )));
244         } else if size > self.region_size {
245             return Err(Error::ResizeError(anyhow!(
246                 "new size 0x{:x} is bigger than region_size 0x{:x}",
247                 size,
248                 self.region_size
249             )));
250         } else if size % self.block_size != 0 {
251             return Err(Error::ResizeError(anyhow!(
252                 "new size 0x{:x} is not aligned on block_size 0x{:x}",
253                 size,
254                 self.block_size
255             )));
256         }
257 
258         self.requested_size = size;
259 
260         Ok(())
261     }
262 
263     fn is_valid_range(&self, addr: u64, size: u64) -> bool {
264         // Ensure no overflow from adding 'addr' and 'size' whose value are both
265         // controlled by the guest driver
266         if addr.checked_add(size).is_none() {
267             return false;
268         }
269 
270         // Start address must be aligned on block_size, the size must be
271         // greater than 0, and all blocks covered by the request must be
272         // in the usable region.
273         if addr % self.block_size != 0
274             || size == 0
275             || (addr < self.addr || addr + size > self.addr + self.usable_region_size)
276         {
277             return false;
278         }
279 
280         true
281     }
282 }
283 
284 struct Request {
285     req: VirtioMemReq,
286     status_addr: GuestAddress,
287 }
288 
289 impl Request {
290     fn parse(
291         desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
292     ) -> result::Result<Request, Error> {
293         let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
294         // The descriptor contains the request type which MUST be readable.
295         if desc.is_write_only() {
296             return Err(Error::UnexpectedWriteOnlyDescriptor);
297         }
298         if desc.len() as usize != size_of::<VirtioMemReq>() {
299             return Err(Error::InvalidRequest);
300         }
301         let req: VirtioMemReq = desc_chain
302             .memory()
303             .read_obj(desc.addr())
304             .map_err(Error::GuestMemory)?;
305 
306         let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
307 
308         // The status MUST always be writable
309         if !status_desc.is_write_only() {
310             return Err(Error::UnexpectedReadOnlyDescriptor);
311         }
312 
313         if (status_desc.len() as usize) < size_of::<VirtioMemResp>() {
314             return Err(Error::BufferLengthTooSmall);
315         }
316 
317         Ok(Request {
318             req,
319             status_addr: status_desc.addr(),
320         })
321     }
322 
323     fn send_response(
324         &self,
325         mem: &GuestMemoryMmap,
326         resp_type: u16,
327         state: u16,
328     ) -> Result<u32, Error> {
329         let resp = VirtioMemResp {
330             resp_type,
331             state,
332             ..Default::default()
333         };
334         mem.write_obj(resp, self.status_addr)
335             .map_err(Error::GuestMemory)?;
336         Ok(size_of::<VirtioMemResp>() as u32)
337     }
338 }
339 
340 #[derive(Clone, Serialize, Deserialize)]
341 pub struct BlocksState {
342     bitmap: Vec<bool>,
343 }
344 
345 impl BlocksState {
346     pub fn new(region_size: u64) -> Self {
347         BlocksState {
348             bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize],
349         }
350     }
351 
352     fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool {
353         for state in self
354             .bitmap
355             .iter()
356             .skip(first_block_index)
357             .take(nb_blocks as usize)
358         {
359             if *state != plug {
360                 return false;
361             }
362         }
363         true
364     }
365 
366     fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) {
367         for state in self
368             .bitmap
369             .iter_mut()
370             .skip(first_block_index)
371             .take(nb_blocks as usize)
372         {
373             *state = plug;
374         }
375     }
376 
377     fn inner(&self) -> &Vec<bool> {
378         &self.bitmap
379     }
380 
381     pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable {
382         let mut bitmap: Vec<u64> = Vec::new();
383         let mut i = 0;
384         for (j, bit) in self.bitmap.iter().enumerate() {
385             if j % 64 == 0 {
386                 bitmap.push(0);
387 
388                 if j != 0 {
389                     i += 1;
390                 }
391             }
392 
393             if *bit == plugged {
394                 bitmap[i] |= 1 << (j % 64);
395             }
396         }
397 
398         MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE)
399     }
400 }
401 
402 struct MemEpollHandler {
403     mem: GuestMemoryAtomic<GuestMemoryMmap>,
404     host_addr: u64,
405     host_fd: Option<RawFd>,
406     blocks_state: Arc<Mutex<BlocksState>>,
407     config: Arc<Mutex<VirtioMemConfig>>,
408     queue: Queue,
409     interrupt_cb: Arc<dyn VirtioInterrupt>,
410     queue_evt: EventFd,
411     kill_evt: EventFd,
412     pause_evt: EventFd,
413     hugepages: bool,
414     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
415 }
416 
417 impl MemEpollHandler {
418     fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> {
419         // Use fallocate if the memory region is backed by a file.
420         if let Some(fd) = self.host_fd {
421             // SAFETY: FFI call with valid arguments
422             let res = unsafe {
423                 libc::fallocate64(
424                     fd,
425                     libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
426                     offset as libc::off64_t,
427                     size as libc::off64_t,
428                 )
429             };
430             if res != 0 {
431                 let err = io::Error::last_os_error();
432                 error!("Deallocating file space failed: {}", err);
433                 return Err(Error::DiscardMemoryRange(err));
434             }
435         }
436 
437         // Only use madvise if the memory region is not allocated with
438         // hugepages.
439         if !self.hugepages {
440             // SAFETY: FFI call with valid arguments
441             let res = unsafe {
442                 libc::madvise(
443                     (self.host_addr + offset) as *mut libc::c_void,
444                     size as libc::size_t,
445                     libc::MADV_DONTNEED,
446                 )
447             };
448             if res != 0 {
449                 let err = io::Error::last_os_error();
450                 error!("Advising kernel about pages range failed: {}", err);
451                 return Err(Error::DiscardMemoryRange(err));
452             }
453         }
454 
455         Ok(())
456     }
457 
458     fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 {
459         let mut config = self.config.lock().unwrap();
460         let size: u64 = nb_blocks as u64 * config.block_size;
461 
462         if plug && (config.plugged_size + size > config.requested_size) {
463             return VIRTIO_MEM_RESP_NACK;
464         }
465         if !config.is_valid_range(addr, size) {
466             return VIRTIO_MEM_RESP_ERROR;
467         }
468 
469         let offset = addr - config.addr;
470 
471         let first_block_index = (offset / config.block_size) as usize;
472         if !self
473             .blocks_state
474             .lock()
475             .unwrap()
476             .is_range_state(first_block_index, nb_blocks, !plug)
477         {
478             return VIRTIO_MEM_RESP_ERROR;
479         }
480 
481         if !plug {
482             if let Err(e) = self.discard_memory_range(offset, size) {
483                 error!("failed discarding memory range: {:?}", e);
484                 return VIRTIO_MEM_RESP_ERROR;
485             }
486         }
487 
488         self.blocks_state
489             .lock()
490             .unwrap()
491             .set_range(first_block_index, nb_blocks, plug);
492 
493         let handlers = self.dma_mapping_handlers.lock().unwrap();
494         if plug {
495             let mut gpa = addr;
496             for _ in 0..nb_blocks {
497                 for (_, handler) in handlers.iter() {
498                     if let Err(e) = handler.map(gpa, gpa, config.block_size) {
499                         error!(
500                             "failed DMA mapping addr 0x{:x} size 0x{:x}: {}",
501                             gpa, config.block_size, e
502                         );
503                         return VIRTIO_MEM_RESP_ERROR;
504                     }
505                 }
506 
507                 gpa += config.block_size;
508             }
509 
510             config.plugged_size += size;
511         } else {
512             for (_, handler) in handlers.iter() {
513                 if let Err(e) = handler.unmap(addr, size) {
514                     error!(
515                         "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
516                         addr, size, e
517                     );
518                     return VIRTIO_MEM_RESP_ERROR;
519                 }
520             }
521 
522             config.plugged_size -= size;
523         }
524 
525         VIRTIO_MEM_RESP_ACK
526     }
527 
528     fn unplug_all(&mut self) -> u16 {
529         let mut config = self.config.lock().unwrap();
530         if let Err(e) = self.discard_memory_range(0, config.region_size) {
531             error!("failed discarding memory range: {:?}", e);
532             return VIRTIO_MEM_RESP_ERROR;
533         }
534 
535         // Remaining plugged blocks are unmapped.
536         if config.plugged_size > 0 {
537             let handlers = self.dma_mapping_handlers.lock().unwrap();
538             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
539                 if *plugged {
540                     let gpa = config.addr + (idx as u64 * config.block_size);
541                     for (_, handler) in handlers.iter() {
542                         if let Err(e) = handler.unmap(gpa, config.block_size) {
543                             error!(
544                                 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
545                                 gpa, config.block_size, e
546                             );
547                             return VIRTIO_MEM_RESP_ERROR;
548                         }
549                     }
550                 }
551             }
552         }
553 
554         self.blocks_state.lock().unwrap().set_range(
555             0,
556             (config.region_size / config.block_size) as u16,
557             false,
558         );
559 
560         config.plugged_size = 0;
561 
562         VIRTIO_MEM_RESP_ACK
563     }
564 
565     fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) {
566         let config = self.config.lock().unwrap();
567         let size: u64 = nb_blocks as u64 * config.block_size;
568 
569         let resp_type = if config.is_valid_range(addr, size) {
570             VIRTIO_MEM_RESP_ACK
571         } else {
572             VIRTIO_MEM_RESP_ERROR
573         };
574 
575         let offset = addr - config.addr;
576         let first_block_index = (offset / config.block_size) as usize;
577         let resp_state =
578             if self
579                 .blocks_state
580                 .lock()
581                 .unwrap()
582                 .is_range_state(first_block_index, nb_blocks, true)
583             {
584                 VIRTIO_MEM_STATE_PLUGGED
585             } else if self.blocks_state.lock().unwrap().is_range_state(
586                 first_block_index,
587                 nb_blocks,
588                 false,
589             ) {
590                 VIRTIO_MEM_STATE_UNPLUGGED
591             } else {
592                 VIRTIO_MEM_STATE_MIXED
593             };
594 
595         (resp_type, resp_state)
596     }
597 
598     fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> {
599         self.interrupt_cb.trigger(int_type).map_err(|e| {
600             error!("Failed to signal used queue: {:?}", e);
601             DeviceError::FailedSignalingUsedQueue(e)
602         })
603     }
604 
605     fn process_queue(&mut self) -> Result<bool, Error> {
606         let mut used_descs = false;
607 
608         while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) {
609             let r = Request::parse(&mut desc_chain)?;
610             let (resp_type, resp_state) = match r.req.req_type {
611                 VIRTIO_MEM_REQ_PLUG => (
612                     self.state_change_request(r.req.addr, r.req.nb_blocks, true),
613                     0u16,
614                 ),
615                 VIRTIO_MEM_REQ_UNPLUG => (
616                     self.state_change_request(r.req.addr, r.req.nb_blocks, false),
617                     0u16,
618                 ),
619                 VIRTIO_MEM_REQ_UNPLUG_ALL => (self.unplug_all(), 0u16),
620                 VIRTIO_MEM_REQ_STATE => self.state_request(r.req.addr, r.req.nb_blocks),
621                 _ => {
622                     return Err(Error::UnknownRequestType(r.req.req_type));
623                 }
624             };
625             let len = r.send_response(desc_chain.memory(), resp_type, resp_state)?;
626             self.queue
627                 .add_used(desc_chain.memory(), desc_chain.head_index(), len)
628                 .map_err(Error::QueueAddUsed)?;
629             used_descs = true;
630         }
631 
632         Ok(used_descs)
633     }
634 
635     fn run(
636         &mut self,
637         paused: Arc<AtomicBool>,
638         paused_sync: Arc<Barrier>,
639     ) -> result::Result<(), EpollHelperError> {
640         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
641         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
642         helper.run(paused, paused_sync, self)?;
643 
644         Ok(())
645     }
646 }
647 
648 impl EpollHelperHandler for MemEpollHandler {
649     fn handle_event(
650         &mut self,
651         _helper: &mut EpollHelper,
652         event: &epoll::Event,
653     ) -> result::Result<(), EpollHelperError> {
654         let ev_type = event.data as u16;
655         match ev_type {
656             QUEUE_AVAIL_EVENT => {
657                 self.queue_evt.read().map_err(|e| {
658                     EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e))
659                 })?;
660 
661                 let needs_notification = self.process_queue().map_err(|e| {
662                     EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e))
663                 })?;
664                 if needs_notification {
665                     self.signal(VirtioInterruptType::Queue(0)).map_err(|e| {
666                         EpollHelperError::HandleEvent(anyhow!(
667                             "Failed to signal used queue: {:?}",
668                             e
669                         ))
670                     })?;
671                 }
672             }
673             _ => {
674                 return Err(EpollHelperError::HandleEvent(anyhow!(
675                     "Unexpected event: {}",
676                     ev_type
677                 )));
678             }
679         }
680         Ok(())
681     }
682 }
683 
684 #[derive(PartialEq, Eq, PartialOrd, Ord)]
685 pub enum VirtioMemMappingSource {
686     Container,
687     Device(u32),
688 }
689 
690 #[derive(Serialize, Deserialize)]
691 pub struct MemState {
692     pub avail_features: u64,
693     pub acked_features: u64,
694     pub config: VirtioMemConfig,
695     pub blocks_state: BlocksState,
696 }
697 
698 pub struct Mem {
699     common: VirtioCommon,
700     id: String,
701     host_addr: u64,
702     host_fd: Option<RawFd>,
703     config: Arc<Mutex<VirtioMemConfig>>,
704     seccomp_action: SeccompAction,
705     hugepages: bool,
706     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
707     blocks_state: Arc<Mutex<BlocksState>>,
708     exit_evt: EventFd,
709     interrupt_cb: Option<Arc<dyn VirtioInterrupt>>,
710 }
711 
712 impl Mem {
713     // Create a new virtio-mem device.
714     #[allow(clippy::too_many_arguments)]
715     pub fn new(
716         id: String,
717         region: &Arc<GuestRegionMmap>,
718         seccomp_action: SeccompAction,
719         numa_node_id: Option<u16>,
720         initial_size: u64,
721         hugepages: bool,
722         exit_evt: EventFd,
723         blocks_state: Arc<Mutex<BlocksState>>,
724         state: Option<MemState>,
725     ) -> io::Result<Mem> {
726         let region_len = region.len();
727 
728         if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE {
729             return Err(io::Error::new(
730                 io::ErrorKind::Other,
731                 format!("Virtio-mem size is not aligned with {VIRTIO_MEM_ALIGN_SIZE}"),
732             ));
733         }
734 
735         let (avail_features, acked_features, config, paused) = if let Some(state) = state {
736             info!("Restoring virtio-mem {}", id);
737             *(blocks_state.lock().unwrap()) = state.blocks_state.clone();
738             (
739                 state.avail_features,
740                 state.acked_features,
741                 state.config,
742                 true,
743             )
744         } else {
745             let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
746 
747             let mut config = VirtioMemConfig {
748                 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE,
749                 addr: region.start_addr().raw_value(),
750                 region_size: region.len(),
751                 usable_region_size: region.len(),
752                 plugged_size: 0,
753                 requested_size: 0,
754                 ..Default::default()
755             };
756 
757             if initial_size != 0 {
758                 config.resize(initial_size).map_err(|e| {
759                     io::Error::new(
760                         io::ErrorKind::Other,
761                         format!(
762                             "Failed to resize virtio-mem configuration to {initial_size}: {e:?}"
763                         ),
764                     )
765                 })?;
766             }
767 
768             if let Some(node_id) = numa_node_id {
769                 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM;
770                 config.node_id = node_id;
771             }
772 
773             // Make sure the virtio-mem configuration complies with the
774             // specification.
775             config.validate().map_err(|e| {
776                 io::Error::new(
777                     io::ErrorKind::Other,
778                     format!("Invalid virtio-mem configuration: {e:?}"),
779                 )
780             })?;
781 
782             (avail_features, 0, config, false)
783         };
784 
785         let host_fd = region
786             .file_offset()
787             .map(|f_offset| f_offset.file().as_raw_fd());
788 
789         Ok(Mem {
790             common: VirtioCommon {
791                 device_type: VirtioDeviceType::Mem as u32,
792                 avail_features,
793                 acked_features,
794                 paused_sync: Some(Arc::new(Barrier::new(2))),
795                 queue_sizes: QUEUE_SIZES.to_vec(),
796                 min_queues: 1,
797                 paused: Arc::new(AtomicBool::new(paused)),
798                 ..Default::default()
799             },
800             id,
801             host_addr: region.as_ptr() as u64,
802             host_fd,
803             config: Arc::new(Mutex::new(config)),
804             seccomp_action,
805             hugepages,
806             dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())),
807             blocks_state,
808             exit_evt,
809             interrupt_cb: None,
810         })
811     }
812 
813     pub fn resize(&mut self, size: u64) -> result::Result<(), Error> {
814         let mut config = self.config.lock().unwrap();
815         config.resize(size).map_err(|e| {
816             Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e))
817         })?;
818 
819         if let Some(interrupt_cb) = self.interrupt_cb.as_ref() {
820             interrupt_cb
821                 .trigger(VirtioInterruptType::Config)
822                 .map_err(|e| {
823                     Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e))
824                 })
825         } else {
826             Ok(())
827         }
828     }
829 
830     pub fn add_dma_mapping_handler(
831         &mut self,
832         source: VirtioMemMappingSource,
833         handler: Arc<dyn ExternalDmaMapping>,
834     ) -> result::Result<(), Error> {
835         let config = self.config.lock().unwrap();
836 
837         if config.plugged_size > 0 {
838             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
839                 if *plugged {
840                     let gpa = config.addr + (idx as u64 * config.block_size);
841                     handler
842                         .map(gpa, gpa, config.block_size)
843                         .map_err(Error::DmaMap)?;
844                 }
845             }
846         }
847 
848         self.dma_mapping_handlers
849             .lock()
850             .unwrap()
851             .insert(source, handler);
852 
853         Ok(())
854     }
855 
856     pub fn remove_dma_mapping_handler(
857         &mut self,
858         source: VirtioMemMappingSource,
859     ) -> result::Result<(), Error> {
860         let handler = self
861             .dma_mapping_handlers
862             .lock()
863             .unwrap()
864             .remove(&source)
865             .ok_or(Error::InvalidDmaMappingHandler)?;
866 
867         let config = self.config.lock().unwrap();
868 
869         if config.plugged_size > 0 {
870             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
871                 if *plugged {
872                     let gpa = config.addr + (idx as u64 * config.block_size);
873                     handler
874                         .unmap(gpa, config.block_size)
875                         .map_err(Error::DmaUnmap)?;
876                 }
877             }
878         }
879 
880         Ok(())
881     }
882 
883     fn state(&self) -> MemState {
884         MemState {
885             avail_features: self.common.avail_features,
886             acked_features: self.common.acked_features,
887             config: *(self.config.lock().unwrap()),
888             blocks_state: self.blocks_state.lock().unwrap().clone(),
889         }
890     }
891 
892     #[cfg(fuzzing)]
893     pub fn wait_for_epoll_threads(&mut self) {
894         self.common.wait_for_epoll_threads();
895     }
896 }
897 
898 impl Drop for Mem {
899     fn drop(&mut self) {
900         if let Some(kill_evt) = self.common.kill_evt.take() {
901             // Ignore the result because there is nothing we can do about it.
902             let _ = kill_evt.write(1);
903         }
904         self.common.wait_for_epoll_threads();
905     }
906 }
907 
908 impl VirtioDevice for Mem {
909     fn device_type(&self) -> u32 {
910         self.common.device_type
911     }
912 
913     fn queue_max_sizes(&self) -> &[u16] {
914         &self.common.queue_sizes
915     }
916 
917     fn features(&self) -> u64 {
918         self.common.avail_features
919     }
920 
921     fn ack_features(&mut self, value: u64) {
922         self.common.ack_features(value)
923     }
924 
925     fn read_config(&self, offset: u64, data: &mut [u8]) {
926         self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data);
927     }
928 
929     fn activate(
930         &mut self,
931         mem: GuestMemoryAtomic<GuestMemoryMmap>,
932         interrupt_cb: Arc<dyn VirtioInterrupt>,
933         mut queues: Vec<(usize, Queue, EventFd)>,
934     ) -> ActivateResult {
935         self.common.activate(&queues, &interrupt_cb)?;
936         let (kill_evt, pause_evt) = self.common.dup_eventfds();
937 
938         let (_, queue, queue_evt) = queues.remove(0);
939 
940         self.interrupt_cb = Some(interrupt_cb.clone());
941 
942         let mut handler = MemEpollHandler {
943             mem,
944             host_addr: self.host_addr,
945             host_fd: self.host_fd,
946             blocks_state: Arc::clone(&self.blocks_state),
947             config: self.config.clone(),
948             queue,
949             interrupt_cb,
950             queue_evt,
951             kill_evt,
952             pause_evt,
953             hugepages: self.hugepages,
954             dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers),
955         };
956 
957         let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false);
958         for range in unplugged_memory_ranges.regions() {
959             handler
960                 .discard_memory_range(range.gpa, range.length)
961                 .map_err(|e| {
962                     error!(
963                         "failed discarding memory range [0x{:x}-0x{:x}]: {:?}",
964                         range.gpa,
965                         range.gpa + range.length - 1,
966                         e
967                     );
968                     ActivateError::BadActivate
969                 })?;
970         }
971 
972         let paused = self.common.paused.clone();
973         let paused_sync = self.common.paused_sync.clone();
974         let mut epoll_threads = Vec::new();
975 
976         spawn_virtio_thread(
977             &self.id,
978             &self.seccomp_action,
979             Thread::VirtioMem,
980             &mut epoll_threads,
981             &self.exit_evt,
982             move || handler.run(paused, paused_sync.unwrap()),
983         )?;
984         self.common.epoll_threads = Some(epoll_threads);
985 
986         event!("virtio-device", "activated", "id", &self.id);
987         Ok(())
988     }
989 
990     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
991         let result = self.common.reset();
992         event!("virtio-device", "reset", "id", &self.id);
993         result
994     }
995 }
996 
997 impl Pausable for Mem {
998     fn pause(&mut self) -> result::Result<(), MigratableError> {
999         self.common.pause()
1000     }
1001 
1002     fn resume(&mut self) -> result::Result<(), MigratableError> {
1003         self.common.resume()
1004     }
1005 }
1006 
1007 impl Snapshottable for Mem {
1008     fn id(&self) -> String {
1009         self.id.clone()
1010     }
1011 
1012     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
1013         Snapshot::new_from_state(&self.state())
1014     }
1015 }
1016 impl Transportable for Mem {}
1017 impl Migratable for Mem {}
1018