xref: /cloud-hypervisor/virtio-devices/src/mem.rs (revision f6cd3bd86ded632da437b6dd6077f4237d2f71fe)
1 // Copyright (c) 2020 Ant Financial
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 use super::Error as DeviceError;
18 use super::{
19     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon,
20     VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
21 };
22 use crate::seccomp_filters::Thread;
23 use crate::thread_helper::spawn_virtio_thread;
24 use crate::{GuestMemoryMmap, GuestRegionMmap};
25 use crate::{VirtioInterrupt, VirtioInterruptType};
26 use anyhow::anyhow;
27 use seccompiler::SeccompAction;
28 use serde::{Deserialize, Serialize};
29 use std::collections::BTreeMap;
30 use std::io;
31 use std::mem::size_of;
32 use std::os::unix::io::{AsRawFd, RawFd};
33 use std::result;
34 use std::sync::atomic::AtomicBool;
35 use std::sync::mpsc;
36 use std::sync::{Arc, Barrier, Mutex};
37 use thiserror::Error;
38 use virtio_queue::{DescriptorChain, Queue, QueueT};
39 use vm_device::dma_mapping::ExternalDmaMapping;
40 use vm_memory::{
41     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
42     GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion,
43 };
44 use vm_migration::protocol::MemoryRangeTable;
45 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
46 use vmm_sys_util::eventfd::EventFd;
47 
48 const QUEUE_SIZE: u16 = 128;
49 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
50 
51 // 128MiB is the standard memory block size in Linux. A virtio-mem region must
52 // be aligned on this size, and the region size must be a multiple of it.
53 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20;
54 // Use 2 MiB alignment so transparent hugepages can be used by KVM.
55 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20;
56 
57 // Request processed successfully, applicable for
58 // - VIRTIO_MEM_REQ_PLUG
59 // - VIRTIO_MEM_REQ_UNPLUG
60 // - VIRTIO_MEM_REQ_UNPLUG_ALL
61 // - VIRTIO_MEM_REQ_STATE
62 const VIRTIO_MEM_RESP_ACK: u16 = 0;
63 
64 // Request denied - e.g. trying to plug more than requested, applicable for
65 // - VIRTIO_MEM_REQ_PLUG
66 const VIRTIO_MEM_RESP_NACK: u16 = 1;
67 
68 // Request cannot be processed right now, try again later, applicable for
69 // - VIRTIO_MEM_REQ_PLUG
70 // - VIRTIO_MEM_REQ_UNPLUG
71 // - VIRTIO_MEM_REQ_UNPLUG_ALL
72 #[allow(unused)]
73 const VIRTIO_MEM_RESP_BUSY: u16 = 2;
74 
75 // Error in request (e.g. addresses/alignment), applicable for
76 // - VIRTIO_MEM_REQ_PLUG
77 // - VIRTIO_MEM_REQ_UNPLUG
78 // - VIRTIO_MEM_REQ_STATE
79 const VIRTIO_MEM_RESP_ERROR: u16 = 3;
80 
81 // State of memory blocks is "plugged"
82 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0;
83 // State of memory blocks is "unplugged"
84 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1;
85 // State of memory blocks is "mixed"
86 const VIRTIO_MEM_STATE_MIXED: u16 = 2;
87 
88 // request to plug memory blocks
89 const VIRTIO_MEM_REQ_PLUG: u16 = 0;
90 // request to unplug memory blocks
91 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1;
92 // request to unplug all blocks and shrink the usable size
93 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2;
94 // request information about the plugged state of memory blocks
95 const VIRTIO_MEM_REQ_STATE: u16 = 3;
96 
97 // New descriptors are pending on the virtio queue.
98 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
99 
100 // Virtio features
101 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0;
102 
103 #[derive(Error, Debug)]
104 pub enum Error {
105     #[error("Guest gave us bad memory addresses: {0}")]
106     GuestMemory(GuestMemoryError),
107     #[error("Guest gave us a write only descriptor that protocol says to read from")]
108     UnexpectedWriteOnlyDescriptor,
109     #[error("Guest gave us a read only descriptor that protocol says to write to")]
110     UnexpectedReadOnlyDescriptor,
111     #[error("Guest gave us too few descriptors in a descriptor chain")]
112     DescriptorChainTooShort,
113     #[error("Guest gave us a buffer that was too short to use")]
114     BufferLengthTooSmall,
115     #[error("Guest sent us invalid request")]
116     InvalidRequest,
117     #[error("Failed to EventFd write: {0}")]
118     EventFdWriteFail(std::io::Error),
119     #[error("Failed to EventFd try_clone: {0}")]
120     EventFdTryCloneFail(std::io::Error),
121     #[error("Failed to MpscRecv: {0}")]
122     MpscRecvFail(mpsc::RecvError),
123     #[error("Resize invalid argument: {0}")]
124     ResizeError(anyhow::Error),
125     #[error("Fail to resize trigger: {0}")]
126     ResizeTriggerFail(DeviceError),
127     #[error("Invalid configuration: {0}")]
128     ValidateError(anyhow::Error),
129     #[error("Failed discarding memory range: {0}")]
130     DiscardMemoryRange(std::io::Error),
131     #[error("Failed DMA mapping: {0}")]
132     DmaMap(std::io::Error),
133     #[error("Failed DMA unmapping: {0}")]
134     DmaUnmap(std::io::Error),
135     #[error("Invalid DMA mapping handler")]
136     InvalidDmaMappingHandler,
137     #[error("Not activated by the guest")]
138     NotActivatedByGuest,
139     #[error("Unknown request type: {0}")]
140     UnknownRequestType(u16),
141     #[error("Failed adding used index: {0}")]
142     QueueAddUsed(virtio_queue::Error),
143 }
144 
145 #[repr(C)]
146 #[derive(Copy, Clone, Debug, Default)]
147 struct VirtioMemReq {
148     req_type: u16,
149     padding: [u16; 3],
150     addr: u64,
151     nb_blocks: u16,
152     padding_1: [u16; 3],
153 }
154 
155 // SAFETY: it only has data and has no implicit padding.
156 unsafe impl ByteValued for VirtioMemReq {}
157 
158 #[repr(C)]
159 #[derive(Copy, Clone, Debug, Default)]
160 struct VirtioMemResp {
161     resp_type: u16,
162     padding: [u16; 3],
163     state: u16,
164 }
165 
166 // SAFETY: it only has data and has no implicit padding.
167 unsafe impl ByteValued for VirtioMemResp {}
168 
169 #[repr(C)]
170 #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)]
171 pub struct VirtioMemConfig {
172     // Block size and alignment. Cannot change.
173     block_size: u64,
174     // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change.
175     node_id: u16,
176     padding: [u8; 6],
177     // Start address of the memory region. Cannot change.
178     addr: u64,
179     // Region size (maximum). Cannot change.
180     region_size: u64,
181     // Currently usable region size. Can grow up to region_size. Can
182     // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
183     // update will be sent).
184     usable_region_size: u64,
185     // Currently used size. Changes due to plug/unplug requests, but no
186     // config updates will be sent.
187     plugged_size: u64,
188     // Requested size. New plug requests cannot exceed it. Can change.
189     requested_size: u64,
190 }
191 
192 // SAFETY: it only has data and has no implicit padding.
193 unsafe impl ByteValued for VirtioMemConfig {}
194 
195 impl VirtioMemConfig {
196     fn validate(&self) -> result::Result<(), Error> {
197         if self.addr % self.block_size != 0 {
198             return Err(Error::ValidateError(anyhow!(
199                 "addr 0x{:x} is not aligned on block_size 0x{:x}",
200                 self.addr,
201                 self.block_size
202             )));
203         }
204         if self.region_size % self.block_size != 0 {
205             return Err(Error::ValidateError(anyhow!(
206                 "region_size 0x{:x} is not aligned on block_size 0x{:x}",
207                 self.region_size,
208                 self.block_size
209             )));
210         }
211         if self.usable_region_size % self.block_size != 0 {
212             return Err(Error::ValidateError(anyhow!(
213                 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}",
214                 self.usable_region_size,
215                 self.block_size
216             )));
217         }
218         if self.plugged_size % self.block_size != 0 {
219             return Err(Error::ValidateError(anyhow!(
220                 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}",
221                 self.plugged_size,
222                 self.block_size
223             )));
224         }
225         if self.requested_size % self.block_size != 0 {
226             return Err(Error::ValidateError(anyhow!(
227                 "requested_size 0x{:x} is not aligned on block_size 0x{:x}",
228                 self.requested_size,
229                 self.block_size
230             )));
231         }
232 
233         Ok(())
234     }
235 
236     fn resize(&mut self, size: u64) -> result::Result<(), Error> {
237         if self.requested_size == size {
238             return Err(Error::ResizeError(anyhow!(
239                 "new size 0x{:x} and requested_size are identical",
240                 size
241             )));
242         } else if size > self.region_size {
243             return Err(Error::ResizeError(anyhow!(
244                 "new size 0x{:x} is bigger than region_size 0x{:x}",
245                 size,
246                 self.region_size
247             )));
248         } else if size % self.block_size != 0 {
249             return Err(Error::ResizeError(anyhow!(
250                 "new size 0x{:x} is not aligned on block_size 0x{:x}",
251                 size,
252                 self.block_size
253             )));
254         }
255 
256         self.requested_size = size;
257 
258         Ok(())
259     }
260 
261     fn is_valid_range(&self, addr: u64, size: u64) -> bool {
262         // Ensure no overflow from adding 'addr' and 'size' whose value are both
263         // controlled by the guest driver
264         if addr.checked_add(size).is_none() {
265             return false;
266         }
267 
268         // Start address must be aligned on block_size, the size must be
269         // greater than 0, and all blocks covered by the request must be
270         // in the usable region.
271         if addr % self.block_size != 0
272             || size == 0
273             || (addr < self.addr || addr + size >= self.addr + self.usable_region_size)
274         {
275             return false;
276         }
277 
278         true
279     }
280 }
281 
282 struct Request {
283     req: VirtioMemReq,
284     status_addr: GuestAddress,
285 }
286 
287 impl Request {
288     fn parse(
289         desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
290     ) -> result::Result<Request, Error> {
291         let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
292         // The descriptor contains the request type which MUST be readable.
293         if desc.is_write_only() {
294             return Err(Error::UnexpectedWriteOnlyDescriptor);
295         }
296         if desc.len() as usize != size_of::<VirtioMemReq>() {
297             return Err(Error::InvalidRequest);
298         }
299         let req: VirtioMemReq = desc_chain
300             .memory()
301             .read_obj(desc.addr())
302             .map_err(Error::GuestMemory)?;
303 
304         let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
305 
306         // The status MUST always be writable
307         if !status_desc.is_write_only() {
308             return Err(Error::UnexpectedReadOnlyDescriptor);
309         }
310 
311         if (status_desc.len() as usize) < size_of::<VirtioMemResp>() {
312             return Err(Error::BufferLengthTooSmall);
313         }
314 
315         Ok(Request {
316             req,
317             status_addr: status_desc.addr(),
318         })
319     }
320 
321     fn send_response(
322         &self,
323         mem: &GuestMemoryMmap,
324         resp_type: u16,
325         state: u16,
326     ) -> Result<u32, Error> {
327         let resp = VirtioMemResp {
328             resp_type,
329             state,
330             ..Default::default()
331         };
332         mem.write_obj(resp, self.status_addr)
333             .map_err(Error::GuestMemory)?;
334         Ok(size_of::<VirtioMemResp>() as u32)
335     }
336 }
337 
338 #[derive(Clone, Serialize, Deserialize)]
339 pub struct BlocksState {
340     bitmap: Vec<bool>,
341 }
342 
343 impl BlocksState {
344     pub fn new(region_size: u64) -> Self {
345         BlocksState {
346             bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize],
347         }
348     }
349 
350     fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool {
351         for state in self
352             .bitmap
353             .iter()
354             .skip(first_block_index)
355             .take(nb_blocks as usize)
356         {
357             if *state != plug {
358                 return false;
359             }
360         }
361         true
362     }
363 
364     fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) {
365         for state in self
366             .bitmap
367             .iter_mut()
368             .skip(first_block_index)
369             .take(nb_blocks as usize)
370         {
371             *state = plug;
372         }
373     }
374 
375     fn inner(&self) -> &Vec<bool> {
376         &self.bitmap
377     }
378 
379     pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable {
380         let mut bitmap: Vec<u64> = Vec::new();
381         let mut i = 0;
382         for (j, bit) in self.bitmap.iter().enumerate() {
383             if j % 64 == 0 {
384                 bitmap.push(0);
385 
386                 if j != 0 {
387                     i += 1;
388                 }
389             }
390 
391             if *bit == plugged {
392                 bitmap[i] |= 1 << (j % 64);
393             }
394         }
395 
396         MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE)
397     }
398 }
399 
400 struct MemEpollHandler {
401     mem: GuestMemoryAtomic<GuestMemoryMmap>,
402     host_addr: u64,
403     host_fd: Option<RawFd>,
404     blocks_state: Arc<Mutex<BlocksState>>,
405     config: Arc<Mutex<VirtioMemConfig>>,
406     queue: Queue,
407     interrupt_cb: Arc<dyn VirtioInterrupt>,
408     queue_evt: EventFd,
409     kill_evt: EventFd,
410     pause_evt: EventFd,
411     hugepages: bool,
412     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
413 }
414 
415 impl MemEpollHandler {
416     fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> {
417         // Use fallocate if the memory region is backed by a file.
418         if let Some(fd) = self.host_fd {
419             // SAFETY: FFI call with valid arguments
420             let res = unsafe {
421                 libc::fallocate64(
422                     fd,
423                     libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
424                     offset as libc::off64_t,
425                     size as libc::off64_t,
426                 )
427             };
428             if res != 0 {
429                 let err = io::Error::last_os_error();
430                 error!("Deallocating file space failed: {}", err);
431                 return Err(Error::DiscardMemoryRange(err));
432             }
433         }
434 
435         // Only use madvise if the memory region is not allocated with
436         // hugepages.
437         if !self.hugepages {
438             // SAFETY: FFI call with valid arguments
439             let res = unsafe {
440                 libc::madvise(
441                     (self.host_addr + offset) as *mut libc::c_void,
442                     size as libc::size_t,
443                     libc::MADV_DONTNEED,
444                 )
445             };
446             if res != 0 {
447                 let err = io::Error::last_os_error();
448                 error!("Advising kernel about pages range failed: {}", err);
449                 return Err(Error::DiscardMemoryRange(err));
450             }
451         }
452 
453         Ok(())
454     }
455 
456     fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 {
457         let mut config = self.config.lock().unwrap();
458         let size: u64 = nb_blocks as u64 * config.block_size;
459 
460         if plug && (config.plugged_size + size > config.requested_size) {
461             return VIRTIO_MEM_RESP_NACK;
462         }
463         if !config.is_valid_range(addr, size) {
464             return VIRTIO_MEM_RESP_ERROR;
465         }
466 
467         let offset = addr - config.addr;
468 
469         let first_block_index = (offset / config.block_size) as usize;
470         if !self
471             .blocks_state
472             .lock()
473             .unwrap()
474             .is_range_state(first_block_index, nb_blocks, !plug)
475         {
476             return VIRTIO_MEM_RESP_ERROR;
477         }
478 
479         if !plug {
480             if let Err(e) = self.discard_memory_range(offset, size) {
481                 error!("failed discarding memory range: {:?}", e);
482                 return VIRTIO_MEM_RESP_ERROR;
483             }
484         }
485 
486         self.blocks_state
487             .lock()
488             .unwrap()
489             .set_range(first_block_index, nb_blocks, plug);
490 
491         let handlers = self.dma_mapping_handlers.lock().unwrap();
492         if plug {
493             let mut gpa = addr;
494             for _ in 0..nb_blocks {
495                 for (_, handler) in handlers.iter() {
496                     if let Err(e) = handler.map(gpa, gpa, config.block_size) {
497                         error!(
498                             "failed DMA mapping addr 0x{:x} size 0x{:x}: {}",
499                             gpa, config.block_size, e
500                         );
501                         return VIRTIO_MEM_RESP_ERROR;
502                     }
503                 }
504 
505                 gpa += config.block_size;
506             }
507 
508             config.plugged_size += size;
509         } else {
510             for (_, handler) in handlers.iter() {
511                 if let Err(e) = handler.unmap(addr, size) {
512                     error!(
513                         "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
514                         addr, size, e
515                     );
516                     return VIRTIO_MEM_RESP_ERROR;
517                 }
518             }
519 
520             config.plugged_size -= size;
521         }
522 
523         VIRTIO_MEM_RESP_ACK
524     }
525 
526     fn unplug_all(&mut self) -> u16 {
527         let mut config = self.config.lock().unwrap();
528         if let Err(e) = self.discard_memory_range(0, config.region_size) {
529             error!("failed discarding memory range: {:?}", e);
530             return VIRTIO_MEM_RESP_ERROR;
531         }
532 
533         // Remaining plugged blocks are unmapped.
534         if config.plugged_size > 0 {
535             let handlers = self.dma_mapping_handlers.lock().unwrap();
536             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
537                 if *plugged {
538                     let gpa = config.addr + (idx as u64 * config.block_size);
539                     for (_, handler) in handlers.iter() {
540                         if let Err(e) = handler.unmap(gpa, config.block_size) {
541                             error!(
542                                 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
543                                 gpa, config.block_size, e
544                             );
545                             return VIRTIO_MEM_RESP_ERROR;
546                         }
547                     }
548                 }
549             }
550         }
551 
552         self.blocks_state.lock().unwrap().set_range(
553             0,
554             (config.region_size / config.block_size) as u16,
555             false,
556         );
557 
558         config.plugged_size = 0;
559 
560         VIRTIO_MEM_RESP_ACK
561     }
562 
563     fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) {
564         let config = self.config.lock().unwrap();
565         let size: u64 = nb_blocks as u64 * config.block_size;
566 
567         let resp_type = if config.is_valid_range(addr, size) {
568             VIRTIO_MEM_RESP_ACK
569         } else {
570             VIRTIO_MEM_RESP_ERROR
571         };
572 
573         let offset = addr - config.addr;
574         let first_block_index = (offset / config.block_size) as usize;
575         let resp_state =
576             if self
577                 .blocks_state
578                 .lock()
579                 .unwrap()
580                 .is_range_state(first_block_index, nb_blocks, true)
581             {
582                 VIRTIO_MEM_STATE_PLUGGED
583             } else if self.blocks_state.lock().unwrap().is_range_state(
584                 first_block_index,
585                 nb_blocks,
586                 false,
587             ) {
588                 VIRTIO_MEM_STATE_UNPLUGGED
589             } else {
590                 VIRTIO_MEM_STATE_MIXED
591             };
592 
593         (resp_type, resp_state)
594     }
595 
596     fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> {
597         self.interrupt_cb.trigger(int_type).map_err(|e| {
598             error!("Failed to signal used queue: {:?}", e);
599             DeviceError::FailedSignalingUsedQueue(e)
600         })
601     }
602 
603     fn process_queue(&mut self) -> Result<bool, Error> {
604         let mut used_descs = false;
605 
606         while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) {
607             let r = Request::parse(&mut desc_chain)?;
608             let (resp_type, resp_state) = match r.req.req_type {
609                 VIRTIO_MEM_REQ_PLUG => (
610                     self.state_change_request(r.req.addr, r.req.nb_blocks, true),
611                     0u16,
612                 ),
613                 VIRTIO_MEM_REQ_UNPLUG => (
614                     self.state_change_request(r.req.addr, r.req.nb_blocks, false),
615                     0u16,
616                 ),
617                 VIRTIO_MEM_REQ_UNPLUG_ALL => (self.unplug_all(), 0u16),
618                 VIRTIO_MEM_REQ_STATE => self.state_request(r.req.addr, r.req.nb_blocks),
619                 _ => {
620                     return Err(Error::UnknownRequestType(r.req.req_type));
621                 }
622             };
623             let len = r.send_response(desc_chain.memory(), resp_type, resp_state)?;
624             self.queue
625                 .add_used(desc_chain.memory(), desc_chain.head_index(), len)
626                 .map_err(Error::QueueAddUsed)?;
627             used_descs = true;
628         }
629 
630         Ok(used_descs)
631     }
632 
633     fn run(
634         &mut self,
635         paused: Arc<AtomicBool>,
636         paused_sync: Arc<Barrier>,
637     ) -> result::Result<(), EpollHelperError> {
638         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
639         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
640         helper.run(paused, paused_sync, self)?;
641 
642         Ok(())
643     }
644 }
645 
646 impl EpollHelperHandler for MemEpollHandler {
647     fn handle_event(
648         &mut self,
649         _helper: &mut EpollHelper,
650         event: &epoll::Event,
651     ) -> result::Result<(), EpollHelperError> {
652         let ev_type = event.data as u16;
653         match ev_type {
654             QUEUE_AVAIL_EVENT => {
655                 self.queue_evt.read().map_err(|e| {
656                     EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e))
657                 })?;
658 
659                 let needs_notification = self.process_queue().map_err(|e| {
660                     EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e))
661                 })?;
662                 if needs_notification {
663                     self.signal(VirtioInterruptType::Queue(0)).map_err(|e| {
664                         EpollHelperError::HandleEvent(anyhow!(
665                             "Failed to signal used queue: {:?}",
666                             e
667                         ))
668                     })?;
669                 }
670             }
671             _ => {
672                 return Err(EpollHelperError::HandleEvent(anyhow!(
673                     "Unexpected event: {}",
674                     ev_type
675                 )));
676             }
677         }
678         Ok(())
679     }
680 }
681 
682 #[derive(PartialEq, Eq, PartialOrd, Ord)]
683 pub enum VirtioMemMappingSource {
684     Container,
685     Device(u32),
686 }
687 
688 #[derive(Serialize, Deserialize)]
689 pub struct MemState {
690     pub avail_features: u64,
691     pub acked_features: u64,
692     pub config: VirtioMemConfig,
693     pub blocks_state: BlocksState,
694 }
695 
696 pub struct Mem {
697     common: VirtioCommon,
698     id: String,
699     host_addr: u64,
700     host_fd: Option<RawFd>,
701     config: Arc<Mutex<VirtioMemConfig>>,
702     seccomp_action: SeccompAction,
703     hugepages: bool,
704     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
705     blocks_state: Arc<Mutex<BlocksState>>,
706     exit_evt: EventFd,
707     interrupt_cb: Option<Arc<dyn VirtioInterrupt>>,
708 }
709 
710 impl Mem {
711     // Create a new virtio-mem device.
712     #[allow(clippy::too_many_arguments)]
713     pub fn new(
714         id: String,
715         region: &Arc<GuestRegionMmap>,
716         seccomp_action: SeccompAction,
717         numa_node_id: Option<u16>,
718         initial_size: u64,
719         hugepages: bool,
720         exit_evt: EventFd,
721         blocks_state: Arc<Mutex<BlocksState>>,
722         state: Option<MemState>,
723     ) -> io::Result<Mem> {
724         let region_len = region.len();
725 
726         if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE {
727             return Err(io::Error::new(
728                 io::ErrorKind::Other,
729                 format!("Virtio-mem size is not aligned with {VIRTIO_MEM_ALIGN_SIZE}"),
730             ));
731         }
732 
733         let (avail_features, acked_features, config, paused) = if let Some(state) = state {
734             info!("Restoring virtio-mem {}", id);
735             *(blocks_state.lock().unwrap()) = state.blocks_state.clone();
736             (
737                 state.avail_features,
738                 state.acked_features,
739                 state.config,
740                 true,
741             )
742         } else {
743             let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
744 
745             let mut config = VirtioMemConfig {
746                 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE,
747                 addr: region.start_addr().raw_value(),
748                 region_size: region.len(),
749                 usable_region_size: region.len(),
750                 plugged_size: 0,
751                 requested_size: 0,
752                 ..Default::default()
753             };
754 
755             if initial_size != 0 {
756                 config.resize(initial_size).map_err(|e| {
757                     io::Error::new(
758                         io::ErrorKind::Other,
759                         format!(
760                             "Failed to resize virtio-mem configuration to {initial_size}: {e:?}"
761                         ),
762                     )
763                 })?;
764             }
765 
766             if let Some(node_id) = numa_node_id {
767                 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM;
768                 config.node_id = node_id;
769             }
770 
771             // Make sure the virtio-mem configuration complies with the
772             // specification.
773             config.validate().map_err(|e| {
774                 io::Error::new(
775                     io::ErrorKind::Other,
776                     format!("Invalid virtio-mem configuration: {e:?}"),
777                 )
778             })?;
779 
780             (avail_features, 0, config, false)
781         };
782 
783         let host_fd = region
784             .file_offset()
785             .map(|f_offset| f_offset.file().as_raw_fd());
786 
787         Ok(Mem {
788             common: VirtioCommon {
789                 device_type: VirtioDeviceType::Mem as u32,
790                 avail_features,
791                 acked_features,
792                 paused_sync: Some(Arc::new(Barrier::new(2))),
793                 queue_sizes: QUEUE_SIZES.to_vec(),
794                 min_queues: 1,
795                 paused: Arc::new(AtomicBool::new(paused)),
796                 ..Default::default()
797             },
798             id,
799             host_addr: region.as_ptr() as u64,
800             host_fd,
801             config: Arc::new(Mutex::new(config)),
802             seccomp_action,
803             hugepages,
804             dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())),
805             blocks_state,
806             exit_evt,
807             interrupt_cb: None,
808         })
809     }
810 
811     pub fn resize(&mut self, size: u64) -> result::Result<(), Error> {
812         let mut config = self.config.lock().unwrap();
813         config.resize(size).map_err(|e| {
814             Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e))
815         })?;
816 
817         if let Some(interrupt_cb) = self.interrupt_cb.as_ref() {
818             interrupt_cb
819                 .trigger(VirtioInterruptType::Config)
820                 .map_err(|e| {
821                     Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e))
822                 })
823         } else {
824             Ok(())
825         }
826     }
827 
828     pub fn add_dma_mapping_handler(
829         &mut self,
830         source: VirtioMemMappingSource,
831         handler: Arc<dyn ExternalDmaMapping>,
832     ) -> result::Result<(), Error> {
833         let config = self.config.lock().unwrap();
834 
835         if config.plugged_size > 0 {
836             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
837                 if *plugged {
838                     let gpa = config.addr + (idx as u64 * config.block_size);
839                     handler
840                         .map(gpa, gpa, config.block_size)
841                         .map_err(Error::DmaMap)?;
842                 }
843             }
844         }
845 
846         self.dma_mapping_handlers
847             .lock()
848             .unwrap()
849             .insert(source, handler);
850 
851         Ok(())
852     }
853 
854     pub fn remove_dma_mapping_handler(
855         &mut self,
856         source: VirtioMemMappingSource,
857     ) -> result::Result<(), Error> {
858         let handler = self
859             .dma_mapping_handlers
860             .lock()
861             .unwrap()
862             .remove(&source)
863             .ok_or(Error::InvalidDmaMappingHandler)?;
864 
865         let config = self.config.lock().unwrap();
866 
867         if config.plugged_size > 0 {
868             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
869                 if *plugged {
870                     let gpa = config.addr + (idx as u64 * config.block_size);
871                     handler
872                         .unmap(gpa, config.block_size)
873                         .map_err(Error::DmaUnmap)?;
874                 }
875             }
876         }
877 
878         Ok(())
879     }
880 
881     fn state(&self) -> MemState {
882         MemState {
883             avail_features: self.common.avail_features,
884             acked_features: self.common.acked_features,
885             config: *(self.config.lock().unwrap()),
886             blocks_state: self.blocks_state.lock().unwrap().clone(),
887         }
888     }
889 
890     #[cfg(fuzzing)]
891     pub fn wait_for_epoll_threads(&mut self) {
892         self.common.wait_for_epoll_threads();
893     }
894 }
895 
896 impl Drop for Mem {
897     fn drop(&mut self) {
898         if let Some(kill_evt) = self.common.kill_evt.take() {
899             // Ignore the result because there is nothing we can do about it.
900             let _ = kill_evt.write(1);
901         }
902         self.common.wait_for_epoll_threads();
903     }
904 }
905 
906 impl VirtioDevice for Mem {
907     fn device_type(&self) -> u32 {
908         self.common.device_type
909     }
910 
911     fn queue_max_sizes(&self) -> &[u16] {
912         &self.common.queue_sizes
913     }
914 
915     fn features(&self) -> u64 {
916         self.common.avail_features
917     }
918 
919     fn ack_features(&mut self, value: u64) {
920         self.common.ack_features(value)
921     }
922 
923     fn read_config(&self, offset: u64, data: &mut [u8]) {
924         self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data);
925     }
926 
927     fn activate(
928         &mut self,
929         mem: GuestMemoryAtomic<GuestMemoryMmap>,
930         interrupt_cb: Arc<dyn VirtioInterrupt>,
931         mut queues: Vec<(usize, Queue, EventFd)>,
932     ) -> ActivateResult {
933         self.common.activate(&queues, &interrupt_cb)?;
934         let (kill_evt, pause_evt) = self.common.dup_eventfds();
935 
936         let (_, queue, queue_evt) = queues.remove(0);
937 
938         self.interrupt_cb = Some(interrupt_cb.clone());
939 
940         let mut handler = MemEpollHandler {
941             mem,
942             host_addr: self.host_addr,
943             host_fd: self.host_fd,
944             blocks_state: Arc::clone(&self.blocks_state),
945             config: self.config.clone(),
946             queue,
947             interrupt_cb,
948             queue_evt,
949             kill_evt,
950             pause_evt,
951             hugepages: self.hugepages,
952             dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers),
953         };
954 
955         let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false);
956         for range in unplugged_memory_ranges.regions() {
957             handler
958                 .discard_memory_range(range.gpa, range.length)
959                 .map_err(|e| {
960                     error!(
961                         "failed discarding memory range [0x{:x}-0x{:x}]: {:?}",
962                         range.gpa,
963                         range.gpa + range.length - 1,
964                         e
965                     );
966                     ActivateError::BadActivate
967                 })?;
968         }
969 
970         let paused = self.common.paused.clone();
971         let paused_sync = self.common.paused_sync.clone();
972         let mut epoll_threads = Vec::new();
973 
974         spawn_virtio_thread(
975             &self.id,
976             &self.seccomp_action,
977             Thread::VirtioMem,
978             &mut epoll_threads,
979             &self.exit_evt,
980             move || handler.run(paused, paused_sync.unwrap()),
981         )?;
982         self.common.epoll_threads = Some(epoll_threads);
983 
984         event!("virtio-device", "activated", "id", &self.id);
985         Ok(())
986     }
987 
988     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
989         let result = self.common.reset();
990         event!("virtio-device", "reset", "id", &self.id);
991         result
992     }
993 }
994 
995 impl Pausable for Mem {
996     fn pause(&mut self) -> result::Result<(), MigratableError> {
997         self.common.pause()
998     }
999 
1000     fn resume(&mut self) -> result::Result<(), MigratableError> {
1001         self.common.resume()
1002     }
1003 }
1004 
1005 impl Snapshottable for Mem {
1006     fn id(&self) -> String {
1007         self.id.clone()
1008     }
1009 
1010     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
1011         Snapshot::new_from_state(&self.state())
1012     }
1013 }
1014 impl Transportable for Mem {}
1015 impl Migratable for Mem {}
1016