xref: /cloud-hypervisor/virtio-devices/src/mem.rs (revision eeae63b4595fbf0cc69f62b6e9d9a79c543c4ac7)
1 // Copyright (c) 2020 Ant Financial
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 use std::collections::BTreeMap;
18 use std::mem::size_of;
19 use std::os::unix::io::{AsRawFd, RawFd};
20 use std::sync::atomic::AtomicBool;
21 use std::sync::{mpsc, Arc, Barrier, Mutex};
22 use std::{io, result};
23 
24 use anyhow::anyhow;
25 use seccompiler::SeccompAction;
26 use serde::{Deserialize, Serialize};
27 use thiserror::Error;
28 use virtio_queue::{DescriptorChain, Queue, QueueT};
29 use vm_device::dma_mapping::ExternalDmaMapping;
30 use vm_memory::{
31     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
32     GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion,
33 };
34 use vm_migration::protocol::MemoryRangeTable;
35 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
36 use vmm_sys_util::eventfd::EventFd;
37 
38 use super::{
39     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
40     Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST,
41     VIRTIO_F_VERSION_1,
42 };
43 use crate::seccomp_filters::Thread;
44 use crate::thread_helper::spawn_virtio_thread;
45 use crate::{GuestMemoryMmap, GuestRegionMmap, VirtioInterrupt, VirtioInterruptType};
46 
47 const QUEUE_SIZE: u16 = 128;
48 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
49 
50 // 128MiB is the standard memory block size in Linux. A virtio-mem region must
51 // be aligned on this size, and the region size must be a multiple of it.
52 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20;
53 // Use 2 MiB alignment so transparent hugepages can be used by KVM.
54 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20;
55 
56 // Request processed successfully, applicable for
57 // - VIRTIO_MEM_REQ_PLUG
58 // - VIRTIO_MEM_REQ_UNPLUG
59 // - VIRTIO_MEM_REQ_UNPLUG_ALL
60 // - VIRTIO_MEM_REQ_STATE
61 const VIRTIO_MEM_RESP_ACK: u16 = 0;
62 
63 // Request denied - e.g. trying to plug more than requested, applicable for
64 // - VIRTIO_MEM_REQ_PLUG
65 const VIRTIO_MEM_RESP_NACK: u16 = 1;
66 
67 // Request cannot be processed right now, try again later, applicable for
68 // - VIRTIO_MEM_REQ_PLUG
69 // - VIRTIO_MEM_REQ_UNPLUG
70 // - VIRTIO_MEM_REQ_UNPLUG_ALL
71 #[allow(unused)]
72 const VIRTIO_MEM_RESP_BUSY: u16 = 2;
73 
74 // Error in request (e.g. addresses/alignment), applicable for
75 // - VIRTIO_MEM_REQ_PLUG
76 // - VIRTIO_MEM_REQ_UNPLUG
77 // - VIRTIO_MEM_REQ_STATE
78 const VIRTIO_MEM_RESP_ERROR: u16 = 3;
79 
80 // State of memory blocks is "plugged"
81 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0;
82 // State of memory blocks is "unplugged"
83 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1;
84 // State of memory blocks is "mixed"
85 const VIRTIO_MEM_STATE_MIXED: u16 = 2;
86 
87 // request to plug memory blocks
88 const VIRTIO_MEM_REQ_PLUG: u16 = 0;
89 // request to unplug memory blocks
90 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1;
91 // request to unplug all blocks and shrink the usable size
92 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2;
93 // request information about the plugged state of memory blocks
94 const VIRTIO_MEM_REQ_STATE: u16 = 3;
95 
96 // New descriptors are pending on the virtio queue.
97 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
98 
99 // Virtio features
100 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0;
101 
102 #[derive(Error, Debug)]
103 pub enum Error {
104     #[error("Guest gave us bad memory addresses: {0}")]
105     GuestMemory(GuestMemoryError),
106     #[error("Guest gave us a write only descriptor that protocol says to read from")]
107     UnexpectedWriteOnlyDescriptor,
108     #[error("Guest gave us a read only descriptor that protocol says to write to")]
109     UnexpectedReadOnlyDescriptor,
110     #[error("Guest gave us too few descriptors in a descriptor chain")]
111     DescriptorChainTooShort,
112     #[error("Guest gave us a buffer that was too short to use")]
113     BufferLengthTooSmall,
114     #[error("Guest sent us invalid request")]
115     InvalidRequest,
116     #[error("Failed to EventFd write: {0}")]
117     EventFdWriteFail(std::io::Error),
118     #[error("Failed to EventFd try_clone: {0}")]
119     EventFdTryCloneFail(std::io::Error),
120     #[error("Failed to MpscRecv: {0}")]
121     MpscRecvFail(mpsc::RecvError),
122     #[error("Resize invalid argument: {0}")]
123     ResizeError(anyhow::Error),
124     #[error("Fail to resize trigger: {0}")]
125     ResizeTriggerFail(DeviceError),
126     #[error("Invalid configuration: {0}")]
127     ValidateError(anyhow::Error),
128     #[error("Failed discarding memory range: {0}")]
129     DiscardMemoryRange(std::io::Error),
130     #[error("Failed DMA mapping: {0}")]
131     DmaMap(std::io::Error),
132     #[error("Failed DMA unmapping: {0}")]
133     DmaUnmap(std::io::Error),
134     #[error("Invalid DMA mapping handler")]
135     InvalidDmaMappingHandler,
136     #[error("Not activated by the guest")]
137     NotActivatedByGuest,
138     #[error("Unknown request type: {0}")]
139     UnknownRequestType(u16),
140     #[error("Failed adding used index: {0}")]
141     QueueAddUsed(virtio_queue::Error),
142 }
143 
144 #[repr(C)]
145 #[derive(Copy, Clone, Debug, Default)]
146 struct VirtioMemReq {
147     req_type: u16,
148     padding: [u16; 3],
149     addr: u64,
150     nb_blocks: u16,
151     padding_1: [u16; 3],
152 }
153 
154 // SAFETY: it only has data and has no implicit padding.
155 unsafe impl ByteValued for VirtioMemReq {}
156 
157 #[repr(C)]
158 #[derive(Copy, Clone, Debug, Default)]
159 struct VirtioMemResp {
160     resp_type: u16,
161     padding: [u16; 3],
162     state: u16,
163 }
164 
165 // SAFETY: it only has data and has no implicit padding.
166 unsafe impl ByteValued for VirtioMemResp {}
167 
168 #[repr(C)]
169 #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)]
170 pub struct VirtioMemConfig {
171     // Block size and alignment. Cannot change.
172     block_size: u64,
173     // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change.
174     node_id: u16,
175     padding: [u8; 6],
176     // Start address of the memory region. Cannot change.
177     addr: u64,
178     // Region size (maximum). Cannot change.
179     region_size: u64,
180     // Currently usable region size. Can grow up to region_size. Can
181     // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
182     // update will be sent).
183     usable_region_size: u64,
184     // Currently used size. Changes due to plug/unplug requests, but no
185     // config updates will be sent.
186     plugged_size: u64,
187     // Requested size. New plug requests cannot exceed it. Can change.
188     requested_size: u64,
189 }
190 
191 // SAFETY: it only has data and has no implicit padding.
192 unsafe impl ByteValued for VirtioMemConfig {}
193 
194 impl VirtioMemConfig {
195     fn validate(&self) -> result::Result<(), Error> {
196         if self.addr % self.block_size != 0 {
197             return Err(Error::ValidateError(anyhow!(
198                 "addr 0x{:x} is not aligned on block_size 0x{:x}",
199                 self.addr,
200                 self.block_size
201             )));
202         }
203         if self.region_size % self.block_size != 0 {
204             return Err(Error::ValidateError(anyhow!(
205                 "region_size 0x{:x} is not aligned on block_size 0x{:x}",
206                 self.region_size,
207                 self.block_size
208             )));
209         }
210         if self.usable_region_size % self.block_size != 0 {
211             return Err(Error::ValidateError(anyhow!(
212                 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}",
213                 self.usable_region_size,
214                 self.block_size
215             )));
216         }
217         if self.plugged_size % self.block_size != 0 {
218             return Err(Error::ValidateError(anyhow!(
219                 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}",
220                 self.plugged_size,
221                 self.block_size
222             )));
223         }
224         if self.requested_size % self.block_size != 0 {
225             return Err(Error::ValidateError(anyhow!(
226                 "requested_size 0x{:x} is not aligned on block_size 0x{:x}",
227                 self.requested_size,
228                 self.block_size
229             )));
230         }
231 
232         Ok(())
233     }
234 
235     fn resize(&mut self, size: u64) -> result::Result<(), Error> {
236         if self.requested_size == size {
237             return Err(Error::ResizeError(anyhow!(
238                 "new size 0x{:x} and requested_size are identical",
239                 size
240             )));
241         } else if size > self.region_size {
242             return Err(Error::ResizeError(anyhow!(
243                 "new size 0x{:x} is bigger than region_size 0x{:x}",
244                 size,
245                 self.region_size
246             )));
247         } else if size % self.block_size != 0 {
248             return Err(Error::ResizeError(anyhow!(
249                 "new size 0x{:x} is not aligned on block_size 0x{:x}",
250                 size,
251                 self.block_size
252             )));
253         }
254 
255         self.requested_size = size;
256 
257         Ok(())
258     }
259 
260     fn is_valid_range(&self, addr: u64, size: u64) -> bool {
261         // Ensure no overflow from adding 'addr' and 'size' whose value are both
262         // controlled by the guest driver
263         if addr.checked_add(size).is_none() {
264             return false;
265         }
266 
267         // Start address must be aligned on block_size, the size must be
268         // greater than 0, and all blocks covered by the request must be
269         // in the usable region.
270         if addr % self.block_size != 0
271             || size == 0
272             || (addr < self.addr || addr + size > self.addr + self.usable_region_size)
273         {
274             return false;
275         }
276 
277         true
278     }
279 }
280 
281 struct Request {
282     req: VirtioMemReq,
283     status_addr: GuestAddress,
284 }
285 
286 impl Request {
287     fn parse(
288         desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
289     ) -> result::Result<Request, Error> {
290         let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
291         // The descriptor contains the request type which MUST be readable.
292         if desc.is_write_only() {
293             return Err(Error::UnexpectedWriteOnlyDescriptor);
294         }
295         if desc.len() as usize != size_of::<VirtioMemReq>() {
296             return Err(Error::InvalidRequest);
297         }
298         let req: VirtioMemReq = desc_chain
299             .memory()
300             .read_obj(desc.addr())
301             .map_err(Error::GuestMemory)?;
302 
303         let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
304 
305         // The status MUST always be writable
306         if !status_desc.is_write_only() {
307             return Err(Error::UnexpectedReadOnlyDescriptor);
308         }
309 
310         if (status_desc.len() as usize) < size_of::<VirtioMemResp>() {
311             return Err(Error::BufferLengthTooSmall);
312         }
313 
314         Ok(Request {
315             req,
316             status_addr: status_desc.addr(),
317         })
318     }
319 
320     fn send_response(
321         &self,
322         mem: &GuestMemoryMmap,
323         resp_type: u16,
324         state: u16,
325     ) -> Result<u32, Error> {
326         let resp = VirtioMemResp {
327             resp_type,
328             state,
329             ..Default::default()
330         };
331         mem.write_obj(resp, self.status_addr)
332             .map_err(Error::GuestMemory)?;
333         Ok(size_of::<VirtioMemResp>() as u32)
334     }
335 }
336 
337 #[derive(Clone, Serialize, Deserialize)]
338 pub struct BlocksState {
339     bitmap: Vec<bool>,
340 }
341 
342 impl BlocksState {
343     pub fn new(region_size: u64) -> Self {
344         BlocksState {
345             bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize],
346         }
347     }
348 
349     fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool {
350         for state in self
351             .bitmap
352             .iter()
353             .skip(first_block_index)
354             .take(nb_blocks as usize)
355         {
356             if *state != plug {
357                 return false;
358             }
359         }
360         true
361     }
362 
363     fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) {
364         for state in self
365             .bitmap
366             .iter_mut()
367             .skip(first_block_index)
368             .take(nb_blocks as usize)
369         {
370             *state = plug;
371         }
372     }
373 
374     fn inner(&self) -> &Vec<bool> {
375         &self.bitmap
376     }
377 
378     pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable {
379         let mut bitmap: Vec<u64> = Vec::new();
380         let mut i = 0;
381         for (j, bit) in self.bitmap.iter().enumerate() {
382             if j % 64 == 0 {
383                 bitmap.push(0);
384 
385                 if j != 0 {
386                     i += 1;
387                 }
388             }
389 
390             if *bit == plugged {
391                 bitmap[i] |= 1 << (j % 64);
392             }
393         }
394 
395         MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE)
396     }
397 }
398 
399 struct MemEpollHandler {
400     mem: GuestMemoryAtomic<GuestMemoryMmap>,
401     host_addr: u64,
402     host_fd: Option<RawFd>,
403     blocks_state: Arc<Mutex<BlocksState>>,
404     config: Arc<Mutex<VirtioMemConfig>>,
405     queue: Queue,
406     interrupt_cb: Arc<dyn VirtioInterrupt>,
407     queue_evt: EventFd,
408     kill_evt: EventFd,
409     pause_evt: EventFd,
410     hugepages: bool,
411     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
412 }
413 
414 impl MemEpollHandler {
415     fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> {
416         // Use fallocate if the memory region is backed by a file.
417         if let Some(fd) = self.host_fd {
418             // SAFETY: FFI call with valid arguments
419             let res = unsafe {
420                 libc::fallocate64(
421                     fd,
422                     libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
423                     offset as libc::off64_t,
424                     size as libc::off64_t,
425                 )
426             };
427             if res != 0 {
428                 let err = io::Error::last_os_error();
429                 error!("Deallocating file space failed: {}", err);
430                 return Err(Error::DiscardMemoryRange(err));
431             }
432         }
433 
434         // Only use madvise if the memory region is not allocated with
435         // hugepages.
436         if !self.hugepages {
437             // SAFETY: FFI call with valid arguments
438             let res = unsafe {
439                 libc::madvise(
440                     (self.host_addr + offset) as *mut libc::c_void,
441                     size as libc::size_t,
442                     libc::MADV_DONTNEED,
443                 )
444             };
445             if res != 0 {
446                 let err = io::Error::last_os_error();
447                 error!("Advising kernel about pages range failed: {}", err);
448                 return Err(Error::DiscardMemoryRange(err));
449             }
450         }
451 
452         Ok(())
453     }
454 
455     fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 {
456         let mut config = self.config.lock().unwrap();
457         let size: u64 = nb_blocks as u64 * config.block_size;
458 
459         if plug && (config.plugged_size + size > config.requested_size) {
460             return VIRTIO_MEM_RESP_NACK;
461         }
462         if !config.is_valid_range(addr, size) {
463             return VIRTIO_MEM_RESP_ERROR;
464         }
465 
466         let offset = addr - config.addr;
467 
468         let first_block_index = (offset / config.block_size) as usize;
469         if !self
470             .blocks_state
471             .lock()
472             .unwrap()
473             .is_range_state(first_block_index, nb_blocks, !plug)
474         {
475             return VIRTIO_MEM_RESP_ERROR;
476         }
477 
478         if !plug {
479             if let Err(e) = self.discard_memory_range(offset, size) {
480                 error!("failed discarding memory range: {:?}", e);
481                 return VIRTIO_MEM_RESP_ERROR;
482             }
483         }
484 
485         self.blocks_state
486             .lock()
487             .unwrap()
488             .set_range(first_block_index, nb_blocks, plug);
489 
490         let handlers = self.dma_mapping_handlers.lock().unwrap();
491         if plug {
492             let mut gpa = addr;
493             for _ in 0..nb_blocks {
494                 for (_, handler) in handlers.iter() {
495                     if let Err(e) = handler.map(gpa, gpa, config.block_size) {
496                         error!(
497                             "failed DMA mapping addr 0x{:x} size 0x{:x}: {}",
498                             gpa, config.block_size, e
499                         );
500                         return VIRTIO_MEM_RESP_ERROR;
501                     }
502                 }
503 
504                 gpa += config.block_size;
505             }
506 
507             config.plugged_size += size;
508         } else {
509             for (_, handler) in handlers.iter() {
510                 if let Err(e) = handler.unmap(addr, size) {
511                     error!(
512                         "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
513                         addr, size, e
514                     );
515                     return VIRTIO_MEM_RESP_ERROR;
516                 }
517             }
518 
519             config.plugged_size -= size;
520         }
521 
522         VIRTIO_MEM_RESP_ACK
523     }
524 
525     fn unplug_all(&mut self) -> u16 {
526         let mut config = self.config.lock().unwrap();
527         if let Err(e) = self.discard_memory_range(0, config.region_size) {
528             error!("failed discarding memory range: {:?}", e);
529             return VIRTIO_MEM_RESP_ERROR;
530         }
531 
532         // Remaining plugged blocks are unmapped.
533         if config.plugged_size > 0 {
534             let handlers = self.dma_mapping_handlers.lock().unwrap();
535             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
536                 if *plugged {
537                     let gpa = config.addr + (idx as u64 * config.block_size);
538                     for (_, handler) in handlers.iter() {
539                         if let Err(e) = handler.unmap(gpa, config.block_size) {
540                             error!(
541                                 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
542                                 gpa, config.block_size, e
543                             );
544                             return VIRTIO_MEM_RESP_ERROR;
545                         }
546                     }
547                 }
548             }
549         }
550 
551         self.blocks_state.lock().unwrap().set_range(
552             0,
553             (config.region_size / config.block_size) as u16,
554             false,
555         );
556 
557         config.plugged_size = 0;
558 
559         VIRTIO_MEM_RESP_ACK
560     }
561 
562     fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) {
563         let config = self.config.lock().unwrap();
564         let size: u64 = nb_blocks as u64 * config.block_size;
565 
566         let resp_type = if config.is_valid_range(addr, size) {
567             VIRTIO_MEM_RESP_ACK
568         } else {
569             VIRTIO_MEM_RESP_ERROR
570         };
571 
572         let offset = addr - config.addr;
573         let first_block_index = (offset / config.block_size) as usize;
574         let resp_state =
575             if self
576                 .blocks_state
577                 .lock()
578                 .unwrap()
579                 .is_range_state(first_block_index, nb_blocks, true)
580             {
581                 VIRTIO_MEM_STATE_PLUGGED
582             } else if self.blocks_state.lock().unwrap().is_range_state(
583                 first_block_index,
584                 nb_blocks,
585                 false,
586             ) {
587                 VIRTIO_MEM_STATE_UNPLUGGED
588             } else {
589                 VIRTIO_MEM_STATE_MIXED
590             };
591 
592         (resp_type, resp_state)
593     }
594 
595     fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> {
596         self.interrupt_cb.trigger(int_type).map_err(|e| {
597             error!("Failed to signal used queue: {:?}", e);
598             DeviceError::FailedSignalingUsedQueue(e)
599         })
600     }
601 
602     fn process_queue(&mut self) -> Result<bool, Error> {
603         let mut used_descs = false;
604 
605         while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) {
606             let r = Request::parse(&mut desc_chain)?;
607             let (resp_type, resp_state) = match r.req.req_type {
608                 VIRTIO_MEM_REQ_PLUG => (
609                     self.state_change_request(r.req.addr, r.req.nb_blocks, true),
610                     0u16,
611                 ),
612                 VIRTIO_MEM_REQ_UNPLUG => (
613                     self.state_change_request(r.req.addr, r.req.nb_blocks, false),
614                     0u16,
615                 ),
616                 VIRTIO_MEM_REQ_UNPLUG_ALL => (self.unplug_all(), 0u16),
617                 VIRTIO_MEM_REQ_STATE => self.state_request(r.req.addr, r.req.nb_blocks),
618                 _ => {
619                     return Err(Error::UnknownRequestType(r.req.req_type));
620                 }
621             };
622             let len = r.send_response(desc_chain.memory(), resp_type, resp_state)?;
623             self.queue
624                 .add_used(desc_chain.memory(), desc_chain.head_index(), len)
625                 .map_err(Error::QueueAddUsed)?;
626             used_descs = true;
627         }
628 
629         Ok(used_descs)
630     }
631 
632     fn run(
633         &mut self,
634         paused: Arc<AtomicBool>,
635         paused_sync: Arc<Barrier>,
636     ) -> result::Result<(), EpollHelperError> {
637         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
638         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
639         helper.run(paused, paused_sync, self)?;
640 
641         Ok(())
642     }
643 }
644 
645 impl EpollHelperHandler for MemEpollHandler {
646     fn handle_event(
647         &mut self,
648         _helper: &mut EpollHelper,
649         event: &epoll::Event,
650     ) -> result::Result<(), EpollHelperError> {
651         let ev_type = event.data as u16;
652         match ev_type {
653             QUEUE_AVAIL_EVENT => {
654                 self.queue_evt.read().map_err(|e| {
655                     EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e))
656                 })?;
657 
658                 let needs_notification = self.process_queue().map_err(|e| {
659                     EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e))
660                 })?;
661                 if needs_notification {
662                     self.signal(VirtioInterruptType::Queue(0)).map_err(|e| {
663                         EpollHelperError::HandleEvent(anyhow!(
664                             "Failed to signal used queue: {:?}",
665                             e
666                         ))
667                     })?;
668                 }
669             }
670             _ => {
671                 return Err(EpollHelperError::HandleEvent(anyhow!(
672                     "Unexpected event: {}",
673                     ev_type
674                 )));
675             }
676         }
677         Ok(())
678     }
679 }
680 
681 #[derive(PartialEq, Eq, PartialOrd, Ord)]
682 pub enum VirtioMemMappingSource {
683     Container,
684     Device(u32),
685 }
686 
687 #[derive(Serialize, Deserialize)]
688 pub struct MemState {
689     pub avail_features: u64,
690     pub acked_features: u64,
691     pub config: VirtioMemConfig,
692     pub blocks_state: BlocksState,
693 }
694 
695 pub struct Mem {
696     common: VirtioCommon,
697     id: String,
698     host_addr: u64,
699     host_fd: Option<RawFd>,
700     config: Arc<Mutex<VirtioMemConfig>>,
701     seccomp_action: SeccompAction,
702     hugepages: bool,
703     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
704     blocks_state: Arc<Mutex<BlocksState>>,
705     exit_evt: EventFd,
706     interrupt_cb: Option<Arc<dyn VirtioInterrupt>>,
707 }
708 
709 impl Mem {
710     // Create a new virtio-mem device.
711     #[allow(clippy::too_many_arguments)]
712     pub fn new(
713         id: String,
714         region: &Arc<GuestRegionMmap>,
715         seccomp_action: SeccompAction,
716         numa_node_id: Option<u16>,
717         initial_size: u64,
718         hugepages: bool,
719         exit_evt: EventFd,
720         blocks_state: Arc<Mutex<BlocksState>>,
721         state: Option<MemState>,
722     ) -> io::Result<Mem> {
723         let region_len = region.len();
724 
725         if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE {
726             return Err(io::Error::new(
727                 io::ErrorKind::Other,
728                 format!("Virtio-mem size is not aligned with {VIRTIO_MEM_ALIGN_SIZE}"),
729             ));
730         }
731 
732         let (avail_features, acked_features, config, paused) = if let Some(state) = state {
733             info!("Restoring virtio-mem {}", id);
734             *(blocks_state.lock().unwrap()) = state.blocks_state.clone();
735             (
736                 state.avail_features,
737                 state.acked_features,
738                 state.config,
739                 true,
740             )
741         } else {
742             let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
743 
744             let mut config = VirtioMemConfig {
745                 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE,
746                 addr: region.start_addr().raw_value(),
747                 region_size: region.len(),
748                 usable_region_size: region.len(),
749                 plugged_size: 0,
750                 requested_size: 0,
751                 ..Default::default()
752             };
753 
754             if initial_size != 0 {
755                 config.resize(initial_size).map_err(|e| {
756                     io::Error::new(
757                         io::ErrorKind::Other,
758                         format!(
759                             "Failed to resize virtio-mem configuration to {initial_size}: {e:?}"
760                         ),
761                     )
762                 })?;
763             }
764 
765             if let Some(node_id) = numa_node_id {
766                 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM;
767                 config.node_id = node_id;
768             }
769 
770             // Make sure the virtio-mem configuration complies with the
771             // specification.
772             config.validate().map_err(|e| {
773                 io::Error::new(
774                     io::ErrorKind::Other,
775                     format!("Invalid virtio-mem configuration: {e:?}"),
776                 )
777             })?;
778 
779             (avail_features, 0, config, false)
780         };
781 
782         let host_fd = region
783             .file_offset()
784             .map(|f_offset| f_offset.file().as_raw_fd());
785 
786         Ok(Mem {
787             common: VirtioCommon {
788                 device_type: VirtioDeviceType::Mem as u32,
789                 avail_features,
790                 acked_features,
791                 paused_sync: Some(Arc::new(Barrier::new(2))),
792                 queue_sizes: QUEUE_SIZES.to_vec(),
793                 min_queues: 1,
794                 paused: Arc::new(AtomicBool::new(paused)),
795                 ..Default::default()
796             },
797             id,
798             host_addr: region.as_ptr() as u64,
799             host_fd,
800             config: Arc::new(Mutex::new(config)),
801             seccomp_action,
802             hugepages,
803             dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())),
804             blocks_state,
805             exit_evt,
806             interrupt_cb: None,
807         })
808     }
809 
810     pub fn resize(&mut self, size: u64) -> result::Result<(), Error> {
811         let mut config = self.config.lock().unwrap();
812         config.resize(size).map_err(|e| {
813             Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e))
814         })?;
815 
816         if let Some(interrupt_cb) = self.interrupt_cb.as_ref() {
817             interrupt_cb
818                 .trigger(VirtioInterruptType::Config)
819                 .map_err(|e| {
820                     Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e))
821                 })
822         } else {
823             Ok(())
824         }
825     }
826 
827     pub fn add_dma_mapping_handler(
828         &mut self,
829         source: VirtioMemMappingSource,
830         handler: Arc<dyn ExternalDmaMapping>,
831     ) -> result::Result<(), Error> {
832         let config = self.config.lock().unwrap();
833 
834         if config.plugged_size > 0 {
835             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
836                 if *plugged {
837                     let gpa = config.addr + (idx as u64 * config.block_size);
838                     handler
839                         .map(gpa, gpa, config.block_size)
840                         .map_err(Error::DmaMap)?;
841                 }
842             }
843         }
844 
845         self.dma_mapping_handlers
846             .lock()
847             .unwrap()
848             .insert(source, handler);
849 
850         Ok(())
851     }
852 
853     pub fn remove_dma_mapping_handler(
854         &mut self,
855         source: VirtioMemMappingSource,
856     ) -> result::Result<(), Error> {
857         let handler = self
858             .dma_mapping_handlers
859             .lock()
860             .unwrap()
861             .remove(&source)
862             .ok_or(Error::InvalidDmaMappingHandler)?;
863 
864         let config = self.config.lock().unwrap();
865 
866         if config.plugged_size > 0 {
867             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
868                 if *plugged {
869                     let gpa = config.addr + (idx as u64 * config.block_size);
870                     handler
871                         .unmap(gpa, config.block_size)
872                         .map_err(Error::DmaUnmap)?;
873                 }
874             }
875         }
876 
877         Ok(())
878     }
879 
880     fn state(&self) -> MemState {
881         MemState {
882             avail_features: self.common.avail_features,
883             acked_features: self.common.acked_features,
884             config: *(self.config.lock().unwrap()),
885             blocks_state: self.blocks_state.lock().unwrap().clone(),
886         }
887     }
888 
889     #[cfg(fuzzing)]
890     pub fn wait_for_epoll_threads(&mut self) {
891         self.common.wait_for_epoll_threads();
892     }
893 }
894 
895 impl Drop for Mem {
896     fn drop(&mut self) {
897         if let Some(kill_evt) = self.common.kill_evt.take() {
898             // Ignore the result because there is nothing we can do about it.
899             let _ = kill_evt.write(1);
900         }
901         self.common.wait_for_epoll_threads();
902     }
903 }
904 
905 impl VirtioDevice for Mem {
906     fn device_type(&self) -> u32 {
907         self.common.device_type
908     }
909 
910     fn queue_max_sizes(&self) -> &[u16] {
911         &self.common.queue_sizes
912     }
913 
914     fn features(&self) -> u64 {
915         self.common.avail_features
916     }
917 
918     fn ack_features(&mut self, value: u64) {
919         self.common.ack_features(value)
920     }
921 
922     fn read_config(&self, offset: u64, data: &mut [u8]) {
923         self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data);
924     }
925 
926     fn activate(
927         &mut self,
928         mem: GuestMemoryAtomic<GuestMemoryMmap>,
929         interrupt_cb: Arc<dyn VirtioInterrupt>,
930         mut queues: Vec<(usize, Queue, EventFd)>,
931     ) -> ActivateResult {
932         self.common.activate(&queues, &interrupt_cb)?;
933         let (kill_evt, pause_evt) = self.common.dup_eventfds();
934 
935         let (_, queue, queue_evt) = queues.remove(0);
936 
937         self.interrupt_cb = Some(interrupt_cb.clone());
938 
939         let mut handler = MemEpollHandler {
940             mem,
941             host_addr: self.host_addr,
942             host_fd: self.host_fd,
943             blocks_state: Arc::clone(&self.blocks_state),
944             config: self.config.clone(),
945             queue,
946             interrupt_cb,
947             queue_evt,
948             kill_evt,
949             pause_evt,
950             hugepages: self.hugepages,
951             dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers),
952         };
953 
954         let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false);
955         for range in unplugged_memory_ranges.regions() {
956             handler
957                 .discard_memory_range(range.gpa, range.length)
958                 .map_err(|e| {
959                     error!(
960                         "failed discarding memory range [0x{:x}-0x{:x}]: {:?}",
961                         range.gpa,
962                         range.gpa + range.length - 1,
963                         e
964                     );
965                     ActivateError::BadActivate
966                 })?;
967         }
968 
969         let paused = self.common.paused.clone();
970         let paused_sync = self.common.paused_sync.clone();
971         let mut epoll_threads = Vec::new();
972 
973         spawn_virtio_thread(
974             &self.id,
975             &self.seccomp_action,
976             Thread::VirtioMem,
977             &mut epoll_threads,
978             &self.exit_evt,
979             move || handler.run(paused, paused_sync.unwrap()),
980         )?;
981         self.common.epoll_threads = Some(epoll_threads);
982 
983         event!("virtio-device", "activated", "id", &self.id);
984         Ok(())
985     }
986 
987     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
988         let result = self.common.reset();
989         event!("virtio-device", "reset", "id", &self.id);
990         result
991     }
992 }
993 
994 impl Pausable for Mem {
995     fn pause(&mut self) -> result::Result<(), MigratableError> {
996         self.common.pause()
997     }
998 
999     fn resume(&mut self) -> result::Result<(), MigratableError> {
1000         self.common.resume()
1001     }
1002 }
1003 
1004 impl Snapshottable for Mem {
1005     fn id(&self) -> String {
1006         self.id.clone()
1007     }
1008 
1009     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
1010         Snapshot::new_from_state(&self.state())
1011     }
1012 }
1013 impl Transportable for Mem {}
1014 impl Migratable for Mem {}
1015