xref: /cloud-hypervisor/virtio-devices/src/mem.rs (revision eea9bcea38e0c5649f444c829f3a4f9c22aa486c)
1 // Copyright (c) 2020 Ant Financial
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 use super::Error as DeviceError;
16 use super::{
17     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon,
18     VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
19 };
20 use crate::seccomp_filters::Thread;
21 use crate::thread_helper::spawn_virtio_thread;
22 use crate::{GuestMemoryMmap, GuestRegionMmap};
23 use crate::{VirtioInterrupt, VirtioInterruptType};
24 use anyhow::anyhow;
25 use seccompiler::SeccompAction;
26 use std::collections::BTreeMap;
27 use std::io;
28 use std::mem::size_of;
29 use std::os::unix::io::{AsRawFd, RawFd};
30 use std::result;
31 use std::sync::atomic::AtomicBool;
32 use std::sync::mpsc;
33 use std::sync::{Arc, Barrier, Mutex};
34 use thiserror::Error;
35 use versionize::{VersionMap, Versionize, VersionizeResult};
36 use versionize_derive::Versionize;
37 use virtio_queue::{DescriptorChain, Queue, QueueT};
38 use vm_device::dma_mapping::ExternalDmaMapping;
39 use vm_memory::{
40     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
41     GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion,
42 };
43 use vm_migration::protocol::MemoryRangeTable;
44 use vm_migration::{
45     Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped,
46 };
47 use vmm_sys_util::eventfd::EventFd;
48 
49 const QUEUE_SIZE: u16 = 128;
50 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
51 
52 // 128MiB is the standard memory block size in Linux. A virtio-mem region must
53 // be aligned on this size, and the region size must be a multiple of it.
54 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20;
55 // Use 2 MiB alignment so transparent hugepages can be used by KVM.
56 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20;
57 
58 // Request processed successfully, applicable for
59 // - VIRTIO_MEM_REQ_PLUG
60 // - VIRTIO_MEM_REQ_UNPLUG
61 // - VIRTIO_MEM_REQ_UNPLUG_ALL
62 // - VIRTIO_MEM_REQ_STATE
63 const VIRTIO_MEM_RESP_ACK: u16 = 0;
64 
65 // Request denied - e.g. trying to plug more than requested, applicable for
66 // - VIRTIO_MEM_REQ_PLUG
67 const VIRTIO_MEM_RESP_NACK: u16 = 1;
68 
69 // Request cannot be processed right now, try again later, applicable for
70 // - VIRTIO_MEM_REQ_PLUG
71 // - VIRTIO_MEM_REQ_UNPLUG
72 // - VIRTIO_MEM_REQ_UNPLUG_ALL
73 #[allow(unused)]
74 const VIRTIO_MEM_RESP_BUSY: u16 = 2;
75 
76 // Error in request (e.g. addresses/alignment), applicable for
77 // - VIRTIO_MEM_REQ_PLUG
78 // - VIRTIO_MEM_REQ_UNPLUG
79 // - VIRTIO_MEM_REQ_STATE
80 const VIRTIO_MEM_RESP_ERROR: u16 = 3;
81 
82 // State of memory blocks is "plugged"
83 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0;
84 // State of memory blocks is "unplugged"
85 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1;
86 // State of memory blocks is "mixed"
87 const VIRTIO_MEM_STATE_MIXED: u16 = 2;
88 
89 // request to plug memory blocks
90 const VIRTIO_MEM_REQ_PLUG: u16 = 0;
91 // request to unplug memory blocks
92 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1;
93 // request to unplug all blocks and shrink the usable size
94 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2;
95 // request information about the plugged state of memory blocks
96 const VIRTIO_MEM_REQ_STATE: u16 = 3;
97 
98 // New descriptors are pending on the virtio queue.
99 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
100 
101 // Virtio features
102 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0;
103 
104 #[derive(Error, Debug)]
105 pub enum Error {
106     #[error("Guest gave us bad memory addresses: {0}")]
107     GuestMemory(GuestMemoryError),
108     #[error("Guest gave us a write only descriptor that protocol says to read from.")]
109     UnexpectedWriteOnlyDescriptor,
110     #[error("Guest gave us a read only descriptor that protocol says to write to.")]
111     UnexpectedReadOnlyDescriptor,
112     #[error("Guest gave us too few descriptors in a descriptor chain.")]
113     DescriptorChainTooShort,
114     #[error("Guest gave us a buffer that was too short to use.")]
115     BufferLengthTooSmall,
116     #[error("Guest sent us invalid request.")]
117     InvalidRequest,
118     #[error("Failed to EventFd write: {0}")]
119     EventFdWriteFail(std::io::Error),
120     #[error("Failed to EventFd try_clone: {0}")]
121     EventFdTryCloneFail(std::io::Error),
122     #[error("Failed to MpscRecv: {0}")]
123     MpscRecvFail(mpsc::RecvError),
124     #[error("Resize invalid argument: {0}")]
125     ResizeError(anyhow::Error),
126     #[error("Fail to resize trigger: {0}")]
127     ResizeTriggerFail(DeviceError),
128     #[error("Invalid configuration: {0}")]
129     ValidateError(anyhow::Error),
130     #[error("Failed discarding memory range: {0}")]
131     DiscardMemoryRange(std::io::Error),
132     #[error("Failed DMA mapping: {0}")]
133     DmaMap(std::io::Error),
134     #[error("Failed DMA unmapping: {0}")]
135     DmaUnmap(std::io::Error),
136     #[error("Invalid DMA mapping handler.")]
137     InvalidDmaMappingHandler,
138     #[error("Not activated by the guest.")]
139     NotActivatedByGuest,
140 }
141 
142 #[repr(C)]
143 #[derive(Copy, Clone, Debug, Default)]
144 struct VirtioMemReq {
145     req_type: u16,
146     padding: [u16; 3],
147     addr: u64,
148     nb_blocks: u16,
149     padding_1: [u16; 3],
150 }
151 
152 // SAFETY: it only has data and has no implicit padding.
153 unsafe impl ByteValued for VirtioMemReq {}
154 
155 #[repr(C)]
156 #[derive(Copy, Clone, Debug, Default)]
157 struct VirtioMemResp {
158     resp_type: u16,
159     padding: [u16; 3],
160     state: u16,
161 }
162 
163 // SAFETY: it only has data and has no implicit padding.
164 unsafe impl ByteValued for VirtioMemResp {}
165 
166 #[repr(C)]
167 #[derive(Copy, Clone, Debug, Default, Versionize)]
168 pub struct VirtioMemConfig {
169     // Block size and alignment. Cannot change.
170     block_size: u64,
171     // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change.
172     node_id: u16,
173     padding: [u8; 6],
174     // Start address of the memory region. Cannot change.
175     addr: u64,
176     // Region size (maximum). Cannot change.
177     region_size: u64,
178     // Currently usable region size. Can grow up to region_size. Can
179     // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
180     // update will be sent).
181     usable_region_size: u64,
182     // Currently used size. Changes due to plug/unplug requests, but no
183     // config updates will be sent.
184     plugged_size: u64,
185     // Requested size. New plug requests cannot exceed it. Can change.
186     requested_size: u64,
187 }
188 
189 // SAFETY: it only has data and has no implicit padding.
190 unsafe impl ByteValued for VirtioMemConfig {}
191 
192 impl VirtioMemConfig {
193     fn validate(&self) -> result::Result<(), Error> {
194         if self.addr % self.block_size != 0 {
195             return Err(Error::ValidateError(anyhow!(
196                 "addr 0x{:x} is not aligned on block_size 0x{:x}",
197                 self.addr,
198                 self.block_size
199             )));
200         }
201         if self.region_size % self.block_size != 0 {
202             return Err(Error::ValidateError(anyhow!(
203                 "region_size 0x{:x} is not aligned on block_size 0x{:x}",
204                 self.region_size,
205                 self.block_size
206             )));
207         }
208         if self.usable_region_size % self.block_size != 0 {
209             return Err(Error::ValidateError(anyhow!(
210                 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}",
211                 self.usable_region_size,
212                 self.block_size
213             )));
214         }
215         if self.plugged_size % self.block_size != 0 {
216             return Err(Error::ValidateError(anyhow!(
217                 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}",
218                 self.plugged_size,
219                 self.block_size
220             )));
221         }
222         if self.requested_size % self.block_size != 0 {
223             return Err(Error::ValidateError(anyhow!(
224                 "requested_size 0x{:x} is not aligned on block_size 0x{:x}",
225                 self.requested_size,
226                 self.block_size
227             )));
228         }
229 
230         Ok(())
231     }
232 
233     fn resize(&mut self, size: u64) -> result::Result<(), Error> {
234         if self.requested_size == size {
235             return Err(Error::ResizeError(anyhow!(
236                 "new size 0x{:x} and requested_size are identical",
237                 size
238             )));
239         } else if size > self.region_size {
240             return Err(Error::ResizeError(anyhow!(
241                 "new size 0x{:x} is bigger than region_size 0x{:x}",
242                 size,
243                 self.region_size
244             )));
245         } else if size % (self.block_size as u64) != 0 {
246             return Err(Error::ResizeError(anyhow!(
247                 "new size 0x{:x} is not aligned on block_size 0x{:x}",
248                 size,
249                 self.block_size
250             )));
251         }
252 
253         self.requested_size = size;
254 
255         Ok(())
256     }
257 
258     fn is_valid_range(&self, addr: u64, size: u64) -> bool {
259         // Start address must be aligned on block_size, the size must be
260         // greater than 0, and all blocks covered by the request must be
261         // in the usable region.
262         if addr % self.block_size != 0
263             || size == 0
264             || (addr < self.addr || addr + size >= self.addr + self.usable_region_size)
265         {
266             return false;
267         }
268 
269         true
270     }
271 }
272 
273 struct Request {
274     req: VirtioMemReq,
275     status_addr: GuestAddress,
276 }
277 
278 impl Request {
279     fn parse(
280         desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
281     ) -> result::Result<Request, Error> {
282         let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
283         // The descriptor contains the request type which MUST be readable.
284         if desc.is_write_only() {
285             return Err(Error::UnexpectedWriteOnlyDescriptor);
286         }
287         if desc.len() as usize != size_of::<VirtioMemReq>() {
288             return Err(Error::InvalidRequest);
289         }
290         let req: VirtioMemReq = desc_chain
291             .memory()
292             .read_obj(desc.addr())
293             .map_err(Error::GuestMemory)?;
294 
295         let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
296 
297         // The status MUST always be writable
298         if !status_desc.is_write_only() {
299             return Err(Error::UnexpectedReadOnlyDescriptor);
300         }
301 
302         if (status_desc.len() as usize) < size_of::<VirtioMemResp>() {
303             return Err(Error::BufferLengthTooSmall);
304         }
305 
306         Ok(Request {
307             req,
308             status_addr: status_desc.addr(),
309         })
310     }
311 
312     fn send_response(&self, mem: &GuestMemoryMmap, resp_type: u16, state: u16) -> u32 {
313         let resp = VirtioMemResp {
314             resp_type,
315             state,
316             ..Default::default()
317         };
318         match mem.write_obj(resp, self.status_addr) {
319             Ok(_) => size_of::<VirtioMemResp>() as u32,
320             Err(e) => {
321                 error!("bad guest memory address: {}", e);
322                 0
323             }
324         }
325     }
326 }
327 
328 #[derive(Clone, Versionize)]
329 pub struct BlocksState {
330     bitmap: Vec<bool>,
331 }
332 
333 impl BlocksState {
334     pub fn new(region_size: u64) -> Self {
335         BlocksState {
336             bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize],
337         }
338     }
339 
340     fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool {
341         for state in self
342             .bitmap
343             .iter()
344             .skip(first_block_index)
345             .take(nb_blocks as usize)
346         {
347             if *state != plug {
348                 return false;
349             }
350         }
351         true
352     }
353 
354     fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) {
355         for state in self
356             .bitmap
357             .iter_mut()
358             .skip(first_block_index)
359             .take(nb_blocks as usize)
360         {
361             *state = plug;
362         }
363     }
364 
365     fn inner(&self) -> &Vec<bool> {
366         &self.bitmap
367     }
368 
369     pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable {
370         let mut bitmap: Vec<u64> = Vec::new();
371         let mut i = 0;
372         for (j, bit) in self.bitmap.iter().enumerate() {
373             if j % 64 == 0 {
374                 bitmap.push(0);
375 
376                 if j != 0 {
377                     i += 1;
378                 }
379             }
380 
381             if *bit == plugged {
382                 bitmap[i] |= 1 << (j % 64);
383             }
384         }
385 
386         MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE)
387     }
388 }
389 
390 struct MemEpollHandler {
391     mem: GuestMemoryAtomic<GuestMemoryMmap>,
392     host_addr: u64,
393     host_fd: Option<RawFd>,
394     blocks_state: Arc<Mutex<BlocksState>>,
395     config: Arc<Mutex<VirtioMemConfig>>,
396     queue: Queue,
397     interrupt_cb: Arc<dyn VirtioInterrupt>,
398     queue_evt: EventFd,
399     kill_evt: EventFd,
400     pause_evt: EventFd,
401     hugepages: bool,
402     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
403 }
404 
405 impl MemEpollHandler {
406     fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> {
407         // Use fallocate if the memory region is backed by a file.
408         if let Some(fd) = self.host_fd {
409             let res = unsafe {
410                 libc::fallocate64(
411                     fd,
412                     libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
413                     offset as libc::off64_t,
414                     size as libc::off64_t,
415                 )
416             };
417             if res != 0 {
418                 let err = io::Error::last_os_error();
419                 error!("Deallocating file space failed: {}", err);
420                 return Err(Error::DiscardMemoryRange(err));
421             }
422         }
423 
424         // Only use madvise if the memory region is not allocated with
425         // hugepages.
426         if !self.hugepages {
427             let res = unsafe {
428                 libc::madvise(
429                     (self.host_addr + offset) as *mut libc::c_void,
430                     size as libc::size_t,
431                     libc::MADV_DONTNEED,
432                 )
433             };
434             if res != 0 {
435                 let err = io::Error::last_os_error();
436                 error!("Advising kernel about pages range failed: {}", err);
437                 return Err(Error::DiscardMemoryRange(err));
438             }
439         }
440 
441         Ok(())
442     }
443 
444     fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 {
445         let mut config = self.config.lock().unwrap();
446         let size: u64 = nb_blocks as u64 * config.block_size;
447 
448         if plug && (config.plugged_size + size > config.requested_size) {
449             return VIRTIO_MEM_RESP_NACK;
450         }
451         if !config.is_valid_range(addr, size) {
452             return VIRTIO_MEM_RESP_ERROR;
453         }
454 
455         let offset = addr - config.addr;
456 
457         let first_block_index = (offset / config.block_size) as usize;
458         if !self
459             .blocks_state
460             .lock()
461             .unwrap()
462             .is_range_state(first_block_index, nb_blocks, !plug)
463         {
464             return VIRTIO_MEM_RESP_ERROR;
465         }
466 
467         if !plug {
468             if let Err(e) = self.discard_memory_range(offset, size) {
469                 error!("failed discarding memory range: {:?}", e);
470                 return VIRTIO_MEM_RESP_ERROR;
471             }
472         }
473 
474         self.blocks_state
475             .lock()
476             .unwrap()
477             .set_range(first_block_index, nb_blocks, plug);
478 
479         let handlers = self.dma_mapping_handlers.lock().unwrap();
480         if plug {
481             let mut gpa = addr;
482             for _ in 0..nb_blocks {
483                 for (_, handler) in handlers.iter() {
484                     if let Err(e) = handler.map(gpa, gpa, config.block_size) {
485                         error!(
486                             "failed DMA mapping addr 0x{:x} size 0x{:x}: {}",
487                             gpa, config.block_size, e
488                         );
489                         return VIRTIO_MEM_RESP_ERROR;
490                     }
491                 }
492 
493                 gpa += config.block_size;
494             }
495 
496             config.plugged_size += size;
497         } else {
498             for (_, handler) in handlers.iter() {
499                 if let Err(e) = handler.unmap(addr, size) {
500                     error!(
501                         "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
502                         addr, size, e
503                     );
504                     return VIRTIO_MEM_RESP_ERROR;
505                 }
506             }
507 
508             config.plugged_size -= size;
509         }
510 
511         VIRTIO_MEM_RESP_ACK
512     }
513 
514     fn unplug_all(&mut self) -> u16 {
515         let mut config = self.config.lock().unwrap();
516         if let Err(e) = self.discard_memory_range(0, config.region_size) {
517             error!("failed discarding memory range: {:?}", e);
518             return VIRTIO_MEM_RESP_ERROR;
519         }
520 
521         // Remaining plugged blocks are unmapped.
522         if config.plugged_size > 0 {
523             let handlers = self.dma_mapping_handlers.lock().unwrap();
524             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
525                 if *plugged {
526                     let gpa = config.addr + (idx as u64 * config.block_size);
527                     for (_, handler) in handlers.iter() {
528                         if let Err(e) = handler.unmap(gpa, config.block_size) {
529                             error!(
530                                 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
531                                 gpa, config.block_size, e
532                             );
533                             return VIRTIO_MEM_RESP_ERROR;
534                         }
535                     }
536                 }
537             }
538         }
539 
540         self.blocks_state.lock().unwrap().set_range(
541             0,
542             (config.region_size / config.block_size) as u16,
543             false,
544         );
545 
546         config.plugged_size = 0;
547 
548         VIRTIO_MEM_RESP_ACK
549     }
550 
551     fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) {
552         let config = self.config.lock().unwrap();
553         let size: u64 = nb_blocks as u64 * config.block_size;
554 
555         let resp_type = if config.is_valid_range(addr, size) {
556             VIRTIO_MEM_RESP_ACK
557         } else {
558             VIRTIO_MEM_RESP_ERROR
559         };
560 
561         let offset = addr - config.addr;
562         let first_block_index = (offset / config.block_size) as usize;
563         let resp_state =
564             if self
565                 .blocks_state
566                 .lock()
567                 .unwrap()
568                 .is_range_state(first_block_index, nb_blocks, true)
569             {
570                 VIRTIO_MEM_STATE_PLUGGED
571             } else if self.blocks_state.lock().unwrap().is_range_state(
572                 first_block_index,
573                 nb_blocks,
574                 false,
575             ) {
576                 VIRTIO_MEM_STATE_UNPLUGGED
577             } else {
578                 VIRTIO_MEM_STATE_MIXED
579             };
580 
581         (resp_type, resp_state)
582     }
583 
584     fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> {
585         self.interrupt_cb.trigger(int_type).map_err(|e| {
586             error!("Failed to signal used queue: {:?}", e);
587             DeviceError::FailedSignalingUsedQueue(e)
588         })
589     }
590 
591     fn process_queue(&mut self) -> bool {
592         let mut used_descs = false;
593 
594         while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) {
595             let len = match Request::parse(&mut desc_chain) {
596                 Err(e) => {
597                     error!("failed parse VirtioMemReq: {:?}", e);
598                     0
599                 }
600                 Ok(r) => match r.req.req_type {
601                     VIRTIO_MEM_REQ_PLUG => {
602                         let resp_type =
603                             self.state_change_request(r.req.addr, r.req.nb_blocks, true);
604                         r.send_response(desc_chain.memory(), resp_type, 0u16)
605                     }
606                     VIRTIO_MEM_REQ_UNPLUG => {
607                         let resp_type =
608                             self.state_change_request(r.req.addr, r.req.nb_blocks, false);
609                         r.send_response(desc_chain.memory(), resp_type, 0u16)
610                     }
611                     VIRTIO_MEM_REQ_UNPLUG_ALL => {
612                         let resp_type = self.unplug_all();
613                         r.send_response(desc_chain.memory(), resp_type, 0u16)
614                     }
615                     VIRTIO_MEM_REQ_STATE => {
616                         let (resp_type, resp_state) =
617                             self.state_request(r.req.addr, r.req.nb_blocks);
618                         r.send_response(desc_chain.memory(), resp_type, resp_state)
619                     }
620                     _ => {
621                         error!("VirtioMemReq unknown request type {:?}", r.req.req_type);
622                         0
623                     }
624                 },
625             };
626 
627             self.queue
628                 .add_used(desc_chain.memory(), desc_chain.head_index(), len)
629                 .unwrap();
630             used_descs = true;
631         }
632 
633         used_descs
634     }
635 
636     fn run(
637         &mut self,
638         paused: Arc<AtomicBool>,
639         paused_sync: Arc<Barrier>,
640     ) -> result::Result<(), EpollHelperError> {
641         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
642         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
643         helper.run(paused, paused_sync, self)?;
644 
645         Ok(())
646     }
647 }
648 
649 impl EpollHelperHandler for MemEpollHandler {
650     fn handle_event(
651         &mut self,
652         _helper: &mut EpollHelper,
653         event: &epoll::Event,
654     ) -> result::Result<(), EpollHelperError> {
655         let ev_type = event.data as u16;
656         match ev_type {
657             QUEUE_AVAIL_EVENT => {
658                 self.queue_evt.read().map_err(|e| {
659                     EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e))
660                 })?;
661 
662                 if self.process_queue() {
663                     self.signal(VirtioInterruptType::Queue(0)).map_err(|e| {
664                         EpollHelperError::HandleEvent(anyhow!(
665                             "Failed to signal used queue: {:?}",
666                             e
667                         ))
668                     })?;
669                 }
670             }
671             _ => {
672                 return Err(EpollHelperError::HandleEvent(anyhow!(
673                     "Unexpected event: {}",
674                     ev_type
675                 )));
676             }
677         }
678         Ok(())
679     }
680 }
681 
682 #[derive(PartialEq, Eq, PartialOrd, Ord)]
683 pub enum VirtioMemMappingSource {
684     Container,
685     Device(u32),
686 }
687 
688 #[derive(Versionize)]
689 pub struct MemState {
690     pub avail_features: u64,
691     pub acked_features: u64,
692     pub config: VirtioMemConfig,
693     pub blocks_state: BlocksState,
694 }
695 
696 impl VersionMapped for MemState {}
697 
698 pub struct Mem {
699     common: VirtioCommon,
700     id: String,
701     host_addr: u64,
702     host_fd: Option<RawFd>,
703     config: Arc<Mutex<VirtioMemConfig>>,
704     seccomp_action: SeccompAction,
705     hugepages: bool,
706     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
707     blocks_state: Arc<Mutex<BlocksState>>,
708     exit_evt: EventFd,
709     interrupt_cb: Option<Arc<dyn VirtioInterrupt>>,
710 }
711 
712 impl Mem {
713     // Create a new virtio-mem device.
714     #[allow(clippy::too_many_arguments)]
715     pub fn new(
716         id: String,
717         region: &Arc<GuestRegionMmap>,
718         seccomp_action: SeccompAction,
719         numa_node_id: Option<u16>,
720         initial_size: u64,
721         hugepages: bool,
722         exit_evt: EventFd,
723         blocks_state: Arc<Mutex<BlocksState>>,
724     ) -> io::Result<Mem> {
725         let region_len = region.len();
726 
727         if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE {
728             return Err(io::Error::new(
729                 io::ErrorKind::Other,
730                 format!(
731                     "Virtio-mem size is not aligned with {}",
732                     VIRTIO_MEM_ALIGN_SIZE
733                 ),
734             ));
735         }
736 
737         let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
738 
739         let mut config = VirtioMemConfig {
740             block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE,
741             addr: region.start_addr().raw_value(),
742             region_size: region.len(),
743             usable_region_size: region.len(),
744             plugged_size: 0,
745             requested_size: 0,
746             ..Default::default()
747         };
748 
749         if initial_size != 0 {
750             config.resize(initial_size).map_err(|e| {
751                 io::Error::new(
752                     io::ErrorKind::Other,
753                     format!(
754                         "Failed to resize virtio-mem configuration to {}: {:?}",
755                         initial_size, e
756                     ),
757                 )
758             })?;
759         }
760 
761         if let Some(node_id) = numa_node_id {
762             avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM;
763             config.node_id = node_id;
764         }
765 
766         // Make sure the virtio-mem configuration complies with the
767         // specification.
768         config.validate().map_err(|e| {
769             io::Error::new(
770                 io::ErrorKind::Other,
771                 format!("Invalid virtio-mem configuration: {:?}", e),
772             )
773         })?;
774 
775         let host_fd = region
776             .file_offset()
777             .map(|f_offset| f_offset.file().as_raw_fd());
778 
779         Ok(Mem {
780             common: VirtioCommon {
781                 device_type: VirtioDeviceType::Mem as u32,
782                 avail_features,
783                 paused_sync: Some(Arc::new(Barrier::new(2))),
784                 queue_sizes: QUEUE_SIZES.to_vec(),
785                 min_queues: 1,
786                 ..Default::default()
787             },
788             id,
789             host_addr: region.as_ptr() as u64,
790             host_fd,
791             config: Arc::new(Mutex::new(config)),
792             seccomp_action,
793             hugepages,
794             dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())),
795             blocks_state,
796             exit_evt,
797             interrupt_cb: None,
798         })
799     }
800 
801     pub fn resize(&mut self, size: u64) -> result::Result<(), Error> {
802         let mut config = self.config.lock().unwrap();
803         config.resize(size).map_err(|e| {
804             Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e))
805         })?;
806 
807         if let Some(interrupt_cb) = self.interrupt_cb.as_ref() {
808             interrupt_cb
809                 .trigger(VirtioInterruptType::Config)
810                 .map_err(|e| {
811                     Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e))
812                 })
813         } else {
814             Ok(())
815         }
816     }
817 
818     pub fn add_dma_mapping_handler(
819         &mut self,
820         source: VirtioMemMappingSource,
821         handler: Arc<dyn ExternalDmaMapping>,
822     ) -> result::Result<(), Error> {
823         let config = self.config.lock().unwrap();
824 
825         if config.plugged_size > 0 {
826             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
827                 if *plugged {
828                     let gpa = config.addr + (idx as u64 * config.block_size);
829                     handler
830                         .map(gpa, gpa, config.block_size)
831                         .map_err(Error::DmaMap)?;
832                 }
833             }
834         }
835 
836         self.dma_mapping_handlers
837             .lock()
838             .unwrap()
839             .insert(source, handler);
840 
841         Ok(())
842     }
843 
844     pub fn remove_dma_mapping_handler(
845         &mut self,
846         source: VirtioMemMappingSource,
847     ) -> result::Result<(), Error> {
848         let handler = self
849             .dma_mapping_handlers
850             .lock()
851             .unwrap()
852             .remove(&source)
853             .ok_or(Error::InvalidDmaMappingHandler)?;
854 
855         let config = self.config.lock().unwrap();
856 
857         if config.plugged_size > 0 {
858             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
859                 if *plugged {
860                     let gpa = config.addr + (idx as u64 * config.block_size);
861                     handler
862                         .unmap(gpa, config.block_size)
863                         .map_err(Error::DmaUnmap)?;
864                 }
865             }
866         }
867 
868         Ok(())
869     }
870 
871     fn state(&self) -> MemState {
872         MemState {
873             avail_features: self.common.avail_features,
874             acked_features: self.common.acked_features,
875             config: *(self.config.lock().unwrap()),
876             blocks_state: self.blocks_state.lock().unwrap().clone(),
877         }
878     }
879 
880     fn set_state(&mut self, state: &MemState) {
881         self.common.avail_features = state.avail_features;
882         self.common.acked_features = state.acked_features;
883         *(self.config.lock().unwrap()) = state.config;
884         *(self.blocks_state.lock().unwrap()) = state.blocks_state.clone();
885     }
886 
887     #[cfg(fuzzing)]
888     pub fn wait_for_epoll_threads(&mut self) {
889         self.common.wait_for_epoll_threads();
890     }
891 }
892 
893 impl Drop for Mem {
894     fn drop(&mut self) {
895         if let Some(kill_evt) = self.common.kill_evt.take() {
896             // Ignore the result because there is nothing we can do about it.
897             let _ = kill_evt.write(1);
898         }
899     }
900 }
901 
902 impl VirtioDevice for Mem {
903     fn device_type(&self) -> u32 {
904         self.common.device_type
905     }
906 
907     fn queue_max_sizes(&self) -> &[u16] {
908         &self.common.queue_sizes
909     }
910 
911     fn features(&self) -> u64 {
912         self.common.avail_features
913     }
914 
915     fn ack_features(&mut self, value: u64) {
916         self.common.ack_features(value)
917     }
918 
919     fn read_config(&self, offset: u64, data: &mut [u8]) {
920         self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data);
921     }
922 
923     fn activate(
924         &mut self,
925         mem: GuestMemoryAtomic<GuestMemoryMmap>,
926         interrupt_cb: Arc<dyn VirtioInterrupt>,
927         mut queues: Vec<(usize, Queue, EventFd)>,
928     ) -> ActivateResult {
929         self.common.activate(&queues, &interrupt_cb)?;
930         let (kill_evt, pause_evt) = self.common.dup_eventfds();
931 
932         let (_, queue, queue_evt) = queues.remove(0);
933 
934         self.interrupt_cb = Some(interrupt_cb.clone());
935 
936         let mut handler = MemEpollHandler {
937             mem,
938             host_addr: self.host_addr,
939             host_fd: self.host_fd,
940             blocks_state: Arc::clone(&self.blocks_state),
941             config: self.config.clone(),
942             queue,
943             interrupt_cb,
944             queue_evt,
945             kill_evt,
946             pause_evt,
947             hugepages: self.hugepages,
948             dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers),
949         };
950 
951         let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false);
952         for range in unplugged_memory_ranges.regions() {
953             handler
954                 .discard_memory_range(range.gpa, range.length)
955                 .map_err(|e| {
956                     error!(
957                         "failed discarding memory range [0x{:x}-0x{:x}]: {:?}",
958                         range.gpa,
959                         range.gpa + range.length - 1,
960                         e
961                     );
962                     ActivateError::BadActivate
963                 })?;
964         }
965 
966         let paused = self.common.paused.clone();
967         let paused_sync = self.common.paused_sync.clone();
968         let mut epoll_threads = Vec::new();
969 
970         spawn_virtio_thread(
971             &self.id,
972             &self.seccomp_action,
973             Thread::VirtioMem,
974             &mut epoll_threads,
975             &self.exit_evt,
976             move || handler.run(paused, paused_sync.unwrap()),
977         )?;
978         self.common.epoll_threads = Some(epoll_threads);
979 
980         event!("virtio-device", "activated", "id", &self.id);
981         Ok(())
982     }
983 
984     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
985         let result = self.common.reset();
986         event!("virtio-device", "reset", "id", &self.id);
987         result
988     }
989 }
990 
991 impl Pausable for Mem {
992     fn pause(&mut self) -> result::Result<(), MigratableError> {
993         self.common.pause()
994     }
995 
996     fn resume(&mut self) -> result::Result<(), MigratableError> {
997         self.common.resume()
998     }
999 }
1000 
1001 impl Snapshottable for Mem {
1002     fn id(&self) -> String {
1003         self.id.clone()
1004     }
1005 
1006     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
1007         Snapshot::new_from_versioned_state(&self.id(), &self.state())
1008     }
1009 
1010     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
1011         self.set_state(&snapshot.to_versioned_state(&self.id)?);
1012         Ok(())
1013     }
1014 }
1015 impl Transportable for Mem {}
1016 impl Migratable for Mem {}
1017