xref: /cloud-hypervisor/virtio-devices/src/mem.rs (revision 6f8bd27cf7629733582d930519e98d19e90afb16)
1 // Copyright (c) 2020 Ant Financial
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 use super::Error as DeviceError;
16 use super::{
17     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon,
18     VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
19 };
20 use crate::seccomp_filters::Thread;
21 use crate::thread_helper::spawn_virtio_thread;
22 use crate::{GuestMemoryMmap, GuestRegionMmap};
23 use crate::{VirtioInterrupt, VirtioInterruptType};
24 use anyhow::anyhow;
25 use seccompiler::SeccompAction;
26 use std::collections::BTreeMap;
27 use std::io;
28 use std::mem::size_of;
29 use std::os::unix::io::{AsRawFd, RawFd};
30 use std::result;
31 use std::sync::atomic::AtomicBool;
32 use std::sync::mpsc;
33 use std::sync::{Arc, Barrier, Mutex};
34 use thiserror::Error;
35 use versionize::{VersionMap, Versionize, VersionizeResult};
36 use versionize_derive::Versionize;
37 use virtio_queue::{DescriptorChain, Queue, QueueT};
38 use vm_device::dma_mapping::ExternalDmaMapping;
39 use vm_memory::{
40     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
41     GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion,
42 };
43 use vm_migration::protocol::MemoryRangeTable;
44 use vm_migration::{
45     Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped,
46 };
47 use vmm_sys_util::eventfd::EventFd;
48 
49 const QUEUE_SIZE: u16 = 128;
50 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
51 
52 // 128MiB is the standard memory block size in Linux. A virtio-mem region must
53 // be aligned on this size, and the region size must be a multiple of it.
54 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20;
55 // Use 2 MiB alignment so transparent hugepages can be used by KVM.
56 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20;
57 
58 // Request processed successfully, applicable for
59 // - VIRTIO_MEM_REQ_PLUG
60 // - VIRTIO_MEM_REQ_UNPLUG
61 // - VIRTIO_MEM_REQ_UNPLUG_ALL
62 // - VIRTIO_MEM_REQ_STATE
63 const VIRTIO_MEM_RESP_ACK: u16 = 0;
64 
65 // Request denied - e.g. trying to plug more than requested, applicable for
66 // - VIRTIO_MEM_REQ_PLUG
67 const VIRTIO_MEM_RESP_NACK: u16 = 1;
68 
69 // Request cannot be processed right now, try again later, applicable for
70 // - VIRTIO_MEM_REQ_PLUG
71 // - VIRTIO_MEM_REQ_UNPLUG
72 // - VIRTIO_MEM_REQ_UNPLUG_ALL
73 #[allow(unused)]
74 const VIRTIO_MEM_RESP_BUSY: u16 = 2;
75 
76 // Error in request (e.g. addresses/alignment), applicable for
77 // - VIRTIO_MEM_REQ_PLUG
78 // - VIRTIO_MEM_REQ_UNPLUG
79 // - VIRTIO_MEM_REQ_STATE
80 const VIRTIO_MEM_RESP_ERROR: u16 = 3;
81 
82 // State of memory blocks is "plugged"
83 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0;
84 // State of memory blocks is "unplugged"
85 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1;
86 // State of memory blocks is "mixed"
87 const VIRTIO_MEM_STATE_MIXED: u16 = 2;
88 
89 // request to plug memory blocks
90 const VIRTIO_MEM_REQ_PLUG: u16 = 0;
91 // request to unplug memory blocks
92 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1;
93 // request to unplug all blocks and shrink the usable size
94 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2;
95 // request information about the plugged state of memory blocks
96 const VIRTIO_MEM_REQ_STATE: u16 = 3;
97 
98 // New descriptors are pending on the virtio queue.
99 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
100 
101 // Virtio features
102 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0;
103 
104 #[derive(Error, Debug)]
105 pub enum Error {
106     #[error("Guest gave us bad memory addresses: {0}")]
107     GuestMemory(GuestMemoryError),
108     #[error("Guest gave us a write only descriptor that protocol says to read from")]
109     UnexpectedWriteOnlyDescriptor,
110     #[error("Guest gave us a read only descriptor that protocol says to write to")]
111     UnexpectedReadOnlyDescriptor,
112     #[error("Guest gave us too few descriptors in a descriptor chain")]
113     DescriptorChainTooShort,
114     #[error("Guest gave us a buffer that was too short to use")]
115     BufferLengthTooSmall,
116     #[error("Guest sent us invalid request")]
117     InvalidRequest,
118     #[error("Failed to EventFd write: {0}")]
119     EventFdWriteFail(std::io::Error),
120     #[error("Failed to EventFd try_clone: {0}")]
121     EventFdTryCloneFail(std::io::Error),
122     #[error("Failed to MpscRecv: {0}")]
123     MpscRecvFail(mpsc::RecvError),
124     #[error("Resize invalid argument: {0}")]
125     ResizeError(anyhow::Error),
126     #[error("Fail to resize trigger: {0}")]
127     ResizeTriggerFail(DeviceError),
128     #[error("Invalid configuration: {0}")]
129     ValidateError(anyhow::Error),
130     #[error("Failed discarding memory range: {0}")]
131     DiscardMemoryRange(std::io::Error),
132     #[error("Failed DMA mapping: {0}")]
133     DmaMap(std::io::Error),
134     #[error("Failed DMA unmapping: {0}")]
135     DmaUnmap(std::io::Error),
136     #[error("Invalid DMA mapping handler")]
137     InvalidDmaMappingHandler,
138     #[error("Not activated by the guest")]
139     NotActivatedByGuest,
140     #[error("Unknown request type: {0}")]
141     UnkownRequestType(u16),
142     #[error("Failed adding used index: {0}")]
143     QueueAddUsed(virtio_queue::Error),
144 }
145 
146 #[repr(C)]
147 #[derive(Copy, Clone, Debug, Default)]
148 struct VirtioMemReq {
149     req_type: u16,
150     padding: [u16; 3],
151     addr: u64,
152     nb_blocks: u16,
153     padding_1: [u16; 3],
154 }
155 
156 // SAFETY: it only has data and has no implicit padding.
157 unsafe impl ByteValued for VirtioMemReq {}
158 
159 #[repr(C)]
160 #[derive(Copy, Clone, Debug, Default)]
161 struct VirtioMemResp {
162     resp_type: u16,
163     padding: [u16; 3],
164     state: u16,
165 }
166 
167 // SAFETY: it only has data and has no implicit padding.
168 unsafe impl ByteValued for VirtioMemResp {}
169 
170 #[repr(C)]
171 #[derive(Copy, Clone, Debug, Default, Versionize)]
172 pub struct VirtioMemConfig {
173     // Block size and alignment. Cannot change.
174     block_size: u64,
175     // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change.
176     node_id: u16,
177     padding: [u8; 6],
178     // Start address of the memory region. Cannot change.
179     addr: u64,
180     // Region size (maximum). Cannot change.
181     region_size: u64,
182     // Currently usable region size. Can grow up to region_size. Can
183     // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
184     // update will be sent).
185     usable_region_size: u64,
186     // Currently used size. Changes due to plug/unplug requests, but no
187     // config updates will be sent.
188     plugged_size: u64,
189     // Requested size. New plug requests cannot exceed it. Can change.
190     requested_size: u64,
191 }
192 
193 // SAFETY: it only has data and has no implicit padding.
194 unsafe impl ByteValued for VirtioMemConfig {}
195 
196 impl VirtioMemConfig {
197     fn validate(&self) -> result::Result<(), Error> {
198         if self.addr % self.block_size != 0 {
199             return Err(Error::ValidateError(anyhow!(
200                 "addr 0x{:x} is not aligned on block_size 0x{:x}",
201                 self.addr,
202                 self.block_size
203             )));
204         }
205         if self.region_size % self.block_size != 0 {
206             return Err(Error::ValidateError(anyhow!(
207                 "region_size 0x{:x} is not aligned on block_size 0x{:x}",
208                 self.region_size,
209                 self.block_size
210             )));
211         }
212         if self.usable_region_size % self.block_size != 0 {
213             return Err(Error::ValidateError(anyhow!(
214                 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}",
215                 self.usable_region_size,
216                 self.block_size
217             )));
218         }
219         if self.plugged_size % self.block_size != 0 {
220             return Err(Error::ValidateError(anyhow!(
221                 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}",
222                 self.plugged_size,
223                 self.block_size
224             )));
225         }
226         if self.requested_size % self.block_size != 0 {
227             return Err(Error::ValidateError(anyhow!(
228                 "requested_size 0x{:x} is not aligned on block_size 0x{:x}",
229                 self.requested_size,
230                 self.block_size
231             )));
232         }
233 
234         Ok(())
235     }
236 
237     fn resize(&mut self, size: u64) -> result::Result<(), Error> {
238         if self.requested_size == size {
239             return Err(Error::ResizeError(anyhow!(
240                 "new size 0x{:x} and requested_size are identical",
241                 size
242             )));
243         } else if size > self.region_size {
244             return Err(Error::ResizeError(anyhow!(
245                 "new size 0x{:x} is bigger than region_size 0x{:x}",
246                 size,
247                 self.region_size
248             )));
249         } else if size % self.block_size != 0 {
250             return Err(Error::ResizeError(anyhow!(
251                 "new size 0x{:x} is not aligned on block_size 0x{:x}",
252                 size,
253                 self.block_size
254             )));
255         }
256 
257         self.requested_size = size;
258 
259         Ok(())
260     }
261 
262     fn is_valid_range(&self, addr: u64, size: u64) -> bool {
263         // Ensure no overflow from adding 'addr' and 'size' whose value are both
264         // controlled by the guest driver
265         if addr.checked_add(size).is_none() {
266             return false;
267         }
268 
269         // Start address must be aligned on block_size, the size must be
270         // greater than 0, and all blocks covered by the request must be
271         // in the usable region.
272         if addr % self.block_size != 0
273             || size == 0
274             || (addr < self.addr || addr + size >= self.addr + self.usable_region_size)
275         {
276             return false;
277         }
278 
279         true
280     }
281 }
282 
283 struct Request {
284     req: VirtioMemReq,
285     status_addr: GuestAddress,
286 }
287 
288 impl Request {
289     fn parse(
290         desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
291     ) -> result::Result<Request, Error> {
292         let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
293         // The descriptor contains the request type which MUST be readable.
294         if desc.is_write_only() {
295             return Err(Error::UnexpectedWriteOnlyDescriptor);
296         }
297         if desc.len() as usize != size_of::<VirtioMemReq>() {
298             return Err(Error::InvalidRequest);
299         }
300         let req: VirtioMemReq = desc_chain
301             .memory()
302             .read_obj(desc.addr())
303             .map_err(Error::GuestMemory)?;
304 
305         let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
306 
307         // The status MUST always be writable
308         if !status_desc.is_write_only() {
309             return Err(Error::UnexpectedReadOnlyDescriptor);
310         }
311 
312         if (status_desc.len() as usize) < size_of::<VirtioMemResp>() {
313             return Err(Error::BufferLengthTooSmall);
314         }
315 
316         Ok(Request {
317             req,
318             status_addr: status_desc.addr(),
319         })
320     }
321 
322     fn send_response(
323         &self,
324         mem: &GuestMemoryMmap,
325         resp_type: u16,
326         state: u16,
327     ) -> Result<u32, Error> {
328         let resp = VirtioMemResp {
329             resp_type,
330             state,
331             ..Default::default()
332         };
333         mem.write_obj(resp, self.status_addr)
334             .map_err(Error::GuestMemory)?;
335         Ok(size_of::<VirtioMemResp>() as u32)
336     }
337 }
338 
339 #[derive(Clone, Versionize)]
340 pub struct BlocksState {
341     bitmap: Vec<bool>,
342 }
343 
344 impl BlocksState {
345     pub fn new(region_size: u64) -> Self {
346         BlocksState {
347             bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize],
348         }
349     }
350 
351     fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool {
352         for state in self
353             .bitmap
354             .iter()
355             .skip(first_block_index)
356             .take(nb_blocks as usize)
357         {
358             if *state != plug {
359                 return false;
360             }
361         }
362         true
363     }
364 
365     fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) {
366         for state in self
367             .bitmap
368             .iter_mut()
369             .skip(first_block_index)
370             .take(nb_blocks as usize)
371         {
372             *state = plug;
373         }
374     }
375 
376     fn inner(&self) -> &Vec<bool> {
377         &self.bitmap
378     }
379 
380     pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable {
381         let mut bitmap: Vec<u64> = Vec::new();
382         let mut i = 0;
383         for (j, bit) in self.bitmap.iter().enumerate() {
384             if j % 64 == 0 {
385                 bitmap.push(0);
386 
387                 if j != 0 {
388                     i += 1;
389                 }
390             }
391 
392             if *bit == plugged {
393                 bitmap[i] |= 1 << (j % 64);
394             }
395         }
396 
397         MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE)
398     }
399 }
400 
401 struct MemEpollHandler {
402     mem: GuestMemoryAtomic<GuestMemoryMmap>,
403     host_addr: u64,
404     host_fd: Option<RawFd>,
405     blocks_state: Arc<Mutex<BlocksState>>,
406     config: Arc<Mutex<VirtioMemConfig>>,
407     queue: Queue,
408     interrupt_cb: Arc<dyn VirtioInterrupt>,
409     queue_evt: EventFd,
410     kill_evt: EventFd,
411     pause_evt: EventFd,
412     hugepages: bool,
413     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
414 }
415 
416 impl MemEpollHandler {
417     fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> {
418         // Use fallocate if the memory region is backed by a file.
419         if let Some(fd) = self.host_fd {
420             // SAFETY: FFI call with valid arguments
421             let res = unsafe {
422                 libc::fallocate64(
423                     fd,
424                     libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
425                     offset as libc::off64_t,
426                     size as libc::off64_t,
427                 )
428             };
429             if res != 0 {
430                 let err = io::Error::last_os_error();
431                 error!("Deallocating file space failed: {}", err);
432                 return Err(Error::DiscardMemoryRange(err));
433             }
434         }
435 
436         // Only use madvise if the memory region is not allocated with
437         // hugepages.
438         if !self.hugepages {
439             // SAFETY: FFI call with valid arguments
440             let res = unsafe {
441                 libc::madvise(
442                     (self.host_addr + offset) as *mut libc::c_void,
443                     size as libc::size_t,
444                     libc::MADV_DONTNEED,
445                 )
446             };
447             if res != 0 {
448                 let err = io::Error::last_os_error();
449                 error!("Advising kernel about pages range failed: {}", err);
450                 return Err(Error::DiscardMemoryRange(err));
451             }
452         }
453 
454         Ok(())
455     }
456 
457     fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 {
458         let mut config = self.config.lock().unwrap();
459         let size: u64 = nb_blocks as u64 * config.block_size;
460 
461         if plug && (config.plugged_size + size > config.requested_size) {
462             return VIRTIO_MEM_RESP_NACK;
463         }
464         if !config.is_valid_range(addr, size) {
465             return VIRTIO_MEM_RESP_ERROR;
466         }
467 
468         let offset = addr - config.addr;
469 
470         let first_block_index = (offset / config.block_size) as usize;
471         if !self
472             .blocks_state
473             .lock()
474             .unwrap()
475             .is_range_state(first_block_index, nb_blocks, !plug)
476         {
477             return VIRTIO_MEM_RESP_ERROR;
478         }
479 
480         if !plug {
481             if let Err(e) = self.discard_memory_range(offset, size) {
482                 error!("failed discarding memory range: {:?}", e);
483                 return VIRTIO_MEM_RESP_ERROR;
484             }
485         }
486 
487         self.blocks_state
488             .lock()
489             .unwrap()
490             .set_range(first_block_index, nb_blocks, plug);
491 
492         let handlers = self.dma_mapping_handlers.lock().unwrap();
493         if plug {
494             let mut gpa = addr;
495             for _ in 0..nb_blocks {
496                 for (_, handler) in handlers.iter() {
497                     if let Err(e) = handler.map(gpa, gpa, config.block_size) {
498                         error!(
499                             "failed DMA mapping addr 0x{:x} size 0x{:x}: {}",
500                             gpa, config.block_size, e
501                         );
502                         return VIRTIO_MEM_RESP_ERROR;
503                     }
504                 }
505 
506                 gpa += config.block_size;
507             }
508 
509             config.plugged_size += size;
510         } else {
511             for (_, handler) in handlers.iter() {
512                 if let Err(e) = handler.unmap(addr, size) {
513                     error!(
514                         "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
515                         addr, size, e
516                     );
517                     return VIRTIO_MEM_RESP_ERROR;
518                 }
519             }
520 
521             config.plugged_size -= size;
522         }
523 
524         VIRTIO_MEM_RESP_ACK
525     }
526 
527     fn unplug_all(&mut self) -> u16 {
528         let mut config = self.config.lock().unwrap();
529         if let Err(e) = self.discard_memory_range(0, config.region_size) {
530             error!("failed discarding memory range: {:?}", e);
531             return VIRTIO_MEM_RESP_ERROR;
532         }
533 
534         // Remaining plugged blocks are unmapped.
535         if config.plugged_size > 0 {
536             let handlers = self.dma_mapping_handlers.lock().unwrap();
537             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
538                 if *plugged {
539                     let gpa = config.addr + (idx as u64 * config.block_size);
540                     for (_, handler) in handlers.iter() {
541                         if let Err(e) = handler.unmap(gpa, config.block_size) {
542                             error!(
543                                 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
544                                 gpa, config.block_size, e
545                             );
546                             return VIRTIO_MEM_RESP_ERROR;
547                         }
548                     }
549                 }
550             }
551         }
552 
553         self.blocks_state.lock().unwrap().set_range(
554             0,
555             (config.region_size / config.block_size) as u16,
556             false,
557         );
558 
559         config.plugged_size = 0;
560 
561         VIRTIO_MEM_RESP_ACK
562     }
563 
564     fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) {
565         let config = self.config.lock().unwrap();
566         let size: u64 = nb_blocks as u64 * config.block_size;
567 
568         let resp_type = if config.is_valid_range(addr, size) {
569             VIRTIO_MEM_RESP_ACK
570         } else {
571             VIRTIO_MEM_RESP_ERROR
572         };
573 
574         let offset = addr - config.addr;
575         let first_block_index = (offset / config.block_size) as usize;
576         let resp_state =
577             if self
578                 .blocks_state
579                 .lock()
580                 .unwrap()
581                 .is_range_state(first_block_index, nb_blocks, true)
582             {
583                 VIRTIO_MEM_STATE_PLUGGED
584             } else if self.blocks_state.lock().unwrap().is_range_state(
585                 first_block_index,
586                 nb_blocks,
587                 false,
588             ) {
589                 VIRTIO_MEM_STATE_UNPLUGGED
590             } else {
591                 VIRTIO_MEM_STATE_MIXED
592             };
593 
594         (resp_type, resp_state)
595     }
596 
597     fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> {
598         self.interrupt_cb.trigger(int_type).map_err(|e| {
599             error!("Failed to signal used queue: {:?}", e);
600             DeviceError::FailedSignalingUsedQueue(e)
601         })
602     }
603 
604     fn process_queue(&mut self) -> Result<bool, Error> {
605         let mut used_descs = false;
606 
607         while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) {
608             let r = Request::parse(&mut desc_chain)?;
609             let (resp_type, resp_state) = match r.req.req_type {
610                 VIRTIO_MEM_REQ_PLUG => (
611                     self.state_change_request(r.req.addr, r.req.nb_blocks, true),
612                     0u16,
613                 ),
614                 VIRTIO_MEM_REQ_UNPLUG => (
615                     self.state_change_request(r.req.addr, r.req.nb_blocks, false),
616                     0u16,
617                 ),
618                 VIRTIO_MEM_REQ_UNPLUG_ALL => (self.unplug_all(), 0u16),
619                 VIRTIO_MEM_REQ_STATE => self.state_request(r.req.addr, r.req.nb_blocks),
620                 _ => {
621                     return Err(Error::UnkownRequestType(r.req.req_type));
622                 }
623             };
624             let len = r.send_response(desc_chain.memory(), resp_type, resp_state)?;
625             self.queue
626                 .add_used(desc_chain.memory(), desc_chain.head_index(), len)
627                 .map_err(Error::QueueAddUsed)?;
628             used_descs = true;
629         }
630 
631         Ok(used_descs)
632     }
633 
634     fn run(
635         &mut self,
636         paused: Arc<AtomicBool>,
637         paused_sync: Arc<Barrier>,
638     ) -> result::Result<(), EpollHelperError> {
639         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
640         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
641         helper.run(paused, paused_sync, self)?;
642 
643         Ok(())
644     }
645 }
646 
647 impl EpollHelperHandler for MemEpollHandler {
648     fn handle_event(
649         &mut self,
650         _helper: &mut EpollHelper,
651         event: &epoll::Event,
652     ) -> result::Result<(), EpollHelperError> {
653         let ev_type = event.data as u16;
654         match ev_type {
655             QUEUE_AVAIL_EVENT => {
656                 self.queue_evt.read().map_err(|e| {
657                     EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e))
658                 })?;
659 
660                 let needs_notification = self.process_queue().map_err(|e| {
661                     EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e))
662                 })?;
663                 if needs_notification {
664                     self.signal(VirtioInterruptType::Queue(0)).map_err(|e| {
665                         EpollHelperError::HandleEvent(anyhow!(
666                             "Failed to signal used queue: {:?}",
667                             e
668                         ))
669                     })?;
670                 }
671             }
672             _ => {
673                 return Err(EpollHelperError::HandleEvent(anyhow!(
674                     "Unexpected event: {}",
675                     ev_type
676                 )));
677             }
678         }
679         Ok(())
680     }
681 }
682 
683 #[derive(PartialEq, Eq, PartialOrd, Ord)]
684 pub enum VirtioMemMappingSource {
685     Container,
686     Device(u32),
687 }
688 
689 #[derive(Versionize)]
690 pub struct MemState {
691     pub avail_features: u64,
692     pub acked_features: u64,
693     pub config: VirtioMemConfig,
694     pub blocks_state: BlocksState,
695 }
696 
697 impl VersionMapped for MemState {}
698 
699 pub struct Mem {
700     common: VirtioCommon,
701     id: String,
702     host_addr: u64,
703     host_fd: Option<RawFd>,
704     config: Arc<Mutex<VirtioMemConfig>>,
705     seccomp_action: SeccompAction,
706     hugepages: bool,
707     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
708     blocks_state: Arc<Mutex<BlocksState>>,
709     exit_evt: EventFd,
710     interrupt_cb: Option<Arc<dyn VirtioInterrupt>>,
711 }
712 
713 impl Mem {
714     // Create a new virtio-mem device.
715     #[allow(clippy::too_many_arguments)]
716     pub fn new(
717         id: String,
718         region: &Arc<GuestRegionMmap>,
719         seccomp_action: SeccompAction,
720         numa_node_id: Option<u16>,
721         initial_size: u64,
722         hugepages: bool,
723         exit_evt: EventFd,
724         blocks_state: Arc<Mutex<BlocksState>>,
725         state: Option<MemState>,
726     ) -> io::Result<Mem> {
727         let region_len = region.len();
728 
729         if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE {
730             return Err(io::Error::new(
731                 io::ErrorKind::Other,
732                 format!(
733                     "Virtio-mem size is not aligned with {}",
734                     VIRTIO_MEM_ALIGN_SIZE
735                 ),
736             ));
737         }
738 
739         let (avail_features, acked_features, config) = if let Some(state) = state {
740             info!("Restoring virtio-mem {}", id);
741             *(blocks_state.lock().unwrap()) = state.blocks_state.clone();
742             (state.avail_features, state.acked_features, state.config)
743         } else {
744             let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
745 
746             let mut config = VirtioMemConfig {
747                 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE,
748                 addr: region.start_addr().raw_value(),
749                 region_size: region.len(),
750                 usable_region_size: region.len(),
751                 plugged_size: 0,
752                 requested_size: 0,
753                 ..Default::default()
754             };
755 
756             if initial_size != 0 {
757                 config.resize(initial_size).map_err(|e| {
758                     io::Error::new(
759                         io::ErrorKind::Other,
760                         format!(
761                             "Failed to resize virtio-mem configuration to {}: {:?}",
762                             initial_size, e
763                         ),
764                     )
765                 })?;
766             }
767 
768             if let Some(node_id) = numa_node_id {
769                 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM;
770                 config.node_id = node_id;
771             }
772 
773             // Make sure the virtio-mem configuration complies with the
774             // specification.
775             config.validate().map_err(|e| {
776                 io::Error::new(
777                     io::ErrorKind::Other,
778                     format!("Invalid virtio-mem configuration: {:?}", e),
779                 )
780             })?;
781 
782             (avail_features, 0, config)
783         };
784 
785         let host_fd = region
786             .file_offset()
787             .map(|f_offset| f_offset.file().as_raw_fd());
788 
789         Ok(Mem {
790             common: VirtioCommon {
791                 device_type: VirtioDeviceType::Mem as u32,
792                 avail_features,
793                 acked_features,
794                 paused_sync: Some(Arc::new(Barrier::new(2))),
795                 queue_sizes: QUEUE_SIZES.to_vec(),
796                 min_queues: 1,
797                 ..Default::default()
798             },
799             id,
800             host_addr: region.as_ptr() as u64,
801             host_fd,
802             config: Arc::new(Mutex::new(config)),
803             seccomp_action,
804             hugepages,
805             dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())),
806             blocks_state,
807             exit_evt,
808             interrupt_cb: None,
809         })
810     }
811 
812     pub fn resize(&mut self, size: u64) -> result::Result<(), Error> {
813         let mut config = self.config.lock().unwrap();
814         config.resize(size).map_err(|e| {
815             Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e))
816         })?;
817 
818         if let Some(interrupt_cb) = self.interrupt_cb.as_ref() {
819             interrupt_cb
820                 .trigger(VirtioInterruptType::Config)
821                 .map_err(|e| {
822                     Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e))
823                 })
824         } else {
825             Ok(())
826         }
827     }
828 
829     pub fn add_dma_mapping_handler(
830         &mut self,
831         source: VirtioMemMappingSource,
832         handler: Arc<dyn ExternalDmaMapping>,
833     ) -> result::Result<(), Error> {
834         let config = self.config.lock().unwrap();
835 
836         if config.plugged_size > 0 {
837             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
838                 if *plugged {
839                     let gpa = config.addr + (idx as u64 * config.block_size);
840                     handler
841                         .map(gpa, gpa, config.block_size)
842                         .map_err(Error::DmaMap)?;
843                 }
844             }
845         }
846 
847         self.dma_mapping_handlers
848             .lock()
849             .unwrap()
850             .insert(source, handler);
851 
852         Ok(())
853     }
854 
855     pub fn remove_dma_mapping_handler(
856         &mut self,
857         source: VirtioMemMappingSource,
858     ) -> result::Result<(), Error> {
859         let handler = self
860             .dma_mapping_handlers
861             .lock()
862             .unwrap()
863             .remove(&source)
864             .ok_or(Error::InvalidDmaMappingHandler)?;
865 
866         let config = self.config.lock().unwrap();
867 
868         if config.plugged_size > 0 {
869             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
870                 if *plugged {
871                     let gpa = config.addr + (idx as u64 * config.block_size);
872                     handler
873                         .unmap(gpa, config.block_size)
874                         .map_err(Error::DmaUnmap)?;
875                 }
876             }
877         }
878 
879         Ok(())
880     }
881 
882     fn state(&self) -> MemState {
883         MemState {
884             avail_features: self.common.avail_features,
885             acked_features: self.common.acked_features,
886             config: *(self.config.lock().unwrap()),
887             blocks_state: self.blocks_state.lock().unwrap().clone(),
888         }
889     }
890 
891     #[cfg(fuzzing)]
892     pub fn wait_for_epoll_threads(&mut self) {
893         self.common.wait_for_epoll_threads();
894     }
895 }
896 
897 impl Drop for Mem {
898     fn drop(&mut self) {
899         if let Some(kill_evt) = self.common.kill_evt.take() {
900             // Ignore the result because there is nothing we can do about it.
901             let _ = kill_evt.write(1);
902         }
903     }
904 }
905 
906 impl VirtioDevice for Mem {
907     fn device_type(&self) -> u32 {
908         self.common.device_type
909     }
910 
911     fn queue_max_sizes(&self) -> &[u16] {
912         &self.common.queue_sizes
913     }
914 
915     fn features(&self) -> u64 {
916         self.common.avail_features
917     }
918 
919     fn ack_features(&mut self, value: u64) {
920         self.common.ack_features(value)
921     }
922 
923     fn read_config(&self, offset: u64, data: &mut [u8]) {
924         self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data);
925     }
926 
927     fn activate(
928         &mut self,
929         mem: GuestMemoryAtomic<GuestMemoryMmap>,
930         interrupt_cb: Arc<dyn VirtioInterrupt>,
931         mut queues: Vec<(usize, Queue, EventFd)>,
932     ) -> ActivateResult {
933         self.common.activate(&queues, &interrupt_cb)?;
934         let (kill_evt, pause_evt) = self.common.dup_eventfds();
935 
936         let (_, queue, queue_evt) = queues.remove(0);
937 
938         self.interrupt_cb = Some(interrupt_cb.clone());
939 
940         let mut handler = MemEpollHandler {
941             mem,
942             host_addr: self.host_addr,
943             host_fd: self.host_fd,
944             blocks_state: Arc::clone(&self.blocks_state),
945             config: self.config.clone(),
946             queue,
947             interrupt_cb,
948             queue_evt,
949             kill_evt,
950             pause_evt,
951             hugepages: self.hugepages,
952             dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers),
953         };
954 
955         let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false);
956         for range in unplugged_memory_ranges.regions() {
957             handler
958                 .discard_memory_range(range.gpa, range.length)
959                 .map_err(|e| {
960                     error!(
961                         "failed discarding memory range [0x{:x}-0x{:x}]: {:?}",
962                         range.gpa,
963                         range.gpa + range.length - 1,
964                         e
965                     );
966                     ActivateError::BadActivate
967                 })?;
968         }
969 
970         let paused = self.common.paused.clone();
971         let paused_sync = self.common.paused_sync.clone();
972         let mut epoll_threads = Vec::new();
973 
974         spawn_virtio_thread(
975             &self.id,
976             &self.seccomp_action,
977             Thread::VirtioMem,
978             &mut epoll_threads,
979             &self.exit_evt,
980             move || handler.run(paused, paused_sync.unwrap()),
981         )?;
982         self.common.epoll_threads = Some(epoll_threads);
983 
984         event!("virtio-device", "activated", "id", &self.id);
985         Ok(())
986     }
987 
988     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
989         let result = self.common.reset();
990         event!("virtio-device", "reset", "id", &self.id);
991         result
992     }
993 }
994 
995 impl Pausable for Mem {
996     fn pause(&mut self) -> result::Result<(), MigratableError> {
997         self.common.pause()
998     }
999 
1000     fn resume(&mut self) -> result::Result<(), MigratableError> {
1001         self.common.resume()
1002     }
1003 }
1004 
1005 impl Snapshottable for Mem {
1006     fn id(&self) -> String {
1007         self.id.clone()
1008     }
1009 
1010     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
1011         Snapshot::new_from_versioned_state(&self.id(), &self.state())
1012     }
1013 }
1014 impl Transportable for Mem {}
1015 impl Migratable for Mem {}
1016