xref: /cloud-hypervisor/virtio-devices/src/mem.rs (revision 7d7bfb2034001d4cb15df2ddc56d2d350c8da30f)
1 // Copyright (c) 2020 Ant Financial
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 use super::Error as DeviceError;
16 use super::{
17     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon,
18     VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
19 };
20 use crate::seccomp_filters::Thread;
21 use crate::thread_helper::spawn_virtio_thread;
22 use crate::{GuestMemoryMmap, GuestRegionMmap};
23 use crate::{VirtioInterrupt, VirtioInterruptType};
24 use anyhow::anyhow;
25 use libc::EFD_NONBLOCK;
26 use seccompiler::SeccompAction;
27 use std::collections::BTreeMap;
28 use std::io;
29 use std::mem::size_of;
30 use std::os::unix::io::{AsRawFd, RawFd};
31 use std::result;
32 use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
33 use std::sync::mpsc;
34 use std::sync::{Arc, Barrier, Mutex};
35 use versionize::{VersionMap, Versionize, VersionizeResult};
36 use versionize_derive::Versionize;
37 use virtio_queue::{DescriptorChain, Queue};
38 use vm_device::dma_mapping::ExternalDmaMapping;
39 use vm_memory::{
40     Address, ByteValued, Bytes, GuestAddress, GuestMemoryAtomic, GuestMemoryError,
41     GuestMemoryLoadGuard, GuestMemoryRegion,
42 };
43 use vm_migration::protocol::MemoryRangeTable;
44 use vm_migration::{
45     Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped,
46 };
47 use vmm_sys_util::eventfd::EventFd;
48 
49 const QUEUE_SIZE: u16 = 128;
50 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
51 
52 // 128MiB is the standard memory block size in Linux. A virtio-mem region must
53 // be aligned on this size, and the region size must be a multiple of it.
54 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20;
55 // Use 2 MiB alignment so transparent hugepages can be used by KVM.
56 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20;
57 
58 // Request processed successfully, applicable for
59 // - VIRTIO_MEM_REQ_PLUG
60 // - VIRTIO_MEM_REQ_UNPLUG
61 // - VIRTIO_MEM_REQ_UNPLUG_ALL
62 // - VIRTIO_MEM_REQ_STATE
63 const VIRTIO_MEM_RESP_ACK: u16 = 0;
64 
65 // Request denied - e.g. trying to plug more than requested, applicable for
66 // - VIRTIO_MEM_REQ_PLUG
67 const VIRTIO_MEM_RESP_NACK: u16 = 1;
68 
69 // Request cannot be processed right now, try again later, applicable for
70 // - VIRTIO_MEM_REQ_PLUG
71 // - VIRTIO_MEM_REQ_UNPLUG
72 // - VIRTIO_MEM_REQ_UNPLUG_ALL
73 #[allow(unused)]
74 const VIRTIO_MEM_RESP_BUSY: u16 = 2;
75 
76 // Error in request (e.g. addresses/alignment), applicable for
77 // - VIRTIO_MEM_REQ_PLUG
78 // - VIRTIO_MEM_REQ_UNPLUG
79 // - VIRTIO_MEM_REQ_STATE
80 const VIRTIO_MEM_RESP_ERROR: u16 = 3;
81 
82 // State of memory blocks is "plugged"
83 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0;
84 // State of memory blocks is "unplugged"
85 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1;
86 // State of memory blocks is "mixed"
87 const VIRTIO_MEM_STATE_MIXED: u16 = 2;
88 
89 // request to plug memory blocks
90 const VIRTIO_MEM_REQ_PLUG: u16 = 0;
91 // request to unplug memory blocks
92 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1;
93 // request to unplug all blocks and shrink the usable size
94 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2;
95 // request information about the plugged state of memory blocks
96 const VIRTIO_MEM_REQ_STATE: u16 = 3;
97 
98 // Get resize event.
99 const RESIZE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
100 // New descriptors are pending on the virtio queue.
101 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
102 
103 // Virtio features
104 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0;
105 
106 #[derive(Debug)]
107 pub enum Error {
108     // Guest gave us bad memory addresses.
109     GuestMemory(GuestMemoryError),
110     // Guest gave us a write only descriptor that protocol says to read from.
111     UnexpectedWriteOnlyDescriptor,
112     // Guest gave us a read only descriptor that protocol says to write to.
113     UnexpectedReadOnlyDescriptor,
114     // Guest gave us too few descriptors in a descriptor chain.
115     DescriptorChainTooShort,
116     // Guest gave us a buffer that was too short to use.
117     BufferLengthTooSmall,
118     // Guest sent us invalid request.
119     InvalidRequest,
120     // Failed to EventFd write.
121     EventFdWriteFail(std::io::Error),
122     // Failed to EventFd try_clone.
123     EventFdTryCloneFail(std::io::Error),
124     // Failed to MpscRecv.
125     MpscRecvFail(mpsc::RecvError),
126     // Resize invalid argument
127     ResizeError(anyhow::Error),
128     // Fail to resize trigger
129     ResizeTriggerFail(DeviceError),
130     // Invalid configuration
131     ValidateError(anyhow::Error),
132     // Failed discarding memory range
133     DiscardMemoryRange(std::io::Error),
134     // Failed DMA mapping.
135     DmaMap(std::io::Error),
136     // Failed DMA unmapping.
137     DmaUnmap(std::io::Error),
138     // Invalid DMA mapping handler
139     InvalidDmaMappingHandler,
140 }
141 
142 #[repr(C)]
143 #[derive(Copy, Clone, Debug, Default)]
144 struct VirtioMemReq {
145     req_type: u16,
146     padding: [u16; 3],
147     addr: u64,
148     nb_blocks: u16,
149     padding_1: [u16; 3],
150 }
151 
152 // SAFETY: it only has data and has no implicit padding.
153 unsafe impl ByteValued for VirtioMemReq {}
154 
155 #[repr(C)]
156 #[derive(Copy, Clone, Debug, Default)]
157 struct VirtioMemResp {
158     resp_type: u16,
159     padding: [u16; 3],
160     state: u16,
161 }
162 
163 // SAFETY: it only has data and has no implicit padding.
164 unsafe impl ByteValued for VirtioMemResp {}
165 
166 #[repr(C)]
167 #[derive(Copy, Clone, Debug, Default, Versionize)]
168 pub struct VirtioMemConfig {
169     // Block size and alignment. Cannot change.
170     block_size: u64,
171     // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change.
172     node_id: u16,
173     padding: [u8; 6],
174     // Start address of the memory region. Cannot change.
175     addr: u64,
176     // Region size (maximum). Cannot change.
177     region_size: u64,
178     // Currently usable region size. Can grow up to region_size. Can
179     // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
180     // update will be sent).
181     usable_region_size: u64,
182     // Currently used size. Changes due to plug/unplug requests, but no
183     // config updates will be sent.
184     plugged_size: u64,
185     // Requested size. New plug requests cannot exceed it. Can change.
186     requested_size: u64,
187 }
188 
189 // SAFETY: it only has data and has no implicit padding.
190 unsafe impl ByteValued for VirtioMemConfig {}
191 
192 impl VirtioMemConfig {
193     fn validate(&self) -> result::Result<(), Error> {
194         if self.addr % self.block_size != 0 {
195             return Err(Error::ValidateError(anyhow!(
196                 "addr 0x{:x} is not aligned on block_size 0x{:x}",
197                 self.addr,
198                 self.block_size
199             )));
200         }
201         if self.region_size % self.block_size != 0 {
202             return Err(Error::ValidateError(anyhow!(
203                 "region_size 0x{:x} is not aligned on block_size 0x{:x}",
204                 self.region_size,
205                 self.block_size
206             )));
207         }
208         if self.usable_region_size % self.block_size != 0 {
209             return Err(Error::ValidateError(anyhow!(
210                 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}",
211                 self.usable_region_size,
212                 self.block_size
213             )));
214         }
215         if self.plugged_size % self.block_size != 0 {
216             return Err(Error::ValidateError(anyhow!(
217                 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}",
218                 self.plugged_size,
219                 self.block_size
220             )));
221         }
222         if self.requested_size % self.block_size != 0 {
223             return Err(Error::ValidateError(anyhow!(
224                 "requested_size 0x{:x} is not aligned on block_size 0x{:x}",
225                 self.requested_size,
226                 self.block_size
227             )));
228         }
229 
230         Ok(())
231     }
232 
233     fn resize(&mut self, size: u64) -> result::Result<(), Error> {
234         if self.requested_size == size {
235             return Err(Error::ResizeError(anyhow!(
236                 "new size 0x{:x} and requested_size are identical",
237                 size
238             )));
239         } else if size > self.region_size {
240             return Err(Error::ResizeError(anyhow!(
241                 "new size 0x{:x} is bigger than region_size 0x{:x}",
242                 size,
243                 self.region_size
244             )));
245         } else if size % (self.block_size as u64) != 0 {
246             return Err(Error::ResizeError(anyhow!(
247                 "new size 0x{:x} is not aligned on block_size 0x{:x}",
248                 size,
249                 self.block_size
250             )));
251         }
252 
253         self.requested_size = size;
254 
255         Ok(())
256     }
257 
258     fn is_valid_range(&self, addr: u64, size: u64) -> bool {
259         // Start address must be aligned on block_size, the size must be
260         // greater than 0, and all blocks covered by the request must be
261         // in the usable region.
262         if addr % self.block_size != 0
263             || size == 0
264             || (addr < self.addr || addr + size >= self.addr + self.usable_region_size)
265         {
266             return false;
267         }
268 
269         true
270     }
271 }
272 
273 struct Request {
274     req: VirtioMemReq,
275     status_addr: GuestAddress,
276 }
277 
278 impl Request {
279     fn parse(
280         desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
281     ) -> result::Result<Request, Error> {
282         let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
283         // The descriptor contains the request type which MUST be readable.
284         if desc.is_write_only() {
285             return Err(Error::UnexpectedWriteOnlyDescriptor);
286         }
287         if desc.len() as usize != size_of::<VirtioMemReq>() {
288             return Err(Error::InvalidRequest);
289         }
290         let req: VirtioMemReq = desc_chain
291             .memory()
292             .read_obj(desc.addr())
293             .map_err(Error::GuestMemory)?;
294 
295         let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
296 
297         // The status MUST always be writable
298         if !status_desc.is_write_only() {
299             return Err(Error::UnexpectedReadOnlyDescriptor);
300         }
301 
302         if (status_desc.len() as usize) < size_of::<VirtioMemResp>() {
303             return Err(Error::BufferLengthTooSmall);
304         }
305 
306         Ok(Request {
307             req,
308             status_addr: status_desc.addr(),
309         })
310     }
311 
312     fn send_response(&self, mem: &GuestMemoryMmap, resp_type: u16, state: u16) -> u32 {
313         let resp = VirtioMemResp {
314             resp_type,
315             state,
316             ..Default::default()
317         };
318         match mem.write_obj(resp, self.status_addr) {
319             Ok(_) => size_of::<VirtioMemResp>() as u32,
320             Err(e) => {
321                 error!("bad guest memory address: {}", e);
322                 0
323             }
324         }
325     }
326 }
327 
328 pub struct ResizeSender {
329     hotplugged_size: Arc<AtomicU64>,
330     tx: mpsc::Sender<Result<(), Error>>,
331     evt: EventFd,
332 }
333 
334 impl ResizeSender {
335     fn size(&self) -> u64 {
336         self.hotplugged_size.load(Ordering::Acquire)
337     }
338 
339     fn send(&self, r: Result<(), Error>) -> Result<(), mpsc::SendError<Result<(), Error>>> {
340         self.tx.send(r)
341     }
342 }
343 
344 impl Clone for ResizeSender {
345     fn clone(&self) -> Self {
346         ResizeSender {
347             hotplugged_size: self.hotplugged_size.clone(),
348             tx: self.tx.clone(),
349             evt: self
350                 .evt
351                 .try_clone()
352                 .expect("Failed cloning EventFd from ResizeSender"),
353         }
354     }
355 }
356 
357 pub struct Resize {
358     hotplugged_size: Arc<AtomicU64>,
359     tx: mpsc::Sender<Result<(), Error>>,
360     rx: mpsc::Receiver<Result<(), Error>>,
361     evt: EventFd,
362 }
363 
364 impl Resize {
365     pub fn new(hotplugged_size: u64) -> io::Result<Self> {
366         let (tx, rx) = mpsc::channel();
367 
368         Ok(Resize {
369             hotplugged_size: Arc::new(AtomicU64::new(hotplugged_size)),
370             tx,
371             rx,
372             evt: EventFd::new(EFD_NONBLOCK)?,
373         })
374     }
375 
376     pub fn new_resize_sender(&self) -> Result<ResizeSender, Error> {
377         Ok(ResizeSender {
378             hotplugged_size: self.hotplugged_size.clone(),
379             tx: self.tx.clone(),
380             evt: self.evt.try_clone().map_err(Error::EventFdTryCloneFail)?,
381         })
382     }
383 
384     pub fn work(&self, desired_size: u64) -> Result<(), Error> {
385         self.hotplugged_size.store(desired_size, Ordering::Release);
386         self.evt.write(1).map_err(Error::EventFdWriteFail)?;
387         self.rx.recv().map_err(Error::MpscRecvFail)?
388     }
389 }
390 
391 #[derive(Clone, Versionize)]
392 pub struct BlocksState {
393     bitmap: Vec<bool>,
394 }
395 
396 impl BlocksState {
397     pub fn new(region_size: u64) -> Self {
398         BlocksState {
399             bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize],
400         }
401     }
402 
403     fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool {
404         for state in self
405             .bitmap
406             .iter()
407             .skip(first_block_index)
408             .take(nb_blocks as usize)
409         {
410             if *state != plug {
411                 return false;
412             }
413         }
414         true
415     }
416 
417     fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) {
418         for state in self
419             .bitmap
420             .iter_mut()
421             .skip(first_block_index)
422             .take(nb_blocks as usize)
423         {
424             *state = plug;
425         }
426     }
427 
428     fn inner(&self) -> &Vec<bool> {
429         &self.bitmap
430     }
431 
432     pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable {
433         let mut bitmap: Vec<u64> = Vec::new();
434         let mut i = 0;
435         for (j, bit) in self.bitmap.iter().enumerate() {
436             if j % 64 == 0 {
437                 bitmap.push(0);
438 
439                 if j != 0 {
440                     i += 1;
441                 }
442             }
443 
444             if *bit == plugged {
445                 bitmap[i] |= 1 << (j % 64);
446             }
447         }
448 
449         MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE)
450     }
451 }
452 
453 struct MemEpollHandler {
454     host_addr: u64,
455     host_fd: Option<RawFd>,
456     blocks_state: Arc<Mutex<BlocksState>>,
457     config: Arc<Mutex<VirtioMemConfig>>,
458     resize: ResizeSender,
459     queue: Queue<GuestMemoryAtomic<GuestMemoryMmap>>,
460     interrupt_cb: Arc<dyn VirtioInterrupt>,
461     queue_evt: EventFd,
462     kill_evt: EventFd,
463     pause_evt: EventFd,
464     hugepages: bool,
465     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
466 }
467 
468 impl MemEpollHandler {
469     fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> {
470         // Use fallocate if the memory region is backed by a file.
471         if let Some(fd) = self.host_fd {
472             let res = unsafe {
473                 libc::fallocate64(
474                     fd,
475                     libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
476                     offset as libc::off64_t,
477                     size as libc::off64_t,
478                 )
479             };
480             if res != 0 {
481                 let err = io::Error::last_os_error();
482                 error!("Deallocating file space failed: {}", err);
483                 return Err(Error::DiscardMemoryRange(err));
484             }
485         }
486 
487         // Only use madvise if the memory region is not allocated with
488         // hugepages.
489         if !self.hugepages {
490             let res = unsafe {
491                 libc::madvise(
492                     (self.host_addr + offset) as *mut libc::c_void,
493                     size as libc::size_t,
494                     libc::MADV_DONTNEED,
495                 )
496             };
497             if res != 0 {
498                 let err = io::Error::last_os_error();
499                 error!("Advising kernel about pages range failed: {}", err);
500                 return Err(Error::DiscardMemoryRange(err));
501             }
502         }
503 
504         Ok(())
505     }
506 
507     fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 {
508         let mut config = self.config.lock().unwrap();
509         let size: u64 = nb_blocks as u64 * config.block_size;
510 
511         if plug && (config.plugged_size + size > config.requested_size) {
512             return VIRTIO_MEM_RESP_NACK;
513         }
514         if !config.is_valid_range(addr, size) {
515             return VIRTIO_MEM_RESP_ERROR;
516         }
517 
518         let offset = addr - config.addr;
519 
520         let first_block_index = (offset / config.block_size) as usize;
521         if !self
522             .blocks_state
523             .lock()
524             .unwrap()
525             .is_range_state(first_block_index, nb_blocks, !plug)
526         {
527             return VIRTIO_MEM_RESP_ERROR;
528         }
529 
530         if !plug {
531             if let Err(e) = self.discard_memory_range(offset, size) {
532                 error!("failed discarding memory range: {:?}", e);
533                 return VIRTIO_MEM_RESP_ERROR;
534             }
535         }
536 
537         self.blocks_state
538             .lock()
539             .unwrap()
540             .set_range(first_block_index, nb_blocks, plug);
541 
542         let handlers = self.dma_mapping_handlers.lock().unwrap();
543         if plug {
544             let mut gpa = addr;
545             for _ in 0..nb_blocks {
546                 for (_, handler) in handlers.iter() {
547                     if let Err(e) = handler.map(gpa, gpa, config.block_size) {
548                         error!(
549                             "failed DMA mapping addr 0x{:x} size 0x{:x}: {}",
550                             gpa, config.block_size, e
551                         );
552                         return VIRTIO_MEM_RESP_ERROR;
553                     }
554                 }
555 
556                 gpa += config.block_size;
557             }
558 
559             config.plugged_size += size;
560         } else {
561             for (_, handler) in handlers.iter() {
562                 if let Err(e) = handler.unmap(addr, size) {
563                     error!(
564                         "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
565                         addr, size, e
566                     );
567                     return VIRTIO_MEM_RESP_ERROR;
568                 }
569             }
570 
571             config.plugged_size -= size;
572         }
573 
574         VIRTIO_MEM_RESP_ACK
575     }
576 
577     fn unplug_all(&mut self) -> u16 {
578         let mut config = self.config.lock().unwrap();
579         if let Err(e) = self.discard_memory_range(0, config.region_size) {
580             error!("failed discarding memory range: {:?}", e);
581             return VIRTIO_MEM_RESP_ERROR;
582         }
583 
584         // Remaining plugged blocks are unmapped.
585         if config.plugged_size > 0 {
586             let handlers = self.dma_mapping_handlers.lock().unwrap();
587             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
588                 if *plugged {
589                     let gpa = config.addr + (idx as u64 * config.block_size);
590                     for (_, handler) in handlers.iter() {
591                         if let Err(e) = handler.unmap(gpa, config.block_size) {
592                             error!(
593                                 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
594                                 gpa, config.block_size, e
595                             );
596                             return VIRTIO_MEM_RESP_ERROR;
597                         }
598                     }
599                 }
600             }
601         }
602 
603         self.blocks_state.lock().unwrap().set_range(
604             0,
605             (config.region_size / config.block_size) as u16,
606             false,
607         );
608 
609         config.plugged_size = 0;
610 
611         VIRTIO_MEM_RESP_ACK
612     }
613 
614     fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) {
615         let config = self.config.lock().unwrap();
616         let size: u64 = nb_blocks as u64 * config.block_size;
617 
618         let resp_type = if config.is_valid_range(addr, size) {
619             VIRTIO_MEM_RESP_ACK
620         } else {
621             VIRTIO_MEM_RESP_ERROR
622         };
623 
624         let offset = addr - config.addr;
625         let first_block_index = (offset / config.block_size) as usize;
626         let resp_state =
627             if self
628                 .blocks_state
629                 .lock()
630                 .unwrap()
631                 .is_range_state(first_block_index, nb_blocks, true)
632             {
633                 VIRTIO_MEM_STATE_PLUGGED
634             } else if self.blocks_state.lock().unwrap().is_range_state(
635                 first_block_index,
636                 nb_blocks,
637                 false,
638             ) {
639                 VIRTIO_MEM_STATE_UNPLUGGED
640             } else {
641                 VIRTIO_MEM_STATE_MIXED
642             };
643 
644         (resp_type, resp_state)
645     }
646 
647     fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> {
648         self.interrupt_cb.trigger(int_type).map_err(|e| {
649             error!("Failed to signal used queue: {:?}", e);
650             DeviceError::FailedSignalingUsedQueue(e)
651         })
652     }
653 
654     fn process_queue(&mut self) -> bool {
655         let mut request_list = Vec::new();
656         let mut used_count = 0;
657 
658         for mut desc_chain in self.queue.iter().unwrap() {
659             request_list.push((
660                 desc_chain.head_index(),
661                 Request::parse(&mut desc_chain),
662                 desc_chain.memory().clone(),
663             ));
664         }
665 
666         for (head_index, request, memory) in request_list {
667             let len = match request {
668                 Err(e) => {
669                     error!("failed parse VirtioMemReq: {:?}", e);
670                     0
671                 }
672                 Ok(r) => match r.req.req_type {
673                     VIRTIO_MEM_REQ_PLUG => {
674                         let resp_type =
675                             self.state_change_request(r.req.addr, r.req.nb_blocks, true);
676                         r.send_response(&memory, resp_type, 0u16)
677                     }
678                     VIRTIO_MEM_REQ_UNPLUG => {
679                         let resp_type =
680                             self.state_change_request(r.req.addr, r.req.nb_blocks, false);
681                         r.send_response(&memory, resp_type, 0u16)
682                     }
683                     VIRTIO_MEM_REQ_UNPLUG_ALL => {
684                         let resp_type = self.unplug_all();
685                         r.send_response(&memory, resp_type, 0u16)
686                     }
687                     VIRTIO_MEM_REQ_STATE => {
688                         let (resp_type, resp_state) =
689                             self.state_request(r.req.addr, r.req.nb_blocks);
690                         r.send_response(&memory, resp_type, resp_state)
691                     }
692                     _ => {
693                         error!("VirtioMemReq unknown request type {:?}", r.req.req_type);
694                         0
695                     }
696                 },
697             };
698 
699             self.queue.add_used(head_index, len).unwrap();
700             used_count += 1;
701         }
702 
703         used_count > 0
704     }
705 
706     fn run(
707         &mut self,
708         paused: Arc<AtomicBool>,
709         paused_sync: Arc<Barrier>,
710     ) -> result::Result<(), EpollHelperError> {
711         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
712         helper.add_event(self.resize.evt.as_raw_fd(), RESIZE_EVENT)?;
713         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
714         helper.run(paused, paused_sync, self)?;
715 
716         Ok(())
717     }
718 }
719 
720 impl EpollHelperHandler for MemEpollHandler {
721     fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool {
722         let ev_type = event.data as u16;
723         match ev_type {
724             RESIZE_EVENT => {
725                 if let Err(e) = self.resize.evt.read() {
726                     error!("Failed to get resize event: {:?}", e);
727                     return true;
728                 } else {
729                     let size = self.resize.size();
730                     let mut config = self.config.lock().unwrap();
731                     let mut signal_error = false;
732                     let mut r = config.resize(size);
733                     r = match r {
734                         Err(e) => Err(e),
735                         _ => match self.signal(VirtioInterruptType::Config) {
736                             Err(e) => {
737                                 signal_error = true;
738                                 Err(Error::ResizeTriggerFail(e))
739                             }
740                             _ => Ok(()),
741                         },
742                     };
743                     if let Err(e) = self.resize.send(r) {
744                         error!("Sending \"resize\" response: {:?}", e);
745                         return true;
746                     }
747                     if signal_error {
748                         return true;
749                     }
750                 }
751             }
752             QUEUE_AVAIL_EVENT => {
753                 if let Err(e) = self.queue_evt.read() {
754                     error!("Failed to get queue event: {:?}", e);
755                     return true;
756                 } else if self.process_queue() {
757                     if let Err(e) = self.signal(VirtioInterruptType::Queue(0)) {
758                         error!("Failed to signal used queue: {:?}", e);
759                         return true;
760                     }
761                 }
762             }
763             _ => {
764                 error!("Unexpected event: {}", ev_type);
765                 return true;
766             }
767         }
768         false
769     }
770 }
771 
772 #[derive(PartialEq, Eq, PartialOrd, Ord)]
773 pub enum VirtioMemMappingSource {
774     Container,
775     Device(u32),
776 }
777 
778 #[derive(Versionize)]
779 pub struct MemState {
780     pub avail_features: u64,
781     pub acked_features: u64,
782     pub config: VirtioMemConfig,
783     pub blocks_state: BlocksState,
784 }
785 
786 impl VersionMapped for MemState {}
787 
788 pub struct Mem {
789     common: VirtioCommon,
790     id: String,
791     resize: ResizeSender,
792     host_addr: u64,
793     host_fd: Option<RawFd>,
794     config: Arc<Mutex<VirtioMemConfig>>,
795     seccomp_action: SeccompAction,
796     hugepages: bool,
797     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
798     blocks_state: Arc<Mutex<BlocksState>>,
799     exit_evt: EventFd,
800 }
801 
802 impl Mem {
803     // Create a new virtio-mem device.
804     #[allow(clippy::too_many_arguments)]
805     pub fn new(
806         id: String,
807         region: &Arc<GuestRegionMmap>,
808         resize: ResizeSender,
809         seccomp_action: SeccompAction,
810         numa_node_id: Option<u16>,
811         initial_size: u64,
812         hugepages: bool,
813         exit_evt: EventFd,
814         blocks_state: Arc<Mutex<BlocksState>>,
815     ) -> io::Result<Mem> {
816         let region_len = region.len();
817 
818         if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE {
819             return Err(io::Error::new(
820                 io::ErrorKind::Other,
821                 format!(
822                     "Virtio-mem size is not aligned with {}",
823                     VIRTIO_MEM_ALIGN_SIZE
824                 ),
825             ));
826         }
827 
828         let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
829 
830         let mut config = VirtioMemConfig {
831             block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE,
832             addr: region.start_addr().raw_value(),
833             region_size: region.len(),
834             usable_region_size: region.len(),
835             plugged_size: 0,
836             requested_size: 0,
837             ..Default::default()
838         };
839 
840         if initial_size != 0 {
841             config.resize(initial_size).map_err(|e| {
842                 io::Error::new(
843                     io::ErrorKind::Other,
844                     format!(
845                         "Failed to resize virtio-mem configuration to {}: {:?}",
846                         initial_size, e
847                     ),
848                 )
849             })?;
850         }
851 
852         if let Some(node_id) = numa_node_id {
853             avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM;
854             config.node_id = node_id;
855         }
856 
857         // Make sure the virtio-mem configuration complies with the
858         // specification.
859         config.validate().map_err(|e| {
860             io::Error::new(
861                 io::ErrorKind::Other,
862                 format!("Invalid virtio-mem configuration: {:?}", e),
863             )
864         })?;
865 
866         let host_fd = region
867             .file_offset()
868             .map(|f_offset| f_offset.file().as_raw_fd());
869 
870         Ok(Mem {
871             common: VirtioCommon {
872                 device_type: VirtioDeviceType::Mem as u32,
873                 avail_features,
874                 paused_sync: Some(Arc::new(Barrier::new(2))),
875                 queue_sizes: QUEUE_SIZES.to_vec(),
876                 min_queues: 1,
877                 ..Default::default()
878             },
879             id,
880             resize,
881             host_addr: region.as_ptr() as u64,
882             host_fd,
883             config: Arc::new(Mutex::new(config)),
884             seccomp_action,
885             hugepages,
886             dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())),
887             blocks_state,
888             exit_evt,
889         })
890     }
891 
892     pub fn add_dma_mapping_handler(
893         &mut self,
894         source: VirtioMemMappingSource,
895         handler: Arc<dyn ExternalDmaMapping>,
896     ) -> result::Result<(), Error> {
897         let config = self.config.lock().unwrap();
898 
899         if config.plugged_size > 0 {
900             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
901                 if *plugged {
902                     let gpa = config.addr + (idx as u64 * config.block_size);
903                     handler
904                         .map(gpa, gpa, config.block_size)
905                         .map_err(Error::DmaMap)?;
906                 }
907             }
908         }
909 
910         self.dma_mapping_handlers
911             .lock()
912             .unwrap()
913             .insert(source, handler);
914 
915         Ok(())
916     }
917 
918     pub fn remove_dma_mapping_handler(
919         &mut self,
920         source: VirtioMemMappingSource,
921     ) -> result::Result<(), Error> {
922         let handler = self
923             .dma_mapping_handlers
924             .lock()
925             .unwrap()
926             .remove(&source)
927             .ok_or(Error::InvalidDmaMappingHandler)?;
928 
929         let config = self.config.lock().unwrap();
930 
931         if config.plugged_size > 0 {
932             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
933                 if *plugged {
934                     let gpa = config.addr + (idx as u64 * config.block_size);
935                     handler
936                         .unmap(gpa, config.block_size)
937                         .map_err(Error::DmaUnmap)?;
938                 }
939             }
940         }
941 
942         Ok(())
943     }
944 
945     fn state(&self) -> MemState {
946         MemState {
947             avail_features: self.common.avail_features,
948             acked_features: self.common.acked_features,
949             config: *(self.config.lock().unwrap()),
950             blocks_state: self.blocks_state.lock().unwrap().clone(),
951         }
952     }
953 
954     fn set_state(&mut self, state: &MemState) {
955         self.common.avail_features = state.avail_features;
956         self.common.acked_features = state.acked_features;
957         *(self.config.lock().unwrap()) = state.config;
958         *(self.blocks_state.lock().unwrap()) = state.blocks_state.clone();
959     }
960 }
961 
962 impl Drop for Mem {
963     fn drop(&mut self) {
964         if let Some(kill_evt) = self.common.kill_evt.take() {
965             // Ignore the result because there is nothing we can do about it.
966             let _ = kill_evt.write(1);
967         }
968     }
969 }
970 
971 impl VirtioDevice for Mem {
972     fn device_type(&self) -> u32 {
973         self.common.device_type
974     }
975 
976     fn queue_max_sizes(&self) -> &[u16] {
977         &self.common.queue_sizes
978     }
979 
980     fn features(&self) -> u64 {
981         self.common.avail_features
982     }
983 
984     fn ack_features(&mut self, value: u64) {
985         self.common.ack_features(value)
986     }
987 
988     fn read_config(&self, offset: u64, data: &mut [u8]) {
989         self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data);
990     }
991 
992     fn activate(
993         &mut self,
994         _mem: GuestMemoryAtomic<GuestMemoryMmap>,
995         interrupt_cb: Arc<dyn VirtioInterrupt>,
996         mut queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>,
997         mut queue_evts: Vec<EventFd>,
998     ) -> ActivateResult {
999         self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
1000         let (kill_evt, pause_evt) = self.common.dup_eventfds();
1001         let mut handler = MemEpollHandler {
1002             host_addr: self.host_addr,
1003             host_fd: self.host_fd,
1004             blocks_state: Arc::clone(&self.blocks_state),
1005             config: self.config.clone(),
1006             resize: self.resize.clone(),
1007             queue: queues.remove(0),
1008             interrupt_cb,
1009             queue_evt: queue_evts.remove(0),
1010             kill_evt,
1011             pause_evt,
1012             hugepages: self.hugepages,
1013             dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers),
1014         };
1015 
1016         let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false);
1017         for range in unplugged_memory_ranges.regions() {
1018             handler
1019                 .discard_memory_range(range.gpa, range.length)
1020                 .map_err(|e| {
1021                     error!(
1022                         "failed discarding memory range [0x{:x}-0x{:x}]: {:?}",
1023                         range.gpa,
1024                         range.gpa + range.length - 1,
1025                         e
1026                     );
1027                     ActivateError::BadActivate
1028                 })?;
1029         }
1030 
1031         let paused = self.common.paused.clone();
1032         let paused_sync = self.common.paused_sync.clone();
1033         let mut epoll_threads = Vec::new();
1034 
1035         spawn_virtio_thread(
1036             &self.id,
1037             &self.seccomp_action,
1038             Thread::VirtioMem,
1039             &mut epoll_threads,
1040             &self.exit_evt,
1041             move || {
1042                 if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
1043                     error!("Error running worker: {:?}", e);
1044                 }
1045             },
1046         )?;
1047         self.common.epoll_threads = Some(epoll_threads);
1048 
1049         event!("virtio-device", "activated", "id", &self.id);
1050         Ok(())
1051     }
1052 
1053     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
1054         let result = self.common.reset();
1055         event!("virtio-device", "reset", "id", &self.id);
1056         result
1057     }
1058 }
1059 
1060 impl Pausable for Mem {
1061     fn pause(&mut self) -> result::Result<(), MigratableError> {
1062         self.common.pause()
1063     }
1064 
1065     fn resume(&mut self) -> result::Result<(), MigratableError> {
1066         self.common.resume()
1067     }
1068 }
1069 
1070 impl Snapshottable for Mem {
1071     fn id(&self) -> String {
1072         self.id.clone()
1073     }
1074 
1075     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
1076         Snapshot::new_from_versioned_state(&self.id(), &self.state())
1077     }
1078 
1079     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
1080         self.set_state(&snapshot.to_versioned_state(&self.id)?);
1081         Ok(())
1082     }
1083 }
1084 impl Transportable for Mem {}
1085 impl Migratable for Mem {}
1086