xref: /cloud-hypervisor/virtio-devices/src/mem.rs (revision 9af2968a7dc47b89bf07ea9dc5e735084efcfa3a)
1 // Copyright (c) 2020 Ant Financial
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 use super::Error as DeviceError;
16 use super::{
17     ActivateError, ActivateResult, DescriptorChain, EpollHelper, EpollHelperError,
18     EpollHelperHandler, Queue, VirtioCommon, VirtioDevice, VirtioDeviceType,
19     EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
20 };
21 use crate::seccomp_filters::{get_seccomp_filter, Thread};
22 use crate::{GuestMemoryMmap, GuestRegionMmap};
23 use crate::{VirtioInterrupt, VirtioInterruptType};
24 use anyhow::anyhow;
25 use libc::EFD_NONBLOCK;
26 use seccomp::{SeccompAction, SeccompFilter};
27 use std::collections::BTreeMap;
28 use std::io;
29 use std::mem::size_of;
30 use std::os::unix::io::{AsRawFd, RawFd};
31 use std::result;
32 use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
33 use std::sync::mpsc;
34 use std::sync::{Arc, Barrier, Mutex};
35 use std::thread;
36 use vm_device::dma_mapping::ExternalDmaMapping;
37 use vm_memory::{
38     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
39     GuestMemoryError, GuestMemoryRegion,
40 };
41 use vm_migration::{Migratable, MigratableError, Pausable, Snapshottable, Transportable};
42 use vmm_sys_util::eventfd::EventFd;
43 
44 const QUEUE_SIZE: u16 = 128;
45 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
46 
47 // 128MiB is the standard memory block size in Linux. A virtio-mem region must
48 // be aligned on this size, and the region size must be a multiple of it.
49 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20;
50 // Use 2 MiB alignment so transparent hugepages can be used by KVM.
51 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20;
52 
53 // Request processed successfully, applicable for
54 // - VIRTIO_MEM_REQ_PLUG
55 // - VIRTIO_MEM_REQ_UNPLUG
56 // - VIRTIO_MEM_REQ_UNPLUG_ALL
57 // - VIRTIO_MEM_REQ_STATE
58 const VIRTIO_MEM_RESP_ACK: u16 = 0;
59 
60 // Request denied - e.g. trying to plug more than requested, applicable for
61 // - VIRTIO_MEM_REQ_PLUG
62 const VIRTIO_MEM_RESP_NACK: u16 = 1;
63 
64 // Request cannot be processed right now, try again later, applicable for
65 // - VIRTIO_MEM_REQ_PLUG
66 // - VIRTIO_MEM_REQ_UNPLUG
67 // - VIRTIO_MEM_REQ_UNPLUG_ALL
68 #[allow(unused)]
69 const VIRTIO_MEM_RESP_BUSY: u16 = 2;
70 
71 // Error in request (e.g. addresses/alignment), applicable for
72 // - VIRTIO_MEM_REQ_PLUG
73 // - VIRTIO_MEM_REQ_UNPLUG
74 // - VIRTIO_MEM_REQ_STATE
75 const VIRTIO_MEM_RESP_ERROR: u16 = 3;
76 
77 // State of memory blocks is "plugged"
78 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0;
79 // State of memory blocks is "unplugged"
80 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1;
81 // State of memory blocks is "mixed"
82 const VIRTIO_MEM_STATE_MIXED: u16 = 2;
83 
84 // request to plug memory blocks
85 const VIRTIO_MEM_REQ_PLUG: u16 = 0;
86 // request to unplug memory blocks
87 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1;
88 // request to unplug all blocks and shrink the usable size
89 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2;
90 // request information about the plugged state of memory blocks
91 const VIRTIO_MEM_REQ_STATE: u16 = 3;
92 
93 // Get resize event.
94 const RESIZE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
95 // New descriptors are pending on the virtio queue.
96 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
97 
98 // Virtio features
99 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0;
100 
101 #[derive(Debug)]
102 pub enum Error {
103     // Guest gave us bad memory addresses.
104     GuestMemory(GuestMemoryError),
105     // Guest gave us a write only descriptor that protocol says to read from.
106     UnexpectedWriteOnlyDescriptor,
107     // Guest gave us a read only descriptor that protocol says to write to.
108     UnexpectedReadOnlyDescriptor,
109     // Guest gave us too few descriptors in a descriptor chain.
110     DescriptorChainTooShort,
111     // Guest gave us a buffer that was too short to use.
112     BufferLengthTooSmall,
113     // Guest sent us invalid request.
114     InvalidRequest,
115     // Failed to EventFd write.
116     EventFdWriteFail(std::io::Error),
117     // Failed to EventFd try_clone.
118     EventFdTryCloneFail(std::io::Error),
119     // Failed to MpscRecv.
120     MpscRecvFail(mpsc::RecvError),
121     // Resize invalid argument
122     ResizeError(anyhow::Error),
123     // Fail to resize trigger
124     ResizeTriggerFail(DeviceError),
125     // Invalid configuration
126     ValidateError(anyhow::Error),
127     // Failed discarding memory range
128     DiscardMemoryRange(std::io::Error),
129     // Failed DMA mapping.
130     DmaMap(std::io::Error),
131     // Failed DMA unmapping.
132     DmaUnmap(std::io::Error),
133     // Invalid DMA mapping handler
134     InvalidDmaMappingHandler,
135 }
136 
137 #[repr(C)]
138 #[derive(Copy, Clone, Debug, Default)]
139 struct VirtioMemReq {
140     req_type: u16,
141     padding: [u16; 3],
142     addr: u64,
143     nb_blocks: u16,
144     padding_1: [u16; 3],
145 }
146 
147 // Safe because it only has data and has no implicit padding.
148 unsafe impl ByteValued for VirtioMemReq {}
149 
150 #[repr(C)]
151 #[derive(Copy, Clone, Debug, Default)]
152 struct VirtioMemResp {
153     resp_type: u16,
154     padding: [u16; 3],
155     state: u16,
156 }
157 
158 // Safe because it only has data and has no implicit padding.
159 unsafe impl ByteValued for VirtioMemResp {}
160 
161 #[repr(C)]
162 #[derive(Copy, Clone, Debug, Default)]
163 struct VirtioMemConfig {
164     // Block size and alignment. Cannot change.
165     block_size: u64,
166     // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change.
167     node_id: u16,
168     padding: [u8; 6],
169     // Start address of the memory region. Cannot change.
170     addr: u64,
171     // Region size (maximum). Cannot change.
172     region_size: u64,
173     // Currently usable region size. Can grow up to region_size. Can
174     // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
175     // update will be sent).
176     usable_region_size: u64,
177     // Currently used size. Changes due to plug/unplug requests, but no
178     // config updates will be sent.
179     plugged_size: u64,
180     // Requested size. New plug requests cannot exceed it. Can change.
181     requested_size: u64,
182 }
183 
184 // Safe because it only has data and has no implicit padding.
185 unsafe impl ByteValued for VirtioMemConfig {}
186 
187 impl VirtioMemConfig {
188     fn validate(&self) -> result::Result<(), Error> {
189         if self.addr % self.block_size != 0 {
190             return Err(Error::ValidateError(anyhow!(
191                 "addr 0x{:x} is not aligned on block_size 0x{:x}",
192                 self.addr,
193                 self.block_size
194             )));
195         }
196         if self.region_size % self.block_size != 0 {
197             return Err(Error::ValidateError(anyhow!(
198                 "region_size 0x{:x} is not aligned on block_size 0x{:x}",
199                 self.region_size,
200                 self.block_size
201             )));
202         }
203         if self.usable_region_size % self.block_size != 0 {
204             return Err(Error::ValidateError(anyhow!(
205                 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}",
206                 self.usable_region_size,
207                 self.block_size
208             )));
209         }
210         if self.plugged_size % self.block_size != 0 {
211             return Err(Error::ValidateError(anyhow!(
212                 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}",
213                 self.plugged_size,
214                 self.block_size
215             )));
216         }
217         if self.requested_size % self.block_size != 0 {
218             return Err(Error::ValidateError(anyhow!(
219                 "requested_size 0x{:x} is not aligned on block_size 0x{:x}",
220                 self.requested_size,
221                 self.block_size
222             )));
223         }
224 
225         Ok(())
226     }
227 
228     fn resize(&mut self, size: u64) -> result::Result<(), Error> {
229         if self.requested_size == size {
230             return Err(Error::ResizeError(anyhow!(
231                 "new size 0x{:x} and requested_size are identical",
232                 size
233             )));
234         } else if size > self.region_size {
235             return Err(Error::ResizeError(anyhow!(
236                 "new size 0x{:x} is bigger than region_size 0x{:x}",
237                 size,
238                 self.region_size
239             )));
240         } else if size % (self.block_size as u64) != 0 {
241             return Err(Error::ResizeError(anyhow!(
242                 "new size 0x{:x} is not aligned on block_size 0x{:x}",
243                 size,
244                 self.block_size
245             )));
246         }
247 
248         self.requested_size = size;
249 
250         Ok(())
251     }
252 
253     fn is_valid_range(&self, addr: u64, size: u64) -> bool {
254         // Start address must be aligned on block_size, the size must be
255         // greater than 0, and all blocks covered by the request must be
256         // in the usable region.
257         if addr % self.block_size != 0
258             || size == 0
259             || (addr < self.addr || addr + size >= self.addr + self.usable_region_size)
260         {
261             return false;
262         }
263 
264         true
265     }
266 }
267 
268 struct Request {
269     req: VirtioMemReq,
270     status_addr: GuestAddress,
271 }
272 
273 impl Request {
274     fn parse(
275         avail_desc: &DescriptorChain,
276         mem: &GuestMemoryMmap,
277     ) -> result::Result<Request, Error> {
278         // The head contains the request type which MUST be readable.
279         if avail_desc.is_write_only() {
280             return Err(Error::UnexpectedWriteOnlyDescriptor);
281         }
282         if avail_desc.len as usize != size_of::<VirtioMemReq>() {
283             return Err(Error::InvalidRequest);
284         }
285         let req: VirtioMemReq = mem.read_obj(avail_desc.addr).map_err(Error::GuestMemory)?;
286 
287         let status_desc = avail_desc
288             .next_descriptor()
289             .ok_or(Error::DescriptorChainTooShort)?;
290 
291         // The status MUST always be writable
292         if !status_desc.is_write_only() {
293             return Err(Error::UnexpectedReadOnlyDescriptor);
294         }
295 
296         if (status_desc.len as usize) < size_of::<VirtioMemResp>() {
297             return Err(Error::BufferLengthTooSmall);
298         }
299 
300         Ok(Request {
301             req,
302             status_addr: status_desc.addr,
303         })
304     }
305 
306     fn send_response(&self, mem: &GuestMemoryMmap, resp_type: u16, state: u16) -> u32 {
307         let resp = VirtioMemResp {
308             resp_type,
309             state,
310             ..Default::default()
311         };
312         match mem.write_obj(resp, self.status_addr) {
313             Ok(_) => size_of::<VirtioMemResp>() as u32,
314             Err(e) => {
315                 error!("bad guest memory address: {}", e);
316                 0
317             }
318         }
319     }
320 }
321 
322 pub struct ResizeSender {
323     size: Arc<AtomicU64>,
324     tx: mpsc::Sender<Result<(), Error>>,
325     evt: EventFd,
326 }
327 
328 impl ResizeSender {
329     fn size(&self) -> u64 {
330         self.size.load(Ordering::Acquire)
331     }
332 
333     fn send(&self, r: Result<(), Error>) -> Result<(), mpsc::SendError<Result<(), Error>>> {
334         self.tx.send(r)
335     }
336 }
337 
338 impl Clone for ResizeSender {
339     fn clone(&self) -> Self {
340         ResizeSender {
341             size: self.size.clone(),
342             tx: self.tx.clone(),
343             evt: self
344                 .evt
345                 .try_clone()
346                 .expect("Failed cloning EventFd from ResizeSender"),
347         }
348     }
349 }
350 
351 pub struct Resize {
352     size: Arc<AtomicU64>,
353     tx: mpsc::Sender<Result<(), Error>>,
354     rx: mpsc::Receiver<Result<(), Error>>,
355     evt: EventFd,
356 }
357 
358 impl Resize {
359     pub fn new() -> io::Result<Self> {
360         let (tx, rx) = mpsc::channel();
361 
362         Ok(Resize {
363             size: Arc::new(AtomicU64::new(0)),
364             tx,
365             rx,
366             evt: EventFd::new(EFD_NONBLOCK)?,
367         })
368     }
369 
370     pub fn new_resize_sender(&self) -> Result<ResizeSender, Error> {
371         Ok(ResizeSender {
372             size: self.size.clone(),
373             tx: self.tx.clone(),
374             evt: self.evt.try_clone().map_err(Error::EventFdTryCloneFail)?,
375         })
376     }
377 
378     pub fn work(&self, size: u64) -> Result<(), Error> {
379         self.size.store(size, Ordering::Release);
380         self.evt.write(1).map_err(Error::EventFdWriteFail)?;
381         self.rx.recv().map_err(Error::MpscRecvFail)?
382     }
383 }
384 
385 struct BlocksState(Vec<bool>);
386 
387 impl BlocksState {
388     fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool {
389         for state in self
390             .0
391             .iter()
392             .skip(first_block_index)
393             .take(nb_blocks as usize)
394         {
395             if *state != plug {
396                 return false;
397             }
398         }
399         true
400     }
401 
402     fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) {
403         for state in self
404             .0
405             .iter_mut()
406             .skip(first_block_index)
407             .take(nb_blocks as usize)
408         {
409             *state = plug;
410         }
411     }
412 
413     fn inner(&self) -> &Vec<bool> {
414         &self.0
415     }
416 }
417 
418 struct MemEpollHandler {
419     host_addr: u64,
420     host_fd: Option<RawFd>,
421     blocks_state: Arc<Mutex<BlocksState>>,
422     config: Arc<Mutex<VirtioMemConfig>>,
423     resize: ResizeSender,
424     queue: Queue,
425     mem: GuestMemoryAtomic<GuestMemoryMmap>,
426     interrupt_cb: Arc<dyn VirtioInterrupt>,
427     queue_evt: EventFd,
428     kill_evt: EventFd,
429     pause_evt: EventFd,
430     hugepages: bool,
431     dma_mapping_handlers: Arc<Mutex<BTreeMap<u32, Arc<dyn ExternalDmaMapping>>>>,
432 }
433 
434 impl MemEpollHandler {
435     fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> {
436         // Use fallocate if the memory region is backed by a file.
437         if let Some(fd) = self.host_fd {
438             let res = unsafe {
439                 libc::fallocate64(
440                     fd,
441                     libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
442                     offset as libc::off64_t,
443                     size as libc::off64_t,
444                 )
445             };
446             if res != 0 {
447                 let err = io::Error::last_os_error();
448                 error!("Deallocating file space failed: {}", err);
449                 return Err(Error::DiscardMemoryRange(err));
450             }
451         }
452 
453         // Only use madvise if the memory region is not allocated with
454         // hugepages.
455         if !self.hugepages {
456             let res = unsafe {
457                 libc::madvise(
458                     (self.host_addr + offset) as *mut libc::c_void,
459                     size as libc::size_t,
460                     libc::MADV_DONTNEED,
461                 )
462             };
463             if res != 0 {
464                 let err = io::Error::last_os_error();
465                 error!("Advising kernel about pages range failed: {}", err);
466                 return Err(Error::DiscardMemoryRange(err));
467             }
468         }
469 
470         Ok(())
471     }
472 
473     fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 {
474         let mut config = self.config.lock().unwrap();
475         let size: u64 = nb_blocks as u64 * config.block_size;
476 
477         if plug && (config.plugged_size + size > config.requested_size) {
478             return VIRTIO_MEM_RESP_NACK;
479         }
480         if !config.is_valid_range(addr, size) {
481             return VIRTIO_MEM_RESP_ERROR;
482         }
483 
484         let offset = addr - config.addr;
485 
486         let first_block_index = (offset / config.block_size) as usize;
487         if !self
488             .blocks_state
489             .lock()
490             .unwrap()
491             .is_range_state(first_block_index, nb_blocks, !plug)
492         {
493             return VIRTIO_MEM_RESP_ERROR;
494         }
495 
496         if !plug {
497             if let Err(e) = self.discard_memory_range(offset, size) {
498                 error!("failed discarding memory range: {:?}", e);
499                 return VIRTIO_MEM_RESP_ERROR;
500             }
501         }
502 
503         self.blocks_state
504             .lock()
505             .unwrap()
506             .set_range(first_block_index, nb_blocks, plug);
507 
508         let handlers = self.dma_mapping_handlers.lock().unwrap();
509         if plug {
510             let mut gpa = addr;
511             for _ in 0..nb_blocks {
512                 for (_, handler) in handlers.iter() {
513                     if let Err(e) = handler.map(gpa, gpa, config.block_size) {
514                         error!(
515                             "failed DMA mapping addr 0x{:x} size 0x{:x}: {}",
516                             gpa, config.block_size, e
517                         );
518                         return VIRTIO_MEM_RESP_ERROR;
519                     }
520                 }
521 
522                 gpa += config.block_size;
523             }
524 
525             config.plugged_size += size;
526         } else {
527             for (_, handler) in handlers.iter() {
528                 if let Err(e) = handler.unmap(addr, size) {
529                     error!(
530                         "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
531                         addr, size, e
532                     );
533                     return VIRTIO_MEM_RESP_ERROR;
534                 }
535             }
536 
537             config.plugged_size -= size;
538         }
539 
540         VIRTIO_MEM_RESP_ACK
541     }
542 
543     fn unplug_all(&mut self) -> u16 {
544         let mut config = self.config.lock().unwrap();
545         if let Err(e) = self.discard_memory_range(0, config.region_size) {
546             error!("failed discarding memory range: {:?}", e);
547             return VIRTIO_MEM_RESP_ERROR;
548         }
549 
550         // Remaining plugged blocks are unmapped.
551         if config.plugged_size > 0 {
552             let handlers = self.dma_mapping_handlers.lock().unwrap();
553             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
554                 if *plugged {
555                     let gpa = config.addr + (idx as u64 * config.block_size);
556                     for (_, handler) in handlers.iter() {
557                         if let Err(e) = handler.unmap(gpa, config.block_size) {
558                             error!(
559                                 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
560                                 gpa, config.block_size, e
561                             );
562                             return VIRTIO_MEM_RESP_ERROR;
563                         }
564                     }
565                 }
566             }
567         }
568 
569         self.blocks_state.lock().unwrap().set_range(
570             0,
571             (config.region_size / config.block_size) as u16,
572             false,
573         );
574 
575         config.plugged_size = 0;
576 
577         VIRTIO_MEM_RESP_ACK
578     }
579 
580     fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) {
581         let config = self.config.lock().unwrap();
582         let size: u64 = nb_blocks as u64 * config.block_size;
583 
584         let resp_type = if config.is_valid_range(addr, size) {
585             VIRTIO_MEM_RESP_ACK
586         } else {
587             VIRTIO_MEM_RESP_ERROR
588         };
589 
590         let offset = addr - config.addr;
591         let first_block_index = (offset / config.block_size) as usize;
592         let resp_state =
593             if self
594                 .blocks_state
595                 .lock()
596                 .unwrap()
597                 .is_range_state(first_block_index, nb_blocks, true)
598             {
599                 VIRTIO_MEM_STATE_PLUGGED
600             } else if self.blocks_state.lock().unwrap().is_range_state(
601                 first_block_index,
602                 nb_blocks,
603                 false,
604             ) {
605                 VIRTIO_MEM_STATE_UNPLUGGED
606             } else {
607                 VIRTIO_MEM_STATE_MIXED
608             };
609 
610         (resp_type, resp_state)
611     }
612 
613     fn signal(&self, int_type: &VirtioInterruptType) -> result::Result<(), DeviceError> {
614         self.interrupt_cb
615             .trigger(int_type, Some(&self.queue))
616             .map_err(|e| {
617                 error!("Failed to signal used queue: {:?}", e);
618                 DeviceError::FailedSignalingUsedQueue(e)
619             })
620     }
621 
622     fn process_queue(&mut self) -> bool {
623         let mut request_list = Vec::new();
624         let mut used_count = 0;
625         let mem = self.mem.memory();
626         for avail_desc in self.queue.iter(&mem) {
627             request_list.push((avail_desc.index, Request::parse(&avail_desc, &mem)));
628         }
629 
630         for (desc_index, request) in request_list.iter() {
631             let len = match request {
632                 Err(e) => {
633                     error!("failed parse VirtioMemReq: {:?}", e);
634                     0
635                 }
636                 Ok(r) => match r.req.req_type {
637                     VIRTIO_MEM_REQ_PLUG => {
638                         let resp_type =
639                             self.state_change_request(r.req.addr, r.req.nb_blocks, true);
640                         r.send_response(&mem, resp_type, 0u16)
641                     }
642                     VIRTIO_MEM_REQ_UNPLUG => {
643                         let resp_type =
644                             self.state_change_request(r.req.addr, r.req.nb_blocks, false);
645                         r.send_response(&mem, resp_type, 0u16)
646                     }
647                     VIRTIO_MEM_REQ_UNPLUG_ALL => {
648                         let resp_type = self.unplug_all();
649                         r.send_response(&mem, resp_type, 0u16)
650                     }
651                     VIRTIO_MEM_REQ_STATE => {
652                         let (resp_type, resp_state) =
653                             self.state_request(r.req.addr, r.req.nb_blocks);
654                         r.send_response(&mem, resp_type, resp_state)
655                     }
656                     _ => {
657                         error!("VirtioMemReq unknown request type {:?}", r.req.req_type);
658                         0
659                     }
660                 },
661             };
662 
663             self.queue.add_used(&mem, *desc_index, len);
664 
665             used_count += 1;
666         }
667 
668         used_count > 0
669     }
670 
671     fn run(
672         &mut self,
673         paused: Arc<AtomicBool>,
674         paused_sync: Arc<Barrier>,
675     ) -> result::Result<(), EpollHelperError> {
676         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
677         helper.add_event(self.resize.evt.as_raw_fd(), RESIZE_EVENT)?;
678         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
679         helper.run(paused, paused_sync, self)?;
680 
681         Ok(())
682     }
683 }
684 
685 impl EpollHelperHandler for MemEpollHandler {
686     fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool {
687         let ev_type = event.data as u16;
688         match ev_type {
689             RESIZE_EVENT => {
690                 if let Err(e) = self.resize.evt.read() {
691                     error!("Failed to get resize event: {:?}", e);
692                     return true;
693                 } else {
694                     let size = self.resize.size();
695                     let mut config = self.config.lock().unwrap();
696                     let mut signal_error = false;
697                     let mut r = config.resize(size);
698                     r = match r {
699                         Err(e) => Err(e),
700                         _ => match self.signal(&VirtioInterruptType::Config) {
701                             Err(e) => {
702                                 signal_error = true;
703                                 Err(Error::ResizeTriggerFail(e))
704                             }
705                             _ => Ok(()),
706                         },
707                     };
708                     if let Err(e) = self.resize.send(r) {
709                         error!("Sending \"resize\" response: {:?}", e);
710                         return true;
711                     }
712                     if signal_error {
713                         return true;
714                     }
715                 }
716             }
717             QUEUE_AVAIL_EVENT => {
718                 if let Err(e) = self.queue_evt.read() {
719                     error!("Failed to get queue event: {:?}", e);
720                     return true;
721                 } else if self.process_queue() {
722                     if let Err(e) = self.signal(&VirtioInterruptType::Queue) {
723                         error!("Failed to signal used queue: {:?}", e);
724                         return true;
725                     }
726                 }
727             }
728             _ => {
729                 error!("Unexpected event: {}", ev_type);
730                 return true;
731             }
732         }
733         false
734     }
735 }
736 
737 // Virtio device for exposing entropy to the guest OS through virtio.
738 pub struct Mem {
739     common: VirtioCommon,
740     id: String,
741     resize: ResizeSender,
742     host_addr: u64,
743     host_fd: Option<RawFd>,
744     config: Arc<Mutex<VirtioMemConfig>>,
745     seccomp_action: SeccompAction,
746     hugepages: bool,
747     dma_mapping_handlers: Arc<Mutex<BTreeMap<u32, Arc<dyn ExternalDmaMapping>>>>,
748     blocks_state: Arc<Mutex<BlocksState>>,
749 }
750 
751 impl Mem {
752     // Create a new virtio-mem device.
753     pub fn new(
754         id: String,
755         region: &Arc<GuestRegionMmap>,
756         resize: ResizeSender,
757         seccomp_action: SeccompAction,
758         numa_node_id: Option<u16>,
759         initial_size: u64,
760         hugepages: bool,
761     ) -> io::Result<Mem> {
762         let region_len = region.len();
763 
764         if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE {
765             return Err(io::Error::new(
766                 io::ErrorKind::Other,
767                 format!(
768                     "Virtio-mem size is not aligned with {}",
769                     VIRTIO_MEM_ALIGN_SIZE
770                 ),
771             ));
772         }
773 
774         let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
775 
776         let mut config = VirtioMemConfig {
777             block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE,
778             addr: region.start_addr().raw_value(),
779             region_size: region.len(),
780             usable_region_size: region.len(),
781             plugged_size: 0,
782             requested_size: 0,
783             ..Default::default()
784         };
785 
786         if initial_size != 0 {
787             config.resize(initial_size).map_err(|e| {
788                 io::Error::new(
789                     io::ErrorKind::Other,
790                     format!(
791                         "Failed to resize virtio-mem configuration to {}: {:?}",
792                         initial_size, e
793                     ),
794                 )
795             })?;
796         }
797 
798         if let Some(node_id) = numa_node_id {
799             avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM;
800             config.node_id = node_id;
801         }
802 
803         // Make sure the virtio-mem configuration complies with the
804         // specification.
805         config.validate().map_err(|e| {
806             io::Error::new(
807                 io::ErrorKind::Other,
808                 format!("Invalid virtio-mem configuration: {:?}", e),
809             )
810         })?;
811 
812         let host_fd = region
813             .file_offset()
814             .map(|f_offset| f_offset.file().as_raw_fd());
815 
816         Ok(Mem {
817             common: VirtioCommon {
818                 device_type: VirtioDeviceType::Mem as u32,
819                 avail_features,
820                 paused_sync: Some(Arc::new(Barrier::new(2))),
821                 queue_sizes: QUEUE_SIZES.to_vec(),
822                 min_queues: 1,
823                 ..Default::default()
824             },
825             id,
826             resize,
827             host_addr: region.as_ptr() as u64,
828             host_fd,
829             config: Arc::new(Mutex::new(config)),
830             seccomp_action,
831             hugepages,
832             dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())),
833             blocks_state: Arc::new(Mutex::new(BlocksState(vec![
834                 false;
835                 (config.region_size / config.block_size)
836                     as usize
837             ]))),
838         })
839     }
840 
841     pub fn add_dma_mapping_handler(
842         &mut self,
843         device_id: u32,
844         handler: Arc<dyn ExternalDmaMapping>,
845     ) -> result::Result<(), Error> {
846         let config = self.config.lock().unwrap();
847 
848         if config.plugged_size > 0 {
849             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
850                 if *plugged {
851                     let gpa = config.addr + (idx as u64 * config.block_size);
852                     handler
853                         .map(gpa, gpa, config.block_size)
854                         .map_err(Error::DmaMap)?;
855                 }
856             }
857         }
858 
859         self.dma_mapping_handlers
860             .lock()
861             .unwrap()
862             .insert(device_id, handler);
863 
864         Ok(())
865     }
866 
867     pub fn remove_dma_mapping_handler(&mut self, device_id: u32) -> result::Result<(), Error> {
868         let handler = self
869             .dma_mapping_handlers
870             .lock()
871             .unwrap()
872             .remove(&device_id)
873             .ok_or(Error::InvalidDmaMappingHandler)?;
874 
875         let config = self.config.lock().unwrap();
876 
877         if config.plugged_size > 0 {
878             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
879                 if *plugged {
880                     let gpa = config.addr + (idx as u64 * config.block_size);
881                     handler
882                         .unmap(gpa, config.block_size)
883                         .map_err(Error::DmaUnmap)?;
884                 }
885             }
886         }
887 
888         Ok(())
889     }
890 }
891 
892 impl Drop for Mem {
893     fn drop(&mut self) {
894         if let Some(kill_evt) = self.common.kill_evt.take() {
895             // Ignore the result because there is nothing we can do about it.
896             let _ = kill_evt.write(1);
897         }
898     }
899 }
900 
901 impl VirtioDevice for Mem {
902     fn device_type(&self) -> u32 {
903         self.common.device_type
904     }
905 
906     fn queue_max_sizes(&self) -> &[u16] {
907         &self.common.queue_sizes
908     }
909 
910     fn features(&self) -> u64 {
911         self.common.avail_features
912     }
913 
914     fn ack_features(&mut self, value: u64) {
915         self.common.ack_features(value)
916     }
917 
918     fn read_config(&self, offset: u64, data: &mut [u8]) {
919         self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data);
920     }
921 
922     fn activate(
923         &mut self,
924         mem: GuestMemoryAtomic<GuestMemoryMmap>,
925         interrupt_cb: Arc<dyn VirtioInterrupt>,
926         mut queues: Vec<Queue>,
927         mut queue_evts: Vec<EventFd>,
928     ) -> ActivateResult {
929         self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
930         let (kill_evt, pause_evt) = self.common.dup_eventfds();
931         let config = self.config.lock().unwrap();
932         let mut handler = MemEpollHandler {
933             host_addr: self.host_addr,
934             host_fd: self.host_fd,
935             blocks_state: Arc::clone(&self.blocks_state),
936             config: self.config.clone(),
937             resize: self.resize.clone(),
938             queue: queues.remove(0),
939             mem,
940             interrupt_cb,
941             queue_evt: queue_evts.remove(0),
942             kill_evt,
943             pause_evt,
944             hugepages: self.hugepages,
945             dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers),
946         };
947 
948         handler
949             .discard_memory_range(0, config.region_size)
950             .map_err(|e| {
951                 error!("failed discarding memory range: {:?}", e);
952                 ActivateError::BadActivate
953             })?;
954 
955         let paused = self.common.paused.clone();
956         let paused_sync = self.common.paused_sync.clone();
957         let mut epoll_threads = Vec::new();
958         // Retrieve seccomp filter for virtio_mem thread
959         let virtio_mem_seccomp_filter = get_seccomp_filter(&self.seccomp_action, Thread::VirtioMem)
960             .map_err(ActivateError::CreateSeccompFilter)?;
961         thread::Builder::new()
962             .name(self.id.clone())
963             .spawn(move || {
964                 if let Err(e) = SeccompFilter::apply(virtio_mem_seccomp_filter) {
965                     error!("Error applying seccomp filter: {:?}", e);
966                 } else if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
967                     error!("Error running worker: {:?}", e);
968                 }
969             })
970             .map(|thread| epoll_threads.push(thread))
971             .map_err(|e| {
972                 error!("failed to clone virtio-mem epoll thread: {}", e);
973                 ActivateError::BadActivate
974             })?;
975         self.common.epoll_threads = Some(epoll_threads);
976 
977         event!("virtio-device", "activated", "id", &self.id);
978         Ok(())
979     }
980 
981     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
982         let result = self.common.reset();
983         event!("virtio-device", "reset", "id", &self.id);
984         result
985     }
986 }
987 
988 impl Pausable for Mem {
989     fn pause(&mut self) -> result::Result<(), MigratableError> {
990         self.common.pause()
991     }
992 
993     fn resume(&mut self) -> result::Result<(), MigratableError> {
994         self.common.resume()
995     }
996 }
997 
998 impl Snapshottable for Mem {
999     fn id(&self) -> String {
1000         self.id.clone()
1001     }
1002 }
1003 impl Transportable for Mem {}
1004 impl Migratable for Mem {}
1005