xref: /cloud-hypervisor/virtio-devices/src/mem.rs (revision 8e2973fe7cc5a0e2c212fc327014ba6efb77b8c8)
151d102c7SHui Zhu // Copyright (c) 2020 Ant Financial
251d102c7SHui Zhu //
35e9886bbSRuslan Mstoi // SPDX-License-Identifier: Apache-2.0
45e9886bbSRuslan Mstoi //
551d102c7SHui Zhu // Licensed under the Apache License, Version 2.0 (the "License");
651d102c7SHui Zhu // you may not use this file except in compliance with the License.
751d102c7SHui Zhu // You may obtain a copy of the License at
851d102c7SHui Zhu //
951d102c7SHui Zhu //     http://www.apache.org/licenses/LICENSE-2.0
1051d102c7SHui Zhu //
1151d102c7SHui Zhu // Unless required by applicable law or agreed to in writing, software
1251d102c7SHui Zhu // distributed under the License is distributed on an "AS IS" BASIS,
1351d102c7SHui Zhu // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1451d102c7SHui Zhu // See the License for the specific language governing permissions and
1551d102c7SHui Zhu // limitations under the License.
1651d102c7SHui Zhu 
17fd4f32faSRob Bradford use std::collections::BTreeMap;
1851d102c7SHui Zhu use std::mem::size_of;
1966efe3cfSRob Bradford use std::os::unix::io::{AsRawFd, RawFd};
20f38056fcSSebastien Boeuf use std::sync::atomic::AtomicBool;
2161e57e1cSRuoqing He use std::sync::{mpsc, Arc, Barrier, Mutex};
2261e57e1cSRuoqing He use std::{io, result};
2388a9f799SRob Bradford 
2488a9f799SRob Bradford use anyhow::anyhow;
2588a9f799SRob Bradford use seccompiler::SeccompAction;
2688a9f799SRob Bradford use serde::{Deserialize, Serialize};
2754c48439SBo Chen use thiserror::Error;
2887f57f7cSSebastien Boeuf use virtio_queue::{DescriptorChain, Queue, QueueT};
2961f9a4ecSSebastien Boeuf use vm_device::dma_mapping::ExternalDmaMapping;
3051d102c7SHui Zhu use vm_memory::{
31a423bf13SSebastien Boeuf     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
32a423bf13SSebastien Boeuf     GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion,
3351d102c7SHui Zhu };
344450c44fSSebastien Boeuf use vm_migration::protocol::MemoryRangeTable;
3510ab87d6SRob Bradford use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
3651d102c7SHui Zhu use vmm_sys_util::eventfd::EventFd;
3751d102c7SHui Zhu 
3888a9f799SRob Bradford use super::{
3961e57e1cSRuoqing He     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
4061e57e1cSRuoqing He     Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST,
4161e57e1cSRuoqing He     VIRTIO_F_VERSION_1,
4288a9f799SRob Bradford };
4388a9f799SRob Bradford use crate::seccomp_filters::Thread;
4488a9f799SRob Bradford use crate::thread_helper::spawn_virtio_thread;
4561e57e1cSRuoqing He use crate::{GuestMemoryMmap, GuestRegionMmap, VirtioInterrupt, VirtioInterruptType};
4688a9f799SRob Bradford 
4751d102c7SHui Zhu const QUEUE_SIZE: u16 = 128;
4851d102c7SHui Zhu const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
4951d102c7SHui Zhu 
501798ed81SSebastien Boeuf // 128MiB is the standard memory block size in Linux. A virtio-mem region must
511798ed81SSebastien Boeuf // be aligned on this size, and the region size must be a multiple of it.
52ad8adcb9SSebastien Boeuf pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20;
5351d102c7SHui Zhu // Use 2 MiB alignment so transparent hugepages can be used by KVM.
54ad8adcb9SSebastien Boeuf const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20;
5551d102c7SHui Zhu 
5651d102c7SHui Zhu // Request processed successfully, applicable for
5751d102c7SHui Zhu // - VIRTIO_MEM_REQ_PLUG
5851d102c7SHui Zhu // - VIRTIO_MEM_REQ_UNPLUG
5951d102c7SHui Zhu // - VIRTIO_MEM_REQ_UNPLUG_ALL
6051d102c7SHui Zhu // - VIRTIO_MEM_REQ_STATE
6151d102c7SHui Zhu const VIRTIO_MEM_RESP_ACK: u16 = 0;
6251d102c7SHui Zhu 
6351d102c7SHui Zhu // Request denied - e.g. trying to plug more than requested, applicable for
6451d102c7SHui Zhu // - VIRTIO_MEM_REQ_PLUG
6551d102c7SHui Zhu const VIRTIO_MEM_RESP_NACK: u16 = 1;
6651d102c7SHui Zhu 
6751d102c7SHui Zhu // Request cannot be processed right now, try again later, applicable for
6851d102c7SHui Zhu // - VIRTIO_MEM_REQ_PLUG
6951d102c7SHui Zhu // - VIRTIO_MEM_REQ_UNPLUG
7051d102c7SHui Zhu // - VIRTIO_MEM_REQ_UNPLUG_ALL
71ad8adcb9SSebastien Boeuf #[allow(unused)]
72ad8adcb9SSebastien Boeuf const VIRTIO_MEM_RESP_BUSY: u16 = 2;
7351d102c7SHui Zhu 
745c3f4dbeSJosh Soref // Error in request (e.g. addresses/alignment), applicable for
7551d102c7SHui Zhu // - VIRTIO_MEM_REQ_PLUG
7651d102c7SHui Zhu // - VIRTIO_MEM_REQ_UNPLUG
7751d102c7SHui Zhu // - VIRTIO_MEM_REQ_STATE
7851d102c7SHui Zhu const VIRTIO_MEM_RESP_ERROR: u16 = 3;
7951d102c7SHui Zhu 
8051d102c7SHui Zhu // State of memory blocks is "plugged"
8151d102c7SHui Zhu const VIRTIO_MEM_STATE_PLUGGED: u16 = 0;
8251d102c7SHui Zhu // State of memory blocks is "unplugged"
8351d102c7SHui Zhu const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1;
8451d102c7SHui Zhu // State of memory blocks is "mixed"
8551d102c7SHui Zhu const VIRTIO_MEM_STATE_MIXED: u16 = 2;
8651d102c7SHui Zhu 
8751d102c7SHui Zhu // request to plug memory blocks
8851d102c7SHui Zhu const VIRTIO_MEM_REQ_PLUG: u16 = 0;
8951d102c7SHui Zhu // request to unplug memory blocks
9051d102c7SHui Zhu const VIRTIO_MEM_REQ_UNPLUG: u16 = 1;
9151d102c7SHui Zhu // request to unplug all blocks and shrink the usable size
9251d102c7SHui Zhu const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2;
9351d102c7SHui Zhu // request information about the plugged state of memory blocks
9451d102c7SHui Zhu const VIRTIO_MEM_REQ_STATE: u16 = 3;
9551d102c7SHui Zhu 
9651d102c7SHui Zhu // New descriptors are pending on the virtio queue.
97f38056fcSSebastien Boeuf const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
9851d102c7SHui Zhu 
99dcedd4cdSSebastien Boeuf // Virtio features
100dcedd4cdSSebastien Boeuf const VIRTIO_MEM_F_ACPI_PXM: u8 = 0;
101dcedd4cdSSebastien Boeuf 
10254c48439SBo Chen #[derive(Error, Debug)]
10351d102c7SHui Zhu pub enum Error {
104*8e2973feSPhilipp Schuster     #[error("Guest gave us bad memory addresses")]
105a2123439SPhilipp Schuster     GuestMemory(#[source] GuestMemoryError),
1062af2cc53SBo Chen     #[error("Guest gave us a write only descriptor that protocol says to read from")]
10751d102c7SHui Zhu     UnexpectedWriteOnlyDescriptor,
1082af2cc53SBo Chen     #[error("Guest gave us a read only descriptor that protocol says to write to")]
10951d102c7SHui Zhu     UnexpectedReadOnlyDescriptor,
1102af2cc53SBo Chen     #[error("Guest gave us too few descriptors in a descriptor chain")]
11151d102c7SHui Zhu     DescriptorChainTooShort,
1122af2cc53SBo Chen     #[error("Guest gave us a buffer that was too short to use")]
11351d102c7SHui Zhu     BufferLengthTooSmall,
1142af2cc53SBo Chen     #[error("Guest sent us invalid request")]
11551d102c7SHui Zhu     InvalidRequest,
116*8e2973feSPhilipp Schuster     #[error("Failed to EventFd write")]
117a2123439SPhilipp Schuster     EventFdWriteFail(#[source] std::io::Error),
118*8e2973feSPhilipp Schuster     #[error("Failed to EventFd try_clone")]
119a2123439SPhilipp Schuster     EventFdTryCloneFail(#[source] std::io::Error),
120*8e2973feSPhilipp Schuster     #[error("Failed to MpscRecv")]
121a2123439SPhilipp Schuster     MpscRecvFail(#[source] mpsc::RecvError),
122*8e2973feSPhilipp Schuster     #[error("Resize invalid argument")]
123a2123439SPhilipp Schuster     ResizeError(#[source] anyhow::Error),
124*8e2973feSPhilipp Schuster     #[error("Fail to resize trigger")]
125a2123439SPhilipp Schuster     ResizeTriggerFail(#[source] DeviceError),
126*8e2973feSPhilipp Schuster     #[error("Invalid configuration")]
127a2123439SPhilipp Schuster     ValidateError(#[source] anyhow::Error),
128*8e2973feSPhilipp Schuster     #[error("Failed discarding memory range")]
129a2123439SPhilipp Schuster     DiscardMemoryRange(#[source] std::io::Error),
130*8e2973feSPhilipp Schuster     #[error("Failed DMA mapping")]
131a2123439SPhilipp Schuster     DmaMap(#[source] std::io::Error),
132*8e2973feSPhilipp Schuster     #[error("Failed DMA unmapping")]
133a2123439SPhilipp Schuster     DmaUnmap(#[source] std::io::Error),
1342af2cc53SBo Chen     #[error("Invalid DMA mapping handler")]
13561f9a4ecSSebastien Boeuf     InvalidDmaMappingHandler,
1362af2cc53SBo Chen     #[error("Not activated by the guest")]
137c274ce4dSRob Bradford     NotActivatedByGuest,
138756aebafSBo Chen     #[error("Unknown request type: {0}")]
1396725771dSPhilipp Schuster     UnknownRequestType(u16),
140*8e2973feSPhilipp Schuster     #[error("Failed adding used index")]
141a2123439SPhilipp Schuster     QueueAddUsed(#[source] virtio_queue::Error),
14251d102c7SHui Zhu }
14351d102c7SHui Zhu 
14451d102c7SHui Zhu #[repr(C)]
14551d102c7SHui Zhu #[derive(Copy, Clone, Debug, Default)]
14651d102c7SHui Zhu struct VirtioMemReq {
14751d102c7SHui Zhu     req_type: u16,
14851d102c7SHui Zhu     padding: [u16; 3],
14951d102c7SHui Zhu     addr: u64,
15051d102c7SHui Zhu     nb_blocks: u16,
151ad8adcb9SSebastien Boeuf     padding_1: [u16; 3],
15251d102c7SHui Zhu }
15351d102c7SHui Zhu 
15431b3871eSWei Liu // SAFETY: it only has data and has no implicit padding.
15551d102c7SHui Zhu unsafe impl ByteValued for VirtioMemReq {}
15651d102c7SHui Zhu 
15751d102c7SHui Zhu #[repr(C)]
15851d102c7SHui Zhu #[derive(Copy, Clone, Debug, Default)]
15951d102c7SHui Zhu struct VirtioMemResp {
16051d102c7SHui Zhu     resp_type: u16,
16151d102c7SHui Zhu     padding: [u16; 3],
162ad8adcb9SSebastien Boeuf     state: u16,
16351d102c7SHui Zhu }
16451d102c7SHui Zhu 
16531b3871eSWei Liu // SAFETY: it only has data and has no implicit padding.
16651d102c7SHui Zhu unsafe impl ByteValued for VirtioMemResp {}
16751d102c7SHui Zhu 
16830e1162dSSebastien Boeuf #[repr(C)]
16910ab87d6SRob Bradford #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)]
170d7115ec6SSebastien Boeuf pub struct VirtioMemConfig {
17151d102c7SHui Zhu     // Block size and alignment. Cannot change.
172165568e9SHui Zhu     block_size: u64,
17351d102c7SHui Zhu     // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change.
17451d102c7SHui Zhu     node_id: u16,
175165568e9SHui Zhu     padding: [u8; 6],
17651d102c7SHui Zhu     // Start address of the memory region. Cannot change.
17751d102c7SHui Zhu     addr: u64,
17851d102c7SHui Zhu     // Region size (maximum). Cannot change.
17951d102c7SHui Zhu     region_size: u64,
18051d102c7SHui Zhu     // Currently usable region size. Can grow up to region_size. Can
18151d102c7SHui Zhu     // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
18251d102c7SHui Zhu     // update will be sent).
18351d102c7SHui Zhu     usable_region_size: u64,
18451d102c7SHui Zhu     // Currently used size. Changes due to plug/unplug requests, but no
18551d102c7SHui Zhu     // config updates will be sent.
18651d102c7SHui Zhu     plugged_size: u64,
18751d102c7SHui Zhu     // Requested size. New plug requests cannot exceed it. Can change.
18851d102c7SHui Zhu     requested_size: u64,
18951d102c7SHui Zhu }
19051d102c7SHui Zhu 
19131b3871eSWei Liu // SAFETY: it only has data and has no implicit padding.
19251d102c7SHui Zhu unsafe impl ByteValued for VirtioMemConfig {}
19351d102c7SHui Zhu 
194ad8adcb9SSebastien Boeuf impl VirtioMemConfig {
validate(&self) -> result::Result<(), Error>195ad8adcb9SSebastien Boeuf     fn validate(&self) -> result::Result<(), Error> {
196ad8adcb9SSebastien Boeuf         if self.addr % self.block_size != 0 {
197ad8adcb9SSebastien Boeuf             return Err(Error::ValidateError(anyhow!(
198ad8adcb9SSebastien Boeuf                 "addr 0x{:x} is not aligned on block_size 0x{:x}",
199ad8adcb9SSebastien Boeuf                 self.addr,
200ad8adcb9SSebastien Boeuf                 self.block_size
20133a1e37cSHui Zhu             )));
202ad8adcb9SSebastien Boeuf         }
203ad8adcb9SSebastien Boeuf         if self.region_size % self.block_size != 0 {
204ad8adcb9SSebastien Boeuf             return Err(Error::ValidateError(anyhow!(
205ad8adcb9SSebastien Boeuf                 "region_size 0x{:x} is not aligned on block_size 0x{:x}",
206ad8adcb9SSebastien Boeuf                 self.region_size,
207ad8adcb9SSebastien Boeuf                 self.block_size
20833a1e37cSHui Zhu             )));
209ad8adcb9SSebastien Boeuf         }
210ad8adcb9SSebastien Boeuf         if self.usable_region_size % self.block_size != 0 {
211ad8adcb9SSebastien Boeuf             return Err(Error::ValidateError(anyhow!(
212ad8adcb9SSebastien Boeuf                 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}",
213ad8adcb9SSebastien Boeuf                 self.usable_region_size,
214ad8adcb9SSebastien Boeuf                 self.block_size
215ad8adcb9SSebastien Boeuf             )));
216ad8adcb9SSebastien Boeuf         }
217ad8adcb9SSebastien Boeuf         if self.plugged_size % self.block_size != 0 {
218ad8adcb9SSebastien Boeuf             return Err(Error::ValidateError(anyhow!(
219ad8adcb9SSebastien Boeuf                 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}",
220ad8adcb9SSebastien Boeuf                 self.plugged_size,
221ad8adcb9SSebastien Boeuf                 self.block_size
222ad8adcb9SSebastien Boeuf             )));
223ad8adcb9SSebastien Boeuf         }
224ad8adcb9SSebastien Boeuf         if self.requested_size % self.block_size != 0 {
225ad8adcb9SSebastien Boeuf             return Err(Error::ValidateError(anyhow!(
226ad8adcb9SSebastien Boeuf                 "requested_size 0x{:x} is not aligned on block_size 0x{:x}",
227ad8adcb9SSebastien Boeuf                 self.requested_size,
228ad8adcb9SSebastien Boeuf                 self.block_size
22933a1e37cSHui Zhu             )));
23033a1e37cSHui Zhu         }
23133a1e37cSHui Zhu 
232ad8adcb9SSebastien Boeuf         Ok(())
233ad8adcb9SSebastien Boeuf     }
234ad8adcb9SSebastien Boeuf 
resize(&mut self, size: u64) -> result::Result<(), Error>235ad8adcb9SSebastien Boeuf     fn resize(&mut self, size: u64) -> result::Result<(), Error> {
236ad8adcb9SSebastien Boeuf         if self.requested_size == size {
237ad8adcb9SSebastien Boeuf             return Err(Error::ResizeError(anyhow!(
238ad8adcb9SSebastien Boeuf                 "new size 0x{:x} and requested_size are identical",
239ad8adcb9SSebastien Boeuf                 size
240ad8adcb9SSebastien Boeuf             )));
241ad8adcb9SSebastien Boeuf         } else if size > self.region_size {
242ad8adcb9SSebastien Boeuf             return Err(Error::ResizeError(anyhow!(
243ad8adcb9SSebastien Boeuf                 "new size 0x{:x} is bigger than region_size 0x{:x}",
244ad8adcb9SSebastien Boeuf                 size,
245ad8adcb9SSebastien Boeuf                 self.region_size
246ad8adcb9SSebastien Boeuf             )));
247a9ec0f33SBo Chen         } else if size % self.block_size != 0 {
248ad8adcb9SSebastien Boeuf             return Err(Error::ResizeError(anyhow!(
249ad8adcb9SSebastien Boeuf                 "new size 0x{:x} is not aligned on block_size 0x{:x}",
250ad8adcb9SSebastien Boeuf                 size,
251ad8adcb9SSebastien Boeuf                 self.block_size
252ad8adcb9SSebastien Boeuf             )));
253ad8adcb9SSebastien Boeuf         }
254ad8adcb9SSebastien Boeuf 
255ad8adcb9SSebastien Boeuf         self.requested_size = size;
25633a1e37cSHui Zhu 
25733a1e37cSHui Zhu         Ok(())
25833a1e37cSHui Zhu     }
25954f814f3SSebastien Boeuf 
is_valid_range(&self, addr: u64, size: u64) -> bool26054f814f3SSebastien Boeuf     fn is_valid_range(&self, addr: u64, size: u64) -> bool {
261b37e2ed3SBo Chen         // Ensure no overflow from adding 'addr' and 'size' whose value are both
262b37e2ed3SBo Chen         // controlled by the guest driver
263b37e2ed3SBo Chen         if addr.checked_add(size).is_none() {
264b37e2ed3SBo Chen             return false;
265b37e2ed3SBo Chen         }
266b37e2ed3SBo Chen 
26754f814f3SSebastien Boeuf         // Start address must be aligned on block_size, the size must be
26854f814f3SSebastien Boeuf         // greater than 0, and all blocks covered by the request must be
26954f814f3SSebastien Boeuf         // in the usable region.
27054f814f3SSebastien Boeuf         if addr % self.block_size != 0
27154f814f3SSebastien Boeuf             || size == 0
2722ad8fac6SYuhong Zhong             || (addr < self.addr || addr + size > self.addr + self.usable_region_size)
27354f814f3SSebastien Boeuf         {
27454f814f3SSebastien Boeuf             return false;
27554f814f3SSebastien Boeuf         }
27654f814f3SSebastien Boeuf 
27754f814f3SSebastien Boeuf         true
27854f814f3SSebastien Boeuf     }
279ad8adcb9SSebastien Boeuf }
28033a1e37cSHui Zhu 
28151d102c7SHui Zhu struct Request {
28251d102c7SHui Zhu     req: VirtioMemReq,
28351d102c7SHui Zhu     status_addr: GuestAddress,
28451d102c7SHui Zhu }
28551d102c7SHui Zhu 
28651d102c7SHui Zhu impl Request {
parse( desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, ) -> result::Result<Request, Error>28751d102c7SHui Zhu     fn parse(
2880162d73eSSebastien Boeuf         desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
28951d102c7SHui Zhu     ) -> result::Result<Request, Error> {
2900249e864SSebastien Boeuf         let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
2910249e864SSebastien Boeuf         // The descriptor contains the request type which MUST be readable.
2920249e864SSebastien Boeuf         if desc.is_write_only() {
29351d102c7SHui Zhu             return Err(Error::UnexpectedWriteOnlyDescriptor);
29451d102c7SHui Zhu         }
2950249e864SSebastien Boeuf         if desc.len() as usize != size_of::<VirtioMemReq>() {
29651d102c7SHui Zhu             return Err(Error::InvalidRequest);
29751d102c7SHui Zhu         }
2980249e864SSebastien Boeuf         let req: VirtioMemReq = desc_chain
2990249e864SSebastien Boeuf             .memory()
3000249e864SSebastien Boeuf             .read_obj(desc.addr())
3010249e864SSebastien Boeuf             .map_err(Error::GuestMemory)?;
30251d102c7SHui Zhu 
3030249e864SSebastien Boeuf         let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
30451d102c7SHui Zhu 
30551d102c7SHui Zhu         // The status MUST always be writable
30651d102c7SHui Zhu         if !status_desc.is_write_only() {
30751d102c7SHui Zhu             return Err(Error::UnexpectedReadOnlyDescriptor);
30851d102c7SHui Zhu         }
30951d102c7SHui Zhu 
3100249e864SSebastien Boeuf         if (status_desc.len() as usize) < size_of::<VirtioMemResp>() {
31151d102c7SHui Zhu             return Err(Error::BufferLengthTooSmall);
31251d102c7SHui Zhu         }
31351d102c7SHui Zhu 
31451d102c7SHui Zhu         Ok(Request {
31551d102c7SHui Zhu             req,
3160249e864SSebastien Boeuf             status_addr: status_desc.addr(),
31751d102c7SHui Zhu         })
31851d102c7SHui Zhu     }
31954f814f3SSebastien Boeuf 
send_response( &self, mem: &GuestMemoryMmap, resp_type: u16, state: u16, ) -> Result<u32, Error>320756aebafSBo Chen     fn send_response(
321756aebafSBo Chen         &self,
322756aebafSBo Chen         mem: &GuestMemoryMmap,
323756aebafSBo Chen         resp_type: u16,
324756aebafSBo Chen         state: u16,
325756aebafSBo Chen     ) -> Result<u32, Error> {
32654f814f3SSebastien Boeuf         let resp = VirtioMemResp {
32754f814f3SSebastien Boeuf             resp_type,
32854f814f3SSebastien Boeuf             state,
32954f814f3SSebastien Boeuf             ..Default::default()
33054f814f3SSebastien Boeuf         };
331756aebafSBo Chen         mem.write_obj(resp, self.status_addr)
332756aebafSBo Chen             .map_err(Error::GuestMemory)?;
333756aebafSBo Chen         Ok(size_of::<VirtioMemResp>() as u32)
33454f814f3SSebastien Boeuf     }
33551d102c7SHui Zhu }
33651d102c7SHui Zhu 
33710ab87d6SRob Bradford #[derive(Clone, Serialize, Deserialize)]
338d7115ec6SSebastien Boeuf pub struct BlocksState {
339d7115ec6SSebastien Boeuf     bitmap: Vec<bool>,
340d7115ec6SSebastien Boeuf }
34154f814f3SSebastien Boeuf 
34254f814f3SSebastien Boeuf impl BlocksState {
new(region_size: u64) -> Self343e390775bSSebastien Boeuf     pub fn new(region_size: u64) -> Self {
344af3a59aaSSebastien Boeuf         BlocksState {
345af3a59aaSSebastien Boeuf             bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize],
346af3a59aaSSebastien Boeuf         }
347af3a59aaSSebastien Boeuf     }
348af3a59aaSSebastien Boeuf 
is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool34954f814f3SSebastien Boeuf     fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool {
35054f814f3SSebastien Boeuf         for state in self
351d7115ec6SSebastien Boeuf             .bitmap
35254f814f3SSebastien Boeuf             .iter()
35354f814f3SSebastien Boeuf             .skip(first_block_index)
35454f814f3SSebastien Boeuf             .take(nb_blocks as usize)
35554f814f3SSebastien Boeuf         {
35654f814f3SSebastien Boeuf             if *state != plug {
35754f814f3SSebastien Boeuf                 return false;
35854f814f3SSebastien Boeuf             }
35954f814f3SSebastien Boeuf         }
36054f814f3SSebastien Boeuf         true
36154f814f3SSebastien Boeuf     }
36254f814f3SSebastien Boeuf 
set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool)36354f814f3SSebastien Boeuf     fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) {
36454f814f3SSebastien Boeuf         for state in self
365d7115ec6SSebastien Boeuf             .bitmap
36654f814f3SSebastien Boeuf             .iter_mut()
36754f814f3SSebastien Boeuf             .skip(first_block_index)
36854f814f3SSebastien Boeuf             .take(nb_blocks as usize)
36954f814f3SSebastien Boeuf         {
37054f814f3SSebastien Boeuf             *state = plug;
37154f814f3SSebastien Boeuf         }
37254f814f3SSebastien Boeuf     }
37361f9a4ecSSebastien Boeuf 
inner(&self) -> &Vec<bool>37461f9a4ecSSebastien Boeuf     fn inner(&self) -> &Vec<bool> {
375d7115ec6SSebastien Boeuf         &self.bitmap
37661f9a4ecSSebastien Boeuf     }
3774450c44fSSebastien Boeuf 
memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable3784450c44fSSebastien Boeuf     pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable {
3794450c44fSSebastien Boeuf         let mut bitmap: Vec<u64> = Vec::new();
3804450c44fSSebastien Boeuf         let mut i = 0;
3814450c44fSSebastien Boeuf         for (j, bit) in self.bitmap.iter().enumerate() {
3824450c44fSSebastien Boeuf             if j % 64 == 0 {
3834450c44fSSebastien Boeuf                 bitmap.push(0);
3844450c44fSSebastien Boeuf 
3854450c44fSSebastien Boeuf                 if j != 0 {
3864450c44fSSebastien Boeuf                     i += 1;
3874450c44fSSebastien Boeuf                 }
3884450c44fSSebastien Boeuf             }
3894450c44fSSebastien Boeuf 
3904450c44fSSebastien Boeuf             if *bit == plugged {
3914450c44fSSebastien Boeuf                 bitmap[i] |= 1 << (j % 64);
3924450c44fSSebastien Boeuf             }
3934450c44fSSebastien Boeuf         }
3944450c44fSSebastien Boeuf 
3954450c44fSSebastien Boeuf         MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE)
3964450c44fSSebastien Boeuf     }
39754f814f3SSebastien Boeuf }
39854f814f3SSebastien Boeuf 
39951d102c7SHui Zhu struct MemEpollHandler {
400a423bf13SSebastien Boeuf     mem: GuestMemoryAtomic<GuestMemoryMmap>,
40151d102c7SHui Zhu     host_addr: u64,
40251d102c7SHui Zhu     host_fd: Option<RawFd>,
40361f9a4ecSSebastien Boeuf     blocks_state: Arc<Mutex<BlocksState>>,
40451d102c7SHui Zhu     config: Arc<Mutex<VirtioMemConfig>>,
405a423bf13SSebastien Boeuf     queue: Queue,
40651d102c7SHui Zhu     interrupt_cb: Arc<dyn VirtioInterrupt>,
40751d102c7SHui Zhu     queue_evt: EventFd,
40851d102c7SHui Zhu     kill_evt: EventFd,
40951d102c7SHui Zhu     pause_evt: EventFd,
410c397c9c9SSebastien Boeuf     hugepages: bool,
411fd4f32faSRob Bradford     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
41251d102c7SHui Zhu }
41351d102c7SHui Zhu 
41451d102c7SHui Zhu impl MemEpollHandler {
discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error>41554f814f3SSebastien Boeuf     fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> {
416c397c9c9SSebastien Boeuf         // Use fallocate if the memory region is backed by a file.
41754f814f3SSebastien Boeuf         if let Some(fd) = self.host_fd {
418c45d24dfSWei Liu             // SAFETY: FFI call with valid arguments
41951d102c7SHui Zhu             let res = unsafe {
42051d102c7SHui Zhu                 libc::fallocate64(
42151d102c7SHui Zhu                     fd,
42251d102c7SHui Zhu                     libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
42351d102c7SHui Zhu                     offset as libc::off64_t,
42454f814f3SSebastien Boeuf                     size as libc::off64_t,
42551d102c7SHui Zhu                 )
42651d102c7SHui Zhu             };
42751d102c7SHui Zhu             if res != 0 {
42854f814f3SSebastien Boeuf                 let err = io::Error::last_os_error();
42954f814f3SSebastien Boeuf                 error!("Deallocating file space failed: {}", err);
43054f814f3SSebastien Boeuf                 return Err(Error::DiscardMemoryRange(err));
43151d102c7SHui Zhu             }
43251d102c7SHui Zhu         }
433c397c9c9SSebastien Boeuf 
434c397c9c9SSebastien Boeuf         // Only use madvise if the memory region is not allocated with
435c397c9c9SSebastien Boeuf         // hugepages.
436c397c9c9SSebastien Boeuf         if !self.hugepages {
437c45d24dfSWei Liu             // SAFETY: FFI call with valid arguments
43851d102c7SHui Zhu             let res = unsafe {
43951d102c7SHui Zhu                 libc::madvise(
44054f814f3SSebastien Boeuf                     (self.host_addr + offset) as *mut libc::c_void,
44154f814f3SSebastien Boeuf                     size as libc::size_t,
44251d102c7SHui Zhu                     libc::MADV_DONTNEED,
44351d102c7SHui Zhu                 )
44451d102c7SHui Zhu             };
44551d102c7SHui Zhu             if res != 0 {
44654f814f3SSebastien Boeuf                 let err = io::Error::last_os_error();
44754f814f3SSebastien Boeuf                 error!("Advising kernel about pages range failed: {}", err);
44854f814f3SSebastien Boeuf                 return Err(Error::DiscardMemoryRange(err));
44954f814f3SSebastien Boeuf             }
450c397c9c9SSebastien Boeuf         }
45154f814f3SSebastien Boeuf 
45254f814f3SSebastien Boeuf         Ok(())
45354f814f3SSebastien Boeuf     }
45454f814f3SSebastien Boeuf 
state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u1645554f814f3SSebastien Boeuf     fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 {
45654f814f3SSebastien Boeuf         let mut config = self.config.lock().unwrap();
45754f814f3SSebastien Boeuf         let size: u64 = nb_blocks as u64 * config.block_size;
45854f814f3SSebastien Boeuf 
45954f814f3SSebastien Boeuf         if plug && (config.plugged_size + size > config.requested_size) {
46054f814f3SSebastien Boeuf             return VIRTIO_MEM_RESP_NACK;
46154f814f3SSebastien Boeuf         }
46254f814f3SSebastien Boeuf         if !config.is_valid_range(addr, size) {
46354f814f3SSebastien Boeuf             return VIRTIO_MEM_RESP_ERROR;
46454f814f3SSebastien Boeuf         }
46554f814f3SSebastien Boeuf 
46654f814f3SSebastien Boeuf         let offset = addr - config.addr;
46754f814f3SSebastien Boeuf 
46854f814f3SSebastien Boeuf         let first_block_index = (offset / config.block_size) as usize;
46954f814f3SSebastien Boeuf         if !self
47054f814f3SSebastien Boeuf             .blocks_state
47161f9a4ecSSebastien Boeuf             .lock()
47261f9a4ecSSebastien Boeuf             .unwrap()
47354f814f3SSebastien Boeuf             .is_range_state(first_block_index, nb_blocks, !plug)
47454f814f3SSebastien Boeuf         {
47554f814f3SSebastien Boeuf             return VIRTIO_MEM_RESP_ERROR;
47654f814f3SSebastien Boeuf         }
47754f814f3SSebastien Boeuf 
47854f814f3SSebastien Boeuf         if !plug {
47954f814f3SSebastien Boeuf             if let Err(e) = self.discard_memory_range(offset, size) {
48054f814f3SSebastien Boeuf                 error!("failed discarding memory range: {:?}", e);
48151d102c7SHui Zhu                 return VIRTIO_MEM_RESP_ERROR;
48251d102c7SHui Zhu             }
48351d102c7SHui Zhu         }
48451d102c7SHui Zhu 
48554f814f3SSebastien Boeuf         self.blocks_state
48661f9a4ecSSebastien Boeuf             .lock()
48761f9a4ecSSebastien Boeuf             .unwrap()
48854f814f3SSebastien Boeuf             .set_range(first_block_index, nb_blocks, plug);
48951d102c7SHui Zhu 
490fd4f32faSRob Bradford         let handlers = self.dma_mapping_handlers.lock().unwrap();
4910d3c5c96SRob Bradford         if plug {
49261f9a4ecSSebastien Boeuf             let mut gpa = addr;
49361f9a4ecSSebastien Boeuf             for _ in 0..nb_blocks {
494fd4f32faSRob Bradford                 for (_, handler) in handlers.iter() {
49561f9a4ecSSebastien Boeuf                     if let Err(e) = handler.map(gpa, gpa, config.block_size) {
49661f9a4ecSSebastien Boeuf                         error!(
49761f9a4ecSSebastien Boeuf                             "failed DMA mapping addr 0x{:x} size 0x{:x}: {}",
49861f9a4ecSSebastien Boeuf                             gpa, config.block_size, e
49961f9a4ecSSebastien Boeuf                         );
50061f9a4ecSSebastien Boeuf                         return VIRTIO_MEM_RESP_ERROR;
50161f9a4ecSSebastien Boeuf                     }
50261f9a4ecSSebastien Boeuf                 }
50361f9a4ecSSebastien Boeuf 
50461f9a4ecSSebastien Boeuf                 gpa += config.block_size;
50561f9a4ecSSebastien Boeuf             }
50661f9a4ecSSebastien Boeuf 
50754f814f3SSebastien Boeuf             config.plugged_size += size;
50854f814f3SSebastien Boeuf         } else {
509fd4f32faSRob Bradford             for (_, handler) in handlers.iter() {
51061f9a4ecSSebastien Boeuf                 if let Err(e) = handler.unmap(addr, size) {
51161f9a4ecSSebastien Boeuf                     error!(
51261f9a4ecSSebastien Boeuf                         "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
51361f9a4ecSSebastien Boeuf                         addr, size, e
51461f9a4ecSSebastien Boeuf                     );
51561f9a4ecSSebastien Boeuf                     return VIRTIO_MEM_RESP_ERROR;
51661f9a4ecSSebastien Boeuf                 }
51761f9a4ecSSebastien Boeuf             }
51861f9a4ecSSebastien Boeuf 
51954f814f3SSebastien Boeuf             config.plugged_size -= size;
52051d102c7SHui Zhu         }
52151d102c7SHui Zhu 
52251d102c7SHui Zhu         VIRTIO_MEM_RESP_ACK
52351d102c7SHui Zhu     }
52451d102c7SHui Zhu 
unplug_all(&mut self) -> u1652554f814f3SSebastien Boeuf     fn unplug_all(&mut self) -> u16 {
52654f814f3SSebastien Boeuf         let mut config = self.config.lock().unwrap();
52754f814f3SSebastien Boeuf         if let Err(e) = self.discard_memory_range(0, config.region_size) {
52854f814f3SSebastien Boeuf             error!("failed discarding memory range: {:?}", e);
52954f814f3SSebastien Boeuf             return VIRTIO_MEM_RESP_ERROR;
53054f814f3SSebastien Boeuf         }
53154f814f3SSebastien Boeuf 
53261f9a4ecSSebastien Boeuf         // Remaining plugged blocks are unmapped.
53361f9a4ecSSebastien Boeuf         if config.plugged_size > 0 {
534fd4f32faSRob Bradford             let handlers = self.dma_mapping_handlers.lock().unwrap();
53561f9a4ecSSebastien Boeuf             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
53661f9a4ecSSebastien Boeuf                 if *plugged {
53761f9a4ecSSebastien Boeuf                     let gpa = config.addr + (idx as u64 * config.block_size);
538fd4f32faSRob Bradford                     for (_, handler) in handlers.iter() {
53961f9a4ecSSebastien Boeuf                         if let Err(e) = handler.unmap(gpa, config.block_size) {
54061f9a4ecSSebastien Boeuf                             error!(
54161f9a4ecSSebastien Boeuf                                 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
54261f9a4ecSSebastien Boeuf                                 gpa, config.block_size, e
54361f9a4ecSSebastien Boeuf                             );
54461f9a4ecSSebastien Boeuf                             return VIRTIO_MEM_RESP_ERROR;
54561f9a4ecSSebastien Boeuf                         }
54661f9a4ecSSebastien Boeuf                     }
54761f9a4ecSSebastien Boeuf                 }
54861f9a4ecSSebastien Boeuf             }
54961f9a4ecSSebastien Boeuf         }
55061f9a4ecSSebastien Boeuf 
55161f9a4ecSSebastien Boeuf         self.blocks_state.lock().unwrap().set_range(
55261f9a4ecSSebastien Boeuf             0,
55361f9a4ecSSebastien Boeuf             (config.region_size / config.block_size) as u16,
55461f9a4ecSSebastien Boeuf             false,
55561f9a4ecSSebastien Boeuf         );
55654f814f3SSebastien Boeuf 
55754f814f3SSebastien Boeuf         config.plugged_size = 0;
55854f814f3SSebastien Boeuf 
55954f814f3SSebastien Boeuf         VIRTIO_MEM_RESP_ACK
56054f814f3SSebastien Boeuf     }
56154f814f3SSebastien Boeuf 
state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16)56254f814f3SSebastien Boeuf     fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) {
56354f814f3SSebastien Boeuf         let config = self.config.lock().unwrap();
56454f814f3SSebastien Boeuf         let size: u64 = nb_blocks as u64 * config.block_size;
56554f814f3SSebastien Boeuf 
56654f814f3SSebastien Boeuf         let resp_type = if config.is_valid_range(addr, size) {
56751d102c7SHui Zhu             VIRTIO_MEM_RESP_ACK
56851d102c7SHui Zhu         } else {
56951d102c7SHui Zhu             VIRTIO_MEM_RESP_ERROR
57051d102c7SHui Zhu         };
57151d102c7SHui Zhu 
57251d102c7SHui Zhu         let offset = addr - config.addr;
57354f814f3SSebastien Boeuf         let first_block_index = (offset / config.block_size) as usize;
57461f9a4ecSSebastien Boeuf         let resp_state =
57561f9a4ecSSebastien Boeuf             if self
57654f814f3SSebastien Boeuf                 .blocks_state
57761f9a4ecSSebastien Boeuf                 .lock()
57861f9a4ecSSebastien Boeuf                 .unwrap()
57954f814f3SSebastien Boeuf                 .is_range_state(first_block_index, nb_blocks, true)
58054f814f3SSebastien Boeuf             {
58151d102c7SHui Zhu                 VIRTIO_MEM_STATE_PLUGGED
58261f9a4ecSSebastien Boeuf             } else if self.blocks_state.lock().unwrap().is_range_state(
58361f9a4ecSSebastien Boeuf                 first_block_index,
58461f9a4ecSSebastien Boeuf                 nb_blocks,
58561f9a4ecSSebastien Boeuf                 false,
58661f9a4ecSSebastien Boeuf             ) {
58751d102c7SHui Zhu                 VIRTIO_MEM_STATE_UNPLUGGED
58851d102c7SHui Zhu             } else {
58951d102c7SHui Zhu                 VIRTIO_MEM_STATE_MIXED
59051d102c7SHui Zhu             };
59151d102c7SHui Zhu 
59251d102c7SHui Zhu         (resp_type, resp_state)
59351d102c7SHui Zhu     }
59451d102c7SHui Zhu 
signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError>595de3e003eSSebastien Boeuf     fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> {
596de3e003eSSebastien Boeuf         self.interrupt_cb.trigger(int_type).map_err(|e| {
59751d102c7SHui Zhu             error!("Failed to signal used queue: {:?}", e);
59851d102c7SHui Zhu             DeviceError::FailedSignalingUsedQueue(e)
59951d102c7SHui Zhu         })
60051d102c7SHui Zhu     }
60151d102c7SHui Zhu 
process_queue(&mut self) -> Result<bool, Error>602756aebafSBo Chen     fn process_queue(&mut self) -> Result<bool, Error> {
603a4859ffeSSebastien Boeuf         let mut used_descs = false;
6040249e864SSebastien Boeuf 
60587f57f7cSSebastien Boeuf         while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) {
606756aebafSBo Chen             let r = Request::parse(&mut desc_chain)?;
607fd9fa2a6SBo Chen             let (resp_type, resp_state) = match r.req.req_type {
608fd9fa2a6SBo Chen                 VIRTIO_MEM_REQ_PLUG => (
609fd9fa2a6SBo Chen                     self.state_change_request(r.req.addr, r.req.nb_blocks, true),
610fd9fa2a6SBo Chen                     0u16,
611fd9fa2a6SBo Chen                 ),
612fd9fa2a6SBo Chen                 VIRTIO_MEM_REQ_UNPLUG => (
613fd9fa2a6SBo Chen                     self.state_change_request(r.req.addr, r.req.nb_blocks, false),
614fd9fa2a6SBo Chen                     0u16,
615fd9fa2a6SBo Chen                 ),
616fd9fa2a6SBo Chen                 VIRTIO_MEM_REQ_UNPLUG_ALL => (self.unplug_all(), 0u16),
617fd9fa2a6SBo Chen                 VIRTIO_MEM_REQ_STATE => self.state_request(r.req.addr, r.req.nb_blocks),
61851d102c7SHui Zhu                 _ => {
6196725771dSPhilipp Schuster                     return Err(Error::UnknownRequestType(r.req.req_type));
62051d102c7SHui Zhu                 }
62151d102c7SHui Zhu             };
622fd9fa2a6SBo Chen             let len = r.send_response(desc_chain.memory(), resp_type, resp_state)?;
623a4859ffeSSebastien Boeuf             self.queue
624a4859ffeSSebastien Boeuf                 .add_used(desc_chain.memory(), desc_chain.head_index(), len)
6250235ed33SBo Chen                 .map_err(Error::QueueAddUsed)?;
626a4859ffeSSebastien Boeuf             used_descs = true;
62751d102c7SHui Zhu         }
62851d102c7SHui Zhu 
629756aebafSBo Chen         Ok(used_descs)
63051d102c7SHui Zhu     }
63151d102c7SHui Zhu 
run( &mut self, paused: Arc<AtomicBool>, paused_sync: Arc<Barrier>, ) -> result::Result<(), EpollHelperError>632aa57762cSSebastien Boeuf     fn run(
633aa57762cSSebastien Boeuf         &mut self,
634aa57762cSSebastien Boeuf         paused: Arc<AtomicBool>,
635aa57762cSSebastien Boeuf         paused_sync: Arc<Barrier>,
636aa57762cSSebastien Boeuf     ) -> result::Result<(), EpollHelperError> {
63766efe3cfSRob Bradford         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
63866efe3cfSRob Bradford         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
639aa57762cSSebastien Boeuf         helper.run(paused, paused_sync, self)?;
64051d102c7SHui Zhu 
64166efe3cfSRob Bradford         Ok(())
64266efe3cfSRob Bradford     }
643e382dc66SSebastien Boeuf }
644e382dc66SSebastien Boeuf 
64566efe3cfSRob Bradford impl EpollHelperHandler for MemEpollHandler {
handle_event( &mut self, _helper: &mut EpollHelper, event: &epoll::Event, ) -> result::Result<(), EpollHelperError>646b1752994SBo Chen     fn handle_event(
647b1752994SBo Chen         &mut self,
648b1752994SBo Chen         _helper: &mut EpollHelper,
649b1752994SBo Chen         event: &epoll::Event,
650b1752994SBo Chen     ) -> result::Result<(), EpollHelperError> {
65101e7bd72SSebastien Boeuf         let ev_type = event.data as u16;
65201e7bd72SSebastien Boeuf         match ev_type {
65351d102c7SHui Zhu             QUEUE_AVAIL_EVENT => {
654b1752994SBo Chen                 self.queue_evt.read().map_err(|e| {
655b1752994SBo Chen                     EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e))
656b1752994SBo Chen                 })?;
657b1752994SBo Chen 
658756aebafSBo Chen                 let needs_notification = self.process_queue().map_err(|e| {
659756aebafSBo Chen                     EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e))
660756aebafSBo Chen                 })?;
661756aebafSBo Chen                 if needs_notification {
662b1752994SBo Chen                     self.signal(VirtioInterruptType::Queue(0)).map_err(|e| {
663b1752994SBo Chen                         EpollHelperError::HandleEvent(anyhow!(
664b1752994SBo Chen                             "Failed to signal used queue: {:?}",
665b1752994SBo Chen                             e
666b1752994SBo Chen                         ))
667b1752994SBo Chen                     })?;
66851d102c7SHui Zhu                 }
66951d102c7SHui Zhu             }
67051d102c7SHui Zhu             _ => {
671b1752994SBo Chen                 return Err(EpollHelperError::HandleEvent(anyhow!(
672b1752994SBo Chen                     "Unexpected event: {}",
673b1752994SBo Chen                     ev_type
674b1752994SBo Chen                 )));
67551d102c7SHui Zhu             }
67651d102c7SHui Zhu         }
677b1752994SBo Chen         Ok(())
67851d102c7SHui Zhu     }
67951d102c7SHui Zhu }
68051d102c7SHui Zhu 
681fd4f32faSRob Bradford #[derive(PartialEq, Eq, PartialOrd, Ord)]
682fd4f32faSRob Bradford pub enum VirtioMemMappingSource {
683fd4f32faSRob Bradford     Container,
684fd4f32faSRob Bradford     Device(u32),
685fd4f32faSRob Bradford }
686fd4f32faSRob Bradford 
68710ab87d6SRob Bradford #[derive(Serialize, Deserialize)]
688d7115ec6SSebastien Boeuf pub struct MemState {
689d7115ec6SSebastien Boeuf     pub avail_features: u64,
690d7115ec6SSebastien Boeuf     pub acked_features: u64,
691d7115ec6SSebastien Boeuf     pub config: VirtioMemConfig,
692d7115ec6SSebastien Boeuf     pub blocks_state: BlocksState,
693d7115ec6SSebastien Boeuf }
694d7115ec6SSebastien Boeuf 
69551d102c7SHui Zhu pub struct Mem {
696d63dcae2SRob Bradford     common: VirtioCommon,
6979b53044aSSebastien Boeuf     id: String,
69851d102c7SHui Zhu     host_addr: u64,
69951d102c7SHui Zhu     host_fd: Option<RawFd>,
70051d102c7SHui Zhu     config: Arc<Mutex<VirtioMemConfig>>,
701c4601787SBo Chen     seccomp_action: SeccompAction,
702c397c9c9SSebastien Boeuf     hugepages: bool,
703fd4f32faSRob Bradford     dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>,
70461f9a4ecSSebastien Boeuf     blocks_state: Arc<Mutex<BlocksState>>,
705687d646cSRob Bradford     exit_evt: EventFd,
706f38056fcSSebastien Boeuf     interrupt_cb: Option<Arc<dyn VirtioInterrupt>>,
70751d102c7SHui Zhu }
70851d102c7SHui Zhu 
70951d102c7SHui Zhu impl Mem {
71051d102c7SHui Zhu     // Create a new virtio-mem device.
711687d646cSRob Bradford     #[allow(clippy::too_many_arguments)]
new( id: String, region: &Arc<GuestRegionMmap>, seccomp_action: SeccompAction, numa_node_id: Option<u16>, initial_size: u64, hugepages: bool, exit_evt: EventFd, blocks_state: Arc<Mutex<BlocksState>>, state: Option<MemState>, ) -> io::Result<Mem>712c4601787SBo Chen     pub fn new(
713c4601787SBo Chen         id: String,
714c4601787SBo Chen         region: &Arc<GuestRegionMmap>,
715c4601787SBo Chen         seccomp_action: SeccompAction,
716dcedd4cdSSebastien Boeuf         numa_node_id: Option<u16>,
71733a1e37cSHui Zhu         initial_size: u64,
718c397c9c9SSebastien Boeuf         hugepages: bool,
719687d646cSRob Bradford         exit_evt: EventFd,
720e390775bSSebastien Boeuf         blocks_state: Arc<Mutex<BlocksState>>,
7211f0e5eb6SSebastien Boeuf         state: Option<MemState>,
722c4601787SBo Chen     ) -> io::Result<Mem> {
72351d102c7SHui Zhu         let region_len = region.len();
72451d102c7SHui Zhu 
7251798ed81SSebastien Boeuf         if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE {
726ea4693a0SJinank Jain             return Err(io::Error::other(format!(
727ea4693a0SJinank Jain                 "Virtio-mem size is not aligned with {VIRTIO_MEM_ALIGN_SIZE}"
728ea4693a0SJinank Jain             )));
72951d102c7SHui Zhu         }
73051d102c7SHui Zhu 
731b62a40efSSebastien Boeuf         let (avail_features, acked_features, config, paused) = if let Some(state) = state {
7321f0e5eb6SSebastien Boeuf             info!("Restoring virtio-mem {}", id);
7331f0e5eb6SSebastien Boeuf             *(blocks_state.lock().unwrap()) = state.blocks_state.clone();
734b62a40efSSebastien Boeuf             (
735b62a40efSSebastien Boeuf                 state.avail_features,
736b62a40efSSebastien Boeuf                 state.acked_features,
737b62a40efSSebastien Boeuf                 state.config,
738b62a40efSSebastien Boeuf                 true,
739b62a40efSSebastien Boeuf             )
7401f0e5eb6SSebastien Boeuf         } else {
741dcedd4cdSSebastien Boeuf             let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
74251d102c7SHui Zhu 
743ad8adcb9SSebastien Boeuf             let mut config = VirtioMemConfig {
744ad8adcb9SSebastien Boeuf                 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE,
745ad8adcb9SSebastien Boeuf                 addr: region.start_addr().raw_value(),
746ad8adcb9SSebastien Boeuf                 region_size: region.len(),
747ad8adcb9SSebastien Boeuf                 usable_region_size: region.len(),
748ad8adcb9SSebastien Boeuf                 plugged_size: 0,
749ad8adcb9SSebastien Boeuf                 requested_size: 0,
750ad8adcb9SSebastien Boeuf                 ..Default::default()
751ad8adcb9SSebastien Boeuf             };
75251d102c7SHui Zhu 
75333a1e37cSHui Zhu             if initial_size != 0 {
754ad8adcb9SSebastien Boeuf                 config.resize(initial_size).map_err(|e| {
755ea4693a0SJinank Jain                     io::Error::other(format!(
7565e527294SRob Bradford                         "Failed to resize virtio-mem configuration to {initial_size}: {e:?}"
757ea4693a0SJinank Jain                     ))
75833a1e37cSHui Zhu                 })?;
75933a1e37cSHui Zhu             }
76033a1e37cSHui Zhu 
761dcedd4cdSSebastien Boeuf             if let Some(node_id) = numa_node_id {
762dcedd4cdSSebastien Boeuf                 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM;
763dcedd4cdSSebastien Boeuf                 config.node_id = node_id;
764dcedd4cdSSebastien Boeuf             }
765dcedd4cdSSebastien Boeuf 
766ad8adcb9SSebastien Boeuf             // Make sure the virtio-mem configuration complies with the
767ad8adcb9SSebastien Boeuf             // specification.
768ad8adcb9SSebastien Boeuf             config.validate().map_err(|e| {
769ea4693a0SJinank Jain                 io::Error::other(format!("Invalid virtio-mem configuration: {e:?}"))
770ad8adcb9SSebastien Boeuf             })?;
771ad8adcb9SSebastien Boeuf 
772b62a40efSSebastien Boeuf             (avail_features, 0, config, false)
7731f0e5eb6SSebastien Boeuf         };
7741f0e5eb6SSebastien Boeuf 
775ad8adcb9SSebastien Boeuf         let host_fd = region
776ad8adcb9SSebastien Boeuf             .file_offset()
777ad8adcb9SSebastien Boeuf             .map(|f_offset| f_offset.file().as_raw_fd());
77851d102c7SHui Zhu 
77951d102c7SHui Zhu         Ok(Mem {
780d63dcae2SRob Bradford             common: VirtioCommon {
781aa34d545SRob Bradford                 device_type: VirtioDeviceType::Mem as u32,
782d63dcae2SRob Bradford                 avail_features,
7831f0e5eb6SSebastien Boeuf                 acked_features,
784df8a55abSRob Bradford                 paused_sync: Some(Arc::new(Barrier::new(2))),
785df8a55abSRob Bradford                 queue_sizes: QUEUE_SIZES.to_vec(),
786c90f77e3SRob Bradford                 min_queues: 1,
787b62a40efSSebastien Boeuf                 paused: Arc::new(AtomicBool::new(paused)),
788a9a13846SRob Bradford                 ..Default::default()
789d63dcae2SRob Bradford             },
7909b53044aSSebastien Boeuf             id,
79151d102c7SHui Zhu             host_addr: region.as_ptr() as u64,
79251d102c7SHui Zhu             host_fd,
79351d102c7SHui Zhu             config: Arc::new(Mutex::new(config)),
794c4601787SBo Chen             seccomp_action,
795c397c9c9SSebastien Boeuf             hugepages,
796fd4f32faSRob Bradford             dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())),
797e390775bSSebastien Boeuf             blocks_state,
798687d646cSRob Bradford             exit_evt,
799f38056fcSSebastien Boeuf             interrupt_cb: None,
80051d102c7SHui Zhu         })
80151d102c7SHui Zhu     }
80261f9a4ecSSebastien Boeuf 
resize(&mut self, size: u64) -> result::Result<(), Error>803f38056fcSSebastien Boeuf     pub fn resize(&mut self, size: u64) -> result::Result<(), Error> {
804f38056fcSSebastien Boeuf         let mut config = self.config.lock().unwrap();
805f38056fcSSebastien Boeuf         config.resize(size).map_err(|e| {
806f38056fcSSebastien Boeuf             Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e))
807f38056fcSSebastien Boeuf         })?;
808f38056fcSSebastien Boeuf 
809f38056fcSSebastien Boeuf         if let Some(interrupt_cb) = self.interrupt_cb.as_ref() {
810f38056fcSSebastien Boeuf             interrupt_cb
811f38056fcSSebastien Boeuf                 .trigger(VirtioInterruptType::Config)
812f38056fcSSebastien Boeuf                 .map_err(|e| {
813f38056fcSSebastien Boeuf                     Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e))
814f38056fcSSebastien Boeuf                 })
815f38056fcSSebastien Boeuf         } else {
816f38056fcSSebastien Boeuf             Ok(())
817f38056fcSSebastien Boeuf         }
818f38056fcSSebastien Boeuf     }
819f38056fcSSebastien Boeuf 
add_dma_mapping_handler( &mut self, source: VirtioMemMappingSource, handler: Arc<dyn ExternalDmaMapping>, ) -> result::Result<(), Error>82061f9a4ecSSebastien Boeuf     pub fn add_dma_mapping_handler(
82161f9a4ecSSebastien Boeuf         &mut self,
822fd4f32faSRob Bradford         source: VirtioMemMappingSource,
82361f9a4ecSSebastien Boeuf         handler: Arc<dyn ExternalDmaMapping>,
82461f9a4ecSSebastien Boeuf     ) -> result::Result<(), Error> {
82561f9a4ecSSebastien Boeuf         let config = self.config.lock().unwrap();
82661f9a4ecSSebastien Boeuf 
82761f9a4ecSSebastien Boeuf         if config.plugged_size > 0 {
82861f9a4ecSSebastien Boeuf             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
82961f9a4ecSSebastien Boeuf                 if *plugged {
83061f9a4ecSSebastien Boeuf                     let gpa = config.addr + (idx as u64 * config.block_size);
83161f9a4ecSSebastien Boeuf                     handler
83261f9a4ecSSebastien Boeuf                         .map(gpa, gpa, config.block_size)
83361f9a4ecSSebastien Boeuf                         .map_err(Error::DmaMap)?;
83461f9a4ecSSebastien Boeuf                 }
83561f9a4ecSSebastien Boeuf             }
83661f9a4ecSSebastien Boeuf         }
83761f9a4ecSSebastien Boeuf 
838fd4f32faSRob Bradford         self.dma_mapping_handlers
839fd4f32faSRob Bradford             .lock()
840fd4f32faSRob Bradford             .unwrap()
841fd4f32faSRob Bradford             .insert(source, handler);
842fd4f32faSRob Bradford 
843fd4f32faSRob Bradford         Ok(())
844fd4f32faSRob Bradford     }
845fd4f32faSRob Bradford 
remove_dma_mapping_handler( &mut self, source: VirtioMemMappingSource, ) -> result::Result<(), Error>846fd4f32faSRob Bradford     pub fn remove_dma_mapping_handler(
847fd4f32faSRob Bradford         &mut self,
848fd4f32faSRob Bradford         source: VirtioMemMappingSource,
849fd4f32faSRob Bradford     ) -> result::Result<(), Error> {
850fd4f32faSRob Bradford         let handler = self
851fd4f32faSRob Bradford             .dma_mapping_handlers
852fd4f32faSRob Bradford             .lock()
853fd4f32faSRob Bradford             .unwrap()
854fd4f32faSRob Bradford             .remove(&source)
855fd4f32faSRob Bradford             .ok_or(Error::InvalidDmaMappingHandler)?;
856fd4f32faSRob Bradford 
857fd4f32faSRob Bradford         let config = self.config.lock().unwrap();
858fd4f32faSRob Bradford 
859fd4f32faSRob Bradford         if config.plugged_size > 0 {
860fd4f32faSRob Bradford             for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
861fd4f32faSRob Bradford                 if *plugged {
862fd4f32faSRob Bradford                     let gpa = config.addr + (idx as u64 * config.block_size);
863fd4f32faSRob Bradford                     handler
864fd4f32faSRob Bradford                         .unmap(gpa, config.block_size)
865fd4f32faSRob Bradford                         .map_err(Error::DmaUnmap)?;
866fd4f32faSRob Bradford                 }
867fd4f32faSRob Bradford             }
868fd4f32faSRob Bradford         }
86961f9a4ecSSebastien Boeuf 
87061f9a4ecSSebastien Boeuf         Ok(())
87161f9a4ecSSebastien Boeuf     }
872d7115ec6SSebastien Boeuf 
state(&self) -> MemState873d7115ec6SSebastien Boeuf     fn state(&self) -> MemState {
874d7115ec6SSebastien Boeuf         MemState {
875d7115ec6SSebastien Boeuf             avail_features: self.common.avail_features,
876d7115ec6SSebastien Boeuf             acked_features: self.common.acked_features,
877d7115ec6SSebastien Boeuf             config: *(self.config.lock().unwrap()),
878d7115ec6SSebastien Boeuf             blocks_state: self.blocks_state.lock().unwrap().clone(),
879d7115ec6SSebastien Boeuf         }
880d7115ec6SSebastien Boeuf     }
881d7115ec6SSebastien Boeuf 
882194b59f4SRob Bradford     #[cfg(fuzzing)]
wait_for_epoll_threads(&mut self)883194b59f4SRob Bradford     pub fn wait_for_epoll_threads(&mut self) {
884194b59f4SRob Bradford         self.common.wait_for_epoll_threads();
885194b59f4SRob Bradford     }
88651d102c7SHui Zhu }
88751d102c7SHui Zhu 
88851d102c7SHui Zhu impl Drop for Mem {
drop(&mut self)88951d102c7SHui Zhu     fn drop(&mut self) {
890df8a55abSRob Bradford         if let Some(kill_evt) = self.common.kill_evt.take() {
89151d102c7SHui Zhu             // Ignore the result because there is nothing we can do about it.
89251d102c7SHui Zhu             let _ = kill_evt.write(1);
89351d102c7SHui Zhu         }
894ad6c0ee5SPhilipp Schuster         self.common.wait_for_epoll_threads();
89551d102c7SHui Zhu     }
89651d102c7SHui Zhu }
89751d102c7SHui Zhu 
89851d102c7SHui Zhu impl VirtioDevice for Mem {
device_type(&self) -> u3289951d102c7SHui Zhu     fn device_type(&self) -> u32 {
900df8a55abSRob Bradford         self.common.device_type
90151d102c7SHui Zhu     }
90251d102c7SHui Zhu 
queue_max_sizes(&self) -> &[u16]90351d102c7SHui Zhu     fn queue_max_sizes(&self) -> &[u16] {
904df8a55abSRob Bradford         &self.common.queue_sizes
90551d102c7SHui Zhu     }
90651d102c7SHui Zhu 
features(&self) -> u6490751d102c7SHui Zhu     fn features(&self) -> u64 {
908d63dcae2SRob Bradford         self.common.avail_features
90951d102c7SHui Zhu     }
91051d102c7SHui Zhu 
ack_features(&mut self, value: u64)91151d102c7SHui Zhu     fn ack_features(&mut self, value: u64) {
912d63dcae2SRob Bradford         self.common.ack_features(value)
91351d102c7SHui Zhu     }
91451d102c7SHui Zhu 
read_config(&self, offset: u64, data: &mut [u8])915751a3020SRob Bradford     fn read_config(&self, offset: u64, data: &mut [u8]) {
916751a3020SRob Bradford         self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data);
91751d102c7SHui Zhu     }
91851d102c7SHui Zhu 
activate( &mut self, mem: GuestMemoryAtomic<GuestMemoryMmap>, interrupt_cb: Arc<dyn VirtioInterrupt>, mut queues: Vec<(usize, Queue, EventFd)>, ) -> ActivateResult91951d102c7SHui Zhu     fn activate(
92051d102c7SHui Zhu         &mut self,
921a423bf13SSebastien Boeuf         mem: GuestMemoryAtomic<GuestMemoryMmap>,
92251d102c7SHui Zhu         interrupt_cb: Arc<dyn VirtioInterrupt>,
923a423bf13SSebastien Boeuf         mut queues: Vec<(usize, Queue, EventFd)>,
92451d102c7SHui Zhu     ) -> ActivateResult {
9253f62a172SSebastien Boeuf         self.common.activate(&queues, &interrupt_cb)?;
926280bef83SRob Bradford         let (kill_evt, pause_evt) = self.common.dup_eventfds();
9273f62a172SSebastien Boeuf 
9283f62a172SSebastien Boeuf         let (_, queue, queue_evt) = queues.remove(0);
9293f62a172SSebastien Boeuf 
930f38056fcSSebastien Boeuf         self.interrupt_cb = Some(interrupt_cb.clone());
931f38056fcSSebastien Boeuf 
93251d102c7SHui Zhu         let mut handler = MemEpollHandler {
933a423bf13SSebastien Boeuf             mem,
93451d102c7SHui Zhu             host_addr: self.host_addr,
93551d102c7SHui Zhu             host_fd: self.host_fd,
93661f9a4ecSSebastien Boeuf             blocks_state: Arc::clone(&self.blocks_state),
93751d102c7SHui Zhu             config: self.config.clone(),
9383f62a172SSebastien Boeuf             queue,
93951d102c7SHui Zhu             interrupt_cb,
9403f62a172SSebastien Boeuf             queue_evt,
94151d102c7SHui Zhu             kill_evt,
94251d102c7SHui Zhu             pause_evt,
943c397c9c9SSebastien Boeuf             hugepages: self.hugepages,
944fd4f32faSRob Bradford             dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers),
94551d102c7SHui Zhu         };
94651d102c7SHui Zhu 
9470fb24ea3SSebastien Boeuf         let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false);
9480fb24ea3SSebastien Boeuf         for range in unplugged_memory_ranges.regions() {
94954f814f3SSebastien Boeuf             handler
9500fb24ea3SSebastien Boeuf                 .discard_memory_range(range.gpa, range.length)
95154f814f3SSebastien Boeuf                 .map_err(|e| {
9520fb24ea3SSebastien Boeuf                     error!(
9530fb24ea3SSebastien Boeuf                         "failed discarding memory range [0x{:x}-0x{:x}]: {:?}",
9540fb24ea3SSebastien Boeuf                         range.gpa,
9550fb24ea3SSebastien Boeuf                         range.gpa + range.length - 1,
9560fb24ea3SSebastien Boeuf                         e
9570fb24ea3SSebastien Boeuf                     );
95854f814f3SSebastien Boeuf                     ActivateError::BadActivate
95954f814f3SSebastien Boeuf                 })?;
9600fb24ea3SSebastien Boeuf         }
96154f814f3SSebastien Boeuf 
962df8a55abSRob Bradford         let paused = self.common.paused.clone();
963df8a55abSRob Bradford         let paused_sync = self.common.paused_sync.clone();
96451d102c7SHui Zhu         let mut epoll_threads = Vec::new();
96554e523c3SRob Bradford 
96654e523c3SRob Bradford         spawn_virtio_thread(
96754e523c3SRob Bradford             &self.id,
96854e523c3SRob Bradford             &self.seccomp_action,
96954e523c3SRob Bradford             Thread::VirtioMem,
97054e523c3SRob Bradford             &mut epoll_threads,
971687d646cSRob Bradford             &self.exit_evt,
972df5b803aSBo Chen             move || handler.run(paused, paused_sync.unwrap()),
97354e523c3SRob Bradford         )?;
974df8a55abSRob Bradford         self.common.epoll_threads = Some(epoll_threads);
97551d102c7SHui Zhu 
976c89095abSRob Bradford         event!("virtio-device", "activated", "id", &self.id);
97751d102c7SHui Zhu         Ok(())
97851d102c7SHui Zhu     }
97951d102c7SHui Zhu 
reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>>98023f9ec50SRob Bradford     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
981c89095abSRob Bradford         let result = self.common.reset();
982c89095abSRob Bradford         event!("virtio-device", "reset", "id", &self.id);
983c89095abSRob Bradford         result
98451d102c7SHui Zhu     }
98551d102c7SHui Zhu }
98651d102c7SHui Zhu 
987df8a55abSRob Bradford impl Pausable for Mem {
pause(&mut self) -> result::Result<(), MigratableError>988df8a55abSRob Bradford     fn pause(&mut self) -> result::Result<(), MigratableError> {
989df8a55abSRob Bradford         self.common.pause()
990df8a55abSRob Bradford     }
991df8a55abSRob Bradford 
resume(&mut self) -> result::Result<(), MigratableError>992df8a55abSRob Bradford     fn resume(&mut self) -> result::Result<(), MigratableError> {
993df8a55abSRob Bradford         self.common.resume()
994df8a55abSRob Bradford     }
995df8a55abSRob Bradford }
996df8a55abSRob Bradford 
9979b53044aSSebastien Boeuf impl Snapshottable for Mem {
id(&self) -> String9989b53044aSSebastien Boeuf     fn id(&self) -> String {
9999b53044aSSebastien Boeuf         self.id.clone()
10009b53044aSSebastien Boeuf     }
1001d7115ec6SSebastien Boeuf 
snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError>1002d7115ec6SSebastien Boeuf     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
100310ab87d6SRob Bradford         Snapshot::new_from_state(&self.state())
1004d7115ec6SSebastien Boeuf     }
10059b53044aSSebastien Boeuf }
10061b1a2175SSamuel Ortiz impl Transportable for Mem {}
100751d102c7SHui Zhu impl Migratable for Mem {}
1008