151d102c7SHui Zhu // Copyright (c) 2020 Ant Financial 251d102c7SHui Zhu // 35e9886bbSRuslan Mstoi // SPDX-License-Identifier: Apache-2.0 45e9886bbSRuslan Mstoi // 551d102c7SHui Zhu // Licensed under the Apache License, Version 2.0 (the "License"); 651d102c7SHui Zhu // you may not use this file except in compliance with the License. 751d102c7SHui Zhu // You may obtain a copy of the License at 851d102c7SHui Zhu // 951d102c7SHui Zhu // http://www.apache.org/licenses/LICENSE-2.0 1051d102c7SHui Zhu // 1151d102c7SHui Zhu // Unless required by applicable law or agreed to in writing, software 1251d102c7SHui Zhu // distributed under the License is distributed on an "AS IS" BASIS, 1351d102c7SHui Zhu // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1451d102c7SHui Zhu // See the License for the specific language governing permissions and 1551d102c7SHui Zhu // limitations under the License. 1651d102c7SHui Zhu 17fd4f32faSRob Bradford use std::collections::BTreeMap; 1851d102c7SHui Zhu use std::mem::size_of; 1966efe3cfSRob Bradford use std::os::unix::io::{AsRawFd, RawFd}; 20f38056fcSSebastien Boeuf use std::sync::atomic::AtomicBool; 2161e57e1cSRuoqing He use std::sync::{mpsc, Arc, Barrier, Mutex}; 2261e57e1cSRuoqing He use std::{io, result}; 2388a9f799SRob Bradford 2488a9f799SRob Bradford use anyhow::anyhow; 2588a9f799SRob Bradford use seccompiler::SeccompAction; 2688a9f799SRob Bradford use serde::{Deserialize, Serialize}; 2754c48439SBo Chen use thiserror::Error; 2887f57f7cSSebastien Boeuf use virtio_queue::{DescriptorChain, Queue, QueueT}; 2961f9a4ecSSebastien Boeuf use vm_device::dma_mapping::ExternalDmaMapping; 3051d102c7SHui Zhu use vm_memory::{ 31a423bf13SSebastien Boeuf Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 32a423bf13SSebastien Boeuf GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion, 3351d102c7SHui Zhu }; 344450c44fSSebastien Boeuf use vm_migration::protocol::MemoryRangeTable; 3510ab87d6SRob Bradford use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 3651d102c7SHui Zhu use vmm_sys_util::eventfd::EventFd; 3751d102c7SHui Zhu 3888a9f799SRob Bradford use super::{ 3961e57e1cSRuoqing He ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, 4061e57e1cSRuoqing He Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, 4161e57e1cSRuoqing He VIRTIO_F_VERSION_1, 4288a9f799SRob Bradford }; 4388a9f799SRob Bradford use crate::seccomp_filters::Thread; 4488a9f799SRob Bradford use crate::thread_helper::spawn_virtio_thread; 4561e57e1cSRuoqing He use crate::{GuestMemoryMmap, GuestRegionMmap, VirtioInterrupt, VirtioInterruptType}; 4688a9f799SRob Bradford 4751d102c7SHui Zhu const QUEUE_SIZE: u16 = 128; 4851d102c7SHui Zhu const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 4951d102c7SHui Zhu 501798ed81SSebastien Boeuf // 128MiB is the standard memory block size in Linux. A virtio-mem region must 511798ed81SSebastien Boeuf // be aligned on this size, and the region size must be a multiple of it. 52ad8adcb9SSebastien Boeuf pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20; 5351d102c7SHui Zhu // Use 2 MiB alignment so transparent hugepages can be used by KVM. 54ad8adcb9SSebastien Boeuf const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20; 5551d102c7SHui Zhu 5651d102c7SHui Zhu // Request processed successfully, applicable for 5751d102c7SHui Zhu // - VIRTIO_MEM_REQ_PLUG 5851d102c7SHui Zhu // - VIRTIO_MEM_REQ_UNPLUG 5951d102c7SHui Zhu // - VIRTIO_MEM_REQ_UNPLUG_ALL 6051d102c7SHui Zhu // - VIRTIO_MEM_REQ_STATE 6151d102c7SHui Zhu const VIRTIO_MEM_RESP_ACK: u16 = 0; 6251d102c7SHui Zhu 6351d102c7SHui Zhu // Request denied - e.g. trying to plug more than requested, applicable for 6451d102c7SHui Zhu // - VIRTIO_MEM_REQ_PLUG 6551d102c7SHui Zhu const VIRTIO_MEM_RESP_NACK: u16 = 1; 6651d102c7SHui Zhu 6751d102c7SHui Zhu // Request cannot be processed right now, try again later, applicable for 6851d102c7SHui Zhu // - VIRTIO_MEM_REQ_PLUG 6951d102c7SHui Zhu // - VIRTIO_MEM_REQ_UNPLUG 7051d102c7SHui Zhu // - VIRTIO_MEM_REQ_UNPLUG_ALL 71ad8adcb9SSebastien Boeuf #[allow(unused)] 72ad8adcb9SSebastien Boeuf const VIRTIO_MEM_RESP_BUSY: u16 = 2; 7351d102c7SHui Zhu 745c3f4dbeSJosh Soref // Error in request (e.g. addresses/alignment), applicable for 7551d102c7SHui Zhu // - VIRTIO_MEM_REQ_PLUG 7651d102c7SHui Zhu // - VIRTIO_MEM_REQ_UNPLUG 7751d102c7SHui Zhu // - VIRTIO_MEM_REQ_STATE 7851d102c7SHui Zhu const VIRTIO_MEM_RESP_ERROR: u16 = 3; 7951d102c7SHui Zhu 8051d102c7SHui Zhu // State of memory blocks is "plugged" 8151d102c7SHui Zhu const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; 8251d102c7SHui Zhu // State of memory blocks is "unplugged" 8351d102c7SHui Zhu const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; 8451d102c7SHui Zhu // State of memory blocks is "mixed" 8551d102c7SHui Zhu const VIRTIO_MEM_STATE_MIXED: u16 = 2; 8651d102c7SHui Zhu 8751d102c7SHui Zhu // request to plug memory blocks 8851d102c7SHui Zhu const VIRTIO_MEM_REQ_PLUG: u16 = 0; 8951d102c7SHui Zhu // request to unplug memory blocks 9051d102c7SHui Zhu const VIRTIO_MEM_REQ_UNPLUG: u16 = 1; 9151d102c7SHui Zhu // request to unplug all blocks and shrink the usable size 9251d102c7SHui Zhu const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2; 9351d102c7SHui Zhu // request information about the plugged state of memory blocks 9451d102c7SHui Zhu const VIRTIO_MEM_REQ_STATE: u16 = 3; 9551d102c7SHui Zhu 9651d102c7SHui Zhu // New descriptors are pending on the virtio queue. 97f38056fcSSebastien Boeuf const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 9851d102c7SHui Zhu 99dcedd4cdSSebastien Boeuf // Virtio features 100dcedd4cdSSebastien Boeuf const VIRTIO_MEM_F_ACPI_PXM: u8 = 0; 101dcedd4cdSSebastien Boeuf 10254c48439SBo Chen #[derive(Error, Debug)] 10351d102c7SHui Zhu pub enum Error { 104*8e2973feSPhilipp Schuster #[error("Guest gave us bad memory addresses")] 105a2123439SPhilipp Schuster GuestMemory(#[source] GuestMemoryError), 1062af2cc53SBo Chen #[error("Guest gave us a write only descriptor that protocol says to read from")] 10751d102c7SHui Zhu UnexpectedWriteOnlyDescriptor, 1082af2cc53SBo Chen #[error("Guest gave us a read only descriptor that protocol says to write to")] 10951d102c7SHui Zhu UnexpectedReadOnlyDescriptor, 1102af2cc53SBo Chen #[error("Guest gave us too few descriptors in a descriptor chain")] 11151d102c7SHui Zhu DescriptorChainTooShort, 1122af2cc53SBo Chen #[error("Guest gave us a buffer that was too short to use")] 11351d102c7SHui Zhu BufferLengthTooSmall, 1142af2cc53SBo Chen #[error("Guest sent us invalid request")] 11551d102c7SHui Zhu InvalidRequest, 116*8e2973feSPhilipp Schuster #[error("Failed to EventFd write")] 117a2123439SPhilipp Schuster EventFdWriteFail(#[source] std::io::Error), 118*8e2973feSPhilipp Schuster #[error("Failed to EventFd try_clone")] 119a2123439SPhilipp Schuster EventFdTryCloneFail(#[source] std::io::Error), 120*8e2973feSPhilipp Schuster #[error("Failed to MpscRecv")] 121a2123439SPhilipp Schuster MpscRecvFail(#[source] mpsc::RecvError), 122*8e2973feSPhilipp Schuster #[error("Resize invalid argument")] 123a2123439SPhilipp Schuster ResizeError(#[source] anyhow::Error), 124*8e2973feSPhilipp Schuster #[error("Fail to resize trigger")] 125a2123439SPhilipp Schuster ResizeTriggerFail(#[source] DeviceError), 126*8e2973feSPhilipp Schuster #[error("Invalid configuration")] 127a2123439SPhilipp Schuster ValidateError(#[source] anyhow::Error), 128*8e2973feSPhilipp Schuster #[error("Failed discarding memory range")] 129a2123439SPhilipp Schuster DiscardMemoryRange(#[source] std::io::Error), 130*8e2973feSPhilipp Schuster #[error("Failed DMA mapping")] 131a2123439SPhilipp Schuster DmaMap(#[source] std::io::Error), 132*8e2973feSPhilipp Schuster #[error("Failed DMA unmapping")] 133a2123439SPhilipp Schuster DmaUnmap(#[source] std::io::Error), 1342af2cc53SBo Chen #[error("Invalid DMA mapping handler")] 13561f9a4ecSSebastien Boeuf InvalidDmaMappingHandler, 1362af2cc53SBo Chen #[error("Not activated by the guest")] 137c274ce4dSRob Bradford NotActivatedByGuest, 138756aebafSBo Chen #[error("Unknown request type: {0}")] 1396725771dSPhilipp Schuster UnknownRequestType(u16), 140*8e2973feSPhilipp Schuster #[error("Failed adding used index")] 141a2123439SPhilipp Schuster QueueAddUsed(#[source] virtio_queue::Error), 14251d102c7SHui Zhu } 14351d102c7SHui Zhu 14451d102c7SHui Zhu #[repr(C)] 14551d102c7SHui Zhu #[derive(Copy, Clone, Debug, Default)] 14651d102c7SHui Zhu struct VirtioMemReq { 14751d102c7SHui Zhu req_type: u16, 14851d102c7SHui Zhu padding: [u16; 3], 14951d102c7SHui Zhu addr: u64, 15051d102c7SHui Zhu nb_blocks: u16, 151ad8adcb9SSebastien Boeuf padding_1: [u16; 3], 15251d102c7SHui Zhu } 15351d102c7SHui Zhu 15431b3871eSWei Liu // SAFETY: it only has data and has no implicit padding. 15551d102c7SHui Zhu unsafe impl ByteValued for VirtioMemReq {} 15651d102c7SHui Zhu 15751d102c7SHui Zhu #[repr(C)] 15851d102c7SHui Zhu #[derive(Copy, Clone, Debug, Default)] 15951d102c7SHui Zhu struct VirtioMemResp { 16051d102c7SHui Zhu resp_type: u16, 16151d102c7SHui Zhu padding: [u16; 3], 162ad8adcb9SSebastien Boeuf state: u16, 16351d102c7SHui Zhu } 16451d102c7SHui Zhu 16531b3871eSWei Liu // SAFETY: it only has data and has no implicit padding. 16651d102c7SHui Zhu unsafe impl ByteValued for VirtioMemResp {} 16751d102c7SHui Zhu 16830e1162dSSebastien Boeuf #[repr(C)] 16910ab87d6SRob Bradford #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)] 170d7115ec6SSebastien Boeuf pub struct VirtioMemConfig { 17151d102c7SHui Zhu // Block size and alignment. Cannot change. 172165568e9SHui Zhu block_size: u64, 17351d102c7SHui Zhu // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. 17451d102c7SHui Zhu node_id: u16, 175165568e9SHui Zhu padding: [u8; 6], 17651d102c7SHui Zhu // Start address of the memory region. Cannot change. 17751d102c7SHui Zhu addr: u64, 17851d102c7SHui Zhu // Region size (maximum). Cannot change. 17951d102c7SHui Zhu region_size: u64, 18051d102c7SHui Zhu // Currently usable region size. Can grow up to region_size. Can 18151d102c7SHui Zhu // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config 18251d102c7SHui Zhu // update will be sent). 18351d102c7SHui Zhu usable_region_size: u64, 18451d102c7SHui Zhu // Currently used size. Changes due to plug/unplug requests, but no 18551d102c7SHui Zhu // config updates will be sent. 18651d102c7SHui Zhu plugged_size: u64, 18751d102c7SHui Zhu // Requested size. New plug requests cannot exceed it. Can change. 18851d102c7SHui Zhu requested_size: u64, 18951d102c7SHui Zhu } 19051d102c7SHui Zhu 19131b3871eSWei Liu // SAFETY: it only has data and has no implicit padding. 19251d102c7SHui Zhu unsafe impl ByteValued for VirtioMemConfig {} 19351d102c7SHui Zhu 194ad8adcb9SSebastien Boeuf impl VirtioMemConfig { validate(&self) -> result::Result<(), Error>195ad8adcb9SSebastien Boeuf fn validate(&self) -> result::Result<(), Error> { 196ad8adcb9SSebastien Boeuf if self.addr % self.block_size != 0 { 197ad8adcb9SSebastien Boeuf return Err(Error::ValidateError(anyhow!( 198ad8adcb9SSebastien Boeuf "addr 0x{:x} is not aligned on block_size 0x{:x}", 199ad8adcb9SSebastien Boeuf self.addr, 200ad8adcb9SSebastien Boeuf self.block_size 20133a1e37cSHui Zhu ))); 202ad8adcb9SSebastien Boeuf } 203ad8adcb9SSebastien Boeuf if self.region_size % self.block_size != 0 { 204ad8adcb9SSebastien Boeuf return Err(Error::ValidateError(anyhow!( 205ad8adcb9SSebastien Boeuf "region_size 0x{:x} is not aligned on block_size 0x{:x}", 206ad8adcb9SSebastien Boeuf self.region_size, 207ad8adcb9SSebastien Boeuf self.block_size 20833a1e37cSHui Zhu ))); 209ad8adcb9SSebastien Boeuf } 210ad8adcb9SSebastien Boeuf if self.usable_region_size % self.block_size != 0 { 211ad8adcb9SSebastien Boeuf return Err(Error::ValidateError(anyhow!( 212ad8adcb9SSebastien Boeuf "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}", 213ad8adcb9SSebastien Boeuf self.usable_region_size, 214ad8adcb9SSebastien Boeuf self.block_size 215ad8adcb9SSebastien Boeuf ))); 216ad8adcb9SSebastien Boeuf } 217ad8adcb9SSebastien Boeuf if self.plugged_size % self.block_size != 0 { 218ad8adcb9SSebastien Boeuf return Err(Error::ValidateError(anyhow!( 219ad8adcb9SSebastien Boeuf "plugged_size 0x{:x} is not aligned on block_size 0x{:x}", 220ad8adcb9SSebastien Boeuf self.plugged_size, 221ad8adcb9SSebastien Boeuf self.block_size 222ad8adcb9SSebastien Boeuf ))); 223ad8adcb9SSebastien Boeuf } 224ad8adcb9SSebastien Boeuf if self.requested_size % self.block_size != 0 { 225ad8adcb9SSebastien Boeuf return Err(Error::ValidateError(anyhow!( 226ad8adcb9SSebastien Boeuf "requested_size 0x{:x} is not aligned on block_size 0x{:x}", 227ad8adcb9SSebastien Boeuf self.requested_size, 228ad8adcb9SSebastien Boeuf self.block_size 22933a1e37cSHui Zhu ))); 23033a1e37cSHui Zhu } 23133a1e37cSHui Zhu 232ad8adcb9SSebastien Boeuf Ok(()) 233ad8adcb9SSebastien Boeuf } 234ad8adcb9SSebastien Boeuf resize(&mut self, size: u64) -> result::Result<(), Error>235ad8adcb9SSebastien Boeuf fn resize(&mut self, size: u64) -> result::Result<(), Error> { 236ad8adcb9SSebastien Boeuf if self.requested_size == size { 237ad8adcb9SSebastien Boeuf return Err(Error::ResizeError(anyhow!( 238ad8adcb9SSebastien Boeuf "new size 0x{:x} and requested_size are identical", 239ad8adcb9SSebastien Boeuf size 240ad8adcb9SSebastien Boeuf ))); 241ad8adcb9SSebastien Boeuf } else if size > self.region_size { 242ad8adcb9SSebastien Boeuf return Err(Error::ResizeError(anyhow!( 243ad8adcb9SSebastien Boeuf "new size 0x{:x} is bigger than region_size 0x{:x}", 244ad8adcb9SSebastien Boeuf size, 245ad8adcb9SSebastien Boeuf self.region_size 246ad8adcb9SSebastien Boeuf ))); 247a9ec0f33SBo Chen } else if size % self.block_size != 0 { 248ad8adcb9SSebastien Boeuf return Err(Error::ResizeError(anyhow!( 249ad8adcb9SSebastien Boeuf "new size 0x{:x} is not aligned on block_size 0x{:x}", 250ad8adcb9SSebastien Boeuf size, 251ad8adcb9SSebastien Boeuf self.block_size 252ad8adcb9SSebastien Boeuf ))); 253ad8adcb9SSebastien Boeuf } 254ad8adcb9SSebastien Boeuf 255ad8adcb9SSebastien Boeuf self.requested_size = size; 25633a1e37cSHui Zhu 25733a1e37cSHui Zhu Ok(()) 25833a1e37cSHui Zhu } 25954f814f3SSebastien Boeuf is_valid_range(&self, addr: u64, size: u64) -> bool26054f814f3SSebastien Boeuf fn is_valid_range(&self, addr: u64, size: u64) -> bool { 261b37e2ed3SBo Chen // Ensure no overflow from adding 'addr' and 'size' whose value are both 262b37e2ed3SBo Chen // controlled by the guest driver 263b37e2ed3SBo Chen if addr.checked_add(size).is_none() { 264b37e2ed3SBo Chen return false; 265b37e2ed3SBo Chen } 266b37e2ed3SBo Chen 26754f814f3SSebastien Boeuf // Start address must be aligned on block_size, the size must be 26854f814f3SSebastien Boeuf // greater than 0, and all blocks covered by the request must be 26954f814f3SSebastien Boeuf // in the usable region. 27054f814f3SSebastien Boeuf if addr % self.block_size != 0 27154f814f3SSebastien Boeuf || size == 0 2722ad8fac6SYuhong Zhong || (addr < self.addr || addr + size > self.addr + self.usable_region_size) 27354f814f3SSebastien Boeuf { 27454f814f3SSebastien Boeuf return false; 27554f814f3SSebastien Boeuf } 27654f814f3SSebastien Boeuf 27754f814f3SSebastien Boeuf true 27854f814f3SSebastien Boeuf } 279ad8adcb9SSebastien Boeuf } 28033a1e37cSHui Zhu 28151d102c7SHui Zhu struct Request { 28251d102c7SHui Zhu req: VirtioMemReq, 28351d102c7SHui Zhu status_addr: GuestAddress, 28451d102c7SHui Zhu } 28551d102c7SHui Zhu 28651d102c7SHui Zhu impl Request { parse( desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, ) -> result::Result<Request, Error>28751d102c7SHui Zhu fn parse( 2880162d73eSSebastien Boeuf desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 28951d102c7SHui Zhu ) -> result::Result<Request, Error> { 2900249e864SSebastien Boeuf let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 2910249e864SSebastien Boeuf // The descriptor contains the request type which MUST be readable. 2920249e864SSebastien Boeuf if desc.is_write_only() { 29351d102c7SHui Zhu return Err(Error::UnexpectedWriteOnlyDescriptor); 29451d102c7SHui Zhu } 2950249e864SSebastien Boeuf if desc.len() as usize != size_of::<VirtioMemReq>() { 29651d102c7SHui Zhu return Err(Error::InvalidRequest); 29751d102c7SHui Zhu } 2980249e864SSebastien Boeuf let req: VirtioMemReq = desc_chain 2990249e864SSebastien Boeuf .memory() 3000249e864SSebastien Boeuf .read_obj(desc.addr()) 3010249e864SSebastien Boeuf .map_err(Error::GuestMemory)?; 30251d102c7SHui Zhu 3030249e864SSebastien Boeuf let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 30451d102c7SHui Zhu 30551d102c7SHui Zhu // The status MUST always be writable 30651d102c7SHui Zhu if !status_desc.is_write_only() { 30751d102c7SHui Zhu return Err(Error::UnexpectedReadOnlyDescriptor); 30851d102c7SHui Zhu } 30951d102c7SHui Zhu 3100249e864SSebastien Boeuf if (status_desc.len() as usize) < size_of::<VirtioMemResp>() { 31151d102c7SHui Zhu return Err(Error::BufferLengthTooSmall); 31251d102c7SHui Zhu } 31351d102c7SHui Zhu 31451d102c7SHui Zhu Ok(Request { 31551d102c7SHui Zhu req, 3160249e864SSebastien Boeuf status_addr: status_desc.addr(), 31751d102c7SHui Zhu }) 31851d102c7SHui Zhu } 31954f814f3SSebastien Boeuf send_response( &self, mem: &GuestMemoryMmap, resp_type: u16, state: u16, ) -> Result<u32, Error>320756aebafSBo Chen fn send_response( 321756aebafSBo Chen &self, 322756aebafSBo Chen mem: &GuestMemoryMmap, 323756aebafSBo Chen resp_type: u16, 324756aebafSBo Chen state: u16, 325756aebafSBo Chen ) -> Result<u32, Error> { 32654f814f3SSebastien Boeuf let resp = VirtioMemResp { 32754f814f3SSebastien Boeuf resp_type, 32854f814f3SSebastien Boeuf state, 32954f814f3SSebastien Boeuf ..Default::default() 33054f814f3SSebastien Boeuf }; 331756aebafSBo Chen mem.write_obj(resp, self.status_addr) 332756aebafSBo Chen .map_err(Error::GuestMemory)?; 333756aebafSBo Chen Ok(size_of::<VirtioMemResp>() as u32) 33454f814f3SSebastien Boeuf } 33551d102c7SHui Zhu } 33651d102c7SHui Zhu 33710ab87d6SRob Bradford #[derive(Clone, Serialize, Deserialize)] 338d7115ec6SSebastien Boeuf pub struct BlocksState { 339d7115ec6SSebastien Boeuf bitmap: Vec<bool>, 340d7115ec6SSebastien Boeuf } 34154f814f3SSebastien Boeuf 34254f814f3SSebastien Boeuf impl BlocksState { new(region_size: u64) -> Self343e390775bSSebastien Boeuf pub fn new(region_size: u64) -> Self { 344af3a59aaSSebastien Boeuf BlocksState { 345af3a59aaSSebastien Boeuf bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize], 346af3a59aaSSebastien Boeuf } 347af3a59aaSSebastien Boeuf } 348af3a59aaSSebastien Boeuf is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool34954f814f3SSebastien Boeuf fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool { 35054f814f3SSebastien Boeuf for state in self 351d7115ec6SSebastien Boeuf .bitmap 35254f814f3SSebastien Boeuf .iter() 35354f814f3SSebastien Boeuf .skip(first_block_index) 35454f814f3SSebastien Boeuf .take(nb_blocks as usize) 35554f814f3SSebastien Boeuf { 35654f814f3SSebastien Boeuf if *state != plug { 35754f814f3SSebastien Boeuf return false; 35854f814f3SSebastien Boeuf } 35954f814f3SSebastien Boeuf } 36054f814f3SSebastien Boeuf true 36154f814f3SSebastien Boeuf } 36254f814f3SSebastien Boeuf set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool)36354f814f3SSebastien Boeuf fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) { 36454f814f3SSebastien Boeuf for state in self 365d7115ec6SSebastien Boeuf .bitmap 36654f814f3SSebastien Boeuf .iter_mut() 36754f814f3SSebastien Boeuf .skip(first_block_index) 36854f814f3SSebastien Boeuf .take(nb_blocks as usize) 36954f814f3SSebastien Boeuf { 37054f814f3SSebastien Boeuf *state = plug; 37154f814f3SSebastien Boeuf } 37254f814f3SSebastien Boeuf } 37361f9a4ecSSebastien Boeuf inner(&self) -> &Vec<bool>37461f9a4ecSSebastien Boeuf fn inner(&self) -> &Vec<bool> { 375d7115ec6SSebastien Boeuf &self.bitmap 37661f9a4ecSSebastien Boeuf } 3774450c44fSSebastien Boeuf memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable3784450c44fSSebastien Boeuf pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable { 3794450c44fSSebastien Boeuf let mut bitmap: Vec<u64> = Vec::new(); 3804450c44fSSebastien Boeuf let mut i = 0; 3814450c44fSSebastien Boeuf for (j, bit) in self.bitmap.iter().enumerate() { 3824450c44fSSebastien Boeuf if j % 64 == 0 { 3834450c44fSSebastien Boeuf bitmap.push(0); 3844450c44fSSebastien Boeuf 3854450c44fSSebastien Boeuf if j != 0 { 3864450c44fSSebastien Boeuf i += 1; 3874450c44fSSebastien Boeuf } 3884450c44fSSebastien Boeuf } 3894450c44fSSebastien Boeuf 3904450c44fSSebastien Boeuf if *bit == plugged { 3914450c44fSSebastien Boeuf bitmap[i] |= 1 << (j % 64); 3924450c44fSSebastien Boeuf } 3934450c44fSSebastien Boeuf } 3944450c44fSSebastien Boeuf 3954450c44fSSebastien Boeuf MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE) 3964450c44fSSebastien Boeuf } 39754f814f3SSebastien Boeuf } 39854f814f3SSebastien Boeuf 39951d102c7SHui Zhu struct MemEpollHandler { 400a423bf13SSebastien Boeuf mem: GuestMemoryAtomic<GuestMemoryMmap>, 40151d102c7SHui Zhu host_addr: u64, 40251d102c7SHui Zhu host_fd: Option<RawFd>, 40361f9a4ecSSebastien Boeuf blocks_state: Arc<Mutex<BlocksState>>, 40451d102c7SHui Zhu config: Arc<Mutex<VirtioMemConfig>>, 405a423bf13SSebastien Boeuf queue: Queue, 40651d102c7SHui Zhu interrupt_cb: Arc<dyn VirtioInterrupt>, 40751d102c7SHui Zhu queue_evt: EventFd, 40851d102c7SHui Zhu kill_evt: EventFd, 40951d102c7SHui Zhu pause_evt: EventFd, 410c397c9c9SSebastien Boeuf hugepages: bool, 411fd4f32faSRob Bradford dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 41251d102c7SHui Zhu } 41351d102c7SHui Zhu 41451d102c7SHui Zhu impl MemEpollHandler { discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error>41554f814f3SSebastien Boeuf fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> { 416c397c9c9SSebastien Boeuf // Use fallocate if the memory region is backed by a file. 41754f814f3SSebastien Boeuf if let Some(fd) = self.host_fd { 418c45d24dfSWei Liu // SAFETY: FFI call with valid arguments 41951d102c7SHui Zhu let res = unsafe { 42051d102c7SHui Zhu libc::fallocate64( 42151d102c7SHui Zhu fd, 42251d102c7SHui Zhu libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE, 42351d102c7SHui Zhu offset as libc::off64_t, 42454f814f3SSebastien Boeuf size as libc::off64_t, 42551d102c7SHui Zhu ) 42651d102c7SHui Zhu }; 42751d102c7SHui Zhu if res != 0 { 42854f814f3SSebastien Boeuf let err = io::Error::last_os_error(); 42954f814f3SSebastien Boeuf error!("Deallocating file space failed: {}", err); 43054f814f3SSebastien Boeuf return Err(Error::DiscardMemoryRange(err)); 43151d102c7SHui Zhu } 43251d102c7SHui Zhu } 433c397c9c9SSebastien Boeuf 434c397c9c9SSebastien Boeuf // Only use madvise if the memory region is not allocated with 435c397c9c9SSebastien Boeuf // hugepages. 436c397c9c9SSebastien Boeuf if !self.hugepages { 437c45d24dfSWei Liu // SAFETY: FFI call with valid arguments 43851d102c7SHui Zhu let res = unsafe { 43951d102c7SHui Zhu libc::madvise( 44054f814f3SSebastien Boeuf (self.host_addr + offset) as *mut libc::c_void, 44154f814f3SSebastien Boeuf size as libc::size_t, 44251d102c7SHui Zhu libc::MADV_DONTNEED, 44351d102c7SHui Zhu ) 44451d102c7SHui Zhu }; 44551d102c7SHui Zhu if res != 0 { 44654f814f3SSebastien Boeuf let err = io::Error::last_os_error(); 44754f814f3SSebastien Boeuf error!("Advising kernel about pages range failed: {}", err); 44854f814f3SSebastien Boeuf return Err(Error::DiscardMemoryRange(err)); 44954f814f3SSebastien Boeuf } 450c397c9c9SSebastien Boeuf } 45154f814f3SSebastien Boeuf 45254f814f3SSebastien Boeuf Ok(()) 45354f814f3SSebastien Boeuf } 45454f814f3SSebastien Boeuf state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u1645554f814f3SSebastien Boeuf fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 { 45654f814f3SSebastien Boeuf let mut config = self.config.lock().unwrap(); 45754f814f3SSebastien Boeuf let size: u64 = nb_blocks as u64 * config.block_size; 45854f814f3SSebastien Boeuf 45954f814f3SSebastien Boeuf if plug && (config.plugged_size + size > config.requested_size) { 46054f814f3SSebastien Boeuf return VIRTIO_MEM_RESP_NACK; 46154f814f3SSebastien Boeuf } 46254f814f3SSebastien Boeuf if !config.is_valid_range(addr, size) { 46354f814f3SSebastien Boeuf return VIRTIO_MEM_RESP_ERROR; 46454f814f3SSebastien Boeuf } 46554f814f3SSebastien Boeuf 46654f814f3SSebastien Boeuf let offset = addr - config.addr; 46754f814f3SSebastien Boeuf 46854f814f3SSebastien Boeuf let first_block_index = (offset / config.block_size) as usize; 46954f814f3SSebastien Boeuf if !self 47054f814f3SSebastien Boeuf .blocks_state 47161f9a4ecSSebastien Boeuf .lock() 47261f9a4ecSSebastien Boeuf .unwrap() 47354f814f3SSebastien Boeuf .is_range_state(first_block_index, nb_blocks, !plug) 47454f814f3SSebastien Boeuf { 47554f814f3SSebastien Boeuf return VIRTIO_MEM_RESP_ERROR; 47654f814f3SSebastien Boeuf } 47754f814f3SSebastien Boeuf 47854f814f3SSebastien Boeuf if !plug { 47954f814f3SSebastien Boeuf if let Err(e) = self.discard_memory_range(offset, size) { 48054f814f3SSebastien Boeuf error!("failed discarding memory range: {:?}", e); 48151d102c7SHui Zhu return VIRTIO_MEM_RESP_ERROR; 48251d102c7SHui Zhu } 48351d102c7SHui Zhu } 48451d102c7SHui Zhu 48554f814f3SSebastien Boeuf self.blocks_state 48661f9a4ecSSebastien Boeuf .lock() 48761f9a4ecSSebastien Boeuf .unwrap() 48854f814f3SSebastien Boeuf .set_range(first_block_index, nb_blocks, plug); 48951d102c7SHui Zhu 490fd4f32faSRob Bradford let handlers = self.dma_mapping_handlers.lock().unwrap(); 4910d3c5c96SRob Bradford if plug { 49261f9a4ecSSebastien Boeuf let mut gpa = addr; 49361f9a4ecSSebastien Boeuf for _ in 0..nb_blocks { 494fd4f32faSRob Bradford for (_, handler) in handlers.iter() { 49561f9a4ecSSebastien Boeuf if let Err(e) = handler.map(gpa, gpa, config.block_size) { 49661f9a4ecSSebastien Boeuf error!( 49761f9a4ecSSebastien Boeuf "failed DMA mapping addr 0x{:x} size 0x{:x}: {}", 49861f9a4ecSSebastien Boeuf gpa, config.block_size, e 49961f9a4ecSSebastien Boeuf ); 50061f9a4ecSSebastien Boeuf return VIRTIO_MEM_RESP_ERROR; 50161f9a4ecSSebastien Boeuf } 50261f9a4ecSSebastien Boeuf } 50361f9a4ecSSebastien Boeuf 50461f9a4ecSSebastien Boeuf gpa += config.block_size; 50561f9a4ecSSebastien Boeuf } 50661f9a4ecSSebastien Boeuf 50754f814f3SSebastien Boeuf config.plugged_size += size; 50854f814f3SSebastien Boeuf } else { 509fd4f32faSRob Bradford for (_, handler) in handlers.iter() { 51061f9a4ecSSebastien Boeuf if let Err(e) = handler.unmap(addr, size) { 51161f9a4ecSSebastien Boeuf error!( 51261f9a4ecSSebastien Boeuf "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 51361f9a4ecSSebastien Boeuf addr, size, e 51461f9a4ecSSebastien Boeuf ); 51561f9a4ecSSebastien Boeuf return VIRTIO_MEM_RESP_ERROR; 51661f9a4ecSSebastien Boeuf } 51761f9a4ecSSebastien Boeuf } 51861f9a4ecSSebastien Boeuf 51954f814f3SSebastien Boeuf config.plugged_size -= size; 52051d102c7SHui Zhu } 52151d102c7SHui Zhu 52251d102c7SHui Zhu VIRTIO_MEM_RESP_ACK 52351d102c7SHui Zhu } 52451d102c7SHui Zhu unplug_all(&mut self) -> u1652554f814f3SSebastien Boeuf fn unplug_all(&mut self) -> u16 { 52654f814f3SSebastien Boeuf let mut config = self.config.lock().unwrap(); 52754f814f3SSebastien Boeuf if let Err(e) = self.discard_memory_range(0, config.region_size) { 52854f814f3SSebastien Boeuf error!("failed discarding memory range: {:?}", e); 52954f814f3SSebastien Boeuf return VIRTIO_MEM_RESP_ERROR; 53054f814f3SSebastien Boeuf } 53154f814f3SSebastien Boeuf 53261f9a4ecSSebastien Boeuf // Remaining plugged blocks are unmapped. 53361f9a4ecSSebastien Boeuf if config.plugged_size > 0 { 534fd4f32faSRob Bradford let handlers = self.dma_mapping_handlers.lock().unwrap(); 53561f9a4ecSSebastien Boeuf for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 53661f9a4ecSSebastien Boeuf if *plugged { 53761f9a4ecSSebastien Boeuf let gpa = config.addr + (idx as u64 * config.block_size); 538fd4f32faSRob Bradford for (_, handler) in handlers.iter() { 53961f9a4ecSSebastien Boeuf if let Err(e) = handler.unmap(gpa, config.block_size) { 54061f9a4ecSSebastien Boeuf error!( 54161f9a4ecSSebastien Boeuf "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 54261f9a4ecSSebastien Boeuf gpa, config.block_size, e 54361f9a4ecSSebastien Boeuf ); 54461f9a4ecSSebastien Boeuf return VIRTIO_MEM_RESP_ERROR; 54561f9a4ecSSebastien Boeuf } 54661f9a4ecSSebastien Boeuf } 54761f9a4ecSSebastien Boeuf } 54861f9a4ecSSebastien Boeuf } 54961f9a4ecSSebastien Boeuf } 55061f9a4ecSSebastien Boeuf 55161f9a4ecSSebastien Boeuf self.blocks_state.lock().unwrap().set_range( 55261f9a4ecSSebastien Boeuf 0, 55361f9a4ecSSebastien Boeuf (config.region_size / config.block_size) as u16, 55461f9a4ecSSebastien Boeuf false, 55561f9a4ecSSebastien Boeuf ); 55654f814f3SSebastien Boeuf 55754f814f3SSebastien Boeuf config.plugged_size = 0; 55854f814f3SSebastien Boeuf 55954f814f3SSebastien Boeuf VIRTIO_MEM_RESP_ACK 56054f814f3SSebastien Boeuf } 56154f814f3SSebastien Boeuf state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16)56254f814f3SSebastien Boeuf fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) { 56354f814f3SSebastien Boeuf let config = self.config.lock().unwrap(); 56454f814f3SSebastien Boeuf let size: u64 = nb_blocks as u64 * config.block_size; 56554f814f3SSebastien Boeuf 56654f814f3SSebastien Boeuf let resp_type = if config.is_valid_range(addr, size) { 56751d102c7SHui Zhu VIRTIO_MEM_RESP_ACK 56851d102c7SHui Zhu } else { 56951d102c7SHui Zhu VIRTIO_MEM_RESP_ERROR 57051d102c7SHui Zhu }; 57151d102c7SHui Zhu 57251d102c7SHui Zhu let offset = addr - config.addr; 57354f814f3SSebastien Boeuf let first_block_index = (offset / config.block_size) as usize; 57461f9a4ecSSebastien Boeuf let resp_state = 57561f9a4ecSSebastien Boeuf if self 57654f814f3SSebastien Boeuf .blocks_state 57761f9a4ecSSebastien Boeuf .lock() 57861f9a4ecSSebastien Boeuf .unwrap() 57954f814f3SSebastien Boeuf .is_range_state(first_block_index, nb_blocks, true) 58054f814f3SSebastien Boeuf { 58151d102c7SHui Zhu VIRTIO_MEM_STATE_PLUGGED 58261f9a4ecSSebastien Boeuf } else if self.blocks_state.lock().unwrap().is_range_state( 58361f9a4ecSSebastien Boeuf first_block_index, 58461f9a4ecSSebastien Boeuf nb_blocks, 58561f9a4ecSSebastien Boeuf false, 58661f9a4ecSSebastien Boeuf ) { 58751d102c7SHui Zhu VIRTIO_MEM_STATE_UNPLUGGED 58851d102c7SHui Zhu } else { 58951d102c7SHui Zhu VIRTIO_MEM_STATE_MIXED 59051d102c7SHui Zhu }; 59151d102c7SHui Zhu 59251d102c7SHui Zhu (resp_type, resp_state) 59351d102c7SHui Zhu } 59451d102c7SHui Zhu signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError>595de3e003eSSebastien Boeuf fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> { 596de3e003eSSebastien Boeuf self.interrupt_cb.trigger(int_type).map_err(|e| { 59751d102c7SHui Zhu error!("Failed to signal used queue: {:?}", e); 59851d102c7SHui Zhu DeviceError::FailedSignalingUsedQueue(e) 59951d102c7SHui Zhu }) 60051d102c7SHui Zhu } 60151d102c7SHui Zhu process_queue(&mut self) -> Result<bool, Error>602756aebafSBo Chen fn process_queue(&mut self) -> Result<bool, Error> { 603a4859ffeSSebastien Boeuf let mut used_descs = false; 6040249e864SSebastien Boeuf 60587f57f7cSSebastien Boeuf while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) { 606756aebafSBo Chen let r = Request::parse(&mut desc_chain)?; 607fd9fa2a6SBo Chen let (resp_type, resp_state) = match r.req.req_type { 608fd9fa2a6SBo Chen VIRTIO_MEM_REQ_PLUG => ( 609fd9fa2a6SBo Chen self.state_change_request(r.req.addr, r.req.nb_blocks, true), 610fd9fa2a6SBo Chen 0u16, 611fd9fa2a6SBo Chen ), 612fd9fa2a6SBo Chen VIRTIO_MEM_REQ_UNPLUG => ( 613fd9fa2a6SBo Chen self.state_change_request(r.req.addr, r.req.nb_blocks, false), 614fd9fa2a6SBo Chen 0u16, 615fd9fa2a6SBo Chen ), 616fd9fa2a6SBo Chen VIRTIO_MEM_REQ_UNPLUG_ALL => (self.unplug_all(), 0u16), 617fd9fa2a6SBo Chen VIRTIO_MEM_REQ_STATE => self.state_request(r.req.addr, r.req.nb_blocks), 61851d102c7SHui Zhu _ => { 6196725771dSPhilipp Schuster return Err(Error::UnknownRequestType(r.req.req_type)); 62051d102c7SHui Zhu } 62151d102c7SHui Zhu }; 622fd9fa2a6SBo Chen let len = r.send_response(desc_chain.memory(), resp_type, resp_state)?; 623a4859ffeSSebastien Boeuf self.queue 624a4859ffeSSebastien Boeuf .add_used(desc_chain.memory(), desc_chain.head_index(), len) 6250235ed33SBo Chen .map_err(Error::QueueAddUsed)?; 626a4859ffeSSebastien Boeuf used_descs = true; 62751d102c7SHui Zhu } 62851d102c7SHui Zhu 629756aebafSBo Chen Ok(used_descs) 63051d102c7SHui Zhu } 63151d102c7SHui Zhu run( &mut self, paused: Arc<AtomicBool>, paused_sync: Arc<Barrier>, ) -> result::Result<(), EpollHelperError>632aa57762cSSebastien Boeuf fn run( 633aa57762cSSebastien Boeuf &mut self, 634aa57762cSSebastien Boeuf paused: Arc<AtomicBool>, 635aa57762cSSebastien Boeuf paused_sync: Arc<Barrier>, 636aa57762cSSebastien Boeuf ) -> result::Result<(), EpollHelperError> { 63766efe3cfSRob Bradford let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 63866efe3cfSRob Bradford helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 639aa57762cSSebastien Boeuf helper.run(paused, paused_sync, self)?; 64051d102c7SHui Zhu 64166efe3cfSRob Bradford Ok(()) 64266efe3cfSRob Bradford } 643e382dc66SSebastien Boeuf } 644e382dc66SSebastien Boeuf 64566efe3cfSRob Bradford impl EpollHelperHandler for MemEpollHandler { handle_event( &mut self, _helper: &mut EpollHelper, event: &epoll::Event, ) -> result::Result<(), EpollHelperError>646b1752994SBo Chen fn handle_event( 647b1752994SBo Chen &mut self, 648b1752994SBo Chen _helper: &mut EpollHelper, 649b1752994SBo Chen event: &epoll::Event, 650b1752994SBo Chen ) -> result::Result<(), EpollHelperError> { 65101e7bd72SSebastien Boeuf let ev_type = event.data as u16; 65201e7bd72SSebastien Boeuf match ev_type { 65351d102c7SHui Zhu QUEUE_AVAIL_EVENT => { 654b1752994SBo Chen self.queue_evt.read().map_err(|e| { 655b1752994SBo Chen EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 656b1752994SBo Chen })?; 657b1752994SBo Chen 658756aebafSBo Chen let needs_notification = self.process_queue().map_err(|e| { 659756aebafSBo Chen EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 660756aebafSBo Chen })?; 661756aebafSBo Chen if needs_notification { 662b1752994SBo Chen self.signal(VirtioInterruptType::Queue(0)).map_err(|e| { 663b1752994SBo Chen EpollHelperError::HandleEvent(anyhow!( 664b1752994SBo Chen "Failed to signal used queue: {:?}", 665b1752994SBo Chen e 666b1752994SBo Chen )) 667b1752994SBo Chen })?; 66851d102c7SHui Zhu } 66951d102c7SHui Zhu } 67051d102c7SHui Zhu _ => { 671b1752994SBo Chen return Err(EpollHelperError::HandleEvent(anyhow!( 672b1752994SBo Chen "Unexpected event: {}", 673b1752994SBo Chen ev_type 674b1752994SBo Chen ))); 67551d102c7SHui Zhu } 67651d102c7SHui Zhu } 677b1752994SBo Chen Ok(()) 67851d102c7SHui Zhu } 67951d102c7SHui Zhu } 68051d102c7SHui Zhu 681fd4f32faSRob Bradford #[derive(PartialEq, Eq, PartialOrd, Ord)] 682fd4f32faSRob Bradford pub enum VirtioMemMappingSource { 683fd4f32faSRob Bradford Container, 684fd4f32faSRob Bradford Device(u32), 685fd4f32faSRob Bradford } 686fd4f32faSRob Bradford 68710ab87d6SRob Bradford #[derive(Serialize, Deserialize)] 688d7115ec6SSebastien Boeuf pub struct MemState { 689d7115ec6SSebastien Boeuf pub avail_features: u64, 690d7115ec6SSebastien Boeuf pub acked_features: u64, 691d7115ec6SSebastien Boeuf pub config: VirtioMemConfig, 692d7115ec6SSebastien Boeuf pub blocks_state: BlocksState, 693d7115ec6SSebastien Boeuf } 694d7115ec6SSebastien Boeuf 69551d102c7SHui Zhu pub struct Mem { 696d63dcae2SRob Bradford common: VirtioCommon, 6979b53044aSSebastien Boeuf id: String, 69851d102c7SHui Zhu host_addr: u64, 69951d102c7SHui Zhu host_fd: Option<RawFd>, 70051d102c7SHui Zhu config: Arc<Mutex<VirtioMemConfig>>, 701c4601787SBo Chen seccomp_action: SeccompAction, 702c397c9c9SSebastien Boeuf hugepages: bool, 703fd4f32faSRob Bradford dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 70461f9a4ecSSebastien Boeuf blocks_state: Arc<Mutex<BlocksState>>, 705687d646cSRob Bradford exit_evt: EventFd, 706f38056fcSSebastien Boeuf interrupt_cb: Option<Arc<dyn VirtioInterrupt>>, 70751d102c7SHui Zhu } 70851d102c7SHui Zhu 70951d102c7SHui Zhu impl Mem { 71051d102c7SHui Zhu // Create a new virtio-mem device. 711687d646cSRob Bradford #[allow(clippy::too_many_arguments)] new( id: String, region: &Arc<GuestRegionMmap>, seccomp_action: SeccompAction, numa_node_id: Option<u16>, initial_size: u64, hugepages: bool, exit_evt: EventFd, blocks_state: Arc<Mutex<BlocksState>>, state: Option<MemState>, ) -> io::Result<Mem>712c4601787SBo Chen pub fn new( 713c4601787SBo Chen id: String, 714c4601787SBo Chen region: &Arc<GuestRegionMmap>, 715c4601787SBo Chen seccomp_action: SeccompAction, 716dcedd4cdSSebastien Boeuf numa_node_id: Option<u16>, 71733a1e37cSHui Zhu initial_size: u64, 718c397c9c9SSebastien Boeuf hugepages: bool, 719687d646cSRob Bradford exit_evt: EventFd, 720e390775bSSebastien Boeuf blocks_state: Arc<Mutex<BlocksState>>, 7211f0e5eb6SSebastien Boeuf state: Option<MemState>, 722c4601787SBo Chen ) -> io::Result<Mem> { 72351d102c7SHui Zhu let region_len = region.len(); 72451d102c7SHui Zhu 7251798ed81SSebastien Boeuf if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE { 726ea4693a0SJinank Jain return Err(io::Error::other(format!( 727ea4693a0SJinank Jain "Virtio-mem size is not aligned with {VIRTIO_MEM_ALIGN_SIZE}" 728ea4693a0SJinank Jain ))); 72951d102c7SHui Zhu } 73051d102c7SHui Zhu 731b62a40efSSebastien Boeuf let (avail_features, acked_features, config, paused) = if let Some(state) = state { 7321f0e5eb6SSebastien Boeuf info!("Restoring virtio-mem {}", id); 7331f0e5eb6SSebastien Boeuf *(blocks_state.lock().unwrap()) = state.blocks_state.clone(); 734b62a40efSSebastien Boeuf ( 735b62a40efSSebastien Boeuf state.avail_features, 736b62a40efSSebastien Boeuf state.acked_features, 737b62a40efSSebastien Boeuf state.config, 738b62a40efSSebastien Boeuf true, 739b62a40efSSebastien Boeuf ) 7401f0e5eb6SSebastien Boeuf } else { 741dcedd4cdSSebastien Boeuf let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 74251d102c7SHui Zhu 743ad8adcb9SSebastien Boeuf let mut config = VirtioMemConfig { 744ad8adcb9SSebastien Boeuf block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE, 745ad8adcb9SSebastien Boeuf addr: region.start_addr().raw_value(), 746ad8adcb9SSebastien Boeuf region_size: region.len(), 747ad8adcb9SSebastien Boeuf usable_region_size: region.len(), 748ad8adcb9SSebastien Boeuf plugged_size: 0, 749ad8adcb9SSebastien Boeuf requested_size: 0, 750ad8adcb9SSebastien Boeuf ..Default::default() 751ad8adcb9SSebastien Boeuf }; 75251d102c7SHui Zhu 75333a1e37cSHui Zhu if initial_size != 0 { 754ad8adcb9SSebastien Boeuf config.resize(initial_size).map_err(|e| { 755ea4693a0SJinank Jain io::Error::other(format!( 7565e527294SRob Bradford "Failed to resize virtio-mem configuration to {initial_size}: {e:?}" 757ea4693a0SJinank Jain )) 75833a1e37cSHui Zhu })?; 75933a1e37cSHui Zhu } 76033a1e37cSHui Zhu 761dcedd4cdSSebastien Boeuf if let Some(node_id) = numa_node_id { 762dcedd4cdSSebastien Boeuf avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM; 763dcedd4cdSSebastien Boeuf config.node_id = node_id; 764dcedd4cdSSebastien Boeuf } 765dcedd4cdSSebastien Boeuf 766ad8adcb9SSebastien Boeuf // Make sure the virtio-mem configuration complies with the 767ad8adcb9SSebastien Boeuf // specification. 768ad8adcb9SSebastien Boeuf config.validate().map_err(|e| { 769ea4693a0SJinank Jain io::Error::other(format!("Invalid virtio-mem configuration: {e:?}")) 770ad8adcb9SSebastien Boeuf })?; 771ad8adcb9SSebastien Boeuf 772b62a40efSSebastien Boeuf (avail_features, 0, config, false) 7731f0e5eb6SSebastien Boeuf }; 7741f0e5eb6SSebastien Boeuf 775ad8adcb9SSebastien Boeuf let host_fd = region 776ad8adcb9SSebastien Boeuf .file_offset() 777ad8adcb9SSebastien Boeuf .map(|f_offset| f_offset.file().as_raw_fd()); 77851d102c7SHui Zhu 77951d102c7SHui Zhu Ok(Mem { 780d63dcae2SRob Bradford common: VirtioCommon { 781aa34d545SRob Bradford device_type: VirtioDeviceType::Mem as u32, 782d63dcae2SRob Bradford avail_features, 7831f0e5eb6SSebastien Boeuf acked_features, 784df8a55abSRob Bradford paused_sync: Some(Arc::new(Barrier::new(2))), 785df8a55abSRob Bradford queue_sizes: QUEUE_SIZES.to_vec(), 786c90f77e3SRob Bradford min_queues: 1, 787b62a40efSSebastien Boeuf paused: Arc::new(AtomicBool::new(paused)), 788a9a13846SRob Bradford ..Default::default() 789d63dcae2SRob Bradford }, 7909b53044aSSebastien Boeuf id, 79151d102c7SHui Zhu host_addr: region.as_ptr() as u64, 79251d102c7SHui Zhu host_fd, 79351d102c7SHui Zhu config: Arc::new(Mutex::new(config)), 794c4601787SBo Chen seccomp_action, 795c397c9c9SSebastien Boeuf hugepages, 796fd4f32faSRob Bradford dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())), 797e390775bSSebastien Boeuf blocks_state, 798687d646cSRob Bradford exit_evt, 799f38056fcSSebastien Boeuf interrupt_cb: None, 80051d102c7SHui Zhu }) 80151d102c7SHui Zhu } 80261f9a4ecSSebastien Boeuf resize(&mut self, size: u64) -> result::Result<(), Error>803f38056fcSSebastien Boeuf pub fn resize(&mut self, size: u64) -> result::Result<(), Error> { 804f38056fcSSebastien Boeuf let mut config = self.config.lock().unwrap(); 805f38056fcSSebastien Boeuf config.resize(size).map_err(|e| { 806f38056fcSSebastien Boeuf Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e)) 807f38056fcSSebastien Boeuf })?; 808f38056fcSSebastien Boeuf 809f38056fcSSebastien Boeuf if let Some(interrupt_cb) = self.interrupt_cb.as_ref() { 810f38056fcSSebastien Boeuf interrupt_cb 811f38056fcSSebastien Boeuf .trigger(VirtioInterruptType::Config) 812f38056fcSSebastien Boeuf .map_err(|e| { 813f38056fcSSebastien Boeuf Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e)) 814f38056fcSSebastien Boeuf }) 815f38056fcSSebastien Boeuf } else { 816f38056fcSSebastien Boeuf Ok(()) 817f38056fcSSebastien Boeuf } 818f38056fcSSebastien Boeuf } 819f38056fcSSebastien Boeuf add_dma_mapping_handler( &mut self, source: VirtioMemMappingSource, handler: Arc<dyn ExternalDmaMapping>, ) -> result::Result<(), Error>82061f9a4ecSSebastien Boeuf pub fn add_dma_mapping_handler( 82161f9a4ecSSebastien Boeuf &mut self, 822fd4f32faSRob Bradford source: VirtioMemMappingSource, 82361f9a4ecSSebastien Boeuf handler: Arc<dyn ExternalDmaMapping>, 82461f9a4ecSSebastien Boeuf ) -> result::Result<(), Error> { 82561f9a4ecSSebastien Boeuf let config = self.config.lock().unwrap(); 82661f9a4ecSSebastien Boeuf 82761f9a4ecSSebastien Boeuf if config.plugged_size > 0 { 82861f9a4ecSSebastien Boeuf for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 82961f9a4ecSSebastien Boeuf if *plugged { 83061f9a4ecSSebastien Boeuf let gpa = config.addr + (idx as u64 * config.block_size); 83161f9a4ecSSebastien Boeuf handler 83261f9a4ecSSebastien Boeuf .map(gpa, gpa, config.block_size) 83361f9a4ecSSebastien Boeuf .map_err(Error::DmaMap)?; 83461f9a4ecSSebastien Boeuf } 83561f9a4ecSSebastien Boeuf } 83661f9a4ecSSebastien Boeuf } 83761f9a4ecSSebastien Boeuf 838fd4f32faSRob Bradford self.dma_mapping_handlers 839fd4f32faSRob Bradford .lock() 840fd4f32faSRob Bradford .unwrap() 841fd4f32faSRob Bradford .insert(source, handler); 842fd4f32faSRob Bradford 843fd4f32faSRob Bradford Ok(()) 844fd4f32faSRob Bradford } 845fd4f32faSRob Bradford remove_dma_mapping_handler( &mut self, source: VirtioMemMappingSource, ) -> result::Result<(), Error>846fd4f32faSRob Bradford pub fn remove_dma_mapping_handler( 847fd4f32faSRob Bradford &mut self, 848fd4f32faSRob Bradford source: VirtioMemMappingSource, 849fd4f32faSRob Bradford ) -> result::Result<(), Error> { 850fd4f32faSRob Bradford let handler = self 851fd4f32faSRob Bradford .dma_mapping_handlers 852fd4f32faSRob Bradford .lock() 853fd4f32faSRob Bradford .unwrap() 854fd4f32faSRob Bradford .remove(&source) 855fd4f32faSRob Bradford .ok_or(Error::InvalidDmaMappingHandler)?; 856fd4f32faSRob Bradford 857fd4f32faSRob Bradford let config = self.config.lock().unwrap(); 858fd4f32faSRob Bradford 859fd4f32faSRob Bradford if config.plugged_size > 0 { 860fd4f32faSRob Bradford for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 861fd4f32faSRob Bradford if *plugged { 862fd4f32faSRob Bradford let gpa = config.addr + (idx as u64 * config.block_size); 863fd4f32faSRob Bradford handler 864fd4f32faSRob Bradford .unmap(gpa, config.block_size) 865fd4f32faSRob Bradford .map_err(Error::DmaUnmap)?; 866fd4f32faSRob Bradford } 867fd4f32faSRob Bradford } 868fd4f32faSRob Bradford } 86961f9a4ecSSebastien Boeuf 87061f9a4ecSSebastien Boeuf Ok(()) 87161f9a4ecSSebastien Boeuf } 872d7115ec6SSebastien Boeuf state(&self) -> MemState873d7115ec6SSebastien Boeuf fn state(&self) -> MemState { 874d7115ec6SSebastien Boeuf MemState { 875d7115ec6SSebastien Boeuf avail_features: self.common.avail_features, 876d7115ec6SSebastien Boeuf acked_features: self.common.acked_features, 877d7115ec6SSebastien Boeuf config: *(self.config.lock().unwrap()), 878d7115ec6SSebastien Boeuf blocks_state: self.blocks_state.lock().unwrap().clone(), 879d7115ec6SSebastien Boeuf } 880d7115ec6SSebastien Boeuf } 881d7115ec6SSebastien Boeuf 882194b59f4SRob Bradford #[cfg(fuzzing)] wait_for_epoll_threads(&mut self)883194b59f4SRob Bradford pub fn wait_for_epoll_threads(&mut self) { 884194b59f4SRob Bradford self.common.wait_for_epoll_threads(); 885194b59f4SRob Bradford } 88651d102c7SHui Zhu } 88751d102c7SHui Zhu 88851d102c7SHui Zhu impl Drop for Mem { drop(&mut self)88951d102c7SHui Zhu fn drop(&mut self) { 890df8a55abSRob Bradford if let Some(kill_evt) = self.common.kill_evt.take() { 89151d102c7SHui Zhu // Ignore the result because there is nothing we can do about it. 89251d102c7SHui Zhu let _ = kill_evt.write(1); 89351d102c7SHui Zhu } 894ad6c0ee5SPhilipp Schuster self.common.wait_for_epoll_threads(); 89551d102c7SHui Zhu } 89651d102c7SHui Zhu } 89751d102c7SHui Zhu 89851d102c7SHui Zhu impl VirtioDevice for Mem { device_type(&self) -> u3289951d102c7SHui Zhu fn device_type(&self) -> u32 { 900df8a55abSRob Bradford self.common.device_type 90151d102c7SHui Zhu } 90251d102c7SHui Zhu queue_max_sizes(&self) -> &[u16]90351d102c7SHui Zhu fn queue_max_sizes(&self) -> &[u16] { 904df8a55abSRob Bradford &self.common.queue_sizes 90551d102c7SHui Zhu } 90651d102c7SHui Zhu features(&self) -> u6490751d102c7SHui Zhu fn features(&self) -> u64 { 908d63dcae2SRob Bradford self.common.avail_features 90951d102c7SHui Zhu } 91051d102c7SHui Zhu ack_features(&mut self, value: u64)91151d102c7SHui Zhu fn ack_features(&mut self, value: u64) { 912d63dcae2SRob Bradford self.common.ack_features(value) 91351d102c7SHui Zhu } 91451d102c7SHui Zhu read_config(&self, offset: u64, data: &mut [u8])915751a3020SRob Bradford fn read_config(&self, offset: u64, data: &mut [u8]) { 916751a3020SRob Bradford self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data); 91751d102c7SHui Zhu } 91851d102c7SHui Zhu activate( &mut self, mem: GuestMemoryAtomic<GuestMemoryMmap>, interrupt_cb: Arc<dyn VirtioInterrupt>, mut queues: Vec<(usize, Queue, EventFd)>, ) -> ActivateResult91951d102c7SHui Zhu fn activate( 92051d102c7SHui Zhu &mut self, 921a423bf13SSebastien Boeuf mem: GuestMemoryAtomic<GuestMemoryMmap>, 92251d102c7SHui Zhu interrupt_cb: Arc<dyn VirtioInterrupt>, 923a423bf13SSebastien Boeuf mut queues: Vec<(usize, Queue, EventFd)>, 92451d102c7SHui Zhu ) -> ActivateResult { 9253f62a172SSebastien Boeuf self.common.activate(&queues, &interrupt_cb)?; 926280bef83SRob Bradford let (kill_evt, pause_evt) = self.common.dup_eventfds(); 9273f62a172SSebastien Boeuf 9283f62a172SSebastien Boeuf let (_, queue, queue_evt) = queues.remove(0); 9293f62a172SSebastien Boeuf 930f38056fcSSebastien Boeuf self.interrupt_cb = Some(interrupt_cb.clone()); 931f38056fcSSebastien Boeuf 93251d102c7SHui Zhu let mut handler = MemEpollHandler { 933a423bf13SSebastien Boeuf mem, 93451d102c7SHui Zhu host_addr: self.host_addr, 93551d102c7SHui Zhu host_fd: self.host_fd, 93661f9a4ecSSebastien Boeuf blocks_state: Arc::clone(&self.blocks_state), 93751d102c7SHui Zhu config: self.config.clone(), 9383f62a172SSebastien Boeuf queue, 93951d102c7SHui Zhu interrupt_cb, 9403f62a172SSebastien Boeuf queue_evt, 94151d102c7SHui Zhu kill_evt, 94251d102c7SHui Zhu pause_evt, 943c397c9c9SSebastien Boeuf hugepages: self.hugepages, 944fd4f32faSRob Bradford dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers), 94551d102c7SHui Zhu }; 94651d102c7SHui Zhu 9470fb24ea3SSebastien Boeuf let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false); 9480fb24ea3SSebastien Boeuf for range in unplugged_memory_ranges.regions() { 94954f814f3SSebastien Boeuf handler 9500fb24ea3SSebastien Boeuf .discard_memory_range(range.gpa, range.length) 95154f814f3SSebastien Boeuf .map_err(|e| { 9520fb24ea3SSebastien Boeuf error!( 9530fb24ea3SSebastien Boeuf "failed discarding memory range [0x{:x}-0x{:x}]: {:?}", 9540fb24ea3SSebastien Boeuf range.gpa, 9550fb24ea3SSebastien Boeuf range.gpa + range.length - 1, 9560fb24ea3SSebastien Boeuf e 9570fb24ea3SSebastien Boeuf ); 95854f814f3SSebastien Boeuf ActivateError::BadActivate 95954f814f3SSebastien Boeuf })?; 9600fb24ea3SSebastien Boeuf } 96154f814f3SSebastien Boeuf 962df8a55abSRob Bradford let paused = self.common.paused.clone(); 963df8a55abSRob Bradford let paused_sync = self.common.paused_sync.clone(); 96451d102c7SHui Zhu let mut epoll_threads = Vec::new(); 96554e523c3SRob Bradford 96654e523c3SRob Bradford spawn_virtio_thread( 96754e523c3SRob Bradford &self.id, 96854e523c3SRob Bradford &self.seccomp_action, 96954e523c3SRob Bradford Thread::VirtioMem, 97054e523c3SRob Bradford &mut epoll_threads, 971687d646cSRob Bradford &self.exit_evt, 972df5b803aSBo Chen move || handler.run(paused, paused_sync.unwrap()), 97354e523c3SRob Bradford )?; 974df8a55abSRob Bradford self.common.epoll_threads = Some(epoll_threads); 97551d102c7SHui Zhu 976c89095abSRob Bradford event!("virtio-device", "activated", "id", &self.id); 97751d102c7SHui Zhu Ok(()) 97851d102c7SHui Zhu } 97951d102c7SHui Zhu reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>>98023f9ec50SRob Bradford fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 981c89095abSRob Bradford let result = self.common.reset(); 982c89095abSRob Bradford event!("virtio-device", "reset", "id", &self.id); 983c89095abSRob Bradford result 98451d102c7SHui Zhu } 98551d102c7SHui Zhu } 98651d102c7SHui Zhu 987df8a55abSRob Bradford impl Pausable for Mem { pause(&mut self) -> result::Result<(), MigratableError>988df8a55abSRob Bradford fn pause(&mut self) -> result::Result<(), MigratableError> { 989df8a55abSRob Bradford self.common.pause() 990df8a55abSRob Bradford } 991df8a55abSRob Bradford resume(&mut self) -> result::Result<(), MigratableError>992df8a55abSRob Bradford fn resume(&mut self) -> result::Result<(), MigratableError> { 993df8a55abSRob Bradford self.common.resume() 994df8a55abSRob Bradford } 995df8a55abSRob Bradford } 996df8a55abSRob Bradford 9979b53044aSSebastien Boeuf impl Snapshottable for Mem { id(&self) -> String9989b53044aSSebastien Boeuf fn id(&self) -> String { 9999b53044aSSebastien Boeuf self.id.clone() 10009b53044aSSebastien Boeuf } 1001d7115ec6SSebastien Boeuf snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError>1002d7115ec6SSebastien Boeuf fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 100310ab87d6SRob Bradford Snapshot::new_from_state(&self.state()) 1004d7115ec6SSebastien Boeuf } 10059b53044aSSebastien Boeuf } 10061b1a2175SSamuel Ortiz impl Transportable for Mem {} 100751d102c7SHui Zhu impl Migratable for Mem {} 1008