1 // Copyright (c) 2020 Ant Financial 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 use std::collections::BTreeMap; 18 use std::io; 19 use std::mem::size_of; 20 use std::os::unix::io::{AsRawFd, RawFd}; 21 use std::result; 22 use std::sync::atomic::AtomicBool; 23 use std::sync::mpsc; 24 use std::sync::{Arc, Barrier, Mutex}; 25 26 use anyhow::anyhow; 27 use seccompiler::SeccompAction; 28 use serde::{Deserialize, Serialize}; 29 use thiserror::Error; 30 use virtio_queue::{DescriptorChain, Queue, QueueT}; 31 use vm_device::dma_mapping::ExternalDmaMapping; 32 use vm_memory::{ 33 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 34 GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion, 35 }; 36 use vm_migration::protocol::MemoryRangeTable; 37 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 38 use vmm_sys_util::eventfd::EventFd; 39 40 use super::Error as DeviceError; 41 use super::{ 42 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon, 43 VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, 44 }; 45 use crate::seccomp_filters::Thread; 46 use crate::thread_helper::spawn_virtio_thread; 47 use crate::{GuestMemoryMmap, GuestRegionMmap}; 48 use crate::{VirtioInterrupt, VirtioInterruptType}; 49 50 const QUEUE_SIZE: u16 = 128; 51 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 52 53 // 128MiB is the standard memory block size in Linux. A virtio-mem region must 54 // be aligned on this size, and the region size must be a multiple of it. 55 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20; 56 // Use 2 MiB alignment so transparent hugepages can be used by KVM. 57 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20; 58 59 // Request processed successfully, applicable for 60 // - VIRTIO_MEM_REQ_PLUG 61 // - VIRTIO_MEM_REQ_UNPLUG 62 // - VIRTIO_MEM_REQ_UNPLUG_ALL 63 // - VIRTIO_MEM_REQ_STATE 64 const VIRTIO_MEM_RESP_ACK: u16 = 0; 65 66 // Request denied - e.g. trying to plug more than requested, applicable for 67 // - VIRTIO_MEM_REQ_PLUG 68 const VIRTIO_MEM_RESP_NACK: u16 = 1; 69 70 // Request cannot be processed right now, try again later, applicable for 71 // - VIRTIO_MEM_REQ_PLUG 72 // - VIRTIO_MEM_REQ_UNPLUG 73 // - VIRTIO_MEM_REQ_UNPLUG_ALL 74 #[allow(unused)] 75 const VIRTIO_MEM_RESP_BUSY: u16 = 2; 76 77 // Error in request (e.g. addresses/alignment), applicable for 78 // - VIRTIO_MEM_REQ_PLUG 79 // - VIRTIO_MEM_REQ_UNPLUG 80 // - VIRTIO_MEM_REQ_STATE 81 const VIRTIO_MEM_RESP_ERROR: u16 = 3; 82 83 // State of memory blocks is "plugged" 84 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; 85 // State of memory blocks is "unplugged" 86 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; 87 // State of memory blocks is "mixed" 88 const VIRTIO_MEM_STATE_MIXED: u16 = 2; 89 90 // request to plug memory blocks 91 const VIRTIO_MEM_REQ_PLUG: u16 = 0; 92 // request to unplug memory blocks 93 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1; 94 // request to unplug all blocks and shrink the usable size 95 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2; 96 // request information about the plugged state of memory blocks 97 const VIRTIO_MEM_REQ_STATE: u16 = 3; 98 99 // New descriptors are pending on the virtio queue. 100 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 101 102 // Virtio features 103 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0; 104 105 #[derive(Error, Debug)] 106 pub enum Error { 107 #[error("Guest gave us bad memory addresses: {0}")] 108 GuestMemory(GuestMemoryError), 109 #[error("Guest gave us a write only descriptor that protocol says to read from")] 110 UnexpectedWriteOnlyDescriptor, 111 #[error("Guest gave us a read only descriptor that protocol says to write to")] 112 UnexpectedReadOnlyDescriptor, 113 #[error("Guest gave us too few descriptors in a descriptor chain")] 114 DescriptorChainTooShort, 115 #[error("Guest gave us a buffer that was too short to use")] 116 BufferLengthTooSmall, 117 #[error("Guest sent us invalid request")] 118 InvalidRequest, 119 #[error("Failed to EventFd write: {0}")] 120 EventFdWriteFail(std::io::Error), 121 #[error("Failed to EventFd try_clone: {0}")] 122 EventFdTryCloneFail(std::io::Error), 123 #[error("Failed to MpscRecv: {0}")] 124 MpscRecvFail(mpsc::RecvError), 125 #[error("Resize invalid argument: {0}")] 126 ResizeError(anyhow::Error), 127 #[error("Fail to resize trigger: {0}")] 128 ResizeTriggerFail(DeviceError), 129 #[error("Invalid configuration: {0}")] 130 ValidateError(anyhow::Error), 131 #[error("Failed discarding memory range: {0}")] 132 DiscardMemoryRange(std::io::Error), 133 #[error("Failed DMA mapping: {0}")] 134 DmaMap(std::io::Error), 135 #[error("Failed DMA unmapping: {0}")] 136 DmaUnmap(std::io::Error), 137 #[error("Invalid DMA mapping handler")] 138 InvalidDmaMappingHandler, 139 #[error("Not activated by the guest")] 140 NotActivatedByGuest, 141 #[error("Unknown request type: {0}")] 142 UnknownRequestType(u16), 143 #[error("Failed adding used index: {0}")] 144 QueueAddUsed(virtio_queue::Error), 145 } 146 147 #[repr(C)] 148 #[derive(Copy, Clone, Debug, Default)] 149 struct VirtioMemReq { 150 req_type: u16, 151 padding: [u16; 3], 152 addr: u64, 153 nb_blocks: u16, 154 padding_1: [u16; 3], 155 } 156 157 // SAFETY: it only has data and has no implicit padding. 158 unsafe impl ByteValued for VirtioMemReq {} 159 160 #[repr(C)] 161 #[derive(Copy, Clone, Debug, Default)] 162 struct VirtioMemResp { 163 resp_type: u16, 164 padding: [u16; 3], 165 state: u16, 166 } 167 168 // SAFETY: it only has data and has no implicit padding. 169 unsafe impl ByteValued for VirtioMemResp {} 170 171 #[repr(C)] 172 #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)] 173 pub struct VirtioMemConfig { 174 // Block size and alignment. Cannot change. 175 block_size: u64, 176 // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. 177 node_id: u16, 178 padding: [u8; 6], 179 // Start address of the memory region. Cannot change. 180 addr: u64, 181 // Region size (maximum). Cannot change. 182 region_size: u64, 183 // Currently usable region size. Can grow up to region_size. Can 184 // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config 185 // update will be sent). 186 usable_region_size: u64, 187 // Currently used size. Changes due to plug/unplug requests, but no 188 // config updates will be sent. 189 plugged_size: u64, 190 // Requested size. New plug requests cannot exceed it. Can change. 191 requested_size: u64, 192 } 193 194 // SAFETY: it only has data and has no implicit padding. 195 unsafe impl ByteValued for VirtioMemConfig {} 196 197 impl VirtioMemConfig { 198 fn validate(&self) -> result::Result<(), Error> { 199 if self.addr % self.block_size != 0 { 200 return Err(Error::ValidateError(anyhow!( 201 "addr 0x{:x} is not aligned on block_size 0x{:x}", 202 self.addr, 203 self.block_size 204 ))); 205 } 206 if self.region_size % self.block_size != 0 { 207 return Err(Error::ValidateError(anyhow!( 208 "region_size 0x{:x} is not aligned on block_size 0x{:x}", 209 self.region_size, 210 self.block_size 211 ))); 212 } 213 if self.usable_region_size % self.block_size != 0 { 214 return Err(Error::ValidateError(anyhow!( 215 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}", 216 self.usable_region_size, 217 self.block_size 218 ))); 219 } 220 if self.plugged_size % self.block_size != 0 { 221 return Err(Error::ValidateError(anyhow!( 222 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}", 223 self.plugged_size, 224 self.block_size 225 ))); 226 } 227 if self.requested_size % self.block_size != 0 { 228 return Err(Error::ValidateError(anyhow!( 229 "requested_size 0x{:x} is not aligned on block_size 0x{:x}", 230 self.requested_size, 231 self.block_size 232 ))); 233 } 234 235 Ok(()) 236 } 237 238 fn resize(&mut self, size: u64) -> result::Result<(), Error> { 239 if self.requested_size == size { 240 return Err(Error::ResizeError(anyhow!( 241 "new size 0x{:x} and requested_size are identical", 242 size 243 ))); 244 } else if size > self.region_size { 245 return Err(Error::ResizeError(anyhow!( 246 "new size 0x{:x} is bigger than region_size 0x{:x}", 247 size, 248 self.region_size 249 ))); 250 } else if size % self.block_size != 0 { 251 return Err(Error::ResizeError(anyhow!( 252 "new size 0x{:x} is not aligned on block_size 0x{:x}", 253 size, 254 self.block_size 255 ))); 256 } 257 258 self.requested_size = size; 259 260 Ok(()) 261 } 262 263 fn is_valid_range(&self, addr: u64, size: u64) -> bool { 264 // Ensure no overflow from adding 'addr' and 'size' whose value are both 265 // controlled by the guest driver 266 if addr.checked_add(size).is_none() { 267 return false; 268 } 269 270 // Start address must be aligned on block_size, the size must be 271 // greater than 0, and all blocks covered by the request must be 272 // in the usable region. 273 if addr % self.block_size != 0 274 || size == 0 275 || (addr < self.addr || addr + size > self.addr + self.usable_region_size) 276 { 277 return false; 278 } 279 280 true 281 } 282 } 283 284 struct Request { 285 req: VirtioMemReq, 286 status_addr: GuestAddress, 287 } 288 289 impl Request { 290 fn parse( 291 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 292 ) -> result::Result<Request, Error> { 293 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 294 // The descriptor contains the request type which MUST be readable. 295 if desc.is_write_only() { 296 return Err(Error::UnexpectedWriteOnlyDescriptor); 297 } 298 if desc.len() as usize != size_of::<VirtioMemReq>() { 299 return Err(Error::InvalidRequest); 300 } 301 let req: VirtioMemReq = desc_chain 302 .memory() 303 .read_obj(desc.addr()) 304 .map_err(Error::GuestMemory)?; 305 306 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 307 308 // The status MUST always be writable 309 if !status_desc.is_write_only() { 310 return Err(Error::UnexpectedReadOnlyDescriptor); 311 } 312 313 if (status_desc.len() as usize) < size_of::<VirtioMemResp>() { 314 return Err(Error::BufferLengthTooSmall); 315 } 316 317 Ok(Request { 318 req, 319 status_addr: status_desc.addr(), 320 }) 321 } 322 323 fn send_response( 324 &self, 325 mem: &GuestMemoryMmap, 326 resp_type: u16, 327 state: u16, 328 ) -> Result<u32, Error> { 329 let resp = VirtioMemResp { 330 resp_type, 331 state, 332 ..Default::default() 333 }; 334 mem.write_obj(resp, self.status_addr) 335 .map_err(Error::GuestMemory)?; 336 Ok(size_of::<VirtioMemResp>() as u32) 337 } 338 } 339 340 #[derive(Clone, Serialize, Deserialize)] 341 pub struct BlocksState { 342 bitmap: Vec<bool>, 343 } 344 345 impl BlocksState { 346 pub fn new(region_size: u64) -> Self { 347 BlocksState { 348 bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize], 349 } 350 } 351 352 fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool { 353 for state in self 354 .bitmap 355 .iter() 356 .skip(first_block_index) 357 .take(nb_blocks as usize) 358 { 359 if *state != plug { 360 return false; 361 } 362 } 363 true 364 } 365 366 fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) { 367 for state in self 368 .bitmap 369 .iter_mut() 370 .skip(first_block_index) 371 .take(nb_blocks as usize) 372 { 373 *state = plug; 374 } 375 } 376 377 fn inner(&self) -> &Vec<bool> { 378 &self.bitmap 379 } 380 381 pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable { 382 let mut bitmap: Vec<u64> = Vec::new(); 383 let mut i = 0; 384 for (j, bit) in self.bitmap.iter().enumerate() { 385 if j % 64 == 0 { 386 bitmap.push(0); 387 388 if j != 0 { 389 i += 1; 390 } 391 } 392 393 if *bit == plugged { 394 bitmap[i] |= 1 << (j % 64); 395 } 396 } 397 398 MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE) 399 } 400 } 401 402 struct MemEpollHandler { 403 mem: GuestMemoryAtomic<GuestMemoryMmap>, 404 host_addr: u64, 405 host_fd: Option<RawFd>, 406 blocks_state: Arc<Mutex<BlocksState>>, 407 config: Arc<Mutex<VirtioMemConfig>>, 408 queue: Queue, 409 interrupt_cb: Arc<dyn VirtioInterrupt>, 410 queue_evt: EventFd, 411 kill_evt: EventFd, 412 pause_evt: EventFd, 413 hugepages: bool, 414 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 415 } 416 417 impl MemEpollHandler { 418 fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> { 419 // Use fallocate if the memory region is backed by a file. 420 if let Some(fd) = self.host_fd { 421 // SAFETY: FFI call with valid arguments 422 let res = unsafe { 423 libc::fallocate64( 424 fd, 425 libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE, 426 offset as libc::off64_t, 427 size as libc::off64_t, 428 ) 429 }; 430 if res != 0 { 431 let err = io::Error::last_os_error(); 432 error!("Deallocating file space failed: {}", err); 433 return Err(Error::DiscardMemoryRange(err)); 434 } 435 } 436 437 // Only use madvise if the memory region is not allocated with 438 // hugepages. 439 if !self.hugepages { 440 // SAFETY: FFI call with valid arguments 441 let res = unsafe { 442 libc::madvise( 443 (self.host_addr + offset) as *mut libc::c_void, 444 size as libc::size_t, 445 libc::MADV_DONTNEED, 446 ) 447 }; 448 if res != 0 { 449 let err = io::Error::last_os_error(); 450 error!("Advising kernel about pages range failed: {}", err); 451 return Err(Error::DiscardMemoryRange(err)); 452 } 453 } 454 455 Ok(()) 456 } 457 458 fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 { 459 let mut config = self.config.lock().unwrap(); 460 let size: u64 = nb_blocks as u64 * config.block_size; 461 462 if plug && (config.plugged_size + size > config.requested_size) { 463 return VIRTIO_MEM_RESP_NACK; 464 } 465 if !config.is_valid_range(addr, size) { 466 return VIRTIO_MEM_RESP_ERROR; 467 } 468 469 let offset = addr - config.addr; 470 471 let first_block_index = (offset / config.block_size) as usize; 472 if !self 473 .blocks_state 474 .lock() 475 .unwrap() 476 .is_range_state(first_block_index, nb_blocks, !plug) 477 { 478 return VIRTIO_MEM_RESP_ERROR; 479 } 480 481 if !plug { 482 if let Err(e) = self.discard_memory_range(offset, size) { 483 error!("failed discarding memory range: {:?}", e); 484 return VIRTIO_MEM_RESP_ERROR; 485 } 486 } 487 488 self.blocks_state 489 .lock() 490 .unwrap() 491 .set_range(first_block_index, nb_blocks, plug); 492 493 let handlers = self.dma_mapping_handlers.lock().unwrap(); 494 if plug { 495 let mut gpa = addr; 496 for _ in 0..nb_blocks { 497 for (_, handler) in handlers.iter() { 498 if let Err(e) = handler.map(gpa, gpa, config.block_size) { 499 error!( 500 "failed DMA mapping addr 0x{:x} size 0x{:x}: {}", 501 gpa, config.block_size, e 502 ); 503 return VIRTIO_MEM_RESP_ERROR; 504 } 505 } 506 507 gpa += config.block_size; 508 } 509 510 config.plugged_size += size; 511 } else { 512 for (_, handler) in handlers.iter() { 513 if let Err(e) = handler.unmap(addr, size) { 514 error!( 515 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 516 addr, size, e 517 ); 518 return VIRTIO_MEM_RESP_ERROR; 519 } 520 } 521 522 config.plugged_size -= size; 523 } 524 525 VIRTIO_MEM_RESP_ACK 526 } 527 528 fn unplug_all(&mut self) -> u16 { 529 let mut config = self.config.lock().unwrap(); 530 if let Err(e) = self.discard_memory_range(0, config.region_size) { 531 error!("failed discarding memory range: {:?}", e); 532 return VIRTIO_MEM_RESP_ERROR; 533 } 534 535 // Remaining plugged blocks are unmapped. 536 if config.plugged_size > 0 { 537 let handlers = self.dma_mapping_handlers.lock().unwrap(); 538 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 539 if *plugged { 540 let gpa = config.addr + (idx as u64 * config.block_size); 541 for (_, handler) in handlers.iter() { 542 if let Err(e) = handler.unmap(gpa, config.block_size) { 543 error!( 544 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 545 gpa, config.block_size, e 546 ); 547 return VIRTIO_MEM_RESP_ERROR; 548 } 549 } 550 } 551 } 552 } 553 554 self.blocks_state.lock().unwrap().set_range( 555 0, 556 (config.region_size / config.block_size) as u16, 557 false, 558 ); 559 560 config.plugged_size = 0; 561 562 VIRTIO_MEM_RESP_ACK 563 } 564 565 fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) { 566 let config = self.config.lock().unwrap(); 567 let size: u64 = nb_blocks as u64 * config.block_size; 568 569 let resp_type = if config.is_valid_range(addr, size) { 570 VIRTIO_MEM_RESP_ACK 571 } else { 572 VIRTIO_MEM_RESP_ERROR 573 }; 574 575 let offset = addr - config.addr; 576 let first_block_index = (offset / config.block_size) as usize; 577 let resp_state = 578 if self 579 .blocks_state 580 .lock() 581 .unwrap() 582 .is_range_state(first_block_index, nb_blocks, true) 583 { 584 VIRTIO_MEM_STATE_PLUGGED 585 } else if self.blocks_state.lock().unwrap().is_range_state( 586 first_block_index, 587 nb_blocks, 588 false, 589 ) { 590 VIRTIO_MEM_STATE_UNPLUGGED 591 } else { 592 VIRTIO_MEM_STATE_MIXED 593 }; 594 595 (resp_type, resp_state) 596 } 597 598 fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> { 599 self.interrupt_cb.trigger(int_type).map_err(|e| { 600 error!("Failed to signal used queue: {:?}", e); 601 DeviceError::FailedSignalingUsedQueue(e) 602 }) 603 } 604 605 fn process_queue(&mut self) -> Result<bool, Error> { 606 let mut used_descs = false; 607 608 while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) { 609 let r = Request::parse(&mut desc_chain)?; 610 let (resp_type, resp_state) = match r.req.req_type { 611 VIRTIO_MEM_REQ_PLUG => ( 612 self.state_change_request(r.req.addr, r.req.nb_blocks, true), 613 0u16, 614 ), 615 VIRTIO_MEM_REQ_UNPLUG => ( 616 self.state_change_request(r.req.addr, r.req.nb_blocks, false), 617 0u16, 618 ), 619 VIRTIO_MEM_REQ_UNPLUG_ALL => (self.unplug_all(), 0u16), 620 VIRTIO_MEM_REQ_STATE => self.state_request(r.req.addr, r.req.nb_blocks), 621 _ => { 622 return Err(Error::UnknownRequestType(r.req.req_type)); 623 } 624 }; 625 let len = r.send_response(desc_chain.memory(), resp_type, resp_state)?; 626 self.queue 627 .add_used(desc_chain.memory(), desc_chain.head_index(), len) 628 .map_err(Error::QueueAddUsed)?; 629 used_descs = true; 630 } 631 632 Ok(used_descs) 633 } 634 635 fn run( 636 &mut self, 637 paused: Arc<AtomicBool>, 638 paused_sync: Arc<Barrier>, 639 ) -> result::Result<(), EpollHelperError> { 640 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 641 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 642 helper.run(paused, paused_sync, self)?; 643 644 Ok(()) 645 } 646 } 647 648 impl EpollHelperHandler for MemEpollHandler { 649 fn handle_event( 650 &mut self, 651 _helper: &mut EpollHelper, 652 event: &epoll::Event, 653 ) -> result::Result<(), EpollHelperError> { 654 let ev_type = event.data as u16; 655 match ev_type { 656 QUEUE_AVAIL_EVENT => { 657 self.queue_evt.read().map_err(|e| { 658 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 659 })?; 660 661 let needs_notification = self.process_queue().map_err(|e| { 662 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 663 })?; 664 if needs_notification { 665 self.signal(VirtioInterruptType::Queue(0)).map_err(|e| { 666 EpollHelperError::HandleEvent(anyhow!( 667 "Failed to signal used queue: {:?}", 668 e 669 )) 670 })?; 671 } 672 } 673 _ => { 674 return Err(EpollHelperError::HandleEvent(anyhow!( 675 "Unexpected event: {}", 676 ev_type 677 ))); 678 } 679 } 680 Ok(()) 681 } 682 } 683 684 #[derive(PartialEq, Eq, PartialOrd, Ord)] 685 pub enum VirtioMemMappingSource { 686 Container, 687 Device(u32), 688 } 689 690 #[derive(Serialize, Deserialize)] 691 pub struct MemState { 692 pub avail_features: u64, 693 pub acked_features: u64, 694 pub config: VirtioMemConfig, 695 pub blocks_state: BlocksState, 696 } 697 698 pub struct Mem { 699 common: VirtioCommon, 700 id: String, 701 host_addr: u64, 702 host_fd: Option<RawFd>, 703 config: Arc<Mutex<VirtioMemConfig>>, 704 seccomp_action: SeccompAction, 705 hugepages: bool, 706 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 707 blocks_state: Arc<Mutex<BlocksState>>, 708 exit_evt: EventFd, 709 interrupt_cb: Option<Arc<dyn VirtioInterrupt>>, 710 } 711 712 impl Mem { 713 // Create a new virtio-mem device. 714 #[allow(clippy::too_many_arguments)] 715 pub fn new( 716 id: String, 717 region: &Arc<GuestRegionMmap>, 718 seccomp_action: SeccompAction, 719 numa_node_id: Option<u16>, 720 initial_size: u64, 721 hugepages: bool, 722 exit_evt: EventFd, 723 blocks_state: Arc<Mutex<BlocksState>>, 724 state: Option<MemState>, 725 ) -> io::Result<Mem> { 726 let region_len = region.len(); 727 728 if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE { 729 return Err(io::Error::new( 730 io::ErrorKind::Other, 731 format!("Virtio-mem size is not aligned with {VIRTIO_MEM_ALIGN_SIZE}"), 732 )); 733 } 734 735 let (avail_features, acked_features, config, paused) = if let Some(state) = state { 736 info!("Restoring virtio-mem {}", id); 737 *(blocks_state.lock().unwrap()) = state.blocks_state.clone(); 738 ( 739 state.avail_features, 740 state.acked_features, 741 state.config, 742 true, 743 ) 744 } else { 745 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 746 747 let mut config = VirtioMemConfig { 748 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE, 749 addr: region.start_addr().raw_value(), 750 region_size: region.len(), 751 usable_region_size: region.len(), 752 plugged_size: 0, 753 requested_size: 0, 754 ..Default::default() 755 }; 756 757 if initial_size != 0 { 758 config.resize(initial_size).map_err(|e| { 759 io::Error::new( 760 io::ErrorKind::Other, 761 format!( 762 "Failed to resize virtio-mem configuration to {initial_size}: {e:?}" 763 ), 764 ) 765 })?; 766 } 767 768 if let Some(node_id) = numa_node_id { 769 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM; 770 config.node_id = node_id; 771 } 772 773 // Make sure the virtio-mem configuration complies with the 774 // specification. 775 config.validate().map_err(|e| { 776 io::Error::new( 777 io::ErrorKind::Other, 778 format!("Invalid virtio-mem configuration: {e:?}"), 779 ) 780 })?; 781 782 (avail_features, 0, config, false) 783 }; 784 785 let host_fd = region 786 .file_offset() 787 .map(|f_offset| f_offset.file().as_raw_fd()); 788 789 Ok(Mem { 790 common: VirtioCommon { 791 device_type: VirtioDeviceType::Mem as u32, 792 avail_features, 793 acked_features, 794 paused_sync: Some(Arc::new(Barrier::new(2))), 795 queue_sizes: QUEUE_SIZES.to_vec(), 796 min_queues: 1, 797 paused: Arc::new(AtomicBool::new(paused)), 798 ..Default::default() 799 }, 800 id, 801 host_addr: region.as_ptr() as u64, 802 host_fd, 803 config: Arc::new(Mutex::new(config)), 804 seccomp_action, 805 hugepages, 806 dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())), 807 blocks_state, 808 exit_evt, 809 interrupt_cb: None, 810 }) 811 } 812 813 pub fn resize(&mut self, size: u64) -> result::Result<(), Error> { 814 let mut config = self.config.lock().unwrap(); 815 config.resize(size).map_err(|e| { 816 Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e)) 817 })?; 818 819 if let Some(interrupt_cb) = self.interrupt_cb.as_ref() { 820 interrupt_cb 821 .trigger(VirtioInterruptType::Config) 822 .map_err(|e| { 823 Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e)) 824 }) 825 } else { 826 Ok(()) 827 } 828 } 829 830 pub fn add_dma_mapping_handler( 831 &mut self, 832 source: VirtioMemMappingSource, 833 handler: Arc<dyn ExternalDmaMapping>, 834 ) -> result::Result<(), Error> { 835 let config = self.config.lock().unwrap(); 836 837 if config.plugged_size > 0 { 838 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 839 if *plugged { 840 let gpa = config.addr + (idx as u64 * config.block_size); 841 handler 842 .map(gpa, gpa, config.block_size) 843 .map_err(Error::DmaMap)?; 844 } 845 } 846 } 847 848 self.dma_mapping_handlers 849 .lock() 850 .unwrap() 851 .insert(source, handler); 852 853 Ok(()) 854 } 855 856 pub fn remove_dma_mapping_handler( 857 &mut self, 858 source: VirtioMemMappingSource, 859 ) -> result::Result<(), Error> { 860 let handler = self 861 .dma_mapping_handlers 862 .lock() 863 .unwrap() 864 .remove(&source) 865 .ok_or(Error::InvalidDmaMappingHandler)?; 866 867 let config = self.config.lock().unwrap(); 868 869 if config.plugged_size > 0 { 870 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 871 if *plugged { 872 let gpa = config.addr + (idx as u64 * config.block_size); 873 handler 874 .unmap(gpa, config.block_size) 875 .map_err(Error::DmaUnmap)?; 876 } 877 } 878 } 879 880 Ok(()) 881 } 882 883 fn state(&self) -> MemState { 884 MemState { 885 avail_features: self.common.avail_features, 886 acked_features: self.common.acked_features, 887 config: *(self.config.lock().unwrap()), 888 blocks_state: self.blocks_state.lock().unwrap().clone(), 889 } 890 } 891 892 #[cfg(fuzzing)] 893 pub fn wait_for_epoll_threads(&mut self) { 894 self.common.wait_for_epoll_threads(); 895 } 896 } 897 898 impl Drop for Mem { 899 fn drop(&mut self) { 900 if let Some(kill_evt) = self.common.kill_evt.take() { 901 // Ignore the result because there is nothing we can do about it. 902 let _ = kill_evt.write(1); 903 } 904 self.common.wait_for_epoll_threads(); 905 } 906 } 907 908 impl VirtioDevice for Mem { 909 fn device_type(&self) -> u32 { 910 self.common.device_type 911 } 912 913 fn queue_max_sizes(&self) -> &[u16] { 914 &self.common.queue_sizes 915 } 916 917 fn features(&self) -> u64 { 918 self.common.avail_features 919 } 920 921 fn ack_features(&mut self, value: u64) { 922 self.common.ack_features(value) 923 } 924 925 fn read_config(&self, offset: u64, data: &mut [u8]) { 926 self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data); 927 } 928 929 fn activate( 930 &mut self, 931 mem: GuestMemoryAtomic<GuestMemoryMmap>, 932 interrupt_cb: Arc<dyn VirtioInterrupt>, 933 mut queues: Vec<(usize, Queue, EventFd)>, 934 ) -> ActivateResult { 935 self.common.activate(&queues, &interrupt_cb)?; 936 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 937 938 let (_, queue, queue_evt) = queues.remove(0); 939 940 self.interrupt_cb = Some(interrupt_cb.clone()); 941 942 let mut handler = MemEpollHandler { 943 mem, 944 host_addr: self.host_addr, 945 host_fd: self.host_fd, 946 blocks_state: Arc::clone(&self.blocks_state), 947 config: self.config.clone(), 948 queue, 949 interrupt_cb, 950 queue_evt, 951 kill_evt, 952 pause_evt, 953 hugepages: self.hugepages, 954 dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers), 955 }; 956 957 let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false); 958 for range in unplugged_memory_ranges.regions() { 959 handler 960 .discard_memory_range(range.gpa, range.length) 961 .map_err(|e| { 962 error!( 963 "failed discarding memory range [0x{:x}-0x{:x}]: {:?}", 964 range.gpa, 965 range.gpa + range.length - 1, 966 e 967 ); 968 ActivateError::BadActivate 969 })?; 970 } 971 972 let paused = self.common.paused.clone(); 973 let paused_sync = self.common.paused_sync.clone(); 974 let mut epoll_threads = Vec::new(); 975 976 spawn_virtio_thread( 977 &self.id, 978 &self.seccomp_action, 979 Thread::VirtioMem, 980 &mut epoll_threads, 981 &self.exit_evt, 982 move || handler.run(paused, paused_sync.unwrap()), 983 )?; 984 self.common.epoll_threads = Some(epoll_threads); 985 986 event!("virtio-device", "activated", "id", &self.id); 987 Ok(()) 988 } 989 990 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 991 let result = self.common.reset(); 992 event!("virtio-device", "reset", "id", &self.id); 993 result 994 } 995 } 996 997 impl Pausable for Mem { 998 fn pause(&mut self) -> result::Result<(), MigratableError> { 999 self.common.pause() 1000 } 1001 1002 fn resume(&mut self) -> result::Result<(), MigratableError> { 1003 self.common.resume() 1004 } 1005 } 1006 1007 impl Snapshottable for Mem { 1008 fn id(&self) -> String { 1009 self.id.clone() 1010 } 1011 1012 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1013 Snapshot::new_from_state(&self.state()) 1014 } 1015 } 1016 impl Transportable for Mem {} 1017 impl Migratable for Mem {} 1018