1 // Copyright (c) 2020 Ant Financial 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 use super::Error as DeviceError; 16 use super::{ 17 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon, 18 VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, 19 }; 20 use crate::seccomp_filters::Thread; 21 use crate::thread_helper::spawn_virtio_thread; 22 use crate::{GuestMemoryMmap, GuestRegionMmap}; 23 use crate::{VirtioInterrupt, VirtioInterruptType}; 24 use anyhow::anyhow; 25 use seccompiler::SeccompAction; 26 use std::collections::BTreeMap; 27 use std::io; 28 use std::mem::size_of; 29 use std::os::unix::io::{AsRawFd, RawFd}; 30 use std::result; 31 use std::sync::atomic::AtomicBool; 32 use std::sync::mpsc; 33 use std::sync::{Arc, Barrier, Mutex}; 34 use thiserror::Error; 35 use versionize::{VersionMap, Versionize, VersionizeResult}; 36 use versionize_derive::Versionize; 37 use virtio_queue::{DescriptorChain, Queue, QueueT}; 38 use vm_device::dma_mapping::ExternalDmaMapping; 39 use vm_memory::{ 40 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 41 GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion, 42 }; 43 use vm_migration::protocol::MemoryRangeTable; 44 use vm_migration::{ 45 Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped, 46 }; 47 use vmm_sys_util::eventfd::EventFd; 48 49 const QUEUE_SIZE: u16 = 128; 50 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 51 52 // 128MiB is the standard memory block size in Linux. A virtio-mem region must 53 // be aligned on this size, and the region size must be a multiple of it. 54 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20; 55 // Use 2 MiB alignment so transparent hugepages can be used by KVM. 56 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20; 57 58 // Request processed successfully, applicable for 59 // - VIRTIO_MEM_REQ_PLUG 60 // - VIRTIO_MEM_REQ_UNPLUG 61 // - VIRTIO_MEM_REQ_UNPLUG_ALL 62 // - VIRTIO_MEM_REQ_STATE 63 const VIRTIO_MEM_RESP_ACK: u16 = 0; 64 65 // Request denied - e.g. trying to plug more than requested, applicable for 66 // - VIRTIO_MEM_REQ_PLUG 67 const VIRTIO_MEM_RESP_NACK: u16 = 1; 68 69 // Request cannot be processed right now, try again later, applicable for 70 // - VIRTIO_MEM_REQ_PLUG 71 // - VIRTIO_MEM_REQ_UNPLUG 72 // - VIRTIO_MEM_REQ_UNPLUG_ALL 73 #[allow(unused)] 74 const VIRTIO_MEM_RESP_BUSY: u16 = 2; 75 76 // Error in request (e.g. addresses/alignment), applicable for 77 // - VIRTIO_MEM_REQ_PLUG 78 // - VIRTIO_MEM_REQ_UNPLUG 79 // - VIRTIO_MEM_REQ_STATE 80 const VIRTIO_MEM_RESP_ERROR: u16 = 3; 81 82 // State of memory blocks is "plugged" 83 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; 84 // State of memory blocks is "unplugged" 85 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; 86 // State of memory blocks is "mixed" 87 const VIRTIO_MEM_STATE_MIXED: u16 = 2; 88 89 // request to plug memory blocks 90 const VIRTIO_MEM_REQ_PLUG: u16 = 0; 91 // request to unplug memory blocks 92 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1; 93 // request to unplug all blocks and shrink the usable size 94 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2; 95 // request information about the plugged state of memory blocks 96 const VIRTIO_MEM_REQ_STATE: u16 = 3; 97 98 // New descriptors are pending on the virtio queue. 99 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 100 101 // Virtio features 102 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0; 103 104 #[derive(Error, Debug)] 105 pub enum Error { 106 #[error("Guest gave us bad memory addresses: {0}")] 107 GuestMemory(GuestMemoryError), 108 #[error("Guest gave us a write only descriptor that protocol says to read from")] 109 UnexpectedWriteOnlyDescriptor, 110 #[error("Guest gave us a read only descriptor that protocol says to write to")] 111 UnexpectedReadOnlyDescriptor, 112 #[error("Guest gave us too few descriptors in a descriptor chain")] 113 DescriptorChainTooShort, 114 #[error("Guest gave us a buffer that was too short to use")] 115 BufferLengthTooSmall, 116 #[error("Guest sent us invalid request")] 117 InvalidRequest, 118 #[error("Failed to EventFd write: {0}")] 119 EventFdWriteFail(std::io::Error), 120 #[error("Failed to EventFd try_clone: {0}")] 121 EventFdTryCloneFail(std::io::Error), 122 #[error("Failed to MpscRecv: {0}")] 123 MpscRecvFail(mpsc::RecvError), 124 #[error("Resize invalid argument: {0}")] 125 ResizeError(anyhow::Error), 126 #[error("Fail to resize trigger: {0}")] 127 ResizeTriggerFail(DeviceError), 128 #[error("Invalid configuration: {0}")] 129 ValidateError(anyhow::Error), 130 #[error("Failed discarding memory range: {0}")] 131 DiscardMemoryRange(std::io::Error), 132 #[error("Failed DMA mapping: {0}")] 133 DmaMap(std::io::Error), 134 #[error("Failed DMA unmapping: {0}")] 135 DmaUnmap(std::io::Error), 136 #[error("Invalid DMA mapping handler")] 137 InvalidDmaMappingHandler, 138 #[error("Not activated by the guest")] 139 NotActivatedByGuest, 140 #[error("Unknown request type: {0}")] 141 UnkownRequestType(u16), 142 #[error("Failed adding used index: {0}")] 143 QueueAddUsed(virtio_queue::Error), 144 } 145 146 #[repr(C)] 147 #[derive(Copy, Clone, Debug, Default)] 148 struct VirtioMemReq { 149 req_type: u16, 150 padding: [u16; 3], 151 addr: u64, 152 nb_blocks: u16, 153 padding_1: [u16; 3], 154 } 155 156 // SAFETY: it only has data and has no implicit padding. 157 unsafe impl ByteValued for VirtioMemReq {} 158 159 #[repr(C)] 160 #[derive(Copy, Clone, Debug, Default)] 161 struct VirtioMemResp { 162 resp_type: u16, 163 padding: [u16; 3], 164 state: u16, 165 } 166 167 // SAFETY: it only has data and has no implicit padding. 168 unsafe impl ByteValued for VirtioMemResp {} 169 170 #[repr(C)] 171 #[derive(Copy, Clone, Debug, Default, Versionize)] 172 pub struct VirtioMemConfig { 173 // Block size and alignment. Cannot change. 174 block_size: u64, 175 // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. 176 node_id: u16, 177 padding: [u8; 6], 178 // Start address of the memory region. Cannot change. 179 addr: u64, 180 // Region size (maximum). Cannot change. 181 region_size: u64, 182 // Currently usable region size. Can grow up to region_size. Can 183 // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config 184 // update will be sent). 185 usable_region_size: u64, 186 // Currently used size. Changes due to plug/unplug requests, but no 187 // config updates will be sent. 188 plugged_size: u64, 189 // Requested size. New plug requests cannot exceed it. Can change. 190 requested_size: u64, 191 } 192 193 // SAFETY: it only has data and has no implicit padding. 194 unsafe impl ByteValued for VirtioMemConfig {} 195 196 impl VirtioMemConfig { 197 fn validate(&self) -> result::Result<(), Error> { 198 if self.addr % self.block_size != 0 { 199 return Err(Error::ValidateError(anyhow!( 200 "addr 0x{:x} is not aligned on block_size 0x{:x}", 201 self.addr, 202 self.block_size 203 ))); 204 } 205 if self.region_size % self.block_size != 0 { 206 return Err(Error::ValidateError(anyhow!( 207 "region_size 0x{:x} is not aligned on block_size 0x{:x}", 208 self.region_size, 209 self.block_size 210 ))); 211 } 212 if self.usable_region_size % self.block_size != 0 { 213 return Err(Error::ValidateError(anyhow!( 214 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}", 215 self.usable_region_size, 216 self.block_size 217 ))); 218 } 219 if self.plugged_size % self.block_size != 0 { 220 return Err(Error::ValidateError(anyhow!( 221 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}", 222 self.plugged_size, 223 self.block_size 224 ))); 225 } 226 if self.requested_size % self.block_size != 0 { 227 return Err(Error::ValidateError(anyhow!( 228 "requested_size 0x{:x} is not aligned on block_size 0x{:x}", 229 self.requested_size, 230 self.block_size 231 ))); 232 } 233 234 Ok(()) 235 } 236 237 fn resize(&mut self, size: u64) -> result::Result<(), Error> { 238 if self.requested_size == size { 239 return Err(Error::ResizeError(anyhow!( 240 "new size 0x{:x} and requested_size are identical", 241 size 242 ))); 243 } else if size > self.region_size { 244 return Err(Error::ResizeError(anyhow!( 245 "new size 0x{:x} is bigger than region_size 0x{:x}", 246 size, 247 self.region_size 248 ))); 249 } else if size % self.block_size != 0 { 250 return Err(Error::ResizeError(anyhow!( 251 "new size 0x{:x} is not aligned on block_size 0x{:x}", 252 size, 253 self.block_size 254 ))); 255 } 256 257 self.requested_size = size; 258 259 Ok(()) 260 } 261 262 fn is_valid_range(&self, addr: u64, size: u64) -> bool { 263 // Ensure no overflow from adding 'addr' and 'size' whose value are both 264 // controlled by the guest driver 265 if addr.checked_add(size).is_none() { 266 return false; 267 } 268 269 // Start address must be aligned on block_size, the size must be 270 // greater than 0, and all blocks covered by the request must be 271 // in the usable region. 272 if addr % self.block_size != 0 273 || size == 0 274 || (addr < self.addr || addr + size >= self.addr + self.usable_region_size) 275 { 276 return false; 277 } 278 279 true 280 } 281 } 282 283 struct Request { 284 req: VirtioMemReq, 285 status_addr: GuestAddress, 286 } 287 288 impl Request { 289 fn parse( 290 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 291 ) -> result::Result<Request, Error> { 292 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 293 // The descriptor contains the request type which MUST be readable. 294 if desc.is_write_only() { 295 return Err(Error::UnexpectedWriteOnlyDescriptor); 296 } 297 if desc.len() as usize != size_of::<VirtioMemReq>() { 298 return Err(Error::InvalidRequest); 299 } 300 let req: VirtioMemReq = desc_chain 301 .memory() 302 .read_obj(desc.addr()) 303 .map_err(Error::GuestMemory)?; 304 305 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 306 307 // The status MUST always be writable 308 if !status_desc.is_write_only() { 309 return Err(Error::UnexpectedReadOnlyDescriptor); 310 } 311 312 if (status_desc.len() as usize) < size_of::<VirtioMemResp>() { 313 return Err(Error::BufferLengthTooSmall); 314 } 315 316 Ok(Request { 317 req, 318 status_addr: status_desc.addr(), 319 }) 320 } 321 322 fn send_response( 323 &self, 324 mem: &GuestMemoryMmap, 325 resp_type: u16, 326 state: u16, 327 ) -> Result<u32, Error> { 328 let resp = VirtioMemResp { 329 resp_type, 330 state, 331 ..Default::default() 332 }; 333 mem.write_obj(resp, self.status_addr) 334 .map_err(Error::GuestMemory)?; 335 Ok(size_of::<VirtioMemResp>() as u32) 336 } 337 } 338 339 #[derive(Clone, Versionize)] 340 pub struct BlocksState { 341 bitmap: Vec<bool>, 342 } 343 344 impl BlocksState { 345 pub fn new(region_size: u64) -> Self { 346 BlocksState { 347 bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize], 348 } 349 } 350 351 fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool { 352 for state in self 353 .bitmap 354 .iter() 355 .skip(first_block_index) 356 .take(nb_blocks as usize) 357 { 358 if *state != plug { 359 return false; 360 } 361 } 362 true 363 } 364 365 fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) { 366 for state in self 367 .bitmap 368 .iter_mut() 369 .skip(first_block_index) 370 .take(nb_blocks as usize) 371 { 372 *state = plug; 373 } 374 } 375 376 fn inner(&self) -> &Vec<bool> { 377 &self.bitmap 378 } 379 380 pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable { 381 let mut bitmap: Vec<u64> = Vec::new(); 382 let mut i = 0; 383 for (j, bit) in self.bitmap.iter().enumerate() { 384 if j % 64 == 0 { 385 bitmap.push(0); 386 387 if j != 0 { 388 i += 1; 389 } 390 } 391 392 if *bit == plugged { 393 bitmap[i] |= 1 << (j % 64); 394 } 395 } 396 397 MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE) 398 } 399 } 400 401 struct MemEpollHandler { 402 mem: GuestMemoryAtomic<GuestMemoryMmap>, 403 host_addr: u64, 404 host_fd: Option<RawFd>, 405 blocks_state: Arc<Mutex<BlocksState>>, 406 config: Arc<Mutex<VirtioMemConfig>>, 407 queue: Queue, 408 interrupt_cb: Arc<dyn VirtioInterrupt>, 409 queue_evt: EventFd, 410 kill_evt: EventFd, 411 pause_evt: EventFd, 412 hugepages: bool, 413 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 414 } 415 416 impl MemEpollHandler { 417 fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> { 418 // Use fallocate if the memory region is backed by a file. 419 if let Some(fd) = self.host_fd { 420 // SAFETY: FFI call with valid arguments 421 let res = unsafe { 422 libc::fallocate64( 423 fd, 424 libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE, 425 offset as libc::off64_t, 426 size as libc::off64_t, 427 ) 428 }; 429 if res != 0 { 430 let err = io::Error::last_os_error(); 431 error!("Deallocating file space failed: {}", err); 432 return Err(Error::DiscardMemoryRange(err)); 433 } 434 } 435 436 // Only use madvise if the memory region is not allocated with 437 // hugepages. 438 if !self.hugepages { 439 // SAFETY: FFI call with valid arguments 440 let res = unsafe { 441 libc::madvise( 442 (self.host_addr + offset) as *mut libc::c_void, 443 size as libc::size_t, 444 libc::MADV_DONTNEED, 445 ) 446 }; 447 if res != 0 { 448 let err = io::Error::last_os_error(); 449 error!("Advising kernel about pages range failed: {}", err); 450 return Err(Error::DiscardMemoryRange(err)); 451 } 452 } 453 454 Ok(()) 455 } 456 457 fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 { 458 let mut config = self.config.lock().unwrap(); 459 let size: u64 = nb_blocks as u64 * config.block_size; 460 461 if plug && (config.plugged_size + size > config.requested_size) { 462 return VIRTIO_MEM_RESP_NACK; 463 } 464 if !config.is_valid_range(addr, size) { 465 return VIRTIO_MEM_RESP_ERROR; 466 } 467 468 let offset = addr - config.addr; 469 470 let first_block_index = (offset / config.block_size) as usize; 471 if !self 472 .blocks_state 473 .lock() 474 .unwrap() 475 .is_range_state(first_block_index, nb_blocks, !plug) 476 { 477 return VIRTIO_MEM_RESP_ERROR; 478 } 479 480 if !plug { 481 if let Err(e) = self.discard_memory_range(offset, size) { 482 error!("failed discarding memory range: {:?}", e); 483 return VIRTIO_MEM_RESP_ERROR; 484 } 485 } 486 487 self.blocks_state 488 .lock() 489 .unwrap() 490 .set_range(first_block_index, nb_blocks, plug); 491 492 let handlers = self.dma_mapping_handlers.lock().unwrap(); 493 if plug { 494 let mut gpa = addr; 495 for _ in 0..nb_blocks { 496 for (_, handler) in handlers.iter() { 497 if let Err(e) = handler.map(gpa, gpa, config.block_size) { 498 error!( 499 "failed DMA mapping addr 0x{:x} size 0x{:x}: {}", 500 gpa, config.block_size, e 501 ); 502 return VIRTIO_MEM_RESP_ERROR; 503 } 504 } 505 506 gpa += config.block_size; 507 } 508 509 config.plugged_size += size; 510 } else { 511 for (_, handler) in handlers.iter() { 512 if let Err(e) = handler.unmap(addr, size) { 513 error!( 514 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 515 addr, size, e 516 ); 517 return VIRTIO_MEM_RESP_ERROR; 518 } 519 } 520 521 config.plugged_size -= size; 522 } 523 524 VIRTIO_MEM_RESP_ACK 525 } 526 527 fn unplug_all(&mut self) -> u16 { 528 let mut config = self.config.lock().unwrap(); 529 if let Err(e) = self.discard_memory_range(0, config.region_size) { 530 error!("failed discarding memory range: {:?}", e); 531 return VIRTIO_MEM_RESP_ERROR; 532 } 533 534 // Remaining plugged blocks are unmapped. 535 if config.plugged_size > 0 { 536 let handlers = self.dma_mapping_handlers.lock().unwrap(); 537 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 538 if *plugged { 539 let gpa = config.addr + (idx as u64 * config.block_size); 540 for (_, handler) in handlers.iter() { 541 if let Err(e) = handler.unmap(gpa, config.block_size) { 542 error!( 543 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 544 gpa, config.block_size, e 545 ); 546 return VIRTIO_MEM_RESP_ERROR; 547 } 548 } 549 } 550 } 551 } 552 553 self.blocks_state.lock().unwrap().set_range( 554 0, 555 (config.region_size / config.block_size) as u16, 556 false, 557 ); 558 559 config.plugged_size = 0; 560 561 VIRTIO_MEM_RESP_ACK 562 } 563 564 fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) { 565 let config = self.config.lock().unwrap(); 566 let size: u64 = nb_blocks as u64 * config.block_size; 567 568 let resp_type = if config.is_valid_range(addr, size) { 569 VIRTIO_MEM_RESP_ACK 570 } else { 571 VIRTIO_MEM_RESP_ERROR 572 }; 573 574 let offset = addr - config.addr; 575 let first_block_index = (offset / config.block_size) as usize; 576 let resp_state = 577 if self 578 .blocks_state 579 .lock() 580 .unwrap() 581 .is_range_state(first_block_index, nb_blocks, true) 582 { 583 VIRTIO_MEM_STATE_PLUGGED 584 } else if self.blocks_state.lock().unwrap().is_range_state( 585 first_block_index, 586 nb_blocks, 587 false, 588 ) { 589 VIRTIO_MEM_STATE_UNPLUGGED 590 } else { 591 VIRTIO_MEM_STATE_MIXED 592 }; 593 594 (resp_type, resp_state) 595 } 596 597 fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> { 598 self.interrupt_cb.trigger(int_type).map_err(|e| { 599 error!("Failed to signal used queue: {:?}", e); 600 DeviceError::FailedSignalingUsedQueue(e) 601 }) 602 } 603 604 fn process_queue(&mut self) -> Result<bool, Error> { 605 let mut used_descs = false; 606 607 while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) { 608 let r = Request::parse(&mut desc_chain)?; 609 let (resp_type, resp_state) = match r.req.req_type { 610 VIRTIO_MEM_REQ_PLUG => ( 611 self.state_change_request(r.req.addr, r.req.nb_blocks, true), 612 0u16, 613 ), 614 VIRTIO_MEM_REQ_UNPLUG => ( 615 self.state_change_request(r.req.addr, r.req.nb_blocks, false), 616 0u16, 617 ), 618 VIRTIO_MEM_REQ_UNPLUG_ALL => (self.unplug_all(), 0u16), 619 VIRTIO_MEM_REQ_STATE => self.state_request(r.req.addr, r.req.nb_blocks), 620 _ => { 621 return Err(Error::UnkownRequestType(r.req.req_type)); 622 } 623 }; 624 let len = r.send_response(desc_chain.memory(), resp_type, resp_state)?; 625 self.queue 626 .add_used(desc_chain.memory(), desc_chain.head_index(), len) 627 .map_err(Error::QueueAddUsed)?; 628 used_descs = true; 629 } 630 631 Ok(used_descs) 632 } 633 634 fn run( 635 &mut self, 636 paused: Arc<AtomicBool>, 637 paused_sync: Arc<Barrier>, 638 ) -> result::Result<(), EpollHelperError> { 639 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 640 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 641 helper.run(paused, paused_sync, self)?; 642 643 Ok(()) 644 } 645 } 646 647 impl EpollHelperHandler for MemEpollHandler { 648 fn handle_event( 649 &mut self, 650 _helper: &mut EpollHelper, 651 event: &epoll::Event, 652 ) -> result::Result<(), EpollHelperError> { 653 let ev_type = event.data as u16; 654 match ev_type { 655 QUEUE_AVAIL_EVENT => { 656 self.queue_evt.read().map_err(|e| { 657 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 658 })?; 659 660 let needs_notification = self.process_queue().map_err(|e| { 661 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 662 })?; 663 if needs_notification { 664 self.signal(VirtioInterruptType::Queue(0)).map_err(|e| { 665 EpollHelperError::HandleEvent(anyhow!( 666 "Failed to signal used queue: {:?}", 667 e 668 )) 669 })?; 670 } 671 } 672 _ => { 673 return Err(EpollHelperError::HandleEvent(anyhow!( 674 "Unexpected event: {}", 675 ev_type 676 ))); 677 } 678 } 679 Ok(()) 680 } 681 } 682 683 #[derive(PartialEq, Eq, PartialOrd, Ord)] 684 pub enum VirtioMemMappingSource { 685 Container, 686 Device(u32), 687 } 688 689 #[derive(Versionize)] 690 pub struct MemState { 691 pub avail_features: u64, 692 pub acked_features: u64, 693 pub config: VirtioMemConfig, 694 pub blocks_state: BlocksState, 695 } 696 697 impl VersionMapped for MemState {} 698 699 pub struct Mem { 700 common: VirtioCommon, 701 id: String, 702 host_addr: u64, 703 host_fd: Option<RawFd>, 704 config: Arc<Mutex<VirtioMemConfig>>, 705 seccomp_action: SeccompAction, 706 hugepages: bool, 707 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 708 blocks_state: Arc<Mutex<BlocksState>>, 709 exit_evt: EventFd, 710 interrupt_cb: Option<Arc<dyn VirtioInterrupt>>, 711 } 712 713 impl Mem { 714 // Create a new virtio-mem device. 715 #[allow(clippy::too_many_arguments)] 716 pub fn new( 717 id: String, 718 region: &Arc<GuestRegionMmap>, 719 seccomp_action: SeccompAction, 720 numa_node_id: Option<u16>, 721 initial_size: u64, 722 hugepages: bool, 723 exit_evt: EventFd, 724 blocks_state: Arc<Mutex<BlocksState>>, 725 state: Option<MemState>, 726 ) -> io::Result<Mem> { 727 let region_len = region.len(); 728 729 if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE { 730 return Err(io::Error::new( 731 io::ErrorKind::Other, 732 format!("Virtio-mem size is not aligned with {VIRTIO_MEM_ALIGN_SIZE}"), 733 )); 734 } 735 736 let (avail_features, acked_features, config, paused) = if let Some(state) = state { 737 info!("Restoring virtio-mem {}", id); 738 *(blocks_state.lock().unwrap()) = state.blocks_state.clone(); 739 ( 740 state.avail_features, 741 state.acked_features, 742 state.config, 743 true, 744 ) 745 } else { 746 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 747 748 let mut config = VirtioMemConfig { 749 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE, 750 addr: region.start_addr().raw_value(), 751 region_size: region.len(), 752 usable_region_size: region.len(), 753 plugged_size: 0, 754 requested_size: 0, 755 ..Default::default() 756 }; 757 758 if initial_size != 0 { 759 config.resize(initial_size).map_err(|e| { 760 io::Error::new( 761 io::ErrorKind::Other, 762 format!( 763 "Failed to resize virtio-mem configuration to {initial_size}: {e:?}" 764 ), 765 ) 766 })?; 767 } 768 769 if let Some(node_id) = numa_node_id { 770 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM; 771 config.node_id = node_id; 772 } 773 774 // Make sure the virtio-mem configuration complies with the 775 // specification. 776 config.validate().map_err(|e| { 777 io::Error::new( 778 io::ErrorKind::Other, 779 format!("Invalid virtio-mem configuration: {e:?}"), 780 ) 781 })?; 782 783 (avail_features, 0, config, false) 784 }; 785 786 let host_fd = region 787 .file_offset() 788 .map(|f_offset| f_offset.file().as_raw_fd()); 789 790 Ok(Mem { 791 common: VirtioCommon { 792 device_type: VirtioDeviceType::Mem as u32, 793 avail_features, 794 acked_features, 795 paused_sync: Some(Arc::new(Barrier::new(2))), 796 queue_sizes: QUEUE_SIZES.to_vec(), 797 min_queues: 1, 798 paused: Arc::new(AtomicBool::new(paused)), 799 ..Default::default() 800 }, 801 id, 802 host_addr: region.as_ptr() as u64, 803 host_fd, 804 config: Arc::new(Mutex::new(config)), 805 seccomp_action, 806 hugepages, 807 dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())), 808 blocks_state, 809 exit_evt, 810 interrupt_cb: None, 811 }) 812 } 813 814 pub fn resize(&mut self, size: u64) -> result::Result<(), Error> { 815 let mut config = self.config.lock().unwrap(); 816 config.resize(size).map_err(|e| { 817 Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e)) 818 })?; 819 820 if let Some(interrupt_cb) = self.interrupt_cb.as_ref() { 821 interrupt_cb 822 .trigger(VirtioInterruptType::Config) 823 .map_err(|e| { 824 Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e)) 825 }) 826 } else { 827 Ok(()) 828 } 829 } 830 831 pub fn add_dma_mapping_handler( 832 &mut self, 833 source: VirtioMemMappingSource, 834 handler: Arc<dyn ExternalDmaMapping>, 835 ) -> result::Result<(), Error> { 836 let config = self.config.lock().unwrap(); 837 838 if config.plugged_size > 0 { 839 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 840 if *plugged { 841 let gpa = config.addr + (idx as u64 * config.block_size); 842 handler 843 .map(gpa, gpa, config.block_size) 844 .map_err(Error::DmaMap)?; 845 } 846 } 847 } 848 849 self.dma_mapping_handlers 850 .lock() 851 .unwrap() 852 .insert(source, handler); 853 854 Ok(()) 855 } 856 857 pub fn remove_dma_mapping_handler( 858 &mut self, 859 source: VirtioMemMappingSource, 860 ) -> result::Result<(), Error> { 861 let handler = self 862 .dma_mapping_handlers 863 .lock() 864 .unwrap() 865 .remove(&source) 866 .ok_or(Error::InvalidDmaMappingHandler)?; 867 868 let config = self.config.lock().unwrap(); 869 870 if config.plugged_size > 0 { 871 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 872 if *plugged { 873 let gpa = config.addr + (idx as u64 * config.block_size); 874 handler 875 .unmap(gpa, config.block_size) 876 .map_err(Error::DmaUnmap)?; 877 } 878 } 879 } 880 881 Ok(()) 882 } 883 884 fn state(&self) -> MemState { 885 MemState { 886 avail_features: self.common.avail_features, 887 acked_features: self.common.acked_features, 888 config: *(self.config.lock().unwrap()), 889 blocks_state: self.blocks_state.lock().unwrap().clone(), 890 } 891 } 892 893 #[cfg(fuzzing)] 894 pub fn wait_for_epoll_threads(&mut self) { 895 self.common.wait_for_epoll_threads(); 896 } 897 } 898 899 impl Drop for Mem { 900 fn drop(&mut self) { 901 if let Some(kill_evt) = self.common.kill_evt.take() { 902 // Ignore the result because there is nothing we can do about it. 903 let _ = kill_evt.write(1); 904 } 905 } 906 } 907 908 impl VirtioDevice for Mem { 909 fn device_type(&self) -> u32 { 910 self.common.device_type 911 } 912 913 fn queue_max_sizes(&self) -> &[u16] { 914 &self.common.queue_sizes 915 } 916 917 fn features(&self) -> u64 { 918 self.common.avail_features 919 } 920 921 fn ack_features(&mut self, value: u64) { 922 self.common.ack_features(value) 923 } 924 925 fn read_config(&self, offset: u64, data: &mut [u8]) { 926 self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data); 927 } 928 929 fn activate( 930 &mut self, 931 mem: GuestMemoryAtomic<GuestMemoryMmap>, 932 interrupt_cb: Arc<dyn VirtioInterrupt>, 933 mut queues: Vec<(usize, Queue, EventFd)>, 934 ) -> ActivateResult { 935 self.common.activate(&queues, &interrupt_cb)?; 936 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 937 938 let (_, queue, queue_evt) = queues.remove(0); 939 940 self.interrupt_cb = Some(interrupt_cb.clone()); 941 942 let mut handler = MemEpollHandler { 943 mem, 944 host_addr: self.host_addr, 945 host_fd: self.host_fd, 946 blocks_state: Arc::clone(&self.blocks_state), 947 config: self.config.clone(), 948 queue, 949 interrupt_cb, 950 queue_evt, 951 kill_evt, 952 pause_evt, 953 hugepages: self.hugepages, 954 dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers), 955 }; 956 957 let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false); 958 for range in unplugged_memory_ranges.regions() { 959 handler 960 .discard_memory_range(range.gpa, range.length) 961 .map_err(|e| { 962 error!( 963 "failed discarding memory range [0x{:x}-0x{:x}]: {:?}", 964 range.gpa, 965 range.gpa + range.length - 1, 966 e 967 ); 968 ActivateError::BadActivate 969 })?; 970 } 971 972 let paused = self.common.paused.clone(); 973 let paused_sync = self.common.paused_sync.clone(); 974 let mut epoll_threads = Vec::new(); 975 976 spawn_virtio_thread( 977 &self.id, 978 &self.seccomp_action, 979 Thread::VirtioMem, 980 &mut epoll_threads, 981 &self.exit_evt, 982 move || handler.run(paused, paused_sync.unwrap()), 983 )?; 984 self.common.epoll_threads = Some(epoll_threads); 985 986 event!("virtio-device", "activated", "id", &self.id); 987 Ok(()) 988 } 989 990 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 991 let result = self.common.reset(); 992 event!("virtio-device", "reset", "id", &self.id); 993 result 994 } 995 } 996 997 impl Pausable for Mem { 998 fn pause(&mut self) -> result::Result<(), MigratableError> { 999 self.common.pause() 1000 } 1001 1002 fn resume(&mut self) -> result::Result<(), MigratableError> { 1003 self.common.resume() 1004 } 1005 } 1006 1007 impl Snapshottable for Mem { 1008 fn id(&self) -> String { 1009 self.id.clone() 1010 } 1011 1012 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1013 Snapshot::new_from_versioned_state(&self.state()) 1014 } 1015 } 1016 impl Transportable for Mem {} 1017 impl Migratable for Mem {} 1018