1 // Copyright (c) 2020 Ant Financial 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 use super::Error as DeviceError; 18 use super::{ 19 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon, 20 VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, 21 }; 22 use crate::seccomp_filters::Thread; 23 use crate::thread_helper::spawn_virtio_thread; 24 use crate::{GuestMemoryMmap, GuestRegionMmap}; 25 use crate::{VirtioInterrupt, VirtioInterruptType}; 26 use anyhow::anyhow; 27 use seccompiler::SeccompAction; 28 use serde::{Deserialize, Serialize}; 29 use std::collections::BTreeMap; 30 use std::io; 31 use std::mem::size_of; 32 use std::os::unix::io::{AsRawFd, RawFd}; 33 use std::result; 34 use std::sync::atomic::AtomicBool; 35 use std::sync::mpsc; 36 use std::sync::{Arc, Barrier, Mutex}; 37 use thiserror::Error; 38 use virtio_queue::{DescriptorChain, Queue, QueueT}; 39 use vm_device::dma_mapping::ExternalDmaMapping; 40 use vm_memory::{ 41 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 42 GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion, 43 }; 44 use vm_migration::protocol::MemoryRangeTable; 45 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 46 use vmm_sys_util::eventfd::EventFd; 47 48 const QUEUE_SIZE: u16 = 128; 49 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 50 51 // 128MiB is the standard memory block size in Linux. A virtio-mem region must 52 // be aligned on this size, and the region size must be a multiple of it. 53 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20; 54 // Use 2 MiB alignment so transparent hugepages can be used by KVM. 55 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20; 56 57 // Request processed successfully, applicable for 58 // - VIRTIO_MEM_REQ_PLUG 59 // - VIRTIO_MEM_REQ_UNPLUG 60 // - VIRTIO_MEM_REQ_UNPLUG_ALL 61 // - VIRTIO_MEM_REQ_STATE 62 const VIRTIO_MEM_RESP_ACK: u16 = 0; 63 64 // Request denied - e.g. trying to plug more than requested, applicable for 65 // - VIRTIO_MEM_REQ_PLUG 66 const VIRTIO_MEM_RESP_NACK: u16 = 1; 67 68 // Request cannot be processed right now, try again later, applicable for 69 // - VIRTIO_MEM_REQ_PLUG 70 // - VIRTIO_MEM_REQ_UNPLUG 71 // - VIRTIO_MEM_REQ_UNPLUG_ALL 72 #[allow(unused)] 73 const VIRTIO_MEM_RESP_BUSY: u16 = 2; 74 75 // Error in request (e.g. addresses/alignment), applicable for 76 // - VIRTIO_MEM_REQ_PLUG 77 // - VIRTIO_MEM_REQ_UNPLUG 78 // - VIRTIO_MEM_REQ_STATE 79 const VIRTIO_MEM_RESP_ERROR: u16 = 3; 80 81 // State of memory blocks is "plugged" 82 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; 83 // State of memory blocks is "unplugged" 84 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; 85 // State of memory blocks is "mixed" 86 const VIRTIO_MEM_STATE_MIXED: u16 = 2; 87 88 // request to plug memory blocks 89 const VIRTIO_MEM_REQ_PLUG: u16 = 0; 90 // request to unplug memory blocks 91 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1; 92 // request to unplug all blocks and shrink the usable size 93 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2; 94 // request information about the plugged state of memory blocks 95 const VIRTIO_MEM_REQ_STATE: u16 = 3; 96 97 // New descriptors are pending on the virtio queue. 98 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 99 100 // Virtio features 101 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0; 102 103 #[derive(Error, Debug)] 104 pub enum Error { 105 #[error("Guest gave us bad memory addresses: {0}")] 106 GuestMemory(GuestMemoryError), 107 #[error("Guest gave us a write only descriptor that protocol says to read from")] 108 UnexpectedWriteOnlyDescriptor, 109 #[error("Guest gave us a read only descriptor that protocol says to write to")] 110 UnexpectedReadOnlyDescriptor, 111 #[error("Guest gave us too few descriptors in a descriptor chain")] 112 DescriptorChainTooShort, 113 #[error("Guest gave us a buffer that was too short to use")] 114 BufferLengthTooSmall, 115 #[error("Guest sent us invalid request")] 116 InvalidRequest, 117 #[error("Failed to EventFd write: {0}")] 118 EventFdWriteFail(std::io::Error), 119 #[error("Failed to EventFd try_clone: {0}")] 120 EventFdTryCloneFail(std::io::Error), 121 #[error("Failed to MpscRecv: {0}")] 122 MpscRecvFail(mpsc::RecvError), 123 #[error("Resize invalid argument: {0}")] 124 ResizeError(anyhow::Error), 125 #[error("Fail to resize trigger: {0}")] 126 ResizeTriggerFail(DeviceError), 127 #[error("Invalid configuration: {0}")] 128 ValidateError(anyhow::Error), 129 #[error("Failed discarding memory range: {0}")] 130 DiscardMemoryRange(std::io::Error), 131 #[error("Failed DMA mapping: {0}")] 132 DmaMap(std::io::Error), 133 #[error("Failed DMA unmapping: {0}")] 134 DmaUnmap(std::io::Error), 135 #[error("Invalid DMA mapping handler")] 136 InvalidDmaMappingHandler, 137 #[error("Not activated by the guest")] 138 NotActivatedByGuest, 139 #[error("Unknown request type: {0}")] 140 UnknownRequestType(u16), 141 #[error("Failed adding used index: {0}")] 142 QueueAddUsed(virtio_queue::Error), 143 } 144 145 #[repr(C)] 146 #[derive(Copy, Clone, Debug, Default)] 147 struct VirtioMemReq { 148 req_type: u16, 149 padding: [u16; 3], 150 addr: u64, 151 nb_blocks: u16, 152 padding_1: [u16; 3], 153 } 154 155 // SAFETY: it only has data and has no implicit padding. 156 unsafe impl ByteValued for VirtioMemReq {} 157 158 #[repr(C)] 159 #[derive(Copy, Clone, Debug, Default)] 160 struct VirtioMemResp { 161 resp_type: u16, 162 padding: [u16; 3], 163 state: u16, 164 } 165 166 // SAFETY: it only has data and has no implicit padding. 167 unsafe impl ByteValued for VirtioMemResp {} 168 169 #[repr(C)] 170 #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)] 171 pub struct VirtioMemConfig { 172 // Block size and alignment. Cannot change. 173 block_size: u64, 174 // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. 175 node_id: u16, 176 padding: [u8; 6], 177 // Start address of the memory region. Cannot change. 178 addr: u64, 179 // Region size (maximum). Cannot change. 180 region_size: u64, 181 // Currently usable region size. Can grow up to region_size. Can 182 // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config 183 // update will be sent). 184 usable_region_size: u64, 185 // Currently used size. Changes due to plug/unplug requests, but no 186 // config updates will be sent. 187 plugged_size: u64, 188 // Requested size. New plug requests cannot exceed it. Can change. 189 requested_size: u64, 190 } 191 192 // SAFETY: it only has data and has no implicit padding. 193 unsafe impl ByteValued for VirtioMemConfig {} 194 195 impl VirtioMemConfig { 196 fn validate(&self) -> result::Result<(), Error> { 197 if self.addr % self.block_size != 0 { 198 return Err(Error::ValidateError(anyhow!( 199 "addr 0x{:x} is not aligned on block_size 0x{:x}", 200 self.addr, 201 self.block_size 202 ))); 203 } 204 if self.region_size % self.block_size != 0 { 205 return Err(Error::ValidateError(anyhow!( 206 "region_size 0x{:x} is not aligned on block_size 0x{:x}", 207 self.region_size, 208 self.block_size 209 ))); 210 } 211 if self.usable_region_size % self.block_size != 0 { 212 return Err(Error::ValidateError(anyhow!( 213 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}", 214 self.usable_region_size, 215 self.block_size 216 ))); 217 } 218 if self.plugged_size % self.block_size != 0 { 219 return Err(Error::ValidateError(anyhow!( 220 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}", 221 self.plugged_size, 222 self.block_size 223 ))); 224 } 225 if self.requested_size % self.block_size != 0 { 226 return Err(Error::ValidateError(anyhow!( 227 "requested_size 0x{:x} is not aligned on block_size 0x{:x}", 228 self.requested_size, 229 self.block_size 230 ))); 231 } 232 233 Ok(()) 234 } 235 236 fn resize(&mut self, size: u64) -> result::Result<(), Error> { 237 if self.requested_size == size { 238 return Err(Error::ResizeError(anyhow!( 239 "new size 0x{:x} and requested_size are identical", 240 size 241 ))); 242 } else if size > self.region_size { 243 return Err(Error::ResizeError(anyhow!( 244 "new size 0x{:x} is bigger than region_size 0x{:x}", 245 size, 246 self.region_size 247 ))); 248 } else if size % self.block_size != 0 { 249 return Err(Error::ResizeError(anyhow!( 250 "new size 0x{:x} is not aligned on block_size 0x{:x}", 251 size, 252 self.block_size 253 ))); 254 } 255 256 self.requested_size = size; 257 258 Ok(()) 259 } 260 261 fn is_valid_range(&self, addr: u64, size: u64) -> bool { 262 // Ensure no overflow from adding 'addr' and 'size' whose value are both 263 // controlled by the guest driver 264 if addr.checked_add(size).is_none() { 265 return false; 266 } 267 268 // Start address must be aligned on block_size, the size must be 269 // greater than 0, and all blocks covered by the request must be 270 // in the usable region. 271 if addr % self.block_size != 0 272 || size == 0 273 || (addr < self.addr || addr + size >= self.addr + self.usable_region_size) 274 { 275 return false; 276 } 277 278 true 279 } 280 } 281 282 struct Request { 283 req: VirtioMemReq, 284 status_addr: GuestAddress, 285 } 286 287 impl Request { 288 fn parse( 289 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 290 ) -> result::Result<Request, Error> { 291 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 292 // The descriptor contains the request type which MUST be readable. 293 if desc.is_write_only() { 294 return Err(Error::UnexpectedWriteOnlyDescriptor); 295 } 296 if desc.len() as usize != size_of::<VirtioMemReq>() { 297 return Err(Error::InvalidRequest); 298 } 299 let req: VirtioMemReq = desc_chain 300 .memory() 301 .read_obj(desc.addr()) 302 .map_err(Error::GuestMemory)?; 303 304 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 305 306 // The status MUST always be writable 307 if !status_desc.is_write_only() { 308 return Err(Error::UnexpectedReadOnlyDescriptor); 309 } 310 311 if (status_desc.len() as usize) < size_of::<VirtioMemResp>() { 312 return Err(Error::BufferLengthTooSmall); 313 } 314 315 Ok(Request { 316 req, 317 status_addr: status_desc.addr(), 318 }) 319 } 320 321 fn send_response( 322 &self, 323 mem: &GuestMemoryMmap, 324 resp_type: u16, 325 state: u16, 326 ) -> Result<u32, Error> { 327 let resp = VirtioMemResp { 328 resp_type, 329 state, 330 ..Default::default() 331 }; 332 mem.write_obj(resp, self.status_addr) 333 .map_err(Error::GuestMemory)?; 334 Ok(size_of::<VirtioMemResp>() as u32) 335 } 336 } 337 338 #[derive(Clone, Serialize, Deserialize)] 339 pub struct BlocksState { 340 bitmap: Vec<bool>, 341 } 342 343 impl BlocksState { 344 pub fn new(region_size: u64) -> Self { 345 BlocksState { 346 bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize], 347 } 348 } 349 350 fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool { 351 for state in self 352 .bitmap 353 .iter() 354 .skip(first_block_index) 355 .take(nb_blocks as usize) 356 { 357 if *state != plug { 358 return false; 359 } 360 } 361 true 362 } 363 364 fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) { 365 for state in self 366 .bitmap 367 .iter_mut() 368 .skip(first_block_index) 369 .take(nb_blocks as usize) 370 { 371 *state = plug; 372 } 373 } 374 375 fn inner(&self) -> &Vec<bool> { 376 &self.bitmap 377 } 378 379 pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable { 380 let mut bitmap: Vec<u64> = Vec::new(); 381 let mut i = 0; 382 for (j, bit) in self.bitmap.iter().enumerate() { 383 if j % 64 == 0 { 384 bitmap.push(0); 385 386 if j != 0 { 387 i += 1; 388 } 389 } 390 391 if *bit == plugged { 392 bitmap[i] |= 1 << (j % 64); 393 } 394 } 395 396 MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE) 397 } 398 } 399 400 struct MemEpollHandler { 401 mem: GuestMemoryAtomic<GuestMemoryMmap>, 402 host_addr: u64, 403 host_fd: Option<RawFd>, 404 blocks_state: Arc<Mutex<BlocksState>>, 405 config: Arc<Mutex<VirtioMemConfig>>, 406 queue: Queue, 407 interrupt_cb: Arc<dyn VirtioInterrupt>, 408 queue_evt: EventFd, 409 kill_evt: EventFd, 410 pause_evt: EventFd, 411 hugepages: bool, 412 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 413 } 414 415 impl MemEpollHandler { 416 fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> { 417 // Use fallocate if the memory region is backed by a file. 418 if let Some(fd) = self.host_fd { 419 // SAFETY: FFI call with valid arguments 420 let res = unsafe { 421 libc::fallocate64( 422 fd, 423 libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE, 424 offset as libc::off64_t, 425 size as libc::off64_t, 426 ) 427 }; 428 if res != 0 { 429 let err = io::Error::last_os_error(); 430 error!("Deallocating file space failed: {}", err); 431 return Err(Error::DiscardMemoryRange(err)); 432 } 433 } 434 435 // Only use madvise if the memory region is not allocated with 436 // hugepages. 437 if !self.hugepages { 438 // SAFETY: FFI call with valid arguments 439 let res = unsafe { 440 libc::madvise( 441 (self.host_addr + offset) as *mut libc::c_void, 442 size as libc::size_t, 443 libc::MADV_DONTNEED, 444 ) 445 }; 446 if res != 0 { 447 let err = io::Error::last_os_error(); 448 error!("Advising kernel about pages range failed: {}", err); 449 return Err(Error::DiscardMemoryRange(err)); 450 } 451 } 452 453 Ok(()) 454 } 455 456 fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 { 457 let mut config = self.config.lock().unwrap(); 458 let size: u64 = nb_blocks as u64 * config.block_size; 459 460 if plug && (config.plugged_size + size > config.requested_size) { 461 return VIRTIO_MEM_RESP_NACK; 462 } 463 if !config.is_valid_range(addr, size) { 464 return VIRTIO_MEM_RESP_ERROR; 465 } 466 467 let offset = addr - config.addr; 468 469 let first_block_index = (offset / config.block_size) as usize; 470 if !self 471 .blocks_state 472 .lock() 473 .unwrap() 474 .is_range_state(first_block_index, nb_blocks, !plug) 475 { 476 return VIRTIO_MEM_RESP_ERROR; 477 } 478 479 if !plug { 480 if let Err(e) = self.discard_memory_range(offset, size) { 481 error!("failed discarding memory range: {:?}", e); 482 return VIRTIO_MEM_RESP_ERROR; 483 } 484 } 485 486 self.blocks_state 487 .lock() 488 .unwrap() 489 .set_range(first_block_index, nb_blocks, plug); 490 491 let handlers = self.dma_mapping_handlers.lock().unwrap(); 492 if plug { 493 let mut gpa = addr; 494 for _ in 0..nb_blocks { 495 for (_, handler) in handlers.iter() { 496 if let Err(e) = handler.map(gpa, gpa, config.block_size) { 497 error!( 498 "failed DMA mapping addr 0x{:x} size 0x{:x}: {}", 499 gpa, config.block_size, e 500 ); 501 return VIRTIO_MEM_RESP_ERROR; 502 } 503 } 504 505 gpa += config.block_size; 506 } 507 508 config.plugged_size += size; 509 } else { 510 for (_, handler) in handlers.iter() { 511 if let Err(e) = handler.unmap(addr, size) { 512 error!( 513 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 514 addr, size, e 515 ); 516 return VIRTIO_MEM_RESP_ERROR; 517 } 518 } 519 520 config.plugged_size -= size; 521 } 522 523 VIRTIO_MEM_RESP_ACK 524 } 525 526 fn unplug_all(&mut self) -> u16 { 527 let mut config = self.config.lock().unwrap(); 528 if let Err(e) = self.discard_memory_range(0, config.region_size) { 529 error!("failed discarding memory range: {:?}", e); 530 return VIRTIO_MEM_RESP_ERROR; 531 } 532 533 // Remaining plugged blocks are unmapped. 534 if config.plugged_size > 0 { 535 let handlers = self.dma_mapping_handlers.lock().unwrap(); 536 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 537 if *plugged { 538 let gpa = config.addr + (idx as u64 * config.block_size); 539 for (_, handler) in handlers.iter() { 540 if let Err(e) = handler.unmap(gpa, config.block_size) { 541 error!( 542 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 543 gpa, config.block_size, e 544 ); 545 return VIRTIO_MEM_RESP_ERROR; 546 } 547 } 548 } 549 } 550 } 551 552 self.blocks_state.lock().unwrap().set_range( 553 0, 554 (config.region_size / config.block_size) as u16, 555 false, 556 ); 557 558 config.plugged_size = 0; 559 560 VIRTIO_MEM_RESP_ACK 561 } 562 563 fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) { 564 let config = self.config.lock().unwrap(); 565 let size: u64 = nb_blocks as u64 * config.block_size; 566 567 let resp_type = if config.is_valid_range(addr, size) { 568 VIRTIO_MEM_RESP_ACK 569 } else { 570 VIRTIO_MEM_RESP_ERROR 571 }; 572 573 let offset = addr - config.addr; 574 let first_block_index = (offset / config.block_size) as usize; 575 let resp_state = 576 if self 577 .blocks_state 578 .lock() 579 .unwrap() 580 .is_range_state(first_block_index, nb_blocks, true) 581 { 582 VIRTIO_MEM_STATE_PLUGGED 583 } else if self.blocks_state.lock().unwrap().is_range_state( 584 first_block_index, 585 nb_blocks, 586 false, 587 ) { 588 VIRTIO_MEM_STATE_UNPLUGGED 589 } else { 590 VIRTIO_MEM_STATE_MIXED 591 }; 592 593 (resp_type, resp_state) 594 } 595 596 fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> { 597 self.interrupt_cb.trigger(int_type).map_err(|e| { 598 error!("Failed to signal used queue: {:?}", e); 599 DeviceError::FailedSignalingUsedQueue(e) 600 }) 601 } 602 603 fn process_queue(&mut self) -> Result<bool, Error> { 604 let mut used_descs = false; 605 606 while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) { 607 let r = Request::parse(&mut desc_chain)?; 608 let (resp_type, resp_state) = match r.req.req_type { 609 VIRTIO_MEM_REQ_PLUG => ( 610 self.state_change_request(r.req.addr, r.req.nb_blocks, true), 611 0u16, 612 ), 613 VIRTIO_MEM_REQ_UNPLUG => ( 614 self.state_change_request(r.req.addr, r.req.nb_blocks, false), 615 0u16, 616 ), 617 VIRTIO_MEM_REQ_UNPLUG_ALL => (self.unplug_all(), 0u16), 618 VIRTIO_MEM_REQ_STATE => self.state_request(r.req.addr, r.req.nb_blocks), 619 _ => { 620 return Err(Error::UnknownRequestType(r.req.req_type)); 621 } 622 }; 623 let len = r.send_response(desc_chain.memory(), resp_type, resp_state)?; 624 self.queue 625 .add_used(desc_chain.memory(), desc_chain.head_index(), len) 626 .map_err(Error::QueueAddUsed)?; 627 used_descs = true; 628 } 629 630 Ok(used_descs) 631 } 632 633 fn run( 634 &mut self, 635 paused: Arc<AtomicBool>, 636 paused_sync: Arc<Barrier>, 637 ) -> result::Result<(), EpollHelperError> { 638 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 639 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 640 helper.run(paused, paused_sync, self)?; 641 642 Ok(()) 643 } 644 } 645 646 impl EpollHelperHandler for MemEpollHandler { 647 fn handle_event( 648 &mut self, 649 _helper: &mut EpollHelper, 650 event: &epoll::Event, 651 ) -> result::Result<(), EpollHelperError> { 652 let ev_type = event.data as u16; 653 match ev_type { 654 QUEUE_AVAIL_EVENT => { 655 self.queue_evt.read().map_err(|e| { 656 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 657 })?; 658 659 let needs_notification = self.process_queue().map_err(|e| { 660 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 661 })?; 662 if needs_notification { 663 self.signal(VirtioInterruptType::Queue(0)).map_err(|e| { 664 EpollHelperError::HandleEvent(anyhow!( 665 "Failed to signal used queue: {:?}", 666 e 667 )) 668 })?; 669 } 670 } 671 _ => { 672 return Err(EpollHelperError::HandleEvent(anyhow!( 673 "Unexpected event: {}", 674 ev_type 675 ))); 676 } 677 } 678 Ok(()) 679 } 680 } 681 682 #[derive(PartialEq, Eq, PartialOrd, Ord)] 683 pub enum VirtioMemMappingSource { 684 Container, 685 Device(u32), 686 } 687 688 #[derive(Serialize, Deserialize)] 689 pub struct MemState { 690 pub avail_features: u64, 691 pub acked_features: u64, 692 pub config: VirtioMemConfig, 693 pub blocks_state: BlocksState, 694 } 695 696 pub struct Mem { 697 common: VirtioCommon, 698 id: String, 699 host_addr: u64, 700 host_fd: Option<RawFd>, 701 config: Arc<Mutex<VirtioMemConfig>>, 702 seccomp_action: SeccompAction, 703 hugepages: bool, 704 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 705 blocks_state: Arc<Mutex<BlocksState>>, 706 exit_evt: EventFd, 707 interrupt_cb: Option<Arc<dyn VirtioInterrupt>>, 708 } 709 710 impl Mem { 711 // Create a new virtio-mem device. 712 #[allow(clippy::too_many_arguments)] 713 pub fn new( 714 id: String, 715 region: &Arc<GuestRegionMmap>, 716 seccomp_action: SeccompAction, 717 numa_node_id: Option<u16>, 718 initial_size: u64, 719 hugepages: bool, 720 exit_evt: EventFd, 721 blocks_state: Arc<Mutex<BlocksState>>, 722 state: Option<MemState>, 723 ) -> io::Result<Mem> { 724 let region_len = region.len(); 725 726 if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE { 727 return Err(io::Error::new( 728 io::ErrorKind::Other, 729 format!("Virtio-mem size is not aligned with {VIRTIO_MEM_ALIGN_SIZE}"), 730 )); 731 } 732 733 let (avail_features, acked_features, config, paused) = if let Some(state) = state { 734 info!("Restoring virtio-mem {}", id); 735 *(blocks_state.lock().unwrap()) = state.blocks_state.clone(); 736 ( 737 state.avail_features, 738 state.acked_features, 739 state.config, 740 true, 741 ) 742 } else { 743 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 744 745 let mut config = VirtioMemConfig { 746 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE, 747 addr: region.start_addr().raw_value(), 748 region_size: region.len(), 749 usable_region_size: region.len(), 750 plugged_size: 0, 751 requested_size: 0, 752 ..Default::default() 753 }; 754 755 if initial_size != 0 { 756 config.resize(initial_size).map_err(|e| { 757 io::Error::new( 758 io::ErrorKind::Other, 759 format!( 760 "Failed to resize virtio-mem configuration to {initial_size}: {e:?}" 761 ), 762 ) 763 })?; 764 } 765 766 if let Some(node_id) = numa_node_id { 767 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM; 768 config.node_id = node_id; 769 } 770 771 // Make sure the virtio-mem configuration complies with the 772 // specification. 773 config.validate().map_err(|e| { 774 io::Error::new( 775 io::ErrorKind::Other, 776 format!("Invalid virtio-mem configuration: {e:?}"), 777 ) 778 })?; 779 780 (avail_features, 0, config, false) 781 }; 782 783 let host_fd = region 784 .file_offset() 785 .map(|f_offset| f_offset.file().as_raw_fd()); 786 787 Ok(Mem { 788 common: VirtioCommon { 789 device_type: VirtioDeviceType::Mem as u32, 790 avail_features, 791 acked_features, 792 paused_sync: Some(Arc::new(Barrier::new(2))), 793 queue_sizes: QUEUE_SIZES.to_vec(), 794 min_queues: 1, 795 paused: Arc::new(AtomicBool::new(paused)), 796 ..Default::default() 797 }, 798 id, 799 host_addr: region.as_ptr() as u64, 800 host_fd, 801 config: Arc::new(Mutex::new(config)), 802 seccomp_action, 803 hugepages, 804 dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())), 805 blocks_state, 806 exit_evt, 807 interrupt_cb: None, 808 }) 809 } 810 811 pub fn resize(&mut self, size: u64) -> result::Result<(), Error> { 812 let mut config = self.config.lock().unwrap(); 813 config.resize(size).map_err(|e| { 814 Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e)) 815 })?; 816 817 if let Some(interrupt_cb) = self.interrupt_cb.as_ref() { 818 interrupt_cb 819 .trigger(VirtioInterruptType::Config) 820 .map_err(|e| { 821 Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e)) 822 }) 823 } else { 824 Ok(()) 825 } 826 } 827 828 pub fn add_dma_mapping_handler( 829 &mut self, 830 source: VirtioMemMappingSource, 831 handler: Arc<dyn ExternalDmaMapping>, 832 ) -> result::Result<(), Error> { 833 let config = self.config.lock().unwrap(); 834 835 if config.plugged_size > 0 { 836 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 837 if *plugged { 838 let gpa = config.addr + (idx as u64 * config.block_size); 839 handler 840 .map(gpa, gpa, config.block_size) 841 .map_err(Error::DmaMap)?; 842 } 843 } 844 } 845 846 self.dma_mapping_handlers 847 .lock() 848 .unwrap() 849 .insert(source, handler); 850 851 Ok(()) 852 } 853 854 pub fn remove_dma_mapping_handler( 855 &mut self, 856 source: VirtioMemMappingSource, 857 ) -> result::Result<(), Error> { 858 let handler = self 859 .dma_mapping_handlers 860 .lock() 861 .unwrap() 862 .remove(&source) 863 .ok_or(Error::InvalidDmaMappingHandler)?; 864 865 let config = self.config.lock().unwrap(); 866 867 if config.plugged_size > 0 { 868 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 869 if *plugged { 870 let gpa = config.addr + (idx as u64 * config.block_size); 871 handler 872 .unmap(gpa, config.block_size) 873 .map_err(Error::DmaUnmap)?; 874 } 875 } 876 } 877 878 Ok(()) 879 } 880 881 fn state(&self) -> MemState { 882 MemState { 883 avail_features: self.common.avail_features, 884 acked_features: self.common.acked_features, 885 config: *(self.config.lock().unwrap()), 886 blocks_state: self.blocks_state.lock().unwrap().clone(), 887 } 888 } 889 890 #[cfg(fuzzing)] 891 pub fn wait_for_epoll_threads(&mut self) { 892 self.common.wait_for_epoll_threads(); 893 } 894 } 895 896 impl Drop for Mem { 897 fn drop(&mut self) { 898 if let Some(kill_evt) = self.common.kill_evt.take() { 899 // Ignore the result because there is nothing we can do about it. 900 let _ = kill_evt.write(1); 901 } 902 self.common.wait_for_epoll_threads(); 903 } 904 } 905 906 impl VirtioDevice for Mem { 907 fn device_type(&self) -> u32 { 908 self.common.device_type 909 } 910 911 fn queue_max_sizes(&self) -> &[u16] { 912 &self.common.queue_sizes 913 } 914 915 fn features(&self) -> u64 { 916 self.common.avail_features 917 } 918 919 fn ack_features(&mut self, value: u64) { 920 self.common.ack_features(value) 921 } 922 923 fn read_config(&self, offset: u64, data: &mut [u8]) { 924 self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data); 925 } 926 927 fn activate( 928 &mut self, 929 mem: GuestMemoryAtomic<GuestMemoryMmap>, 930 interrupt_cb: Arc<dyn VirtioInterrupt>, 931 mut queues: Vec<(usize, Queue, EventFd)>, 932 ) -> ActivateResult { 933 self.common.activate(&queues, &interrupt_cb)?; 934 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 935 936 let (_, queue, queue_evt) = queues.remove(0); 937 938 self.interrupt_cb = Some(interrupt_cb.clone()); 939 940 let mut handler = MemEpollHandler { 941 mem, 942 host_addr: self.host_addr, 943 host_fd: self.host_fd, 944 blocks_state: Arc::clone(&self.blocks_state), 945 config: self.config.clone(), 946 queue, 947 interrupt_cb, 948 queue_evt, 949 kill_evt, 950 pause_evt, 951 hugepages: self.hugepages, 952 dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers), 953 }; 954 955 let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false); 956 for range in unplugged_memory_ranges.regions() { 957 handler 958 .discard_memory_range(range.gpa, range.length) 959 .map_err(|e| { 960 error!( 961 "failed discarding memory range [0x{:x}-0x{:x}]: {:?}", 962 range.gpa, 963 range.gpa + range.length - 1, 964 e 965 ); 966 ActivateError::BadActivate 967 })?; 968 } 969 970 let paused = self.common.paused.clone(); 971 let paused_sync = self.common.paused_sync.clone(); 972 let mut epoll_threads = Vec::new(); 973 974 spawn_virtio_thread( 975 &self.id, 976 &self.seccomp_action, 977 Thread::VirtioMem, 978 &mut epoll_threads, 979 &self.exit_evt, 980 move || handler.run(paused, paused_sync.unwrap()), 981 )?; 982 self.common.epoll_threads = Some(epoll_threads); 983 984 event!("virtio-device", "activated", "id", &self.id); 985 Ok(()) 986 } 987 988 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 989 let result = self.common.reset(); 990 event!("virtio-device", "reset", "id", &self.id); 991 result 992 } 993 } 994 995 impl Pausable for Mem { 996 fn pause(&mut self) -> result::Result<(), MigratableError> { 997 self.common.pause() 998 } 999 1000 fn resume(&mut self) -> result::Result<(), MigratableError> { 1001 self.common.resume() 1002 } 1003 } 1004 1005 impl Snapshottable for Mem { 1006 fn id(&self) -> String { 1007 self.id.clone() 1008 } 1009 1010 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1011 Snapshot::new_from_state(&self.state()) 1012 } 1013 } 1014 impl Transportable for Mem {} 1015 impl Migratable for Mem {} 1016