1 // Copyright (c) 2020 Ant Financial 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 use std::collections::BTreeMap; 18 use std::mem::size_of; 19 use std::os::unix::io::{AsRawFd, RawFd}; 20 use std::sync::atomic::AtomicBool; 21 use std::sync::{mpsc, Arc, Barrier, Mutex}; 22 use std::{io, result}; 23 24 use anyhow::anyhow; 25 use seccompiler::SeccompAction; 26 use serde::{Deserialize, Serialize}; 27 use thiserror::Error; 28 use virtio_queue::{DescriptorChain, Queue, QueueT}; 29 use vm_device::dma_mapping::ExternalDmaMapping; 30 use vm_memory::{ 31 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 32 GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion, 33 }; 34 use vm_migration::protocol::MemoryRangeTable; 35 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 36 use vmm_sys_util::eventfd::EventFd; 37 38 use super::{ 39 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, 40 Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, 41 VIRTIO_F_VERSION_1, 42 }; 43 use crate::seccomp_filters::Thread; 44 use crate::thread_helper::spawn_virtio_thread; 45 use crate::{GuestMemoryMmap, GuestRegionMmap, VirtioInterrupt, VirtioInterruptType}; 46 47 const QUEUE_SIZE: u16 = 128; 48 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 49 50 // 128MiB is the standard memory block size in Linux. A virtio-mem region must 51 // be aligned on this size, and the region size must be a multiple of it. 52 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20; 53 // Use 2 MiB alignment so transparent hugepages can be used by KVM. 54 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20; 55 56 // Request processed successfully, applicable for 57 // - VIRTIO_MEM_REQ_PLUG 58 // - VIRTIO_MEM_REQ_UNPLUG 59 // - VIRTIO_MEM_REQ_UNPLUG_ALL 60 // - VIRTIO_MEM_REQ_STATE 61 const VIRTIO_MEM_RESP_ACK: u16 = 0; 62 63 // Request denied - e.g. trying to plug more than requested, applicable for 64 // - VIRTIO_MEM_REQ_PLUG 65 const VIRTIO_MEM_RESP_NACK: u16 = 1; 66 67 // Request cannot be processed right now, try again later, applicable for 68 // - VIRTIO_MEM_REQ_PLUG 69 // - VIRTIO_MEM_REQ_UNPLUG 70 // - VIRTIO_MEM_REQ_UNPLUG_ALL 71 #[allow(unused)] 72 const VIRTIO_MEM_RESP_BUSY: u16 = 2; 73 74 // Error in request (e.g. addresses/alignment), applicable for 75 // - VIRTIO_MEM_REQ_PLUG 76 // - VIRTIO_MEM_REQ_UNPLUG 77 // - VIRTIO_MEM_REQ_STATE 78 const VIRTIO_MEM_RESP_ERROR: u16 = 3; 79 80 // State of memory blocks is "plugged" 81 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; 82 // State of memory blocks is "unplugged" 83 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; 84 // State of memory blocks is "mixed" 85 const VIRTIO_MEM_STATE_MIXED: u16 = 2; 86 87 // request to plug memory blocks 88 const VIRTIO_MEM_REQ_PLUG: u16 = 0; 89 // request to unplug memory blocks 90 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1; 91 // request to unplug all blocks and shrink the usable size 92 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2; 93 // request information about the plugged state of memory blocks 94 const VIRTIO_MEM_REQ_STATE: u16 = 3; 95 96 // New descriptors are pending on the virtio queue. 97 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 98 99 // Virtio features 100 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0; 101 102 #[derive(Error, Debug)] 103 pub enum Error { 104 #[error("Guest gave us bad memory addresses: {0}")] 105 GuestMemory(GuestMemoryError), 106 #[error("Guest gave us a write only descriptor that protocol says to read from")] 107 UnexpectedWriteOnlyDescriptor, 108 #[error("Guest gave us a read only descriptor that protocol says to write to")] 109 UnexpectedReadOnlyDescriptor, 110 #[error("Guest gave us too few descriptors in a descriptor chain")] 111 DescriptorChainTooShort, 112 #[error("Guest gave us a buffer that was too short to use")] 113 BufferLengthTooSmall, 114 #[error("Guest sent us invalid request")] 115 InvalidRequest, 116 #[error("Failed to EventFd write: {0}")] 117 EventFdWriteFail(std::io::Error), 118 #[error("Failed to EventFd try_clone: {0}")] 119 EventFdTryCloneFail(std::io::Error), 120 #[error("Failed to MpscRecv: {0}")] 121 MpscRecvFail(mpsc::RecvError), 122 #[error("Resize invalid argument: {0}")] 123 ResizeError(anyhow::Error), 124 #[error("Fail to resize trigger: {0}")] 125 ResizeTriggerFail(DeviceError), 126 #[error("Invalid configuration: {0}")] 127 ValidateError(anyhow::Error), 128 #[error("Failed discarding memory range: {0}")] 129 DiscardMemoryRange(std::io::Error), 130 #[error("Failed DMA mapping: {0}")] 131 DmaMap(std::io::Error), 132 #[error("Failed DMA unmapping: {0}")] 133 DmaUnmap(std::io::Error), 134 #[error("Invalid DMA mapping handler")] 135 InvalidDmaMappingHandler, 136 #[error("Not activated by the guest")] 137 NotActivatedByGuest, 138 #[error("Unknown request type: {0}")] 139 UnknownRequestType(u16), 140 #[error("Failed adding used index: {0}")] 141 QueueAddUsed(virtio_queue::Error), 142 } 143 144 #[repr(C)] 145 #[derive(Copy, Clone, Debug, Default)] 146 struct VirtioMemReq { 147 req_type: u16, 148 padding: [u16; 3], 149 addr: u64, 150 nb_blocks: u16, 151 padding_1: [u16; 3], 152 } 153 154 // SAFETY: it only has data and has no implicit padding. 155 unsafe impl ByteValued for VirtioMemReq {} 156 157 #[repr(C)] 158 #[derive(Copy, Clone, Debug, Default)] 159 struct VirtioMemResp { 160 resp_type: u16, 161 padding: [u16; 3], 162 state: u16, 163 } 164 165 // SAFETY: it only has data and has no implicit padding. 166 unsafe impl ByteValued for VirtioMemResp {} 167 168 #[repr(C)] 169 #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)] 170 pub struct VirtioMemConfig { 171 // Block size and alignment. Cannot change. 172 block_size: u64, 173 // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. 174 node_id: u16, 175 padding: [u8; 6], 176 // Start address of the memory region. Cannot change. 177 addr: u64, 178 // Region size (maximum). Cannot change. 179 region_size: u64, 180 // Currently usable region size. Can grow up to region_size. Can 181 // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config 182 // update will be sent). 183 usable_region_size: u64, 184 // Currently used size. Changes due to plug/unplug requests, but no 185 // config updates will be sent. 186 plugged_size: u64, 187 // Requested size. New plug requests cannot exceed it. Can change. 188 requested_size: u64, 189 } 190 191 // SAFETY: it only has data and has no implicit padding. 192 unsafe impl ByteValued for VirtioMemConfig {} 193 194 impl VirtioMemConfig { 195 fn validate(&self) -> result::Result<(), Error> { 196 if self.addr % self.block_size != 0 { 197 return Err(Error::ValidateError(anyhow!( 198 "addr 0x{:x} is not aligned on block_size 0x{:x}", 199 self.addr, 200 self.block_size 201 ))); 202 } 203 if self.region_size % self.block_size != 0 { 204 return Err(Error::ValidateError(anyhow!( 205 "region_size 0x{:x} is not aligned on block_size 0x{:x}", 206 self.region_size, 207 self.block_size 208 ))); 209 } 210 if self.usable_region_size % self.block_size != 0 { 211 return Err(Error::ValidateError(anyhow!( 212 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}", 213 self.usable_region_size, 214 self.block_size 215 ))); 216 } 217 if self.plugged_size % self.block_size != 0 { 218 return Err(Error::ValidateError(anyhow!( 219 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}", 220 self.plugged_size, 221 self.block_size 222 ))); 223 } 224 if self.requested_size % self.block_size != 0 { 225 return Err(Error::ValidateError(anyhow!( 226 "requested_size 0x{:x} is not aligned on block_size 0x{:x}", 227 self.requested_size, 228 self.block_size 229 ))); 230 } 231 232 Ok(()) 233 } 234 235 fn resize(&mut self, size: u64) -> result::Result<(), Error> { 236 if self.requested_size == size { 237 return Err(Error::ResizeError(anyhow!( 238 "new size 0x{:x} and requested_size are identical", 239 size 240 ))); 241 } else if size > self.region_size { 242 return Err(Error::ResizeError(anyhow!( 243 "new size 0x{:x} is bigger than region_size 0x{:x}", 244 size, 245 self.region_size 246 ))); 247 } else if size % self.block_size != 0 { 248 return Err(Error::ResizeError(anyhow!( 249 "new size 0x{:x} is not aligned on block_size 0x{:x}", 250 size, 251 self.block_size 252 ))); 253 } 254 255 self.requested_size = size; 256 257 Ok(()) 258 } 259 260 fn is_valid_range(&self, addr: u64, size: u64) -> bool { 261 // Ensure no overflow from adding 'addr' and 'size' whose value are both 262 // controlled by the guest driver 263 if addr.checked_add(size).is_none() { 264 return false; 265 } 266 267 // Start address must be aligned on block_size, the size must be 268 // greater than 0, and all blocks covered by the request must be 269 // in the usable region. 270 if addr % self.block_size != 0 271 || size == 0 272 || (addr < self.addr || addr + size > self.addr + self.usable_region_size) 273 { 274 return false; 275 } 276 277 true 278 } 279 } 280 281 struct Request { 282 req: VirtioMemReq, 283 status_addr: GuestAddress, 284 } 285 286 impl Request { 287 fn parse( 288 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 289 ) -> result::Result<Request, Error> { 290 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 291 // The descriptor contains the request type which MUST be readable. 292 if desc.is_write_only() { 293 return Err(Error::UnexpectedWriteOnlyDescriptor); 294 } 295 if desc.len() as usize != size_of::<VirtioMemReq>() { 296 return Err(Error::InvalidRequest); 297 } 298 let req: VirtioMemReq = desc_chain 299 .memory() 300 .read_obj(desc.addr()) 301 .map_err(Error::GuestMemory)?; 302 303 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 304 305 // The status MUST always be writable 306 if !status_desc.is_write_only() { 307 return Err(Error::UnexpectedReadOnlyDescriptor); 308 } 309 310 if (status_desc.len() as usize) < size_of::<VirtioMemResp>() { 311 return Err(Error::BufferLengthTooSmall); 312 } 313 314 Ok(Request { 315 req, 316 status_addr: status_desc.addr(), 317 }) 318 } 319 320 fn send_response( 321 &self, 322 mem: &GuestMemoryMmap, 323 resp_type: u16, 324 state: u16, 325 ) -> Result<u32, Error> { 326 let resp = VirtioMemResp { 327 resp_type, 328 state, 329 ..Default::default() 330 }; 331 mem.write_obj(resp, self.status_addr) 332 .map_err(Error::GuestMemory)?; 333 Ok(size_of::<VirtioMemResp>() as u32) 334 } 335 } 336 337 #[derive(Clone, Serialize, Deserialize)] 338 pub struct BlocksState { 339 bitmap: Vec<bool>, 340 } 341 342 impl BlocksState { 343 pub fn new(region_size: u64) -> Self { 344 BlocksState { 345 bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize], 346 } 347 } 348 349 fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool { 350 for state in self 351 .bitmap 352 .iter() 353 .skip(first_block_index) 354 .take(nb_blocks as usize) 355 { 356 if *state != plug { 357 return false; 358 } 359 } 360 true 361 } 362 363 fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) { 364 for state in self 365 .bitmap 366 .iter_mut() 367 .skip(first_block_index) 368 .take(nb_blocks as usize) 369 { 370 *state = plug; 371 } 372 } 373 374 fn inner(&self) -> &Vec<bool> { 375 &self.bitmap 376 } 377 378 pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable { 379 let mut bitmap: Vec<u64> = Vec::new(); 380 let mut i = 0; 381 for (j, bit) in self.bitmap.iter().enumerate() { 382 if j % 64 == 0 { 383 bitmap.push(0); 384 385 if j != 0 { 386 i += 1; 387 } 388 } 389 390 if *bit == plugged { 391 bitmap[i] |= 1 << (j % 64); 392 } 393 } 394 395 MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE) 396 } 397 } 398 399 struct MemEpollHandler { 400 mem: GuestMemoryAtomic<GuestMemoryMmap>, 401 host_addr: u64, 402 host_fd: Option<RawFd>, 403 blocks_state: Arc<Mutex<BlocksState>>, 404 config: Arc<Mutex<VirtioMemConfig>>, 405 queue: Queue, 406 interrupt_cb: Arc<dyn VirtioInterrupt>, 407 queue_evt: EventFd, 408 kill_evt: EventFd, 409 pause_evt: EventFd, 410 hugepages: bool, 411 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 412 } 413 414 impl MemEpollHandler { 415 fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> { 416 // Use fallocate if the memory region is backed by a file. 417 if let Some(fd) = self.host_fd { 418 // SAFETY: FFI call with valid arguments 419 let res = unsafe { 420 libc::fallocate64( 421 fd, 422 libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE, 423 offset as libc::off64_t, 424 size as libc::off64_t, 425 ) 426 }; 427 if res != 0 { 428 let err = io::Error::last_os_error(); 429 error!("Deallocating file space failed: {}", err); 430 return Err(Error::DiscardMemoryRange(err)); 431 } 432 } 433 434 // Only use madvise if the memory region is not allocated with 435 // hugepages. 436 if !self.hugepages { 437 // SAFETY: FFI call with valid arguments 438 let res = unsafe { 439 libc::madvise( 440 (self.host_addr + offset) as *mut libc::c_void, 441 size as libc::size_t, 442 libc::MADV_DONTNEED, 443 ) 444 }; 445 if res != 0 { 446 let err = io::Error::last_os_error(); 447 error!("Advising kernel about pages range failed: {}", err); 448 return Err(Error::DiscardMemoryRange(err)); 449 } 450 } 451 452 Ok(()) 453 } 454 455 fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 { 456 let mut config = self.config.lock().unwrap(); 457 let size: u64 = nb_blocks as u64 * config.block_size; 458 459 if plug && (config.plugged_size + size > config.requested_size) { 460 return VIRTIO_MEM_RESP_NACK; 461 } 462 if !config.is_valid_range(addr, size) { 463 return VIRTIO_MEM_RESP_ERROR; 464 } 465 466 let offset = addr - config.addr; 467 468 let first_block_index = (offset / config.block_size) as usize; 469 if !self 470 .blocks_state 471 .lock() 472 .unwrap() 473 .is_range_state(first_block_index, nb_blocks, !plug) 474 { 475 return VIRTIO_MEM_RESP_ERROR; 476 } 477 478 if !plug { 479 if let Err(e) = self.discard_memory_range(offset, size) { 480 error!("failed discarding memory range: {:?}", e); 481 return VIRTIO_MEM_RESP_ERROR; 482 } 483 } 484 485 self.blocks_state 486 .lock() 487 .unwrap() 488 .set_range(first_block_index, nb_blocks, plug); 489 490 let handlers = self.dma_mapping_handlers.lock().unwrap(); 491 if plug { 492 let mut gpa = addr; 493 for _ in 0..nb_blocks { 494 for (_, handler) in handlers.iter() { 495 if let Err(e) = handler.map(gpa, gpa, config.block_size) { 496 error!( 497 "failed DMA mapping addr 0x{:x} size 0x{:x}: {}", 498 gpa, config.block_size, e 499 ); 500 return VIRTIO_MEM_RESP_ERROR; 501 } 502 } 503 504 gpa += config.block_size; 505 } 506 507 config.plugged_size += size; 508 } else { 509 for (_, handler) in handlers.iter() { 510 if let Err(e) = handler.unmap(addr, size) { 511 error!( 512 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 513 addr, size, e 514 ); 515 return VIRTIO_MEM_RESP_ERROR; 516 } 517 } 518 519 config.plugged_size -= size; 520 } 521 522 VIRTIO_MEM_RESP_ACK 523 } 524 525 fn unplug_all(&mut self) -> u16 { 526 let mut config = self.config.lock().unwrap(); 527 if let Err(e) = self.discard_memory_range(0, config.region_size) { 528 error!("failed discarding memory range: {:?}", e); 529 return VIRTIO_MEM_RESP_ERROR; 530 } 531 532 // Remaining plugged blocks are unmapped. 533 if config.plugged_size > 0 { 534 let handlers = self.dma_mapping_handlers.lock().unwrap(); 535 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 536 if *plugged { 537 let gpa = config.addr + (idx as u64 * config.block_size); 538 for (_, handler) in handlers.iter() { 539 if let Err(e) = handler.unmap(gpa, config.block_size) { 540 error!( 541 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 542 gpa, config.block_size, e 543 ); 544 return VIRTIO_MEM_RESP_ERROR; 545 } 546 } 547 } 548 } 549 } 550 551 self.blocks_state.lock().unwrap().set_range( 552 0, 553 (config.region_size / config.block_size) as u16, 554 false, 555 ); 556 557 config.plugged_size = 0; 558 559 VIRTIO_MEM_RESP_ACK 560 } 561 562 fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) { 563 let config = self.config.lock().unwrap(); 564 let size: u64 = nb_blocks as u64 * config.block_size; 565 566 let resp_type = if config.is_valid_range(addr, size) { 567 VIRTIO_MEM_RESP_ACK 568 } else { 569 VIRTIO_MEM_RESP_ERROR 570 }; 571 572 let offset = addr - config.addr; 573 let first_block_index = (offset / config.block_size) as usize; 574 let resp_state = 575 if self 576 .blocks_state 577 .lock() 578 .unwrap() 579 .is_range_state(first_block_index, nb_blocks, true) 580 { 581 VIRTIO_MEM_STATE_PLUGGED 582 } else if self.blocks_state.lock().unwrap().is_range_state( 583 first_block_index, 584 nb_blocks, 585 false, 586 ) { 587 VIRTIO_MEM_STATE_UNPLUGGED 588 } else { 589 VIRTIO_MEM_STATE_MIXED 590 }; 591 592 (resp_type, resp_state) 593 } 594 595 fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> { 596 self.interrupt_cb.trigger(int_type).map_err(|e| { 597 error!("Failed to signal used queue: {:?}", e); 598 DeviceError::FailedSignalingUsedQueue(e) 599 }) 600 } 601 602 fn process_queue(&mut self) -> Result<bool, Error> { 603 let mut used_descs = false; 604 605 while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) { 606 let r = Request::parse(&mut desc_chain)?; 607 let (resp_type, resp_state) = match r.req.req_type { 608 VIRTIO_MEM_REQ_PLUG => ( 609 self.state_change_request(r.req.addr, r.req.nb_blocks, true), 610 0u16, 611 ), 612 VIRTIO_MEM_REQ_UNPLUG => ( 613 self.state_change_request(r.req.addr, r.req.nb_blocks, false), 614 0u16, 615 ), 616 VIRTIO_MEM_REQ_UNPLUG_ALL => (self.unplug_all(), 0u16), 617 VIRTIO_MEM_REQ_STATE => self.state_request(r.req.addr, r.req.nb_blocks), 618 _ => { 619 return Err(Error::UnknownRequestType(r.req.req_type)); 620 } 621 }; 622 let len = r.send_response(desc_chain.memory(), resp_type, resp_state)?; 623 self.queue 624 .add_used(desc_chain.memory(), desc_chain.head_index(), len) 625 .map_err(Error::QueueAddUsed)?; 626 used_descs = true; 627 } 628 629 Ok(used_descs) 630 } 631 632 fn run( 633 &mut self, 634 paused: Arc<AtomicBool>, 635 paused_sync: Arc<Barrier>, 636 ) -> result::Result<(), EpollHelperError> { 637 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 638 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 639 helper.run(paused, paused_sync, self)?; 640 641 Ok(()) 642 } 643 } 644 645 impl EpollHelperHandler for MemEpollHandler { 646 fn handle_event( 647 &mut self, 648 _helper: &mut EpollHelper, 649 event: &epoll::Event, 650 ) -> result::Result<(), EpollHelperError> { 651 let ev_type = event.data as u16; 652 match ev_type { 653 QUEUE_AVAIL_EVENT => { 654 self.queue_evt.read().map_err(|e| { 655 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 656 })?; 657 658 let needs_notification = self.process_queue().map_err(|e| { 659 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 660 })?; 661 if needs_notification { 662 self.signal(VirtioInterruptType::Queue(0)).map_err(|e| { 663 EpollHelperError::HandleEvent(anyhow!( 664 "Failed to signal used queue: {:?}", 665 e 666 )) 667 })?; 668 } 669 } 670 _ => { 671 return Err(EpollHelperError::HandleEvent(anyhow!( 672 "Unexpected event: {}", 673 ev_type 674 ))); 675 } 676 } 677 Ok(()) 678 } 679 } 680 681 #[derive(PartialEq, Eq, PartialOrd, Ord)] 682 pub enum VirtioMemMappingSource { 683 Container, 684 Device(u32), 685 } 686 687 #[derive(Serialize, Deserialize)] 688 pub struct MemState { 689 pub avail_features: u64, 690 pub acked_features: u64, 691 pub config: VirtioMemConfig, 692 pub blocks_state: BlocksState, 693 } 694 695 pub struct Mem { 696 common: VirtioCommon, 697 id: String, 698 host_addr: u64, 699 host_fd: Option<RawFd>, 700 config: Arc<Mutex<VirtioMemConfig>>, 701 seccomp_action: SeccompAction, 702 hugepages: bool, 703 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 704 blocks_state: Arc<Mutex<BlocksState>>, 705 exit_evt: EventFd, 706 interrupt_cb: Option<Arc<dyn VirtioInterrupt>>, 707 } 708 709 impl Mem { 710 // Create a new virtio-mem device. 711 #[allow(clippy::too_many_arguments)] 712 pub fn new( 713 id: String, 714 region: &Arc<GuestRegionMmap>, 715 seccomp_action: SeccompAction, 716 numa_node_id: Option<u16>, 717 initial_size: u64, 718 hugepages: bool, 719 exit_evt: EventFd, 720 blocks_state: Arc<Mutex<BlocksState>>, 721 state: Option<MemState>, 722 ) -> io::Result<Mem> { 723 let region_len = region.len(); 724 725 if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE { 726 return Err(io::Error::new( 727 io::ErrorKind::Other, 728 format!("Virtio-mem size is not aligned with {VIRTIO_MEM_ALIGN_SIZE}"), 729 )); 730 } 731 732 let (avail_features, acked_features, config, paused) = if let Some(state) = state { 733 info!("Restoring virtio-mem {}", id); 734 *(blocks_state.lock().unwrap()) = state.blocks_state.clone(); 735 ( 736 state.avail_features, 737 state.acked_features, 738 state.config, 739 true, 740 ) 741 } else { 742 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 743 744 let mut config = VirtioMemConfig { 745 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE, 746 addr: region.start_addr().raw_value(), 747 region_size: region.len(), 748 usable_region_size: region.len(), 749 plugged_size: 0, 750 requested_size: 0, 751 ..Default::default() 752 }; 753 754 if initial_size != 0 { 755 config.resize(initial_size).map_err(|e| { 756 io::Error::new( 757 io::ErrorKind::Other, 758 format!( 759 "Failed to resize virtio-mem configuration to {initial_size}: {e:?}" 760 ), 761 ) 762 })?; 763 } 764 765 if let Some(node_id) = numa_node_id { 766 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM; 767 config.node_id = node_id; 768 } 769 770 // Make sure the virtio-mem configuration complies with the 771 // specification. 772 config.validate().map_err(|e| { 773 io::Error::new( 774 io::ErrorKind::Other, 775 format!("Invalid virtio-mem configuration: {e:?}"), 776 ) 777 })?; 778 779 (avail_features, 0, config, false) 780 }; 781 782 let host_fd = region 783 .file_offset() 784 .map(|f_offset| f_offset.file().as_raw_fd()); 785 786 Ok(Mem { 787 common: VirtioCommon { 788 device_type: VirtioDeviceType::Mem as u32, 789 avail_features, 790 acked_features, 791 paused_sync: Some(Arc::new(Barrier::new(2))), 792 queue_sizes: QUEUE_SIZES.to_vec(), 793 min_queues: 1, 794 paused: Arc::new(AtomicBool::new(paused)), 795 ..Default::default() 796 }, 797 id, 798 host_addr: region.as_ptr() as u64, 799 host_fd, 800 config: Arc::new(Mutex::new(config)), 801 seccomp_action, 802 hugepages, 803 dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())), 804 blocks_state, 805 exit_evt, 806 interrupt_cb: None, 807 }) 808 } 809 810 pub fn resize(&mut self, size: u64) -> result::Result<(), Error> { 811 let mut config = self.config.lock().unwrap(); 812 config.resize(size).map_err(|e| { 813 Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e)) 814 })?; 815 816 if let Some(interrupt_cb) = self.interrupt_cb.as_ref() { 817 interrupt_cb 818 .trigger(VirtioInterruptType::Config) 819 .map_err(|e| { 820 Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e)) 821 }) 822 } else { 823 Ok(()) 824 } 825 } 826 827 pub fn add_dma_mapping_handler( 828 &mut self, 829 source: VirtioMemMappingSource, 830 handler: Arc<dyn ExternalDmaMapping>, 831 ) -> result::Result<(), Error> { 832 let config = self.config.lock().unwrap(); 833 834 if config.plugged_size > 0 { 835 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 836 if *plugged { 837 let gpa = config.addr + (idx as u64 * config.block_size); 838 handler 839 .map(gpa, gpa, config.block_size) 840 .map_err(Error::DmaMap)?; 841 } 842 } 843 } 844 845 self.dma_mapping_handlers 846 .lock() 847 .unwrap() 848 .insert(source, handler); 849 850 Ok(()) 851 } 852 853 pub fn remove_dma_mapping_handler( 854 &mut self, 855 source: VirtioMemMappingSource, 856 ) -> result::Result<(), Error> { 857 let handler = self 858 .dma_mapping_handlers 859 .lock() 860 .unwrap() 861 .remove(&source) 862 .ok_or(Error::InvalidDmaMappingHandler)?; 863 864 let config = self.config.lock().unwrap(); 865 866 if config.plugged_size > 0 { 867 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 868 if *plugged { 869 let gpa = config.addr + (idx as u64 * config.block_size); 870 handler 871 .unmap(gpa, config.block_size) 872 .map_err(Error::DmaUnmap)?; 873 } 874 } 875 } 876 877 Ok(()) 878 } 879 880 fn state(&self) -> MemState { 881 MemState { 882 avail_features: self.common.avail_features, 883 acked_features: self.common.acked_features, 884 config: *(self.config.lock().unwrap()), 885 blocks_state: self.blocks_state.lock().unwrap().clone(), 886 } 887 } 888 889 #[cfg(fuzzing)] 890 pub fn wait_for_epoll_threads(&mut self) { 891 self.common.wait_for_epoll_threads(); 892 } 893 } 894 895 impl Drop for Mem { 896 fn drop(&mut self) { 897 if let Some(kill_evt) = self.common.kill_evt.take() { 898 // Ignore the result because there is nothing we can do about it. 899 let _ = kill_evt.write(1); 900 } 901 self.common.wait_for_epoll_threads(); 902 } 903 } 904 905 impl VirtioDevice for Mem { 906 fn device_type(&self) -> u32 { 907 self.common.device_type 908 } 909 910 fn queue_max_sizes(&self) -> &[u16] { 911 &self.common.queue_sizes 912 } 913 914 fn features(&self) -> u64 { 915 self.common.avail_features 916 } 917 918 fn ack_features(&mut self, value: u64) { 919 self.common.ack_features(value) 920 } 921 922 fn read_config(&self, offset: u64, data: &mut [u8]) { 923 self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data); 924 } 925 926 fn activate( 927 &mut self, 928 mem: GuestMemoryAtomic<GuestMemoryMmap>, 929 interrupt_cb: Arc<dyn VirtioInterrupt>, 930 mut queues: Vec<(usize, Queue, EventFd)>, 931 ) -> ActivateResult { 932 self.common.activate(&queues, &interrupt_cb)?; 933 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 934 935 let (_, queue, queue_evt) = queues.remove(0); 936 937 self.interrupt_cb = Some(interrupt_cb.clone()); 938 939 let mut handler = MemEpollHandler { 940 mem, 941 host_addr: self.host_addr, 942 host_fd: self.host_fd, 943 blocks_state: Arc::clone(&self.blocks_state), 944 config: self.config.clone(), 945 queue, 946 interrupt_cb, 947 queue_evt, 948 kill_evt, 949 pause_evt, 950 hugepages: self.hugepages, 951 dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers), 952 }; 953 954 let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false); 955 for range in unplugged_memory_ranges.regions() { 956 handler 957 .discard_memory_range(range.gpa, range.length) 958 .map_err(|e| { 959 error!( 960 "failed discarding memory range [0x{:x}-0x{:x}]: {:?}", 961 range.gpa, 962 range.gpa + range.length - 1, 963 e 964 ); 965 ActivateError::BadActivate 966 })?; 967 } 968 969 let paused = self.common.paused.clone(); 970 let paused_sync = self.common.paused_sync.clone(); 971 let mut epoll_threads = Vec::new(); 972 973 spawn_virtio_thread( 974 &self.id, 975 &self.seccomp_action, 976 Thread::VirtioMem, 977 &mut epoll_threads, 978 &self.exit_evt, 979 move || handler.run(paused, paused_sync.unwrap()), 980 )?; 981 self.common.epoll_threads = Some(epoll_threads); 982 983 event!("virtio-device", "activated", "id", &self.id); 984 Ok(()) 985 } 986 987 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 988 let result = self.common.reset(); 989 event!("virtio-device", "reset", "id", &self.id); 990 result 991 } 992 } 993 994 impl Pausable for Mem { 995 fn pause(&mut self) -> result::Result<(), MigratableError> { 996 self.common.pause() 997 } 998 999 fn resume(&mut self) -> result::Result<(), MigratableError> { 1000 self.common.resume() 1001 } 1002 } 1003 1004 impl Snapshottable for Mem { 1005 fn id(&self) -> String { 1006 self.id.clone() 1007 } 1008 1009 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1010 Snapshot::new_from_state(&self.state()) 1011 } 1012 } 1013 impl Transportable for Mem {} 1014 impl Migratable for Mem {} 1015