1 // Copyright (c) 2020 Ant Financial 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 use super::Error as DeviceError; 16 use super::{ 17 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon, 18 VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, 19 }; 20 use crate::seccomp_filters::Thread; 21 use crate::thread_helper::spawn_virtio_thread; 22 use crate::{GuestMemoryMmap, GuestRegionMmap}; 23 use crate::{VirtioInterrupt, VirtioInterruptType}; 24 use anyhow::anyhow; 25 use seccompiler::SeccompAction; 26 use std::collections::BTreeMap; 27 use std::io; 28 use std::mem::size_of; 29 use std::os::unix::io::{AsRawFd, RawFd}; 30 use std::result; 31 use std::sync::atomic::AtomicBool; 32 use std::sync::mpsc; 33 use std::sync::{Arc, Barrier, Mutex}; 34 use thiserror::Error; 35 use versionize::{VersionMap, Versionize, VersionizeResult}; 36 use versionize_derive::Versionize; 37 use virtio_queue::{DescriptorChain, Queue, QueueT}; 38 use vm_device::dma_mapping::ExternalDmaMapping; 39 use vm_memory::{ 40 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 41 GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion, 42 }; 43 use vm_migration::protocol::MemoryRangeTable; 44 use vm_migration::{ 45 Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped, 46 }; 47 use vmm_sys_util::eventfd::EventFd; 48 49 const QUEUE_SIZE: u16 = 128; 50 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 51 52 // 128MiB is the standard memory block size in Linux. A virtio-mem region must 53 // be aligned on this size, and the region size must be a multiple of it. 54 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20; 55 // Use 2 MiB alignment so transparent hugepages can be used by KVM. 56 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20; 57 58 // Request processed successfully, applicable for 59 // - VIRTIO_MEM_REQ_PLUG 60 // - VIRTIO_MEM_REQ_UNPLUG 61 // - VIRTIO_MEM_REQ_UNPLUG_ALL 62 // - VIRTIO_MEM_REQ_STATE 63 const VIRTIO_MEM_RESP_ACK: u16 = 0; 64 65 // Request denied - e.g. trying to plug more than requested, applicable for 66 // - VIRTIO_MEM_REQ_PLUG 67 const VIRTIO_MEM_RESP_NACK: u16 = 1; 68 69 // Request cannot be processed right now, try again later, applicable for 70 // - VIRTIO_MEM_REQ_PLUG 71 // - VIRTIO_MEM_REQ_UNPLUG 72 // - VIRTIO_MEM_REQ_UNPLUG_ALL 73 #[allow(unused)] 74 const VIRTIO_MEM_RESP_BUSY: u16 = 2; 75 76 // Error in request (e.g. addresses/alignment), applicable for 77 // - VIRTIO_MEM_REQ_PLUG 78 // - VIRTIO_MEM_REQ_UNPLUG 79 // - VIRTIO_MEM_REQ_STATE 80 const VIRTIO_MEM_RESP_ERROR: u16 = 3; 81 82 // State of memory blocks is "plugged" 83 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; 84 // State of memory blocks is "unplugged" 85 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; 86 // State of memory blocks is "mixed" 87 const VIRTIO_MEM_STATE_MIXED: u16 = 2; 88 89 // request to plug memory blocks 90 const VIRTIO_MEM_REQ_PLUG: u16 = 0; 91 // request to unplug memory blocks 92 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1; 93 // request to unplug all blocks and shrink the usable size 94 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2; 95 // request information about the plugged state of memory blocks 96 const VIRTIO_MEM_REQ_STATE: u16 = 3; 97 98 // New descriptors are pending on the virtio queue. 99 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 100 101 // Virtio features 102 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0; 103 104 #[derive(Error, Debug)] 105 pub enum Error { 106 #[error("Guest gave us bad memory addresses: {0}")] 107 GuestMemory(GuestMemoryError), 108 #[error("Guest gave us a write only descriptor that protocol says to read from")] 109 UnexpectedWriteOnlyDescriptor, 110 #[error("Guest gave us a read only descriptor that protocol says to write to")] 111 UnexpectedReadOnlyDescriptor, 112 #[error("Guest gave us too few descriptors in a descriptor chain")] 113 DescriptorChainTooShort, 114 #[error("Guest gave us a buffer that was too short to use")] 115 BufferLengthTooSmall, 116 #[error("Guest sent us invalid request")] 117 InvalidRequest, 118 #[error("Failed to EventFd write: {0}")] 119 EventFdWriteFail(std::io::Error), 120 #[error("Failed to EventFd try_clone: {0}")] 121 EventFdTryCloneFail(std::io::Error), 122 #[error("Failed to MpscRecv: {0}")] 123 MpscRecvFail(mpsc::RecvError), 124 #[error("Resize invalid argument: {0}")] 125 ResizeError(anyhow::Error), 126 #[error("Fail to resize trigger: {0}")] 127 ResizeTriggerFail(DeviceError), 128 #[error("Invalid configuration: {0}")] 129 ValidateError(anyhow::Error), 130 #[error("Failed discarding memory range: {0}")] 131 DiscardMemoryRange(std::io::Error), 132 #[error("Failed DMA mapping: {0}")] 133 DmaMap(std::io::Error), 134 #[error("Failed DMA unmapping: {0}")] 135 DmaUnmap(std::io::Error), 136 #[error("Invalid DMA mapping handler")] 137 InvalidDmaMappingHandler, 138 #[error("Not activated by the guest")] 139 NotActivatedByGuest, 140 #[error("Unknown request type: {0}")] 141 UnkownRequestType(u16), 142 #[error("Failed adding used index: {0}")] 143 QueueAddUsed(virtio_queue::Error), 144 } 145 146 #[repr(C)] 147 #[derive(Copy, Clone, Debug, Default)] 148 struct VirtioMemReq { 149 req_type: u16, 150 padding: [u16; 3], 151 addr: u64, 152 nb_blocks: u16, 153 padding_1: [u16; 3], 154 } 155 156 // SAFETY: it only has data and has no implicit padding. 157 unsafe impl ByteValued for VirtioMemReq {} 158 159 #[repr(C)] 160 #[derive(Copy, Clone, Debug, Default)] 161 struct VirtioMemResp { 162 resp_type: u16, 163 padding: [u16; 3], 164 state: u16, 165 } 166 167 // SAFETY: it only has data and has no implicit padding. 168 unsafe impl ByteValued for VirtioMemResp {} 169 170 #[repr(C)] 171 #[derive(Copy, Clone, Debug, Default, Versionize)] 172 pub struct VirtioMemConfig { 173 // Block size and alignment. Cannot change. 174 block_size: u64, 175 // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. 176 node_id: u16, 177 padding: [u8; 6], 178 // Start address of the memory region. Cannot change. 179 addr: u64, 180 // Region size (maximum). Cannot change. 181 region_size: u64, 182 // Currently usable region size. Can grow up to region_size. Can 183 // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config 184 // update will be sent). 185 usable_region_size: u64, 186 // Currently used size. Changes due to plug/unplug requests, but no 187 // config updates will be sent. 188 plugged_size: u64, 189 // Requested size. New plug requests cannot exceed it. Can change. 190 requested_size: u64, 191 } 192 193 // SAFETY: it only has data and has no implicit padding. 194 unsafe impl ByteValued for VirtioMemConfig {} 195 196 impl VirtioMemConfig { 197 fn validate(&self) -> result::Result<(), Error> { 198 if self.addr % self.block_size != 0 { 199 return Err(Error::ValidateError(anyhow!( 200 "addr 0x{:x} is not aligned on block_size 0x{:x}", 201 self.addr, 202 self.block_size 203 ))); 204 } 205 if self.region_size % self.block_size != 0 { 206 return Err(Error::ValidateError(anyhow!( 207 "region_size 0x{:x} is not aligned on block_size 0x{:x}", 208 self.region_size, 209 self.block_size 210 ))); 211 } 212 if self.usable_region_size % self.block_size != 0 { 213 return Err(Error::ValidateError(anyhow!( 214 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}", 215 self.usable_region_size, 216 self.block_size 217 ))); 218 } 219 if self.plugged_size % self.block_size != 0 { 220 return Err(Error::ValidateError(anyhow!( 221 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}", 222 self.plugged_size, 223 self.block_size 224 ))); 225 } 226 if self.requested_size % self.block_size != 0 { 227 return Err(Error::ValidateError(anyhow!( 228 "requested_size 0x{:x} is not aligned on block_size 0x{:x}", 229 self.requested_size, 230 self.block_size 231 ))); 232 } 233 234 Ok(()) 235 } 236 237 fn resize(&mut self, size: u64) -> result::Result<(), Error> { 238 if self.requested_size == size { 239 return Err(Error::ResizeError(anyhow!( 240 "new size 0x{:x} and requested_size are identical", 241 size 242 ))); 243 } else if size > self.region_size { 244 return Err(Error::ResizeError(anyhow!( 245 "new size 0x{:x} is bigger than region_size 0x{:x}", 246 size, 247 self.region_size 248 ))); 249 } else if size % self.block_size != 0 { 250 return Err(Error::ResizeError(anyhow!( 251 "new size 0x{:x} is not aligned on block_size 0x{:x}", 252 size, 253 self.block_size 254 ))); 255 } 256 257 self.requested_size = size; 258 259 Ok(()) 260 } 261 262 fn is_valid_range(&self, addr: u64, size: u64) -> bool { 263 // Ensure no overflow from adding 'addr' and 'size' whose value are both 264 // controlled by the guest driver 265 if addr.checked_add(size).is_none() { 266 return false; 267 } 268 269 // Start address must be aligned on block_size, the size must be 270 // greater than 0, and all blocks covered by the request must be 271 // in the usable region. 272 if addr % self.block_size != 0 273 || size == 0 274 || (addr < self.addr || addr + size >= self.addr + self.usable_region_size) 275 { 276 return false; 277 } 278 279 true 280 } 281 } 282 283 struct Request { 284 req: VirtioMemReq, 285 status_addr: GuestAddress, 286 } 287 288 impl Request { 289 fn parse( 290 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 291 ) -> result::Result<Request, Error> { 292 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 293 // The descriptor contains the request type which MUST be readable. 294 if desc.is_write_only() { 295 return Err(Error::UnexpectedWriteOnlyDescriptor); 296 } 297 if desc.len() as usize != size_of::<VirtioMemReq>() { 298 return Err(Error::InvalidRequest); 299 } 300 let req: VirtioMemReq = desc_chain 301 .memory() 302 .read_obj(desc.addr()) 303 .map_err(Error::GuestMemory)?; 304 305 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 306 307 // The status MUST always be writable 308 if !status_desc.is_write_only() { 309 return Err(Error::UnexpectedReadOnlyDescriptor); 310 } 311 312 if (status_desc.len() as usize) < size_of::<VirtioMemResp>() { 313 return Err(Error::BufferLengthTooSmall); 314 } 315 316 Ok(Request { 317 req, 318 status_addr: status_desc.addr(), 319 }) 320 } 321 322 fn send_response( 323 &self, 324 mem: &GuestMemoryMmap, 325 resp_type: u16, 326 state: u16, 327 ) -> Result<u32, Error> { 328 let resp = VirtioMemResp { 329 resp_type, 330 state, 331 ..Default::default() 332 }; 333 mem.write_obj(resp, self.status_addr) 334 .map_err(Error::GuestMemory)?; 335 Ok(size_of::<VirtioMemResp>() as u32) 336 } 337 } 338 339 #[derive(Clone, Versionize)] 340 pub struct BlocksState { 341 bitmap: Vec<bool>, 342 } 343 344 impl BlocksState { 345 pub fn new(region_size: u64) -> Self { 346 BlocksState { 347 bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize], 348 } 349 } 350 351 fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool { 352 for state in self 353 .bitmap 354 .iter() 355 .skip(first_block_index) 356 .take(nb_blocks as usize) 357 { 358 if *state != plug { 359 return false; 360 } 361 } 362 true 363 } 364 365 fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) { 366 for state in self 367 .bitmap 368 .iter_mut() 369 .skip(first_block_index) 370 .take(nb_blocks as usize) 371 { 372 *state = plug; 373 } 374 } 375 376 fn inner(&self) -> &Vec<bool> { 377 &self.bitmap 378 } 379 380 pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable { 381 let mut bitmap: Vec<u64> = Vec::new(); 382 let mut i = 0; 383 for (j, bit) in self.bitmap.iter().enumerate() { 384 if j % 64 == 0 { 385 bitmap.push(0); 386 387 if j != 0 { 388 i += 1; 389 } 390 } 391 392 if *bit == plugged { 393 bitmap[i] |= 1 << (j % 64); 394 } 395 } 396 397 MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE) 398 } 399 } 400 401 struct MemEpollHandler { 402 mem: GuestMemoryAtomic<GuestMemoryMmap>, 403 host_addr: u64, 404 host_fd: Option<RawFd>, 405 blocks_state: Arc<Mutex<BlocksState>>, 406 config: Arc<Mutex<VirtioMemConfig>>, 407 queue: Queue, 408 interrupt_cb: Arc<dyn VirtioInterrupt>, 409 queue_evt: EventFd, 410 kill_evt: EventFd, 411 pause_evt: EventFd, 412 hugepages: bool, 413 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 414 } 415 416 impl MemEpollHandler { 417 fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> { 418 // Use fallocate if the memory region is backed by a file. 419 if let Some(fd) = self.host_fd { 420 // SAFETY: FFI call with valid arguments 421 let res = unsafe { 422 libc::fallocate64( 423 fd, 424 libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE, 425 offset as libc::off64_t, 426 size as libc::off64_t, 427 ) 428 }; 429 if res != 0 { 430 let err = io::Error::last_os_error(); 431 error!("Deallocating file space failed: {}", err); 432 return Err(Error::DiscardMemoryRange(err)); 433 } 434 } 435 436 // Only use madvise if the memory region is not allocated with 437 // hugepages. 438 if !self.hugepages { 439 // SAFETY: FFI call with valid arguments 440 let res = unsafe { 441 libc::madvise( 442 (self.host_addr + offset) as *mut libc::c_void, 443 size as libc::size_t, 444 libc::MADV_DONTNEED, 445 ) 446 }; 447 if res != 0 { 448 let err = io::Error::last_os_error(); 449 error!("Advising kernel about pages range failed: {}", err); 450 return Err(Error::DiscardMemoryRange(err)); 451 } 452 } 453 454 Ok(()) 455 } 456 457 fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 { 458 let mut config = self.config.lock().unwrap(); 459 let size: u64 = nb_blocks as u64 * config.block_size; 460 461 if plug && (config.plugged_size + size > config.requested_size) { 462 return VIRTIO_MEM_RESP_NACK; 463 } 464 if !config.is_valid_range(addr, size) { 465 return VIRTIO_MEM_RESP_ERROR; 466 } 467 468 let offset = addr - config.addr; 469 470 let first_block_index = (offset / config.block_size) as usize; 471 if !self 472 .blocks_state 473 .lock() 474 .unwrap() 475 .is_range_state(first_block_index, nb_blocks, !plug) 476 { 477 return VIRTIO_MEM_RESP_ERROR; 478 } 479 480 if !plug { 481 if let Err(e) = self.discard_memory_range(offset, size) { 482 error!("failed discarding memory range: {:?}", e); 483 return VIRTIO_MEM_RESP_ERROR; 484 } 485 } 486 487 self.blocks_state 488 .lock() 489 .unwrap() 490 .set_range(first_block_index, nb_blocks, plug); 491 492 let handlers = self.dma_mapping_handlers.lock().unwrap(); 493 if plug { 494 let mut gpa = addr; 495 for _ in 0..nb_blocks { 496 for (_, handler) in handlers.iter() { 497 if let Err(e) = handler.map(gpa, gpa, config.block_size) { 498 error!( 499 "failed DMA mapping addr 0x{:x} size 0x{:x}: {}", 500 gpa, config.block_size, e 501 ); 502 return VIRTIO_MEM_RESP_ERROR; 503 } 504 } 505 506 gpa += config.block_size; 507 } 508 509 config.plugged_size += size; 510 } else { 511 for (_, handler) in handlers.iter() { 512 if let Err(e) = handler.unmap(addr, size) { 513 error!( 514 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 515 addr, size, e 516 ); 517 return VIRTIO_MEM_RESP_ERROR; 518 } 519 } 520 521 config.plugged_size -= size; 522 } 523 524 VIRTIO_MEM_RESP_ACK 525 } 526 527 fn unplug_all(&mut self) -> u16 { 528 let mut config = self.config.lock().unwrap(); 529 if let Err(e) = self.discard_memory_range(0, config.region_size) { 530 error!("failed discarding memory range: {:?}", e); 531 return VIRTIO_MEM_RESP_ERROR; 532 } 533 534 // Remaining plugged blocks are unmapped. 535 if config.plugged_size > 0 { 536 let handlers = self.dma_mapping_handlers.lock().unwrap(); 537 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 538 if *plugged { 539 let gpa = config.addr + (idx as u64 * config.block_size); 540 for (_, handler) in handlers.iter() { 541 if let Err(e) = handler.unmap(gpa, config.block_size) { 542 error!( 543 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 544 gpa, config.block_size, e 545 ); 546 return VIRTIO_MEM_RESP_ERROR; 547 } 548 } 549 } 550 } 551 } 552 553 self.blocks_state.lock().unwrap().set_range( 554 0, 555 (config.region_size / config.block_size) as u16, 556 false, 557 ); 558 559 config.plugged_size = 0; 560 561 VIRTIO_MEM_RESP_ACK 562 } 563 564 fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) { 565 let config = self.config.lock().unwrap(); 566 let size: u64 = nb_blocks as u64 * config.block_size; 567 568 let resp_type = if config.is_valid_range(addr, size) { 569 VIRTIO_MEM_RESP_ACK 570 } else { 571 VIRTIO_MEM_RESP_ERROR 572 }; 573 574 let offset = addr - config.addr; 575 let first_block_index = (offset / config.block_size) as usize; 576 let resp_state = 577 if self 578 .blocks_state 579 .lock() 580 .unwrap() 581 .is_range_state(first_block_index, nb_blocks, true) 582 { 583 VIRTIO_MEM_STATE_PLUGGED 584 } else if self.blocks_state.lock().unwrap().is_range_state( 585 first_block_index, 586 nb_blocks, 587 false, 588 ) { 589 VIRTIO_MEM_STATE_UNPLUGGED 590 } else { 591 VIRTIO_MEM_STATE_MIXED 592 }; 593 594 (resp_type, resp_state) 595 } 596 597 fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> { 598 self.interrupt_cb.trigger(int_type).map_err(|e| { 599 error!("Failed to signal used queue: {:?}", e); 600 DeviceError::FailedSignalingUsedQueue(e) 601 }) 602 } 603 604 fn process_queue(&mut self) -> Result<bool, Error> { 605 let mut used_descs = false; 606 607 while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) { 608 let r = Request::parse(&mut desc_chain)?; 609 let (resp_type, resp_state) = match r.req.req_type { 610 VIRTIO_MEM_REQ_PLUG => ( 611 self.state_change_request(r.req.addr, r.req.nb_blocks, true), 612 0u16, 613 ), 614 VIRTIO_MEM_REQ_UNPLUG => ( 615 self.state_change_request(r.req.addr, r.req.nb_blocks, false), 616 0u16, 617 ), 618 VIRTIO_MEM_REQ_UNPLUG_ALL => (self.unplug_all(), 0u16), 619 VIRTIO_MEM_REQ_STATE => self.state_request(r.req.addr, r.req.nb_blocks), 620 _ => { 621 return Err(Error::UnkownRequestType(r.req.req_type)); 622 } 623 }; 624 let len = r.send_response(desc_chain.memory(), resp_type, resp_state)?; 625 self.queue 626 .add_used(desc_chain.memory(), desc_chain.head_index(), len) 627 .map_err(Error::QueueAddUsed)?; 628 used_descs = true; 629 } 630 631 Ok(used_descs) 632 } 633 634 fn run( 635 &mut self, 636 paused: Arc<AtomicBool>, 637 paused_sync: Arc<Barrier>, 638 ) -> result::Result<(), EpollHelperError> { 639 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 640 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 641 helper.run(paused, paused_sync, self)?; 642 643 Ok(()) 644 } 645 } 646 647 impl EpollHelperHandler for MemEpollHandler { 648 fn handle_event( 649 &mut self, 650 _helper: &mut EpollHelper, 651 event: &epoll::Event, 652 ) -> result::Result<(), EpollHelperError> { 653 let ev_type = event.data as u16; 654 match ev_type { 655 QUEUE_AVAIL_EVENT => { 656 self.queue_evt.read().map_err(|e| { 657 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 658 })?; 659 660 let needs_notification = self.process_queue().map_err(|e| { 661 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 662 })?; 663 if needs_notification { 664 self.signal(VirtioInterruptType::Queue(0)).map_err(|e| { 665 EpollHelperError::HandleEvent(anyhow!( 666 "Failed to signal used queue: {:?}", 667 e 668 )) 669 })?; 670 } 671 } 672 _ => { 673 return Err(EpollHelperError::HandleEvent(anyhow!( 674 "Unexpected event: {}", 675 ev_type 676 ))); 677 } 678 } 679 Ok(()) 680 } 681 } 682 683 #[derive(PartialEq, Eq, PartialOrd, Ord)] 684 pub enum VirtioMemMappingSource { 685 Container, 686 Device(u32), 687 } 688 689 #[derive(Versionize)] 690 pub struct MemState { 691 pub avail_features: u64, 692 pub acked_features: u64, 693 pub config: VirtioMemConfig, 694 pub blocks_state: BlocksState, 695 } 696 697 impl VersionMapped for MemState {} 698 699 pub struct Mem { 700 common: VirtioCommon, 701 id: String, 702 host_addr: u64, 703 host_fd: Option<RawFd>, 704 config: Arc<Mutex<VirtioMemConfig>>, 705 seccomp_action: SeccompAction, 706 hugepages: bool, 707 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 708 blocks_state: Arc<Mutex<BlocksState>>, 709 exit_evt: EventFd, 710 interrupt_cb: Option<Arc<dyn VirtioInterrupt>>, 711 } 712 713 impl Mem { 714 // Create a new virtio-mem device. 715 #[allow(clippy::too_many_arguments)] 716 pub fn new( 717 id: String, 718 region: &Arc<GuestRegionMmap>, 719 seccomp_action: SeccompAction, 720 numa_node_id: Option<u16>, 721 initial_size: u64, 722 hugepages: bool, 723 exit_evt: EventFd, 724 blocks_state: Arc<Mutex<BlocksState>>, 725 state: Option<MemState>, 726 ) -> io::Result<Mem> { 727 let region_len = region.len(); 728 729 if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE { 730 return Err(io::Error::new( 731 io::ErrorKind::Other, 732 format!( 733 "Virtio-mem size is not aligned with {}", 734 VIRTIO_MEM_ALIGN_SIZE 735 ), 736 )); 737 } 738 739 let (avail_features, acked_features, config) = if let Some(state) = state { 740 info!("Restoring virtio-mem {}", id); 741 *(blocks_state.lock().unwrap()) = state.blocks_state.clone(); 742 (state.avail_features, state.acked_features, state.config) 743 } else { 744 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 745 746 let mut config = VirtioMemConfig { 747 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE, 748 addr: region.start_addr().raw_value(), 749 region_size: region.len(), 750 usable_region_size: region.len(), 751 plugged_size: 0, 752 requested_size: 0, 753 ..Default::default() 754 }; 755 756 if initial_size != 0 { 757 config.resize(initial_size).map_err(|e| { 758 io::Error::new( 759 io::ErrorKind::Other, 760 format!( 761 "Failed to resize virtio-mem configuration to {}: {:?}", 762 initial_size, e 763 ), 764 ) 765 })?; 766 } 767 768 if let Some(node_id) = numa_node_id { 769 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM; 770 config.node_id = node_id; 771 } 772 773 // Make sure the virtio-mem configuration complies with the 774 // specification. 775 config.validate().map_err(|e| { 776 io::Error::new( 777 io::ErrorKind::Other, 778 format!("Invalid virtio-mem configuration: {:?}", e), 779 ) 780 })?; 781 782 (avail_features, 0, config) 783 }; 784 785 let host_fd = region 786 .file_offset() 787 .map(|f_offset| f_offset.file().as_raw_fd()); 788 789 Ok(Mem { 790 common: VirtioCommon { 791 device_type: VirtioDeviceType::Mem as u32, 792 avail_features, 793 acked_features, 794 paused_sync: Some(Arc::new(Barrier::new(2))), 795 queue_sizes: QUEUE_SIZES.to_vec(), 796 min_queues: 1, 797 ..Default::default() 798 }, 799 id, 800 host_addr: region.as_ptr() as u64, 801 host_fd, 802 config: Arc::new(Mutex::new(config)), 803 seccomp_action, 804 hugepages, 805 dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())), 806 blocks_state, 807 exit_evt, 808 interrupt_cb: None, 809 }) 810 } 811 812 pub fn resize(&mut self, size: u64) -> result::Result<(), Error> { 813 let mut config = self.config.lock().unwrap(); 814 config.resize(size).map_err(|e| { 815 Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e)) 816 })?; 817 818 if let Some(interrupt_cb) = self.interrupt_cb.as_ref() { 819 interrupt_cb 820 .trigger(VirtioInterruptType::Config) 821 .map_err(|e| { 822 Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e)) 823 }) 824 } else { 825 Ok(()) 826 } 827 } 828 829 pub fn add_dma_mapping_handler( 830 &mut self, 831 source: VirtioMemMappingSource, 832 handler: Arc<dyn ExternalDmaMapping>, 833 ) -> result::Result<(), Error> { 834 let config = self.config.lock().unwrap(); 835 836 if config.plugged_size > 0 { 837 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 838 if *plugged { 839 let gpa = config.addr + (idx as u64 * config.block_size); 840 handler 841 .map(gpa, gpa, config.block_size) 842 .map_err(Error::DmaMap)?; 843 } 844 } 845 } 846 847 self.dma_mapping_handlers 848 .lock() 849 .unwrap() 850 .insert(source, handler); 851 852 Ok(()) 853 } 854 855 pub fn remove_dma_mapping_handler( 856 &mut self, 857 source: VirtioMemMappingSource, 858 ) -> result::Result<(), Error> { 859 let handler = self 860 .dma_mapping_handlers 861 .lock() 862 .unwrap() 863 .remove(&source) 864 .ok_or(Error::InvalidDmaMappingHandler)?; 865 866 let config = self.config.lock().unwrap(); 867 868 if config.plugged_size > 0 { 869 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 870 if *plugged { 871 let gpa = config.addr + (idx as u64 * config.block_size); 872 handler 873 .unmap(gpa, config.block_size) 874 .map_err(Error::DmaUnmap)?; 875 } 876 } 877 } 878 879 Ok(()) 880 } 881 882 fn state(&self) -> MemState { 883 MemState { 884 avail_features: self.common.avail_features, 885 acked_features: self.common.acked_features, 886 config: *(self.config.lock().unwrap()), 887 blocks_state: self.blocks_state.lock().unwrap().clone(), 888 } 889 } 890 891 #[cfg(fuzzing)] 892 pub fn wait_for_epoll_threads(&mut self) { 893 self.common.wait_for_epoll_threads(); 894 } 895 } 896 897 impl Drop for Mem { 898 fn drop(&mut self) { 899 if let Some(kill_evt) = self.common.kill_evt.take() { 900 // Ignore the result because there is nothing we can do about it. 901 let _ = kill_evt.write(1); 902 } 903 } 904 } 905 906 impl VirtioDevice for Mem { 907 fn device_type(&self) -> u32 { 908 self.common.device_type 909 } 910 911 fn queue_max_sizes(&self) -> &[u16] { 912 &self.common.queue_sizes 913 } 914 915 fn features(&self) -> u64 { 916 self.common.avail_features 917 } 918 919 fn ack_features(&mut self, value: u64) { 920 self.common.ack_features(value) 921 } 922 923 fn read_config(&self, offset: u64, data: &mut [u8]) { 924 self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data); 925 } 926 927 fn activate( 928 &mut self, 929 mem: GuestMemoryAtomic<GuestMemoryMmap>, 930 interrupt_cb: Arc<dyn VirtioInterrupt>, 931 mut queues: Vec<(usize, Queue, EventFd)>, 932 ) -> ActivateResult { 933 self.common.activate(&queues, &interrupt_cb)?; 934 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 935 936 let (_, queue, queue_evt) = queues.remove(0); 937 938 self.interrupt_cb = Some(interrupt_cb.clone()); 939 940 let mut handler = MemEpollHandler { 941 mem, 942 host_addr: self.host_addr, 943 host_fd: self.host_fd, 944 blocks_state: Arc::clone(&self.blocks_state), 945 config: self.config.clone(), 946 queue, 947 interrupt_cb, 948 queue_evt, 949 kill_evt, 950 pause_evt, 951 hugepages: self.hugepages, 952 dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers), 953 }; 954 955 let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false); 956 for range in unplugged_memory_ranges.regions() { 957 handler 958 .discard_memory_range(range.gpa, range.length) 959 .map_err(|e| { 960 error!( 961 "failed discarding memory range [0x{:x}-0x{:x}]: {:?}", 962 range.gpa, 963 range.gpa + range.length - 1, 964 e 965 ); 966 ActivateError::BadActivate 967 })?; 968 } 969 970 let paused = self.common.paused.clone(); 971 let paused_sync = self.common.paused_sync.clone(); 972 let mut epoll_threads = Vec::new(); 973 974 spawn_virtio_thread( 975 &self.id, 976 &self.seccomp_action, 977 Thread::VirtioMem, 978 &mut epoll_threads, 979 &self.exit_evt, 980 move || handler.run(paused, paused_sync.unwrap()), 981 )?; 982 self.common.epoll_threads = Some(epoll_threads); 983 984 event!("virtio-device", "activated", "id", &self.id); 985 Ok(()) 986 } 987 988 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 989 let result = self.common.reset(); 990 event!("virtio-device", "reset", "id", &self.id); 991 result 992 } 993 } 994 995 impl Pausable for Mem { 996 fn pause(&mut self) -> result::Result<(), MigratableError> { 997 self.common.pause() 998 } 999 1000 fn resume(&mut self) -> result::Result<(), MigratableError> { 1001 self.common.resume() 1002 } 1003 } 1004 1005 impl Snapshottable for Mem { 1006 fn id(&self) -> String { 1007 self.id.clone() 1008 } 1009 1010 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1011 Snapshot::new_from_versioned_state(&self.id(), &self.state()) 1012 } 1013 } 1014 impl Transportable for Mem {} 1015 impl Migratable for Mem {} 1016