1 // Copyright (c) 2020 Ant Financial 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 use super::Error as DeviceError; 16 use super::{ 17 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon, 18 VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, 19 }; 20 use crate::seccomp_filters::Thread; 21 use crate::thread_helper::spawn_virtio_thread; 22 use crate::{GuestMemoryMmap, GuestRegionMmap}; 23 use crate::{VirtioInterrupt, VirtioInterruptType}; 24 use anyhow::anyhow; 25 use seccompiler::SeccompAction; 26 use std::collections::BTreeMap; 27 use std::io; 28 use std::mem::size_of; 29 use std::os::unix::io::{AsRawFd, RawFd}; 30 use std::result; 31 use std::sync::atomic::AtomicBool; 32 use std::sync::mpsc; 33 use std::sync::{Arc, Barrier, Mutex}; 34 use thiserror::Error; 35 use versionize::{VersionMap, Versionize, VersionizeResult}; 36 use versionize_derive::Versionize; 37 use virtio_queue::{DescriptorChain, Queue, QueueT}; 38 use vm_device::dma_mapping::ExternalDmaMapping; 39 use vm_memory::{ 40 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 41 GuestMemoryError, GuestMemoryLoadGuard, GuestMemoryRegion, 42 }; 43 use vm_migration::protocol::MemoryRangeTable; 44 use vm_migration::{ 45 Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped, 46 }; 47 use vmm_sys_util::eventfd::EventFd; 48 49 const QUEUE_SIZE: u16 = 128; 50 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 51 52 // 128MiB is the standard memory block size in Linux. A virtio-mem region must 53 // be aligned on this size, and the region size must be a multiple of it. 54 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20; 55 // Use 2 MiB alignment so transparent hugepages can be used by KVM. 56 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20; 57 58 // Request processed successfully, applicable for 59 // - VIRTIO_MEM_REQ_PLUG 60 // - VIRTIO_MEM_REQ_UNPLUG 61 // - VIRTIO_MEM_REQ_UNPLUG_ALL 62 // - VIRTIO_MEM_REQ_STATE 63 const VIRTIO_MEM_RESP_ACK: u16 = 0; 64 65 // Request denied - e.g. trying to plug more than requested, applicable for 66 // - VIRTIO_MEM_REQ_PLUG 67 const VIRTIO_MEM_RESP_NACK: u16 = 1; 68 69 // Request cannot be processed right now, try again later, applicable for 70 // - VIRTIO_MEM_REQ_PLUG 71 // - VIRTIO_MEM_REQ_UNPLUG 72 // - VIRTIO_MEM_REQ_UNPLUG_ALL 73 #[allow(unused)] 74 const VIRTIO_MEM_RESP_BUSY: u16 = 2; 75 76 // Error in request (e.g. addresses/alignment), applicable for 77 // - VIRTIO_MEM_REQ_PLUG 78 // - VIRTIO_MEM_REQ_UNPLUG 79 // - VIRTIO_MEM_REQ_STATE 80 const VIRTIO_MEM_RESP_ERROR: u16 = 3; 81 82 // State of memory blocks is "plugged" 83 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; 84 // State of memory blocks is "unplugged" 85 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; 86 // State of memory blocks is "mixed" 87 const VIRTIO_MEM_STATE_MIXED: u16 = 2; 88 89 // request to plug memory blocks 90 const VIRTIO_MEM_REQ_PLUG: u16 = 0; 91 // request to unplug memory blocks 92 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1; 93 // request to unplug all blocks and shrink the usable size 94 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2; 95 // request information about the plugged state of memory blocks 96 const VIRTIO_MEM_REQ_STATE: u16 = 3; 97 98 // New descriptors are pending on the virtio queue. 99 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 100 101 // Virtio features 102 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0; 103 104 #[derive(Error, Debug)] 105 pub enum Error { 106 #[error("Guest gave us bad memory addresses: {0}")] 107 GuestMemory(GuestMemoryError), 108 #[error("Guest gave us a write only descriptor that protocol says to read from.")] 109 UnexpectedWriteOnlyDescriptor, 110 #[error("Guest gave us a read only descriptor that protocol says to write to.")] 111 UnexpectedReadOnlyDescriptor, 112 #[error("Guest gave us too few descriptors in a descriptor chain.")] 113 DescriptorChainTooShort, 114 #[error("Guest gave us a buffer that was too short to use.")] 115 BufferLengthTooSmall, 116 #[error("Guest sent us invalid request.")] 117 InvalidRequest, 118 #[error("Failed to EventFd write: {0}")] 119 EventFdWriteFail(std::io::Error), 120 #[error("Failed to EventFd try_clone: {0}")] 121 EventFdTryCloneFail(std::io::Error), 122 #[error("Failed to MpscRecv: {0}")] 123 MpscRecvFail(mpsc::RecvError), 124 #[error("Resize invalid argument: {0}")] 125 ResizeError(anyhow::Error), 126 #[error("Fail to resize trigger: {0}")] 127 ResizeTriggerFail(DeviceError), 128 #[error("Invalid configuration: {0}")] 129 ValidateError(anyhow::Error), 130 #[error("Failed discarding memory range: {0}")] 131 DiscardMemoryRange(std::io::Error), 132 #[error("Failed DMA mapping: {0}")] 133 DmaMap(std::io::Error), 134 #[error("Failed DMA unmapping: {0}")] 135 DmaUnmap(std::io::Error), 136 #[error("Invalid DMA mapping handler.")] 137 InvalidDmaMappingHandler, 138 #[error("Not activated by the guest.")] 139 NotActivatedByGuest, 140 } 141 142 #[repr(C)] 143 #[derive(Copy, Clone, Debug, Default)] 144 struct VirtioMemReq { 145 req_type: u16, 146 padding: [u16; 3], 147 addr: u64, 148 nb_blocks: u16, 149 padding_1: [u16; 3], 150 } 151 152 // SAFETY: it only has data and has no implicit padding. 153 unsafe impl ByteValued for VirtioMemReq {} 154 155 #[repr(C)] 156 #[derive(Copy, Clone, Debug, Default)] 157 struct VirtioMemResp { 158 resp_type: u16, 159 padding: [u16; 3], 160 state: u16, 161 } 162 163 // SAFETY: it only has data and has no implicit padding. 164 unsafe impl ByteValued for VirtioMemResp {} 165 166 #[repr(C)] 167 #[derive(Copy, Clone, Debug, Default, Versionize)] 168 pub struct VirtioMemConfig { 169 // Block size and alignment. Cannot change. 170 block_size: u64, 171 // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. 172 node_id: u16, 173 padding: [u8; 6], 174 // Start address of the memory region. Cannot change. 175 addr: u64, 176 // Region size (maximum). Cannot change. 177 region_size: u64, 178 // Currently usable region size. Can grow up to region_size. Can 179 // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config 180 // update will be sent). 181 usable_region_size: u64, 182 // Currently used size. Changes due to plug/unplug requests, but no 183 // config updates will be sent. 184 plugged_size: u64, 185 // Requested size. New plug requests cannot exceed it. Can change. 186 requested_size: u64, 187 } 188 189 // SAFETY: it only has data and has no implicit padding. 190 unsafe impl ByteValued for VirtioMemConfig {} 191 192 impl VirtioMemConfig { 193 fn validate(&self) -> result::Result<(), Error> { 194 if self.addr % self.block_size != 0 { 195 return Err(Error::ValidateError(anyhow!( 196 "addr 0x{:x} is not aligned on block_size 0x{:x}", 197 self.addr, 198 self.block_size 199 ))); 200 } 201 if self.region_size % self.block_size != 0 { 202 return Err(Error::ValidateError(anyhow!( 203 "region_size 0x{:x} is not aligned on block_size 0x{:x}", 204 self.region_size, 205 self.block_size 206 ))); 207 } 208 if self.usable_region_size % self.block_size != 0 { 209 return Err(Error::ValidateError(anyhow!( 210 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}", 211 self.usable_region_size, 212 self.block_size 213 ))); 214 } 215 if self.plugged_size % self.block_size != 0 { 216 return Err(Error::ValidateError(anyhow!( 217 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}", 218 self.plugged_size, 219 self.block_size 220 ))); 221 } 222 if self.requested_size % self.block_size != 0 { 223 return Err(Error::ValidateError(anyhow!( 224 "requested_size 0x{:x} is not aligned on block_size 0x{:x}", 225 self.requested_size, 226 self.block_size 227 ))); 228 } 229 230 Ok(()) 231 } 232 233 fn resize(&mut self, size: u64) -> result::Result<(), Error> { 234 if self.requested_size == size { 235 return Err(Error::ResizeError(anyhow!( 236 "new size 0x{:x} and requested_size are identical", 237 size 238 ))); 239 } else if size > self.region_size { 240 return Err(Error::ResizeError(anyhow!( 241 "new size 0x{:x} is bigger than region_size 0x{:x}", 242 size, 243 self.region_size 244 ))); 245 } else if size % (self.block_size as u64) != 0 { 246 return Err(Error::ResizeError(anyhow!( 247 "new size 0x{:x} is not aligned on block_size 0x{:x}", 248 size, 249 self.block_size 250 ))); 251 } 252 253 self.requested_size = size; 254 255 Ok(()) 256 } 257 258 fn is_valid_range(&self, addr: u64, size: u64) -> bool { 259 // Start address must be aligned on block_size, the size must be 260 // greater than 0, and all blocks covered by the request must be 261 // in the usable region. 262 if addr % self.block_size != 0 263 || size == 0 264 || (addr < self.addr || addr + size >= self.addr + self.usable_region_size) 265 { 266 return false; 267 } 268 269 true 270 } 271 } 272 273 struct Request { 274 req: VirtioMemReq, 275 status_addr: GuestAddress, 276 } 277 278 impl Request { 279 fn parse( 280 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 281 ) -> result::Result<Request, Error> { 282 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 283 // The descriptor contains the request type which MUST be readable. 284 if desc.is_write_only() { 285 return Err(Error::UnexpectedWriteOnlyDescriptor); 286 } 287 if desc.len() as usize != size_of::<VirtioMemReq>() { 288 return Err(Error::InvalidRequest); 289 } 290 let req: VirtioMemReq = desc_chain 291 .memory() 292 .read_obj(desc.addr()) 293 .map_err(Error::GuestMemory)?; 294 295 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 296 297 // The status MUST always be writable 298 if !status_desc.is_write_only() { 299 return Err(Error::UnexpectedReadOnlyDescriptor); 300 } 301 302 if (status_desc.len() as usize) < size_of::<VirtioMemResp>() { 303 return Err(Error::BufferLengthTooSmall); 304 } 305 306 Ok(Request { 307 req, 308 status_addr: status_desc.addr(), 309 }) 310 } 311 312 fn send_response(&self, mem: &GuestMemoryMmap, resp_type: u16, state: u16) -> u32 { 313 let resp = VirtioMemResp { 314 resp_type, 315 state, 316 ..Default::default() 317 }; 318 match mem.write_obj(resp, self.status_addr) { 319 Ok(_) => size_of::<VirtioMemResp>() as u32, 320 Err(e) => { 321 error!("bad guest memory address: {}", e); 322 0 323 } 324 } 325 } 326 } 327 328 #[derive(Clone, Versionize)] 329 pub struct BlocksState { 330 bitmap: Vec<bool>, 331 } 332 333 impl BlocksState { 334 pub fn new(region_size: u64) -> Self { 335 BlocksState { 336 bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize], 337 } 338 } 339 340 fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool { 341 for state in self 342 .bitmap 343 .iter() 344 .skip(first_block_index) 345 .take(nb_blocks as usize) 346 { 347 if *state != plug { 348 return false; 349 } 350 } 351 true 352 } 353 354 fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) { 355 for state in self 356 .bitmap 357 .iter_mut() 358 .skip(first_block_index) 359 .take(nb_blocks as usize) 360 { 361 *state = plug; 362 } 363 } 364 365 fn inner(&self) -> &Vec<bool> { 366 &self.bitmap 367 } 368 369 pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable { 370 let mut bitmap: Vec<u64> = Vec::new(); 371 let mut i = 0; 372 for (j, bit) in self.bitmap.iter().enumerate() { 373 if j % 64 == 0 { 374 bitmap.push(0); 375 376 if j != 0 { 377 i += 1; 378 } 379 } 380 381 if *bit == plugged { 382 bitmap[i] |= 1 << (j % 64); 383 } 384 } 385 386 MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE) 387 } 388 } 389 390 struct MemEpollHandler { 391 mem: GuestMemoryAtomic<GuestMemoryMmap>, 392 host_addr: u64, 393 host_fd: Option<RawFd>, 394 blocks_state: Arc<Mutex<BlocksState>>, 395 config: Arc<Mutex<VirtioMemConfig>>, 396 queue: Queue, 397 interrupt_cb: Arc<dyn VirtioInterrupt>, 398 queue_evt: EventFd, 399 kill_evt: EventFd, 400 pause_evt: EventFd, 401 hugepages: bool, 402 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 403 } 404 405 impl MemEpollHandler { 406 fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> { 407 // Use fallocate if the memory region is backed by a file. 408 if let Some(fd) = self.host_fd { 409 let res = unsafe { 410 libc::fallocate64( 411 fd, 412 libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE, 413 offset as libc::off64_t, 414 size as libc::off64_t, 415 ) 416 }; 417 if res != 0 { 418 let err = io::Error::last_os_error(); 419 error!("Deallocating file space failed: {}", err); 420 return Err(Error::DiscardMemoryRange(err)); 421 } 422 } 423 424 // Only use madvise if the memory region is not allocated with 425 // hugepages. 426 if !self.hugepages { 427 let res = unsafe { 428 libc::madvise( 429 (self.host_addr + offset) as *mut libc::c_void, 430 size as libc::size_t, 431 libc::MADV_DONTNEED, 432 ) 433 }; 434 if res != 0 { 435 let err = io::Error::last_os_error(); 436 error!("Advising kernel about pages range failed: {}", err); 437 return Err(Error::DiscardMemoryRange(err)); 438 } 439 } 440 441 Ok(()) 442 } 443 444 fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 { 445 let mut config = self.config.lock().unwrap(); 446 let size: u64 = nb_blocks as u64 * config.block_size; 447 448 if plug && (config.plugged_size + size > config.requested_size) { 449 return VIRTIO_MEM_RESP_NACK; 450 } 451 if !config.is_valid_range(addr, size) { 452 return VIRTIO_MEM_RESP_ERROR; 453 } 454 455 let offset = addr - config.addr; 456 457 let first_block_index = (offset / config.block_size) as usize; 458 if !self 459 .blocks_state 460 .lock() 461 .unwrap() 462 .is_range_state(first_block_index, nb_blocks, !plug) 463 { 464 return VIRTIO_MEM_RESP_ERROR; 465 } 466 467 if !plug { 468 if let Err(e) = self.discard_memory_range(offset, size) { 469 error!("failed discarding memory range: {:?}", e); 470 return VIRTIO_MEM_RESP_ERROR; 471 } 472 } 473 474 self.blocks_state 475 .lock() 476 .unwrap() 477 .set_range(first_block_index, nb_blocks, plug); 478 479 let handlers = self.dma_mapping_handlers.lock().unwrap(); 480 if plug { 481 let mut gpa = addr; 482 for _ in 0..nb_blocks { 483 for (_, handler) in handlers.iter() { 484 if let Err(e) = handler.map(gpa, gpa, config.block_size) { 485 error!( 486 "failed DMA mapping addr 0x{:x} size 0x{:x}: {}", 487 gpa, config.block_size, e 488 ); 489 return VIRTIO_MEM_RESP_ERROR; 490 } 491 } 492 493 gpa += config.block_size; 494 } 495 496 config.plugged_size += size; 497 } else { 498 for (_, handler) in handlers.iter() { 499 if let Err(e) = handler.unmap(addr, size) { 500 error!( 501 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 502 addr, size, e 503 ); 504 return VIRTIO_MEM_RESP_ERROR; 505 } 506 } 507 508 config.plugged_size -= size; 509 } 510 511 VIRTIO_MEM_RESP_ACK 512 } 513 514 fn unplug_all(&mut self) -> u16 { 515 let mut config = self.config.lock().unwrap(); 516 if let Err(e) = self.discard_memory_range(0, config.region_size) { 517 error!("failed discarding memory range: {:?}", e); 518 return VIRTIO_MEM_RESP_ERROR; 519 } 520 521 // Remaining plugged blocks are unmapped. 522 if config.plugged_size > 0 { 523 let handlers = self.dma_mapping_handlers.lock().unwrap(); 524 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 525 if *plugged { 526 let gpa = config.addr + (idx as u64 * config.block_size); 527 for (_, handler) in handlers.iter() { 528 if let Err(e) = handler.unmap(gpa, config.block_size) { 529 error!( 530 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 531 gpa, config.block_size, e 532 ); 533 return VIRTIO_MEM_RESP_ERROR; 534 } 535 } 536 } 537 } 538 } 539 540 self.blocks_state.lock().unwrap().set_range( 541 0, 542 (config.region_size / config.block_size) as u16, 543 false, 544 ); 545 546 config.plugged_size = 0; 547 548 VIRTIO_MEM_RESP_ACK 549 } 550 551 fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) { 552 let config = self.config.lock().unwrap(); 553 let size: u64 = nb_blocks as u64 * config.block_size; 554 555 let resp_type = if config.is_valid_range(addr, size) { 556 VIRTIO_MEM_RESP_ACK 557 } else { 558 VIRTIO_MEM_RESP_ERROR 559 }; 560 561 let offset = addr - config.addr; 562 let first_block_index = (offset / config.block_size) as usize; 563 let resp_state = 564 if self 565 .blocks_state 566 .lock() 567 .unwrap() 568 .is_range_state(first_block_index, nb_blocks, true) 569 { 570 VIRTIO_MEM_STATE_PLUGGED 571 } else if self.blocks_state.lock().unwrap().is_range_state( 572 first_block_index, 573 nb_blocks, 574 false, 575 ) { 576 VIRTIO_MEM_STATE_UNPLUGGED 577 } else { 578 VIRTIO_MEM_STATE_MIXED 579 }; 580 581 (resp_type, resp_state) 582 } 583 584 fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> { 585 self.interrupt_cb.trigger(int_type).map_err(|e| { 586 error!("Failed to signal used queue: {:?}", e); 587 DeviceError::FailedSignalingUsedQueue(e) 588 }) 589 } 590 591 fn process_queue(&mut self) -> bool { 592 let mut used_descs = false; 593 594 while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) { 595 let len = match Request::parse(&mut desc_chain) { 596 Err(e) => { 597 error!("failed parse VirtioMemReq: {:?}", e); 598 0 599 } 600 Ok(r) => match r.req.req_type { 601 VIRTIO_MEM_REQ_PLUG => { 602 let resp_type = 603 self.state_change_request(r.req.addr, r.req.nb_blocks, true); 604 r.send_response(desc_chain.memory(), resp_type, 0u16) 605 } 606 VIRTIO_MEM_REQ_UNPLUG => { 607 let resp_type = 608 self.state_change_request(r.req.addr, r.req.nb_blocks, false); 609 r.send_response(desc_chain.memory(), resp_type, 0u16) 610 } 611 VIRTIO_MEM_REQ_UNPLUG_ALL => { 612 let resp_type = self.unplug_all(); 613 r.send_response(desc_chain.memory(), resp_type, 0u16) 614 } 615 VIRTIO_MEM_REQ_STATE => { 616 let (resp_type, resp_state) = 617 self.state_request(r.req.addr, r.req.nb_blocks); 618 r.send_response(desc_chain.memory(), resp_type, resp_state) 619 } 620 _ => { 621 error!("VirtioMemReq unknown request type {:?}", r.req.req_type); 622 0 623 } 624 }, 625 }; 626 627 self.queue 628 .add_used(desc_chain.memory(), desc_chain.head_index(), len) 629 .unwrap(); 630 used_descs = true; 631 } 632 633 used_descs 634 } 635 636 fn run( 637 &mut self, 638 paused: Arc<AtomicBool>, 639 paused_sync: Arc<Barrier>, 640 ) -> result::Result<(), EpollHelperError> { 641 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 642 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 643 helper.run(paused, paused_sync, self)?; 644 645 Ok(()) 646 } 647 } 648 649 impl EpollHelperHandler for MemEpollHandler { 650 fn handle_event( 651 &mut self, 652 _helper: &mut EpollHelper, 653 event: &epoll::Event, 654 ) -> result::Result<(), EpollHelperError> { 655 let ev_type = event.data as u16; 656 match ev_type { 657 QUEUE_AVAIL_EVENT => { 658 self.queue_evt.read().map_err(|e| { 659 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 660 })?; 661 662 if self.process_queue() { 663 self.signal(VirtioInterruptType::Queue(0)).map_err(|e| { 664 EpollHelperError::HandleEvent(anyhow!( 665 "Failed to signal used queue: {:?}", 666 e 667 )) 668 })?; 669 } 670 } 671 _ => { 672 return Err(EpollHelperError::HandleEvent(anyhow!( 673 "Unexpected event: {}", 674 ev_type 675 ))); 676 } 677 } 678 Ok(()) 679 } 680 } 681 682 #[derive(PartialEq, Eq, PartialOrd, Ord)] 683 pub enum VirtioMemMappingSource { 684 Container, 685 Device(u32), 686 } 687 688 #[derive(Versionize)] 689 pub struct MemState { 690 pub avail_features: u64, 691 pub acked_features: u64, 692 pub config: VirtioMemConfig, 693 pub blocks_state: BlocksState, 694 } 695 696 impl VersionMapped for MemState {} 697 698 pub struct Mem { 699 common: VirtioCommon, 700 id: String, 701 host_addr: u64, 702 host_fd: Option<RawFd>, 703 config: Arc<Mutex<VirtioMemConfig>>, 704 seccomp_action: SeccompAction, 705 hugepages: bool, 706 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 707 blocks_state: Arc<Mutex<BlocksState>>, 708 exit_evt: EventFd, 709 interrupt_cb: Option<Arc<dyn VirtioInterrupt>>, 710 } 711 712 impl Mem { 713 // Create a new virtio-mem device. 714 #[allow(clippy::too_many_arguments)] 715 pub fn new( 716 id: String, 717 region: &Arc<GuestRegionMmap>, 718 seccomp_action: SeccompAction, 719 numa_node_id: Option<u16>, 720 initial_size: u64, 721 hugepages: bool, 722 exit_evt: EventFd, 723 blocks_state: Arc<Mutex<BlocksState>>, 724 ) -> io::Result<Mem> { 725 let region_len = region.len(); 726 727 if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE { 728 return Err(io::Error::new( 729 io::ErrorKind::Other, 730 format!( 731 "Virtio-mem size is not aligned with {}", 732 VIRTIO_MEM_ALIGN_SIZE 733 ), 734 )); 735 } 736 737 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 738 739 let mut config = VirtioMemConfig { 740 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE, 741 addr: region.start_addr().raw_value(), 742 region_size: region.len(), 743 usable_region_size: region.len(), 744 plugged_size: 0, 745 requested_size: 0, 746 ..Default::default() 747 }; 748 749 if initial_size != 0 { 750 config.resize(initial_size).map_err(|e| { 751 io::Error::new( 752 io::ErrorKind::Other, 753 format!( 754 "Failed to resize virtio-mem configuration to {}: {:?}", 755 initial_size, e 756 ), 757 ) 758 })?; 759 } 760 761 if let Some(node_id) = numa_node_id { 762 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM; 763 config.node_id = node_id; 764 } 765 766 // Make sure the virtio-mem configuration complies with the 767 // specification. 768 config.validate().map_err(|e| { 769 io::Error::new( 770 io::ErrorKind::Other, 771 format!("Invalid virtio-mem configuration: {:?}", e), 772 ) 773 })?; 774 775 let host_fd = region 776 .file_offset() 777 .map(|f_offset| f_offset.file().as_raw_fd()); 778 779 Ok(Mem { 780 common: VirtioCommon { 781 device_type: VirtioDeviceType::Mem as u32, 782 avail_features, 783 paused_sync: Some(Arc::new(Barrier::new(2))), 784 queue_sizes: QUEUE_SIZES.to_vec(), 785 min_queues: 1, 786 ..Default::default() 787 }, 788 id, 789 host_addr: region.as_ptr() as u64, 790 host_fd, 791 config: Arc::new(Mutex::new(config)), 792 seccomp_action, 793 hugepages, 794 dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())), 795 blocks_state, 796 exit_evt, 797 interrupt_cb: None, 798 }) 799 } 800 801 pub fn resize(&mut self, size: u64) -> result::Result<(), Error> { 802 let mut config = self.config.lock().unwrap(); 803 config.resize(size).map_err(|e| { 804 Error::ResizeError(anyhow!("Failed to update virtio configuration: {:?}", e)) 805 })?; 806 807 if let Some(interrupt_cb) = self.interrupt_cb.as_ref() { 808 interrupt_cb 809 .trigger(VirtioInterruptType::Config) 810 .map_err(|e| { 811 Error::ResizeError(anyhow!("Failed to signal the guest about resize: {:?}", e)) 812 }) 813 } else { 814 Ok(()) 815 } 816 } 817 818 pub fn add_dma_mapping_handler( 819 &mut self, 820 source: VirtioMemMappingSource, 821 handler: Arc<dyn ExternalDmaMapping>, 822 ) -> result::Result<(), Error> { 823 let config = self.config.lock().unwrap(); 824 825 if config.plugged_size > 0 { 826 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 827 if *plugged { 828 let gpa = config.addr + (idx as u64 * config.block_size); 829 handler 830 .map(gpa, gpa, config.block_size) 831 .map_err(Error::DmaMap)?; 832 } 833 } 834 } 835 836 self.dma_mapping_handlers 837 .lock() 838 .unwrap() 839 .insert(source, handler); 840 841 Ok(()) 842 } 843 844 pub fn remove_dma_mapping_handler( 845 &mut self, 846 source: VirtioMemMappingSource, 847 ) -> result::Result<(), Error> { 848 let handler = self 849 .dma_mapping_handlers 850 .lock() 851 .unwrap() 852 .remove(&source) 853 .ok_or(Error::InvalidDmaMappingHandler)?; 854 855 let config = self.config.lock().unwrap(); 856 857 if config.plugged_size > 0 { 858 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 859 if *plugged { 860 let gpa = config.addr + (idx as u64 * config.block_size); 861 handler 862 .unmap(gpa, config.block_size) 863 .map_err(Error::DmaUnmap)?; 864 } 865 } 866 } 867 868 Ok(()) 869 } 870 871 fn state(&self) -> MemState { 872 MemState { 873 avail_features: self.common.avail_features, 874 acked_features: self.common.acked_features, 875 config: *(self.config.lock().unwrap()), 876 blocks_state: self.blocks_state.lock().unwrap().clone(), 877 } 878 } 879 880 fn set_state(&mut self, state: &MemState) { 881 self.common.avail_features = state.avail_features; 882 self.common.acked_features = state.acked_features; 883 *(self.config.lock().unwrap()) = state.config; 884 *(self.blocks_state.lock().unwrap()) = state.blocks_state.clone(); 885 } 886 887 #[cfg(fuzzing)] 888 pub fn wait_for_epoll_threads(&mut self) { 889 self.common.wait_for_epoll_threads(); 890 } 891 } 892 893 impl Drop for Mem { 894 fn drop(&mut self) { 895 if let Some(kill_evt) = self.common.kill_evt.take() { 896 // Ignore the result because there is nothing we can do about it. 897 let _ = kill_evt.write(1); 898 } 899 } 900 } 901 902 impl VirtioDevice for Mem { 903 fn device_type(&self) -> u32 { 904 self.common.device_type 905 } 906 907 fn queue_max_sizes(&self) -> &[u16] { 908 &self.common.queue_sizes 909 } 910 911 fn features(&self) -> u64 { 912 self.common.avail_features 913 } 914 915 fn ack_features(&mut self, value: u64) { 916 self.common.ack_features(value) 917 } 918 919 fn read_config(&self, offset: u64, data: &mut [u8]) { 920 self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data); 921 } 922 923 fn activate( 924 &mut self, 925 mem: GuestMemoryAtomic<GuestMemoryMmap>, 926 interrupt_cb: Arc<dyn VirtioInterrupt>, 927 mut queues: Vec<(usize, Queue, EventFd)>, 928 ) -> ActivateResult { 929 self.common.activate(&queues, &interrupt_cb)?; 930 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 931 932 let (_, queue, queue_evt) = queues.remove(0); 933 934 self.interrupt_cb = Some(interrupt_cb.clone()); 935 936 let mut handler = MemEpollHandler { 937 mem, 938 host_addr: self.host_addr, 939 host_fd: self.host_fd, 940 blocks_state: Arc::clone(&self.blocks_state), 941 config: self.config.clone(), 942 queue, 943 interrupt_cb, 944 queue_evt, 945 kill_evt, 946 pause_evt, 947 hugepages: self.hugepages, 948 dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers), 949 }; 950 951 let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false); 952 for range in unplugged_memory_ranges.regions() { 953 handler 954 .discard_memory_range(range.gpa, range.length) 955 .map_err(|e| { 956 error!( 957 "failed discarding memory range [0x{:x}-0x{:x}]: {:?}", 958 range.gpa, 959 range.gpa + range.length - 1, 960 e 961 ); 962 ActivateError::BadActivate 963 })?; 964 } 965 966 let paused = self.common.paused.clone(); 967 let paused_sync = self.common.paused_sync.clone(); 968 let mut epoll_threads = Vec::new(); 969 970 spawn_virtio_thread( 971 &self.id, 972 &self.seccomp_action, 973 Thread::VirtioMem, 974 &mut epoll_threads, 975 &self.exit_evt, 976 move || handler.run(paused, paused_sync.unwrap()), 977 )?; 978 self.common.epoll_threads = Some(epoll_threads); 979 980 event!("virtio-device", "activated", "id", &self.id); 981 Ok(()) 982 } 983 984 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 985 let result = self.common.reset(); 986 event!("virtio-device", "reset", "id", &self.id); 987 result 988 } 989 } 990 991 impl Pausable for Mem { 992 fn pause(&mut self) -> result::Result<(), MigratableError> { 993 self.common.pause() 994 } 995 996 fn resume(&mut self) -> result::Result<(), MigratableError> { 997 self.common.resume() 998 } 999 } 1000 1001 impl Snapshottable for Mem { 1002 fn id(&self) -> String { 1003 self.id.clone() 1004 } 1005 1006 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1007 Snapshot::new_from_versioned_state(&self.id(), &self.state()) 1008 } 1009 1010 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 1011 self.set_state(&snapshot.to_versioned_state(&self.id)?); 1012 Ok(()) 1013 } 1014 } 1015 impl Transportable for Mem {} 1016 impl Migratable for Mem {} 1017