1 // Copyright (c) 2020 Ant Financial 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 use super::Error as DeviceError; 16 use super::{ 17 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon, 18 VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, 19 }; 20 use crate::seccomp_filters::Thread; 21 use crate::thread_helper::spawn_virtio_thread; 22 use crate::{GuestMemoryMmap, GuestRegionMmap}; 23 use crate::{VirtioInterrupt, VirtioInterruptType}; 24 use anyhow::anyhow; 25 use libc::EFD_NONBLOCK; 26 use seccompiler::SeccompAction; 27 use std::collections::BTreeMap; 28 use std::io; 29 use std::mem::size_of; 30 use std::os::unix::io::{AsRawFd, RawFd}; 31 use std::result; 32 use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; 33 use std::sync::mpsc; 34 use std::sync::{Arc, Barrier, Mutex}; 35 use versionize::{VersionMap, Versionize, VersionizeResult}; 36 use versionize_derive::Versionize; 37 use virtio_queue::{DescriptorChain, Queue}; 38 use vm_device::dma_mapping::ExternalDmaMapping; 39 use vm_memory::{ 40 Address, ByteValued, Bytes, GuestAddress, GuestMemoryAtomic, GuestMemoryError, 41 GuestMemoryLoadGuard, GuestMemoryRegion, 42 }; 43 use vm_migration::protocol::MemoryRangeTable; 44 use vm_migration::{ 45 Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped, 46 }; 47 use vmm_sys_util::eventfd::EventFd; 48 49 const QUEUE_SIZE: u16 = 128; 50 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 51 52 // 128MiB is the standard memory block size in Linux. A virtio-mem region must 53 // be aligned on this size, and the region size must be a multiple of it. 54 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20; 55 // Use 2 MiB alignment so transparent hugepages can be used by KVM. 56 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20; 57 58 // Request processed successfully, applicable for 59 // - VIRTIO_MEM_REQ_PLUG 60 // - VIRTIO_MEM_REQ_UNPLUG 61 // - VIRTIO_MEM_REQ_UNPLUG_ALL 62 // - VIRTIO_MEM_REQ_STATE 63 const VIRTIO_MEM_RESP_ACK: u16 = 0; 64 65 // Request denied - e.g. trying to plug more than requested, applicable for 66 // - VIRTIO_MEM_REQ_PLUG 67 const VIRTIO_MEM_RESP_NACK: u16 = 1; 68 69 // Request cannot be processed right now, try again later, applicable for 70 // - VIRTIO_MEM_REQ_PLUG 71 // - VIRTIO_MEM_REQ_UNPLUG 72 // - VIRTIO_MEM_REQ_UNPLUG_ALL 73 #[allow(unused)] 74 const VIRTIO_MEM_RESP_BUSY: u16 = 2; 75 76 // Error in request (e.g. addresses/alignment), applicable for 77 // - VIRTIO_MEM_REQ_PLUG 78 // - VIRTIO_MEM_REQ_UNPLUG 79 // - VIRTIO_MEM_REQ_STATE 80 const VIRTIO_MEM_RESP_ERROR: u16 = 3; 81 82 // State of memory blocks is "plugged" 83 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; 84 // State of memory blocks is "unplugged" 85 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; 86 // State of memory blocks is "mixed" 87 const VIRTIO_MEM_STATE_MIXED: u16 = 2; 88 89 // request to plug memory blocks 90 const VIRTIO_MEM_REQ_PLUG: u16 = 0; 91 // request to unplug memory blocks 92 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1; 93 // request to unplug all blocks and shrink the usable size 94 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2; 95 // request information about the plugged state of memory blocks 96 const VIRTIO_MEM_REQ_STATE: u16 = 3; 97 98 // Get resize event. 99 const RESIZE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 100 // New descriptors are pending on the virtio queue. 101 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2; 102 103 // Virtio features 104 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0; 105 106 #[derive(Debug)] 107 pub enum Error { 108 // Guest gave us bad memory addresses. 109 GuestMemory(GuestMemoryError), 110 // Guest gave us a write only descriptor that protocol says to read from. 111 UnexpectedWriteOnlyDescriptor, 112 // Guest gave us a read only descriptor that protocol says to write to. 113 UnexpectedReadOnlyDescriptor, 114 // Guest gave us too few descriptors in a descriptor chain. 115 DescriptorChainTooShort, 116 // Guest gave us a buffer that was too short to use. 117 BufferLengthTooSmall, 118 // Guest sent us invalid request. 119 InvalidRequest, 120 // Failed to EventFd write. 121 EventFdWriteFail(std::io::Error), 122 // Failed to EventFd try_clone. 123 EventFdTryCloneFail(std::io::Error), 124 // Failed to MpscRecv. 125 MpscRecvFail(mpsc::RecvError), 126 // Resize invalid argument 127 ResizeError(anyhow::Error), 128 // Fail to resize trigger 129 ResizeTriggerFail(DeviceError), 130 // Invalid configuration 131 ValidateError(anyhow::Error), 132 // Failed discarding memory range 133 DiscardMemoryRange(std::io::Error), 134 // Failed DMA mapping. 135 DmaMap(std::io::Error), 136 // Failed DMA unmapping. 137 DmaUnmap(std::io::Error), 138 // Invalid DMA mapping handler 139 InvalidDmaMappingHandler, 140 } 141 142 #[repr(C)] 143 #[derive(Copy, Clone, Debug, Default)] 144 struct VirtioMemReq { 145 req_type: u16, 146 padding: [u16; 3], 147 addr: u64, 148 nb_blocks: u16, 149 padding_1: [u16; 3], 150 } 151 152 // SAFETY: it only has data and has no implicit padding. 153 unsafe impl ByteValued for VirtioMemReq {} 154 155 #[repr(C)] 156 #[derive(Copy, Clone, Debug, Default)] 157 struct VirtioMemResp { 158 resp_type: u16, 159 padding: [u16; 3], 160 state: u16, 161 } 162 163 // SAFETY: it only has data and has no implicit padding. 164 unsafe impl ByteValued for VirtioMemResp {} 165 166 #[repr(C)] 167 #[derive(Copy, Clone, Debug, Default, Versionize)] 168 pub struct VirtioMemConfig { 169 // Block size and alignment. Cannot change. 170 block_size: u64, 171 // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. 172 node_id: u16, 173 padding: [u8; 6], 174 // Start address of the memory region. Cannot change. 175 addr: u64, 176 // Region size (maximum). Cannot change. 177 region_size: u64, 178 // Currently usable region size. Can grow up to region_size. Can 179 // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config 180 // update will be sent). 181 usable_region_size: u64, 182 // Currently used size. Changes due to plug/unplug requests, but no 183 // config updates will be sent. 184 plugged_size: u64, 185 // Requested size. New plug requests cannot exceed it. Can change. 186 requested_size: u64, 187 } 188 189 // SAFETY: it only has data and has no implicit padding. 190 unsafe impl ByteValued for VirtioMemConfig {} 191 192 impl VirtioMemConfig { 193 fn validate(&self) -> result::Result<(), Error> { 194 if self.addr % self.block_size != 0 { 195 return Err(Error::ValidateError(anyhow!( 196 "addr 0x{:x} is not aligned on block_size 0x{:x}", 197 self.addr, 198 self.block_size 199 ))); 200 } 201 if self.region_size % self.block_size != 0 { 202 return Err(Error::ValidateError(anyhow!( 203 "region_size 0x{:x} is not aligned on block_size 0x{:x}", 204 self.region_size, 205 self.block_size 206 ))); 207 } 208 if self.usable_region_size % self.block_size != 0 { 209 return Err(Error::ValidateError(anyhow!( 210 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}", 211 self.usable_region_size, 212 self.block_size 213 ))); 214 } 215 if self.plugged_size % self.block_size != 0 { 216 return Err(Error::ValidateError(anyhow!( 217 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}", 218 self.plugged_size, 219 self.block_size 220 ))); 221 } 222 if self.requested_size % self.block_size != 0 { 223 return Err(Error::ValidateError(anyhow!( 224 "requested_size 0x{:x} is not aligned on block_size 0x{:x}", 225 self.requested_size, 226 self.block_size 227 ))); 228 } 229 230 Ok(()) 231 } 232 233 fn resize(&mut self, size: u64) -> result::Result<(), Error> { 234 if self.requested_size == size { 235 return Err(Error::ResizeError(anyhow!( 236 "new size 0x{:x} and requested_size are identical", 237 size 238 ))); 239 } else if size > self.region_size { 240 return Err(Error::ResizeError(anyhow!( 241 "new size 0x{:x} is bigger than region_size 0x{:x}", 242 size, 243 self.region_size 244 ))); 245 } else if size % (self.block_size as u64) != 0 { 246 return Err(Error::ResizeError(anyhow!( 247 "new size 0x{:x} is not aligned on block_size 0x{:x}", 248 size, 249 self.block_size 250 ))); 251 } 252 253 self.requested_size = size; 254 255 Ok(()) 256 } 257 258 fn is_valid_range(&self, addr: u64, size: u64) -> bool { 259 // Start address must be aligned on block_size, the size must be 260 // greater than 0, and all blocks covered by the request must be 261 // in the usable region. 262 if addr % self.block_size != 0 263 || size == 0 264 || (addr < self.addr || addr + size >= self.addr + self.usable_region_size) 265 { 266 return false; 267 } 268 269 true 270 } 271 } 272 273 struct Request { 274 req: VirtioMemReq, 275 status_addr: GuestAddress, 276 } 277 278 impl Request { 279 fn parse( 280 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 281 ) -> result::Result<Request, Error> { 282 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 283 // The descriptor contains the request type which MUST be readable. 284 if desc.is_write_only() { 285 return Err(Error::UnexpectedWriteOnlyDescriptor); 286 } 287 if desc.len() as usize != size_of::<VirtioMemReq>() { 288 return Err(Error::InvalidRequest); 289 } 290 let req: VirtioMemReq = desc_chain 291 .memory() 292 .read_obj(desc.addr()) 293 .map_err(Error::GuestMemory)?; 294 295 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 296 297 // The status MUST always be writable 298 if !status_desc.is_write_only() { 299 return Err(Error::UnexpectedReadOnlyDescriptor); 300 } 301 302 if (status_desc.len() as usize) < size_of::<VirtioMemResp>() { 303 return Err(Error::BufferLengthTooSmall); 304 } 305 306 Ok(Request { 307 req, 308 status_addr: status_desc.addr(), 309 }) 310 } 311 312 fn send_response(&self, mem: &GuestMemoryMmap, resp_type: u16, state: u16) -> u32 { 313 let resp = VirtioMemResp { 314 resp_type, 315 state, 316 ..Default::default() 317 }; 318 match mem.write_obj(resp, self.status_addr) { 319 Ok(_) => size_of::<VirtioMemResp>() as u32, 320 Err(e) => { 321 error!("bad guest memory address: {}", e); 322 0 323 } 324 } 325 } 326 } 327 328 pub struct ResizeSender { 329 hotplugged_size: Arc<AtomicU64>, 330 tx: mpsc::Sender<Result<(), Error>>, 331 evt: EventFd, 332 } 333 334 impl ResizeSender { 335 fn size(&self) -> u64 { 336 self.hotplugged_size.load(Ordering::Acquire) 337 } 338 339 fn send(&self, r: Result<(), Error>) -> Result<(), mpsc::SendError<Result<(), Error>>> { 340 self.tx.send(r) 341 } 342 } 343 344 impl Clone for ResizeSender { 345 fn clone(&self) -> Self { 346 ResizeSender { 347 hotplugged_size: self.hotplugged_size.clone(), 348 tx: self.tx.clone(), 349 evt: self 350 .evt 351 .try_clone() 352 .expect("Failed cloning EventFd from ResizeSender"), 353 } 354 } 355 } 356 357 pub struct Resize { 358 hotplugged_size: Arc<AtomicU64>, 359 tx: mpsc::Sender<Result<(), Error>>, 360 rx: mpsc::Receiver<Result<(), Error>>, 361 evt: EventFd, 362 } 363 364 impl Resize { 365 pub fn new(hotplugged_size: u64) -> io::Result<Self> { 366 let (tx, rx) = mpsc::channel(); 367 368 Ok(Resize { 369 hotplugged_size: Arc::new(AtomicU64::new(hotplugged_size)), 370 tx, 371 rx, 372 evt: EventFd::new(EFD_NONBLOCK)?, 373 }) 374 } 375 376 pub fn new_resize_sender(&self) -> Result<ResizeSender, Error> { 377 Ok(ResizeSender { 378 hotplugged_size: self.hotplugged_size.clone(), 379 tx: self.tx.clone(), 380 evt: self.evt.try_clone().map_err(Error::EventFdTryCloneFail)?, 381 }) 382 } 383 384 pub fn work(&self, desired_size: u64) -> Result<(), Error> { 385 self.hotplugged_size.store(desired_size, Ordering::Release); 386 self.evt.write(1).map_err(Error::EventFdWriteFail)?; 387 self.rx.recv().map_err(Error::MpscRecvFail)? 388 } 389 } 390 391 #[derive(Clone, Versionize)] 392 pub struct BlocksState { 393 bitmap: Vec<bool>, 394 } 395 396 impl BlocksState { 397 pub fn new(region_size: u64) -> Self { 398 BlocksState { 399 bitmap: vec![false; (region_size / VIRTIO_MEM_DEFAULT_BLOCK_SIZE) as usize], 400 } 401 } 402 403 fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool { 404 for state in self 405 .bitmap 406 .iter() 407 .skip(first_block_index) 408 .take(nb_blocks as usize) 409 { 410 if *state != plug { 411 return false; 412 } 413 } 414 true 415 } 416 417 fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) { 418 for state in self 419 .bitmap 420 .iter_mut() 421 .skip(first_block_index) 422 .take(nb_blocks as usize) 423 { 424 *state = plug; 425 } 426 } 427 428 fn inner(&self) -> &Vec<bool> { 429 &self.bitmap 430 } 431 432 pub fn memory_ranges(&self, start_addr: u64, plugged: bool) -> MemoryRangeTable { 433 let mut bitmap: Vec<u64> = Vec::new(); 434 let mut i = 0; 435 for (j, bit) in self.bitmap.iter().enumerate() { 436 if j % 64 == 0 { 437 bitmap.push(0); 438 439 if j != 0 { 440 i += 1; 441 } 442 } 443 444 if *bit == plugged { 445 bitmap[i] |= 1 << (j % 64); 446 } 447 } 448 449 MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE) 450 } 451 } 452 453 struct MemEpollHandler { 454 host_addr: u64, 455 host_fd: Option<RawFd>, 456 blocks_state: Arc<Mutex<BlocksState>>, 457 config: Arc<Mutex<VirtioMemConfig>>, 458 resize: ResizeSender, 459 queue: Queue<GuestMemoryAtomic<GuestMemoryMmap>>, 460 interrupt_cb: Arc<dyn VirtioInterrupt>, 461 queue_evt: EventFd, 462 kill_evt: EventFd, 463 pause_evt: EventFd, 464 hugepages: bool, 465 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 466 } 467 468 impl MemEpollHandler { 469 fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> { 470 // Use fallocate if the memory region is backed by a file. 471 if let Some(fd) = self.host_fd { 472 let res = unsafe { 473 libc::fallocate64( 474 fd, 475 libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE, 476 offset as libc::off64_t, 477 size as libc::off64_t, 478 ) 479 }; 480 if res != 0 { 481 let err = io::Error::last_os_error(); 482 error!("Deallocating file space failed: {}", err); 483 return Err(Error::DiscardMemoryRange(err)); 484 } 485 } 486 487 // Only use madvise if the memory region is not allocated with 488 // hugepages. 489 if !self.hugepages { 490 let res = unsafe { 491 libc::madvise( 492 (self.host_addr + offset) as *mut libc::c_void, 493 size as libc::size_t, 494 libc::MADV_DONTNEED, 495 ) 496 }; 497 if res != 0 { 498 let err = io::Error::last_os_error(); 499 error!("Advising kernel about pages range failed: {}", err); 500 return Err(Error::DiscardMemoryRange(err)); 501 } 502 } 503 504 Ok(()) 505 } 506 507 fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 { 508 let mut config = self.config.lock().unwrap(); 509 let size: u64 = nb_blocks as u64 * config.block_size; 510 511 if plug && (config.plugged_size + size > config.requested_size) { 512 return VIRTIO_MEM_RESP_NACK; 513 } 514 if !config.is_valid_range(addr, size) { 515 return VIRTIO_MEM_RESP_ERROR; 516 } 517 518 let offset = addr - config.addr; 519 520 let first_block_index = (offset / config.block_size) as usize; 521 if !self 522 .blocks_state 523 .lock() 524 .unwrap() 525 .is_range_state(first_block_index, nb_blocks, !plug) 526 { 527 return VIRTIO_MEM_RESP_ERROR; 528 } 529 530 if !plug { 531 if let Err(e) = self.discard_memory_range(offset, size) { 532 error!("failed discarding memory range: {:?}", e); 533 return VIRTIO_MEM_RESP_ERROR; 534 } 535 } 536 537 self.blocks_state 538 .lock() 539 .unwrap() 540 .set_range(first_block_index, nb_blocks, plug); 541 542 let handlers = self.dma_mapping_handlers.lock().unwrap(); 543 if plug { 544 let mut gpa = addr; 545 for _ in 0..nb_blocks { 546 for (_, handler) in handlers.iter() { 547 if let Err(e) = handler.map(gpa, gpa, config.block_size) { 548 error!( 549 "failed DMA mapping addr 0x{:x} size 0x{:x}: {}", 550 gpa, config.block_size, e 551 ); 552 return VIRTIO_MEM_RESP_ERROR; 553 } 554 } 555 556 gpa += config.block_size; 557 } 558 559 config.plugged_size += size; 560 } else { 561 for (_, handler) in handlers.iter() { 562 if let Err(e) = handler.unmap(addr, size) { 563 error!( 564 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 565 addr, size, e 566 ); 567 return VIRTIO_MEM_RESP_ERROR; 568 } 569 } 570 571 config.plugged_size -= size; 572 } 573 574 VIRTIO_MEM_RESP_ACK 575 } 576 577 fn unplug_all(&mut self) -> u16 { 578 let mut config = self.config.lock().unwrap(); 579 if let Err(e) = self.discard_memory_range(0, config.region_size) { 580 error!("failed discarding memory range: {:?}", e); 581 return VIRTIO_MEM_RESP_ERROR; 582 } 583 584 // Remaining plugged blocks are unmapped. 585 if config.plugged_size > 0 { 586 let handlers = self.dma_mapping_handlers.lock().unwrap(); 587 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 588 if *plugged { 589 let gpa = config.addr + (idx as u64 * config.block_size); 590 for (_, handler) in handlers.iter() { 591 if let Err(e) = handler.unmap(gpa, config.block_size) { 592 error!( 593 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 594 gpa, config.block_size, e 595 ); 596 return VIRTIO_MEM_RESP_ERROR; 597 } 598 } 599 } 600 } 601 } 602 603 self.blocks_state.lock().unwrap().set_range( 604 0, 605 (config.region_size / config.block_size) as u16, 606 false, 607 ); 608 609 config.plugged_size = 0; 610 611 VIRTIO_MEM_RESP_ACK 612 } 613 614 fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) { 615 let config = self.config.lock().unwrap(); 616 let size: u64 = nb_blocks as u64 * config.block_size; 617 618 let resp_type = if config.is_valid_range(addr, size) { 619 VIRTIO_MEM_RESP_ACK 620 } else { 621 VIRTIO_MEM_RESP_ERROR 622 }; 623 624 let offset = addr - config.addr; 625 let first_block_index = (offset / config.block_size) as usize; 626 let resp_state = 627 if self 628 .blocks_state 629 .lock() 630 .unwrap() 631 .is_range_state(first_block_index, nb_blocks, true) 632 { 633 VIRTIO_MEM_STATE_PLUGGED 634 } else if self.blocks_state.lock().unwrap().is_range_state( 635 first_block_index, 636 nb_blocks, 637 false, 638 ) { 639 VIRTIO_MEM_STATE_UNPLUGGED 640 } else { 641 VIRTIO_MEM_STATE_MIXED 642 }; 643 644 (resp_type, resp_state) 645 } 646 647 fn signal(&self, int_type: VirtioInterruptType) -> result::Result<(), DeviceError> { 648 self.interrupt_cb.trigger(int_type).map_err(|e| { 649 error!("Failed to signal used queue: {:?}", e); 650 DeviceError::FailedSignalingUsedQueue(e) 651 }) 652 } 653 654 fn process_queue(&mut self) -> bool { 655 let mut request_list = Vec::new(); 656 let mut used_count = 0; 657 658 for mut desc_chain in self.queue.iter().unwrap() { 659 request_list.push(( 660 desc_chain.head_index(), 661 Request::parse(&mut desc_chain), 662 desc_chain.memory().clone(), 663 )); 664 } 665 666 for (head_index, request, memory) in request_list { 667 let len = match request { 668 Err(e) => { 669 error!("failed parse VirtioMemReq: {:?}", e); 670 0 671 } 672 Ok(r) => match r.req.req_type { 673 VIRTIO_MEM_REQ_PLUG => { 674 let resp_type = 675 self.state_change_request(r.req.addr, r.req.nb_blocks, true); 676 r.send_response(&memory, resp_type, 0u16) 677 } 678 VIRTIO_MEM_REQ_UNPLUG => { 679 let resp_type = 680 self.state_change_request(r.req.addr, r.req.nb_blocks, false); 681 r.send_response(&memory, resp_type, 0u16) 682 } 683 VIRTIO_MEM_REQ_UNPLUG_ALL => { 684 let resp_type = self.unplug_all(); 685 r.send_response(&memory, resp_type, 0u16) 686 } 687 VIRTIO_MEM_REQ_STATE => { 688 let (resp_type, resp_state) = 689 self.state_request(r.req.addr, r.req.nb_blocks); 690 r.send_response(&memory, resp_type, resp_state) 691 } 692 _ => { 693 error!("VirtioMemReq unknown request type {:?}", r.req.req_type); 694 0 695 } 696 }, 697 }; 698 699 self.queue.add_used(head_index, len).unwrap(); 700 used_count += 1; 701 } 702 703 used_count > 0 704 } 705 706 fn run( 707 &mut self, 708 paused: Arc<AtomicBool>, 709 paused_sync: Arc<Barrier>, 710 ) -> result::Result<(), EpollHelperError> { 711 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 712 helper.add_event(self.resize.evt.as_raw_fd(), RESIZE_EVENT)?; 713 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 714 helper.run(paused, paused_sync, self)?; 715 716 Ok(()) 717 } 718 } 719 720 impl EpollHelperHandler for MemEpollHandler { 721 fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { 722 let ev_type = event.data as u16; 723 match ev_type { 724 RESIZE_EVENT => { 725 if let Err(e) = self.resize.evt.read() { 726 error!("Failed to get resize event: {:?}", e); 727 return true; 728 } else { 729 let size = self.resize.size(); 730 let mut config = self.config.lock().unwrap(); 731 let mut signal_error = false; 732 let mut r = config.resize(size); 733 r = match r { 734 Err(e) => Err(e), 735 _ => match self.signal(VirtioInterruptType::Config) { 736 Err(e) => { 737 signal_error = true; 738 Err(Error::ResizeTriggerFail(e)) 739 } 740 _ => Ok(()), 741 }, 742 }; 743 if let Err(e) = self.resize.send(r) { 744 error!("Sending \"resize\" response: {:?}", e); 745 return true; 746 } 747 if signal_error { 748 return true; 749 } 750 } 751 } 752 QUEUE_AVAIL_EVENT => { 753 if let Err(e) = self.queue_evt.read() { 754 error!("Failed to get queue event: {:?}", e); 755 return true; 756 } else if self.process_queue() { 757 if let Err(e) = self.signal(VirtioInterruptType::Queue(0)) { 758 error!("Failed to signal used queue: {:?}", e); 759 return true; 760 } 761 } 762 } 763 _ => { 764 error!("Unexpected event: {}", ev_type); 765 return true; 766 } 767 } 768 false 769 } 770 } 771 772 #[derive(PartialEq, Eq, PartialOrd, Ord)] 773 pub enum VirtioMemMappingSource { 774 Container, 775 Device(u32), 776 } 777 778 #[derive(Versionize)] 779 pub struct MemState { 780 pub avail_features: u64, 781 pub acked_features: u64, 782 pub config: VirtioMemConfig, 783 pub blocks_state: BlocksState, 784 } 785 786 impl VersionMapped for MemState {} 787 788 pub struct Mem { 789 common: VirtioCommon, 790 id: String, 791 resize: ResizeSender, 792 host_addr: u64, 793 host_fd: Option<RawFd>, 794 config: Arc<Mutex<VirtioMemConfig>>, 795 seccomp_action: SeccompAction, 796 hugepages: bool, 797 dma_mapping_handlers: Arc<Mutex<BTreeMap<VirtioMemMappingSource, Arc<dyn ExternalDmaMapping>>>>, 798 blocks_state: Arc<Mutex<BlocksState>>, 799 exit_evt: EventFd, 800 } 801 802 impl Mem { 803 // Create a new virtio-mem device. 804 #[allow(clippy::too_many_arguments)] 805 pub fn new( 806 id: String, 807 region: &Arc<GuestRegionMmap>, 808 resize: ResizeSender, 809 seccomp_action: SeccompAction, 810 numa_node_id: Option<u16>, 811 initial_size: u64, 812 hugepages: bool, 813 exit_evt: EventFd, 814 blocks_state: Arc<Mutex<BlocksState>>, 815 ) -> io::Result<Mem> { 816 let region_len = region.len(); 817 818 if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE { 819 return Err(io::Error::new( 820 io::ErrorKind::Other, 821 format!( 822 "Virtio-mem size is not aligned with {}", 823 VIRTIO_MEM_ALIGN_SIZE 824 ), 825 )); 826 } 827 828 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 829 830 let mut config = VirtioMemConfig { 831 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE, 832 addr: region.start_addr().raw_value(), 833 region_size: region.len(), 834 usable_region_size: region.len(), 835 plugged_size: 0, 836 requested_size: 0, 837 ..Default::default() 838 }; 839 840 if initial_size != 0 { 841 config.resize(initial_size).map_err(|e| { 842 io::Error::new( 843 io::ErrorKind::Other, 844 format!( 845 "Failed to resize virtio-mem configuration to {}: {:?}", 846 initial_size, e 847 ), 848 ) 849 })?; 850 } 851 852 if let Some(node_id) = numa_node_id { 853 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM; 854 config.node_id = node_id; 855 } 856 857 // Make sure the virtio-mem configuration complies with the 858 // specification. 859 config.validate().map_err(|e| { 860 io::Error::new( 861 io::ErrorKind::Other, 862 format!("Invalid virtio-mem configuration: {:?}", e), 863 ) 864 })?; 865 866 let host_fd = region 867 .file_offset() 868 .map(|f_offset| f_offset.file().as_raw_fd()); 869 870 Ok(Mem { 871 common: VirtioCommon { 872 device_type: VirtioDeviceType::Mem as u32, 873 avail_features, 874 paused_sync: Some(Arc::new(Barrier::new(2))), 875 queue_sizes: QUEUE_SIZES.to_vec(), 876 min_queues: 1, 877 ..Default::default() 878 }, 879 id, 880 resize, 881 host_addr: region.as_ptr() as u64, 882 host_fd, 883 config: Arc::new(Mutex::new(config)), 884 seccomp_action, 885 hugepages, 886 dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())), 887 blocks_state, 888 exit_evt, 889 }) 890 } 891 892 pub fn add_dma_mapping_handler( 893 &mut self, 894 source: VirtioMemMappingSource, 895 handler: Arc<dyn ExternalDmaMapping>, 896 ) -> result::Result<(), Error> { 897 let config = self.config.lock().unwrap(); 898 899 if config.plugged_size > 0 { 900 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 901 if *plugged { 902 let gpa = config.addr + (idx as u64 * config.block_size); 903 handler 904 .map(gpa, gpa, config.block_size) 905 .map_err(Error::DmaMap)?; 906 } 907 } 908 } 909 910 self.dma_mapping_handlers 911 .lock() 912 .unwrap() 913 .insert(source, handler); 914 915 Ok(()) 916 } 917 918 pub fn remove_dma_mapping_handler( 919 &mut self, 920 source: VirtioMemMappingSource, 921 ) -> result::Result<(), Error> { 922 let handler = self 923 .dma_mapping_handlers 924 .lock() 925 .unwrap() 926 .remove(&source) 927 .ok_or(Error::InvalidDmaMappingHandler)?; 928 929 let config = self.config.lock().unwrap(); 930 931 if config.plugged_size > 0 { 932 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 933 if *plugged { 934 let gpa = config.addr + (idx as u64 * config.block_size); 935 handler 936 .unmap(gpa, config.block_size) 937 .map_err(Error::DmaUnmap)?; 938 } 939 } 940 } 941 942 Ok(()) 943 } 944 945 fn state(&self) -> MemState { 946 MemState { 947 avail_features: self.common.avail_features, 948 acked_features: self.common.acked_features, 949 config: *(self.config.lock().unwrap()), 950 blocks_state: self.blocks_state.lock().unwrap().clone(), 951 } 952 } 953 954 fn set_state(&mut self, state: &MemState) { 955 self.common.avail_features = state.avail_features; 956 self.common.acked_features = state.acked_features; 957 *(self.config.lock().unwrap()) = state.config; 958 *(self.blocks_state.lock().unwrap()) = state.blocks_state.clone(); 959 } 960 } 961 962 impl Drop for Mem { 963 fn drop(&mut self) { 964 if let Some(kill_evt) = self.common.kill_evt.take() { 965 // Ignore the result because there is nothing we can do about it. 966 let _ = kill_evt.write(1); 967 } 968 } 969 } 970 971 impl VirtioDevice for Mem { 972 fn device_type(&self) -> u32 { 973 self.common.device_type 974 } 975 976 fn queue_max_sizes(&self) -> &[u16] { 977 &self.common.queue_sizes 978 } 979 980 fn features(&self) -> u64 { 981 self.common.avail_features 982 } 983 984 fn ack_features(&mut self, value: u64) { 985 self.common.ack_features(value) 986 } 987 988 fn read_config(&self, offset: u64, data: &mut [u8]) { 989 self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data); 990 } 991 992 fn activate( 993 &mut self, 994 _mem: GuestMemoryAtomic<GuestMemoryMmap>, 995 interrupt_cb: Arc<dyn VirtioInterrupt>, 996 mut queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>, 997 mut queue_evts: Vec<EventFd>, 998 ) -> ActivateResult { 999 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 1000 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 1001 let mut handler = MemEpollHandler { 1002 host_addr: self.host_addr, 1003 host_fd: self.host_fd, 1004 blocks_state: Arc::clone(&self.blocks_state), 1005 config: self.config.clone(), 1006 resize: self.resize.clone(), 1007 queue: queues.remove(0), 1008 interrupt_cb, 1009 queue_evt: queue_evts.remove(0), 1010 kill_evt, 1011 pause_evt, 1012 hugepages: self.hugepages, 1013 dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers), 1014 }; 1015 1016 let unplugged_memory_ranges = self.blocks_state.lock().unwrap().memory_ranges(0, false); 1017 for range in unplugged_memory_ranges.regions() { 1018 handler 1019 .discard_memory_range(range.gpa, range.length) 1020 .map_err(|e| { 1021 error!( 1022 "failed discarding memory range [0x{:x}-0x{:x}]: {:?}", 1023 range.gpa, 1024 range.gpa + range.length - 1, 1025 e 1026 ); 1027 ActivateError::BadActivate 1028 })?; 1029 } 1030 1031 let paused = self.common.paused.clone(); 1032 let paused_sync = self.common.paused_sync.clone(); 1033 let mut epoll_threads = Vec::new(); 1034 1035 spawn_virtio_thread( 1036 &self.id, 1037 &self.seccomp_action, 1038 Thread::VirtioMem, 1039 &mut epoll_threads, 1040 &self.exit_evt, 1041 move || { 1042 if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 1043 error!("Error running worker: {:?}", e); 1044 } 1045 }, 1046 )?; 1047 self.common.epoll_threads = Some(epoll_threads); 1048 1049 event!("virtio-device", "activated", "id", &self.id); 1050 Ok(()) 1051 } 1052 1053 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 1054 let result = self.common.reset(); 1055 event!("virtio-device", "reset", "id", &self.id); 1056 result 1057 } 1058 } 1059 1060 impl Pausable for Mem { 1061 fn pause(&mut self) -> result::Result<(), MigratableError> { 1062 self.common.pause() 1063 } 1064 1065 fn resume(&mut self) -> result::Result<(), MigratableError> { 1066 self.common.resume() 1067 } 1068 } 1069 1070 impl Snapshottable for Mem { 1071 fn id(&self) -> String { 1072 self.id.clone() 1073 } 1074 1075 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1076 Snapshot::new_from_versioned_state(&self.id(), &self.state()) 1077 } 1078 1079 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 1080 self.set_state(&snapshot.to_versioned_state(&self.id)?); 1081 Ok(()) 1082 } 1083 } 1084 impl Transportable for Mem {} 1085 impl Migratable for Mem {} 1086