1 // Copyright (c) 2020 Ant Financial 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 use super::Error as DeviceError; 16 use super::{ 17 ActivateError, ActivateResult, DescriptorChain, EpollHelper, EpollHelperError, 18 EpollHelperHandler, Queue, VirtioCommon, VirtioDevice, VirtioDeviceType, 19 EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, 20 }; 21 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 22 use crate::{GuestMemoryMmap, GuestRegionMmap}; 23 use crate::{VirtioInterrupt, VirtioInterruptType}; 24 use anyhow::anyhow; 25 use libc::EFD_NONBLOCK; 26 use seccomp::{SeccompAction, SeccompFilter}; 27 use std::collections::BTreeMap; 28 use std::io; 29 use std::mem::size_of; 30 use std::os::unix::io::{AsRawFd, RawFd}; 31 use std::result; 32 use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; 33 use std::sync::mpsc; 34 use std::sync::{Arc, Barrier, Mutex}; 35 use std::thread; 36 use vm_device::dma_mapping::ExternalDmaMapping; 37 use vm_memory::{ 38 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 39 GuestMemoryError, GuestMemoryRegion, 40 }; 41 use vm_migration::{Migratable, MigratableError, Pausable, Snapshottable, Transportable}; 42 use vmm_sys_util::eventfd::EventFd; 43 44 const QUEUE_SIZE: u16 = 128; 45 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 46 47 // 128MiB is the standard memory block size in Linux. A virtio-mem region must 48 // be aligned on this size, and the region size must be a multiple of it. 49 pub const VIRTIO_MEM_ALIGN_SIZE: u64 = 128 << 20; 50 // Use 2 MiB alignment so transparent hugepages can be used by KVM. 51 const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 2 << 20; 52 53 // Request processed successfully, applicable for 54 // - VIRTIO_MEM_REQ_PLUG 55 // - VIRTIO_MEM_REQ_UNPLUG 56 // - VIRTIO_MEM_REQ_UNPLUG_ALL 57 // - VIRTIO_MEM_REQ_STATE 58 const VIRTIO_MEM_RESP_ACK: u16 = 0; 59 60 // Request denied - e.g. trying to plug more than requested, applicable for 61 // - VIRTIO_MEM_REQ_PLUG 62 const VIRTIO_MEM_RESP_NACK: u16 = 1; 63 64 // Request cannot be processed right now, try again later, applicable for 65 // - VIRTIO_MEM_REQ_PLUG 66 // - VIRTIO_MEM_REQ_UNPLUG 67 // - VIRTIO_MEM_REQ_UNPLUG_ALL 68 #[allow(unused)] 69 const VIRTIO_MEM_RESP_BUSY: u16 = 2; 70 71 // Error in request (e.g. addresses/alignment), applicable for 72 // - VIRTIO_MEM_REQ_PLUG 73 // - VIRTIO_MEM_REQ_UNPLUG 74 // - VIRTIO_MEM_REQ_STATE 75 const VIRTIO_MEM_RESP_ERROR: u16 = 3; 76 77 // State of memory blocks is "plugged" 78 const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; 79 // State of memory blocks is "unplugged" 80 const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; 81 // State of memory blocks is "mixed" 82 const VIRTIO_MEM_STATE_MIXED: u16 = 2; 83 84 // request to plug memory blocks 85 const VIRTIO_MEM_REQ_PLUG: u16 = 0; 86 // request to unplug memory blocks 87 const VIRTIO_MEM_REQ_UNPLUG: u16 = 1; 88 // request to unplug all blocks and shrink the usable size 89 const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2; 90 // request information about the plugged state of memory blocks 91 const VIRTIO_MEM_REQ_STATE: u16 = 3; 92 93 // Get resize event. 94 const RESIZE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 95 // New descriptors are pending on the virtio queue. 96 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2; 97 98 // Virtio features 99 const VIRTIO_MEM_F_ACPI_PXM: u8 = 0; 100 101 #[derive(Debug)] 102 pub enum Error { 103 // Guest gave us bad memory addresses. 104 GuestMemory(GuestMemoryError), 105 // Guest gave us a write only descriptor that protocol says to read from. 106 UnexpectedWriteOnlyDescriptor, 107 // Guest gave us a read only descriptor that protocol says to write to. 108 UnexpectedReadOnlyDescriptor, 109 // Guest gave us too few descriptors in a descriptor chain. 110 DescriptorChainTooShort, 111 // Guest gave us a buffer that was too short to use. 112 BufferLengthTooSmall, 113 // Guest sent us invalid request. 114 InvalidRequest, 115 // Failed to EventFd write. 116 EventFdWriteFail(std::io::Error), 117 // Failed to EventFd try_clone. 118 EventFdTryCloneFail(std::io::Error), 119 // Failed to MpscRecv. 120 MpscRecvFail(mpsc::RecvError), 121 // Resize invalid argument 122 ResizeError(anyhow::Error), 123 // Fail to resize trigger 124 ResizeTriggerFail(DeviceError), 125 // Invalid configuration 126 ValidateError(anyhow::Error), 127 // Failed discarding memory range 128 DiscardMemoryRange(std::io::Error), 129 // Failed DMA mapping. 130 DmaMap(std::io::Error), 131 // Failed DMA unmapping. 132 DmaUnmap(std::io::Error), 133 // Invalid DMA mapping handler 134 InvalidDmaMappingHandler, 135 } 136 137 #[repr(C)] 138 #[derive(Copy, Clone, Debug, Default)] 139 struct VirtioMemReq { 140 req_type: u16, 141 padding: [u16; 3], 142 addr: u64, 143 nb_blocks: u16, 144 padding_1: [u16; 3], 145 } 146 147 // Safe because it only has data and has no implicit padding. 148 unsafe impl ByteValued for VirtioMemReq {} 149 150 #[repr(C)] 151 #[derive(Copy, Clone, Debug, Default)] 152 struct VirtioMemResp { 153 resp_type: u16, 154 padding: [u16; 3], 155 state: u16, 156 } 157 158 // Safe because it only has data and has no implicit padding. 159 unsafe impl ByteValued for VirtioMemResp {} 160 161 #[repr(C)] 162 #[derive(Copy, Clone, Debug, Default)] 163 struct VirtioMemConfig { 164 // Block size and alignment. Cannot change. 165 block_size: u64, 166 // Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. 167 node_id: u16, 168 padding: [u8; 6], 169 // Start address of the memory region. Cannot change. 170 addr: u64, 171 // Region size (maximum). Cannot change. 172 region_size: u64, 173 // Currently usable region size. Can grow up to region_size. Can 174 // shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config 175 // update will be sent). 176 usable_region_size: u64, 177 // Currently used size. Changes due to plug/unplug requests, but no 178 // config updates will be sent. 179 plugged_size: u64, 180 // Requested size. New plug requests cannot exceed it. Can change. 181 requested_size: u64, 182 } 183 184 // Safe because it only has data and has no implicit padding. 185 unsafe impl ByteValued for VirtioMemConfig {} 186 187 impl VirtioMemConfig { 188 fn validate(&self) -> result::Result<(), Error> { 189 if self.addr % self.block_size != 0 { 190 return Err(Error::ValidateError(anyhow!( 191 "addr 0x{:x} is not aligned on block_size 0x{:x}", 192 self.addr, 193 self.block_size 194 ))); 195 } 196 if self.region_size % self.block_size != 0 { 197 return Err(Error::ValidateError(anyhow!( 198 "region_size 0x{:x} is not aligned on block_size 0x{:x}", 199 self.region_size, 200 self.block_size 201 ))); 202 } 203 if self.usable_region_size % self.block_size != 0 { 204 return Err(Error::ValidateError(anyhow!( 205 "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}", 206 self.usable_region_size, 207 self.block_size 208 ))); 209 } 210 if self.plugged_size % self.block_size != 0 { 211 return Err(Error::ValidateError(anyhow!( 212 "plugged_size 0x{:x} is not aligned on block_size 0x{:x}", 213 self.plugged_size, 214 self.block_size 215 ))); 216 } 217 if self.requested_size % self.block_size != 0 { 218 return Err(Error::ValidateError(anyhow!( 219 "requested_size 0x{:x} is not aligned on block_size 0x{:x}", 220 self.requested_size, 221 self.block_size 222 ))); 223 } 224 225 Ok(()) 226 } 227 228 fn resize(&mut self, size: u64) -> result::Result<(), Error> { 229 if self.requested_size == size { 230 return Err(Error::ResizeError(anyhow!( 231 "new size 0x{:x} and requested_size are identical", 232 size 233 ))); 234 } else if size > self.region_size { 235 return Err(Error::ResizeError(anyhow!( 236 "new size 0x{:x} is bigger than region_size 0x{:x}", 237 size, 238 self.region_size 239 ))); 240 } else if size % (self.block_size as u64) != 0 { 241 return Err(Error::ResizeError(anyhow!( 242 "new size 0x{:x} is not aligned on block_size 0x{:x}", 243 size, 244 self.block_size 245 ))); 246 } 247 248 self.requested_size = size; 249 250 Ok(()) 251 } 252 253 fn is_valid_range(&self, addr: u64, size: u64) -> bool { 254 // Start address must be aligned on block_size, the size must be 255 // greater than 0, and all blocks covered by the request must be 256 // in the usable region. 257 if addr % self.block_size != 0 258 || size == 0 259 || (addr < self.addr || addr + size >= self.addr + self.usable_region_size) 260 { 261 return false; 262 } 263 264 true 265 } 266 } 267 268 struct Request { 269 req: VirtioMemReq, 270 status_addr: GuestAddress, 271 } 272 273 impl Request { 274 fn parse( 275 avail_desc: &DescriptorChain, 276 mem: &GuestMemoryMmap, 277 ) -> result::Result<Request, Error> { 278 // The head contains the request type which MUST be readable. 279 if avail_desc.is_write_only() { 280 return Err(Error::UnexpectedWriteOnlyDescriptor); 281 } 282 if avail_desc.len as usize != size_of::<VirtioMemReq>() { 283 return Err(Error::InvalidRequest); 284 } 285 let req: VirtioMemReq = mem.read_obj(avail_desc.addr).map_err(Error::GuestMemory)?; 286 287 let status_desc = avail_desc 288 .next_descriptor() 289 .ok_or(Error::DescriptorChainTooShort)?; 290 291 // The status MUST always be writable 292 if !status_desc.is_write_only() { 293 return Err(Error::UnexpectedReadOnlyDescriptor); 294 } 295 296 if (status_desc.len as usize) < size_of::<VirtioMemResp>() { 297 return Err(Error::BufferLengthTooSmall); 298 } 299 300 Ok(Request { 301 req, 302 status_addr: status_desc.addr, 303 }) 304 } 305 306 fn send_response(&self, mem: &GuestMemoryMmap, resp_type: u16, state: u16) -> u32 { 307 let resp = VirtioMemResp { 308 resp_type, 309 state, 310 ..Default::default() 311 }; 312 match mem.write_obj(resp, self.status_addr) { 313 Ok(_) => size_of::<VirtioMemResp>() as u32, 314 Err(e) => { 315 error!("bad guest memory address: {}", e); 316 0 317 } 318 } 319 } 320 } 321 322 pub struct ResizeSender { 323 size: Arc<AtomicU64>, 324 tx: mpsc::Sender<Result<(), Error>>, 325 evt: EventFd, 326 } 327 328 impl ResizeSender { 329 fn size(&self) -> u64 { 330 self.size.load(Ordering::Acquire) 331 } 332 333 fn send(&self, r: Result<(), Error>) -> Result<(), mpsc::SendError<Result<(), Error>>> { 334 self.tx.send(r) 335 } 336 } 337 338 impl Clone for ResizeSender { 339 fn clone(&self) -> Self { 340 ResizeSender { 341 size: self.size.clone(), 342 tx: self.tx.clone(), 343 evt: self 344 .evt 345 .try_clone() 346 .expect("Failed cloning EventFd from ResizeSender"), 347 } 348 } 349 } 350 351 pub struct Resize { 352 size: Arc<AtomicU64>, 353 tx: mpsc::Sender<Result<(), Error>>, 354 rx: mpsc::Receiver<Result<(), Error>>, 355 evt: EventFd, 356 } 357 358 impl Resize { 359 pub fn new() -> io::Result<Self> { 360 let (tx, rx) = mpsc::channel(); 361 362 Ok(Resize { 363 size: Arc::new(AtomicU64::new(0)), 364 tx, 365 rx, 366 evt: EventFd::new(EFD_NONBLOCK)?, 367 }) 368 } 369 370 pub fn new_resize_sender(&self) -> Result<ResizeSender, Error> { 371 Ok(ResizeSender { 372 size: self.size.clone(), 373 tx: self.tx.clone(), 374 evt: self.evt.try_clone().map_err(Error::EventFdTryCloneFail)?, 375 }) 376 } 377 378 pub fn work(&self, size: u64) -> Result<(), Error> { 379 self.size.store(size, Ordering::Release); 380 self.evt.write(1).map_err(Error::EventFdWriteFail)?; 381 self.rx.recv().map_err(Error::MpscRecvFail)? 382 } 383 } 384 385 struct BlocksState(Vec<bool>); 386 387 impl BlocksState { 388 fn is_range_state(&self, first_block_index: usize, nb_blocks: u16, plug: bool) -> bool { 389 for state in self 390 .0 391 .iter() 392 .skip(first_block_index) 393 .take(nb_blocks as usize) 394 { 395 if *state != plug { 396 return false; 397 } 398 } 399 true 400 } 401 402 fn set_range(&mut self, first_block_index: usize, nb_blocks: u16, plug: bool) { 403 for state in self 404 .0 405 .iter_mut() 406 .skip(first_block_index) 407 .take(nb_blocks as usize) 408 { 409 *state = plug; 410 } 411 } 412 413 fn inner(&self) -> &Vec<bool> { 414 &self.0 415 } 416 } 417 418 struct MemEpollHandler { 419 host_addr: u64, 420 host_fd: Option<RawFd>, 421 blocks_state: Arc<Mutex<BlocksState>>, 422 config: Arc<Mutex<VirtioMemConfig>>, 423 resize: ResizeSender, 424 queue: Queue, 425 mem: GuestMemoryAtomic<GuestMemoryMmap>, 426 interrupt_cb: Arc<dyn VirtioInterrupt>, 427 queue_evt: EventFd, 428 kill_evt: EventFd, 429 pause_evt: EventFd, 430 hugepages: bool, 431 dma_mapping_handlers: Arc<Mutex<BTreeMap<u32, Arc<dyn ExternalDmaMapping>>>>, 432 } 433 434 impl MemEpollHandler { 435 fn discard_memory_range(&self, offset: u64, size: u64) -> Result<(), Error> { 436 // Use fallocate if the memory region is backed by a file. 437 if let Some(fd) = self.host_fd { 438 let res = unsafe { 439 libc::fallocate64( 440 fd, 441 libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE, 442 offset as libc::off64_t, 443 size as libc::off64_t, 444 ) 445 }; 446 if res != 0 { 447 let err = io::Error::last_os_error(); 448 error!("Deallocating file space failed: {}", err); 449 return Err(Error::DiscardMemoryRange(err)); 450 } 451 } 452 453 // Only use madvise if the memory region is not allocated with 454 // hugepages. 455 if !self.hugepages { 456 let res = unsafe { 457 libc::madvise( 458 (self.host_addr + offset) as *mut libc::c_void, 459 size as libc::size_t, 460 libc::MADV_DONTNEED, 461 ) 462 }; 463 if res != 0 { 464 let err = io::Error::last_os_error(); 465 error!("Advising kernel about pages range failed: {}", err); 466 return Err(Error::DiscardMemoryRange(err)); 467 } 468 } 469 470 Ok(()) 471 } 472 473 fn state_change_request(&mut self, addr: u64, nb_blocks: u16, plug: bool) -> u16 { 474 let mut config = self.config.lock().unwrap(); 475 let size: u64 = nb_blocks as u64 * config.block_size; 476 477 if plug && (config.plugged_size + size > config.requested_size) { 478 return VIRTIO_MEM_RESP_NACK; 479 } 480 if !config.is_valid_range(addr, size) { 481 return VIRTIO_MEM_RESP_ERROR; 482 } 483 484 let offset = addr - config.addr; 485 486 let first_block_index = (offset / config.block_size) as usize; 487 if !self 488 .blocks_state 489 .lock() 490 .unwrap() 491 .is_range_state(first_block_index, nb_blocks, !plug) 492 { 493 return VIRTIO_MEM_RESP_ERROR; 494 } 495 496 if !plug { 497 if let Err(e) = self.discard_memory_range(offset, size) { 498 error!("failed discarding memory range: {:?}", e); 499 return VIRTIO_MEM_RESP_ERROR; 500 } 501 } 502 503 self.blocks_state 504 .lock() 505 .unwrap() 506 .set_range(first_block_index, nb_blocks, plug); 507 508 let handlers = self.dma_mapping_handlers.lock().unwrap(); 509 if plug { 510 let mut gpa = addr; 511 for _ in 0..nb_blocks { 512 for (_, handler) in handlers.iter() { 513 if let Err(e) = handler.map(gpa, gpa, config.block_size) { 514 error!( 515 "failed DMA mapping addr 0x{:x} size 0x{:x}: {}", 516 gpa, config.block_size, e 517 ); 518 return VIRTIO_MEM_RESP_ERROR; 519 } 520 } 521 522 gpa += config.block_size; 523 } 524 525 config.plugged_size += size; 526 } else { 527 for (_, handler) in handlers.iter() { 528 if let Err(e) = handler.unmap(addr, size) { 529 error!( 530 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 531 addr, size, e 532 ); 533 return VIRTIO_MEM_RESP_ERROR; 534 } 535 } 536 537 config.plugged_size -= size; 538 } 539 540 VIRTIO_MEM_RESP_ACK 541 } 542 543 fn unplug_all(&mut self) -> u16 { 544 let mut config = self.config.lock().unwrap(); 545 if let Err(e) = self.discard_memory_range(0, config.region_size) { 546 error!("failed discarding memory range: {:?}", e); 547 return VIRTIO_MEM_RESP_ERROR; 548 } 549 550 // Remaining plugged blocks are unmapped. 551 if config.plugged_size > 0 { 552 let handlers = self.dma_mapping_handlers.lock().unwrap(); 553 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 554 if *plugged { 555 let gpa = config.addr + (idx as u64 * config.block_size); 556 for (_, handler) in handlers.iter() { 557 if let Err(e) = handler.unmap(gpa, config.block_size) { 558 error!( 559 "failed DMA unmapping addr 0x{:x} size 0x{:x}: {}", 560 gpa, config.block_size, e 561 ); 562 return VIRTIO_MEM_RESP_ERROR; 563 } 564 } 565 } 566 } 567 } 568 569 self.blocks_state.lock().unwrap().set_range( 570 0, 571 (config.region_size / config.block_size) as u16, 572 false, 573 ); 574 575 config.plugged_size = 0; 576 577 VIRTIO_MEM_RESP_ACK 578 } 579 580 fn state_request(&self, addr: u64, nb_blocks: u16) -> (u16, u16) { 581 let config = self.config.lock().unwrap(); 582 let size: u64 = nb_blocks as u64 * config.block_size; 583 584 let resp_type = if config.is_valid_range(addr, size) { 585 VIRTIO_MEM_RESP_ACK 586 } else { 587 VIRTIO_MEM_RESP_ERROR 588 }; 589 590 let offset = addr - config.addr; 591 let first_block_index = (offset / config.block_size) as usize; 592 let resp_state = 593 if self 594 .blocks_state 595 .lock() 596 .unwrap() 597 .is_range_state(first_block_index, nb_blocks, true) 598 { 599 VIRTIO_MEM_STATE_PLUGGED 600 } else if self.blocks_state.lock().unwrap().is_range_state( 601 first_block_index, 602 nb_blocks, 603 false, 604 ) { 605 VIRTIO_MEM_STATE_UNPLUGGED 606 } else { 607 VIRTIO_MEM_STATE_MIXED 608 }; 609 610 (resp_type, resp_state) 611 } 612 613 fn signal(&self, int_type: &VirtioInterruptType) -> result::Result<(), DeviceError> { 614 self.interrupt_cb 615 .trigger(int_type, Some(&self.queue)) 616 .map_err(|e| { 617 error!("Failed to signal used queue: {:?}", e); 618 DeviceError::FailedSignalingUsedQueue(e) 619 }) 620 } 621 622 fn process_queue(&mut self) -> bool { 623 let mut request_list = Vec::new(); 624 let mut used_count = 0; 625 let mem = self.mem.memory(); 626 for avail_desc in self.queue.iter(&mem) { 627 request_list.push((avail_desc.index, Request::parse(&avail_desc, &mem))); 628 } 629 630 for (desc_index, request) in request_list.iter() { 631 let len = match request { 632 Err(e) => { 633 error!("failed parse VirtioMemReq: {:?}", e); 634 0 635 } 636 Ok(r) => match r.req.req_type { 637 VIRTIO_MEM_REQ_PLUG => { 638 let resp_type = 639 self.state_change_request(r.req.addr, r.req.nb_blocks, true); 640 r.send_response(&mem, resp_type, 0u16) 641 } 642 VIRTIO_MEM_REQ_UNPLUG => { 643 let resp_type = 644 self.state_change_request(r.req.addr, r.req.nb_blocks, false); 645 r.send_response(&mem, resp_type, 0u16) 646 } 647 VIRTIO_MEM_REQ_UNPLUG_ALL => { 648 let resp_type = self.unplug_all(); 649 r.send_response(&mem, resp_type, 0u16) 650 } 651 VIRTIO_MEM_REQ_STATE => { 652 let (resp_type, resp_state) = 653 self.state_request(r.req.addr, r.req.nb_blocks); 654 r.send_response(&mem, resp_type, resp_state) 655 } 656 _ => { 657 error!("VirtioMemReq unknown request type {:?}", r.req.req_type); 658 0 659 } 660 }, 661 }; 662 663 self.queue.add_used(&mem, *desc_index, len); 664 665 used_count += 1; 666 } 667 668 used_count > 0 669 } 670 671 fn run( 672 &mut self, 673 paused: Arc<AtomicBool>, 674 paused_sync: Arc<Barrier>, 675 ) -> result::Result<(), EpollHelperError> { 676 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 677 helper.add_event(self.resize.evt.as_raw_fd(), RESIZE_EVENT)?; 678 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 679 helper.run(paused, paused_sync, self)?; 680 681 Ok(()) 682 } 683 } 684 685 impl EpollHelperHandler for MemEpollHandler { 686 fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { 687 let ev_type = event.data as u16; 688 match ev_type { 689 RESIZE_EVENT => { 690 if let Err(e) = self.resize.evt.read() { 691 error!("Failed to get resize event: {:?}", e); 692 return true; 693 } else { 694 let size = self.resize.size(); 695 let mut config = self.config.lock().unwrap(); 696 let mut signal_error = false; 697 let mut r = config.resize(size); 698 r = match r { 699 Err(e) => Err(e), 700 _ => match self.signal(&VirtioInterruptType::Config) { 701 Err(e) => { 702 signal_error = true; 703 Err(Error::ResizeTriggerFail(e)) 704 } 705 _ => Ok(()), 706 }, 707 }; 708 if let Err(e) = self.resize.send(r) { 709 error!("Sending \"resize\" response: {:?}", e); 710 return true; 711 } 712 if signal_error { 713 return true; 714 } 715 } 716 } 717 QUEUE_AVAIL_EVENT => { 718 if let Err(e) = self.queue_evt.read() { 719 error!("Failed to get queue event: {:?}", e); 720 return true; 721 } else if self.process_queue() { 722 if let Err(e) = self.signal(&VirtioInterruptType::Queue) { 723 error!("Failed to signal used queue: {:?}", e); 724 return true; 725 } 726 } 727 } 728 _ => { 729 error!("Unexpected event: {}", ev_type); 730 return true; 731 } 732 } 733 false 734 } 735 } 736 737 // Virtio device for exposing entropy to the guest OS through virtio. 738 pub struct Mem { 739 common: VirtioCommon, 740 id: String, 741 resize: ResizeSender, 742 host_addr: u64, 743 host_fd: Option<RawFd>, 744 config: Arc<Mutex<VirtioMemConfig>>, 745 seccomp_action: SeccompAction, 746 hugepages: bool, 747 dma_mapping_handlers: Arc<Mutex<BTreeMap<u32, Arc<dyn ExternalDmaMapping>>>>, 748 blocks_state: Arc<Mutex<BlocksState>>, 749 } 750 751 impl Mem { 752 // Create a new virtio-mem device. 753 pub fn new( 754 id: String, 755 region: &Arc<GuestRegionMmap>, 756 resize: ResizeSender, 757 seccomp_action: SeccompAction, 758 numa_node_id: Option<u16>, 759 initial_size: u64, 760 hugepages: bool, 761 ) -> io::Result<Mem> { 762 let region_len = region.len(); 763 764 if region_len != region_len / VIRTIO_MEM_ALIGN_SIZE * VIRTIO_MEM_ALIGN_SIZE { 765 return Err(io::Error::new( 766 io::ErrorKind::Other, 767 format!( 768 "Virtio-mem size is not aligned with {}", 769 VIRTIO_MEM_ALIGN_SIZE 770 ), 771 )); 772 } 773 774 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 775 776 let mut config = VirtioMemConfig { 777 block_size: VIRTIO_MEM_DEFAULT_BLOCK_SIZE, 778 addr: region.start_addr().raw_value(), 779 region_size: region.len(), 780 usable_region_size: region.len(), 781 plugged_size: 0, 782 requested_size: 0, 783 ..Default::default() 784 }; 785 786 if initial_size != 0 { 787 config.resize(initial_size).map_err(|e| { 788 io::Error::new( 789 io::ErrorKind::Other, 790 format!( 791 "Failed to resize virtio-mem configuration to {}: {:?}", 792 initial_size, e 793 ), 794 ) 795 })?; 796 } 797 798 if let Some(node_id) = numa_node_id { 799 avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM; 800 config.node_id = node_id; 801 } 802 803 // Make sure the virtio-mem configuration complies with the 804 // specification. 805 config.validate().map_err(|e| { 806 io::Error::new( 807 io::ErrorKind::Other, 808 format!("Invalid virtio-mem configuration: {:?}", e), 809 ) 810 })?; 811 812 let host_fd = region 813 .file_offset() 814 .map(|f_offset| f_offset.file().as_raw_fd()); 815 816 Ok(Mem { 817 common: VirtioCommon { 818 device_type: VirtioDeviceType::Mem as u32, 819 avail_features, 820 paused_sync: Some(Arc::new(Barrier::new(2))), 821 queue_sizes: QUEUE_SIZES.to_vec(), 822 min_queues: 1, 823 ..Default::default() 824 }, 825 id, 826 resize, 827 host_addr: region.as_ptr() as u64, 828 host_fd, 829 config: Arc::new(Mutex::new(config)), 830 seccomp_action, 831 hugepages, 832 dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())), 833 blocks_state: Arc::new(Mutex::new(BlocksState(vec![ 834 false; 835 (config.region_size / config.block_size) 836 as usize 837 ]))), 838 }) 839 } 840 841 pub fn add_dma_mapping_handler( 842 &mut self, 843 device_id: u32, 844 handler: Arc<dyn ExternalDmaMapping>, 845 ) -> result::Result<(), Error> { 846 let config = self.config.lock().unwrap(); 847 848 if config.plugged_size > 0 { 849 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 850 if *plugged { 851 let gpa = config.addr + (idx as u64 * config.block_size); 852 handler 853 .map(gpa, gpa, config.block_size) 854 .map_err(Error::DmaMap)?; 855 } 856 } 857 } 858 859 self.dma_mapping_handlers 860 .lock() 861 .unwrap() 862 .insert(device_id, handler); 863 864 Ok(()) 865 } 866 867 pub fn remove_dma_mapping_handler(&mut self, device_id: u32) -> result::Result<(), Error> { 868 let handler = self 869 .dma_mapping_handlers 870 .lock() 871 .unwrap() 872 .remove(&device_id) 873 .ok_or(Error::InvalidDmaMappingHandler)?; 874 875 let config = self.config.lock().unwrap(); 876 877 if config.plugged_size > 0 { 878 for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() { 879 if *plugged { 880 let gpa = config.addr + (idx as u64 * config.block_size); 881 handler 882 .unmap(gpa, config.block_size) 883 .map_err(Error::DmaUnmap)?; 884 } 885 } 886 } 887 888 Ok(()) 889 } 890 } 891 892 impl Drop for Mem { 893 fn drop(&mut self) { 894 if let Some(kill_evt) = self.common.kill_evt.take() { 895 // Ignore the result because there is nothing we can do about it. 896 let _ = kill_evt.write(1); 897 } 898 } 899 } 900 901 impl VirtioDevice for Mem { 902 fn device_type(&self) -> u32 { 903 self.common.device_type 904 } 905 906 fn queue_max_sizes(&self) -> &[u16] { 907 &self.common.queue_sizes 908 } 909 910 fn features(&self) -> u64 { 911 self.common.avail_features 912 } 913 914 fn ack_features(&mut self, value: u64) { 915 self.common.ack_features(value) 916 } 917 918 fn read_config(&self, offset: u64, data: &mut [u8]) { 919 self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data); 920 } 921 922 fn activate( 923 &mut self, 924 mem: GuestMemoryAtomic<GuestMemoryMmap>, 925 interrupt_cb: Arc<dyn VirtioInterrupt>, 926 mut queues: Vec<Queue>, 927 mut queue_evts: Vec<EventFd>, 928 ) -> ActivateResult { 929 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 930 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 931 let config = self.config.lock().unwrap(); 932 let mut handler = MemEpollHandler { 933 host_addr: self.host_addr, 934 host_fd: self.host_fd, 935 blocks_state: Arc::clone(&self.blocks_state), 936 config: self.config.clone(), 937 resize: self.resize.clone(), 938 queue: queues.remove(0), 939 mem, 940 interrupt_cb, 941 queue_evt: queue_evts.remove(0), 942 kill_evt, 943 pause_evt, 944 hugepages: self.hugepages, 945 dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers), 946 }; 947 948 handler 949 .discard_memory_range(0, config.region_size) 950 .map_err(|e| { 951 error!("failed discarding memory range: {:?}", e); 952 ActivateError::BadActivate 953 })?; 954 955 let paused = self.common.paused.clone(); 956 let paused_sync = self.common.paused_sync.clone(); 957 let mut epoll_threads = Vec::new(); 958 // Retrieve seccomp filter for virtio_mem thread 959 let virtio_mem_seccomp_filter = get_seccomp_filter(&self.seccomp_action, Thread::VirtioMem) 960 .map_err(ActivateError::CreateSeccompFilter)?; 961 thread::Builder::new() 962 .name(self.id.clone()) 963 .spawn(move || { 964 if let Err(e) = SeccompFilter::apply(virtio_mem_seccomp_filter) { 965 error!("Error applying seccomp filter: {:?}", e); 966 } else if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 967 error!("Error running worker: {:?}", e); 968 } 969 }) 970 .map(|thread| epoll_threads.push(thread)) 971 .map_err(|e| { 972 error!("failed to clone virtio-mem epoll thread: {}", e); 973 ActivateError::BadActivate 974 })?; 975 self.common.epoll_threads = Some(epoll_threads); 976 977 event!("virtio-device", "activated", "id", &self.id); 978 Ok(()) 979 } 980 981 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 982 let result = self.common.reset(); 983 event!("virtio-device", "reset", "id", &self.id); 984 result 985 } 986 } 987 988 impl Pausable for Mem { 989 fn pause(&mut self) -> result::Result<(), MigratableError> { 990 self.common.pause() 991 } 992 993 fn resume(&mut self) -> result::Result<(), MigratableError> { 994 self.common.resume() 995 } 996 } 997 998 impl Snapshottable for Mem { 999 fn id(&self) -> String { 1000 self.id.clone() 1001 } 1002 } 1003 impl Transportable for Mem {} 1004 impl Migratable for Mem {} 1005