1 // Copyright (c) 2020 Ant Financial 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 use super::{ 16 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, Queue, 17 VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, 18 }; 19 use crate::seccomp_filters::Thread; 20 use crate::thread_helper::spawn_virtio_thread; 21 use crate::GuestMemoryMmap; 22 use crate::{VirtioInterrupt, VirtioInterruptType}; 23 use libc::EFD_NONBLOCK; 24 use seccompiler::SeccompAction; 25 use std::io; 26 use std::mem::size_of; 27 use std::os::unix::io::AsRawFd; 28 use std::result; 29 use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; 30 use std::sync::mpsc; 31 use std::sync::{Arc, Barrier, Mutex}; 32 use vm_memory::GuestMemory; 33 use vm_memory::{ 34 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 35 GuestMemoryError, 36 }; 37 use vm_migration::{Migratable, MigratableError, Pausable, Snapshottable, Transportable}; 38 use vmm_sys_util::eventfd::EventFd; 39 40 const QUEUE_SIZE: u16 = 128; 41 const NUM_QUEUES: usize = 2; 42 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE; NUM_QUEUES]; 43 44 // Get resize event. 45 const RESIZE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 46 // New descriptors are pending on the virtio queue. 47 const INFLATE_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2; 48 // New descriptors are pending on the virtio queue. 49 const DEFLATE_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3; 50 51 // Size of a PFN in the balloon interface. 52 const VIRTIO_BALLOON_PFN_SHIFT: u64 = 12; 53 54 // Deflate balloon on OOM 55 const VIRTIO_BALLOON_F_DEFLATE_ON_OOM: u64 = 2; 56 57 #[derive(Debug)] 58 pub enum Error { 59 // Guest gave us bad memory addresses. 60 GuestMemory(GuestMemoryError), 61 // Guest gave us a write only descriptor that protocol says to read from. 62 UnexpectedWriteOnlyDescriptor, 63 // Guest sent us invalid request. 64 InvalidRequest, 65 // Fallocate fail. 66 FallocateFail(std::io::Error), 67 // Madvise fail. 68 MadviseFail(std::io::Error), 69 // Failed to EventFd write. 70 EventFdWriteFail(std::io::Error), 71 // Failed to EventFd try_clone. 72 EventFdTryCloneFail(std::io::Error), 73 // Failed to MpscRecv. 74 MpscRecvFail(mpsc::RecvError), 75 // Resize invalid argument 76 ResizeInval(String), 77 // process_queue got wrong ev_type 78 ProcessQueueWrongEvType(u16), 79 // Fail tp signal 80 FailedSignal(io::Error), 81 } 82 83 // Got from include/uapi/linux/virtio_balloon.h 84 #[repr(C)] 85 #[derive(Copy, Clone, Debug, Default)] 86 struct VirtioBalloonConfig { 87 // Number of pages host wants Guest to give up. 88 num_pages: u32, 89 // Number of pages we've actually got in balloon. 90 actual: u32, 91 } 92 93 const CONFIG_ACTUAL_OFFSET: u64 = 4; 94 const CONFIG_ACTUAL_SIZE: usize = 4; 95 96 // Safe because it only has data and has no implicit padding. 97 unsafe impl ByteValued for VirtioBalloonConfig {} 98 99 struct VirtioBalloonResizeReceiver { 100 size: Arc<AtomicU64>, 101 tx: mpsc::Sender<Result<(), Error>>, 102 evt: EventFd, 103 } 104 105 impl VirtioBalloonResizeReceiver { 106 fn get_size(&self) -> u64 { 107 self.size.load(Ordering::Acquire) 108 } 109 110 fn send(&self, r: Result<(), Error>) -> Result<(), mpsc::SendError<Result<(), Error>>> { 111 self.tx.send(r) 112 } 113 } 114 115 struct VirtioBalloonResize { 116 size: Arc<AtomicU64>, 117 tx: mpsc::Sender<Result<(), Error>>, 118 rx: mpsc::Receiver<Result<(), Error>>, 119 evt: EventFd, 120 } 121 122 impl VirtioBalloonResize { 123 pub fn new() -> io::Result<Self> { 124 let (tx, rx) = mpsc::channel(); 125 126 Ok(Self { 127 size: Arc::new(AtomicU64::new(0)), 128 tx, 129 rx, 130 evt: EventFd::new(EFD_NONBLOCK)?, 131 }) 132 } 133 134 pub fn get_receiver(&self) -> Result<VirtioBalloonResizeReceiver, Error> { 135 Ok(VirtioBalloonResizeReceiver { 136 size: self.size.clone(), 137 tx: self.tx.clone(), 138 evt: self.evt.try_clone().map_err(Error::EventFdTryCloneFail)?, 139 }) 140 } 141 142 pub fn work(&self, size: u64) -> Result<(), Error> { 143 self.size.store(size, Ordering::Release); 144 self.evt.write(1).map_err(Error::EventFdWriteFail)?; 145 self.rx.recv().map_err(Error::MpscRecvFail)? 146 } 147 } 148 149 struct BalloonEpollHandler { 150 config: Arc<Mutex<VirtioBalloonConfig>>, 151 resize_receiver: VirtioBalloonResizeReceiver, 152 queues: Vec<Queue>, 153 mem: GuestMemoryAtomic<GuestMemoryMmap>, 154 interrupt_cb: Arc<dyn VirtioInterrupt>, 155 inflate_queue_evt: EventFd, 156 deflate_queue_evt: EventFd, 157 kill_evt: EventFd, 158 pause_evt: EventFd, 159 } 160 161 impl BalloonEpollHandler { 162 fn signal( 163 &self, 164 int_type: &VirtioInterruptType, 165 queue: Option<&Queue>, 166 ) -> result::Result<(), Error> { 167 self.interrupt_cb.trigger(int_type, queue).map_err(|e| { 168 error!("Failed to signal used queue: {:?}", e); 169 Error::FailedSignal(e) 170 }) 171 } 172 173 fn process_queue(&mut self, ev_type: u16) -> result::Result<(), Error> { 174 let queue_index = match ev_type { 175 INFLATE_QUEUE_EVENT => 0, 176 DEFLATE_QUEUE_EVENT => 1, 177 _ => return Err(Error::ProcessQueueWrongEvType(ev_type)), 178 }; 179 180 let mut used_desc_heads = [0; QUEUE_SIZE as usize]; 181 let mut used_count = 0; 182 let mem = self.mem.memory(); 183 for avail_desc in self.queues[queue_index].iter(&mem) { 184 used_desc_heads[used_count] = avail_desc.index; 185 used_count += 1; 186 187 let data_chunk_size = size_of::<u32>(); 188 189 // The head contains the request type which MUST be readable. 190 if avail_desc.is_write_only() { 191 error!("The head contains the request type is not right"); 192 return Err(Error::UnexpectedWriteOnlyDescriptor); 193 } 194 if avail_desc.len as usize % data_chunk_size != 0 { 195 error!("the request size {} is not right", avail_desc.len); 196 return Err(Error::InvalidRequest); 197 } 198 199 let mut offset = 0u64; 200 while offset < avail_desc.len as u64 { 201 let addr = avail_desc.addr.checked_add(offset).unwrap(); 202 let pfn: u32 = mem.read_obj(addr).map_err(Error::GuestMemory)?; 203 offset += data_chunk_size as u64; 204 205 let gpa = (pfn as u64) << VIRTIO_BALLOON_PFN_SHIFT; 206 if let Ok(hva) = mem.get_host_address(GuestAddress(gpa)) { 207 let advice = match ev_type { 208 INFLATE_QUEUE_EVENT => { 209 let region = 210 mem.find_region(GuestAddress(gpa)) 211 .ok_or(Error::GuestMemory( 212 GuestMemoryError::InvalidGuestAddress(GuestAddress(gpa)), 213 ))?; 214 if let Some(f_off) = region.file_offset() { 215 let offset = hva as usize - region.as_ptr() as usize; 216 let res = unsafe { 217 libc::fallocate64( 218 f_off.file().as_raw_fd(), 219 libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE, 220 (offset as u64 + f_off.start()) as libc::off64_t, 221 (1 << VIRTIO_BALLOON_PFN_SHIFT) as libc::off64_t, 222 ) 223 }; 224 225 if res != 0 { 226 return Err(Error::FallocateFail(io::Error::last_os_error())); 227 } 228 } 229 libc::MADV_DONTNEED 230 } 231 DEFLATE_QUEUE_EVENT => libc::MADV_WILLNEED, 232 _ => return Err(Error::ProcessQueueWrongEvType(ev_type)), 233 }; 234 // Need unsafe to do syscall madvise 235 let res = unsafe { 236 libc::madvise( 237 hva as *mut libc::c_void, 238 (1 << VIRTIO_BALLOON_PFN_SHIFT) as libc::size_t, 239 advice, 240 ) 241 }; 242 if res != 0 { 243 return Err(Error::MadviseFail(io::Error::last_os_error())); 244 } 245 } else { 246 error!("Address 0x{:x} is not available", gpa); 247 return Err(Error::InvalidRequest); 248 } 249 } 250 } 251 252 for &desc_index in &used_desc_heads[..used_count] { 253 self.queues[queue_index].add_used(&mem, desc_index, 0); 254 } 255 if used_count > 0 { 256 self.signal(&VirtioInterruptType::Queue, Some(&self.queues[queue_index]))?; 257 } 258 259 Ok(()) 260 } 261 262 fn run( 263 &mut self, 264 paused: Arc<AtomicBool>, 265 paused_sync: Arc<Barrier>, 266 ) -> result::Result<(), EpollHelperError> { 267 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 268 helper.add_event(self.resize_receiver.evt.as_raw_fd(), RESIZE_EVENT)?; 269 helper.add_event(self.inflate_queue_evt.as_raw_fd(), INFLATE_QUEUE_EVENT)?; 270 helper.add_event(self.deflate_queue_evt.as_raw_fd(), DEFLATE_QUEUE_EVENT)?; 271 helper.run(paused, paused_sync, self)?; 272 273 Ok(()) 274 } 275 } 276 277 impl EpollHelperHandler for BalloonEpollHandler { 278 fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { 279 let ev_type = event.data as u16; 280 match ev_type { 281 RESIZE_EVENT => { 282 if let Err(e) = self.resize_receiver.evt.read() { 283 error!("Failed to get resize event: {:?}", e); 284 return true; 285 } 286 let mut signal_error = false; 287 let r = { 288 let mut config = self.config.lock().unwrap(); 289 config.num_pages = 290 (self.resize_receiver.get_size() >> VIRTIO_BALLOON_PFN_SHIFT) as u32; 291 if let Err(e) = self.signal(&VirtioInterruptType::Config, None) { 292 signal_error = true; 293 Err(e) 294 } else { 295 Ok(()) 296 } 297 }; 298 if let Err(e) = &r { 299 // This error will send back to resize caller. 300 error!("Handle resize event get error: {:?}", e); 301 } 302 if let Err(e) = self.resize_receiver.send(r) { 303 error!("Sending \"resize\" generated error: {:?}", e); 304 return true; 305 } 306 if signal_error { 307 return true; 308 } 309 } 310 INFLATE_QUEUE_EVENT => { 311 if let Err(e) = self.inflate_queue_evt.read() { 312 error!("Failed to get inflate queue event: {:?}", e); 313 return true; 314 } else if let Err(e) = self.process_queue(ev_type) { 315 error!("Failed to signal used inflate queue: {:?}", e); 316 return true; 317 } 318 } 319 DEFLATE_QUEUE_EVENT => { 320 if let Err(e) = self.deflate_queue_evt.read() { 321 error!("Failed to get deflate queue event: {:?}", e); 322 return true; 323 } else if let Err(e) = self.process_queue(ev_type) { 324 error!("Failed to signal used deflate queue: {:?}", e); 325 return true; 326 } 327 } 328 _ => { 329 error!("Unknown event for virtio-balloon"); 330 return true; 331 } 332 } 333 334 false 335 } 336 } 337 338 // Virtio device for exposing entropy to the guest OS through virtio. 339 pub struct Balloon { 340 common: VirtioCommon, 341 id: String, 342 resize: VirtioBalloonResize, 343 config: Arc<Mutex<VirtioBalloonConfig>>, 344 seccomp_action: SeccompAction, 345 exit_evt: EventFd, 346 } 347 348 impl Balloon { 349 // Create a new virtio-balloon. 350 pub fn new( 351 id: String, 352 size: u64, 353 deflate_on_oom: bool, 354 seccomp_action: SeccompAction, 355 exit_evt: EventFd, 356 ) -> io::Result<Self> { 357 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 358 if deflate_on_oom { 359 avail_features |= 1u64 << VIRTIO_BALLOON_F_DEFLATE_ON_OOM; 360 } 361 362 let config = VirtioBalloonConfig { 363 num_pages: (size >> VIRTIO_BALLOON_PFN_SHIFT) as u32, 364 ..Default::default() 365 }; 366 367 Ok(Balloon { 368 common: VirtioCommon { 369 device_type: VirtioDeviceType::Balloon as u32, 370 avail_features, 371 paused_sync: Some(Arc::new(Barrier::new(2))), 372 queue_sizes: QUEUE_SIZES.to_vec(), 373 min_queues: NUM_QUEUES as u16, 374 ..Default::default() 375 }, 376 id, 377 resize: VirtioBalloonResize::new()?, 378 config: Arc::new(Mutex::new(config)), 379 seccomp_action, 380 exit_evt, 381 }) 382 } 383 384 pub fn resize(&self, size: u64) -> Result<(), Error> { 385 self.resize.work(size) 386 } 387 388 // Get the actual size of the virtio-balloon. 389 pub fn get_actual(&self) -> u64 { 390 (self.config.lock().unwrap().actual as u64) << VIRTIO_BALLOON_PFN_SHIFT 391 } 392 } 393 394 impl Drop for Balloon { 395 fn drop(&mut self) { 396 if let Some(kill_evt) = self.common.kill_evt.take() { 397 // Ignore the result because there is nothing we can do about it. 398 let _ = kill_evt.write(1); 399 } 400 } 401 } 402 403 impl VirtioDevice for Balloon { 404 fn device_type(&self) -> u32 { 405 self.common.device_type 406 } 407 408 fn queue_max_sizes(&self) -> &[u16] { 409 &self.common.queue_sizes 410 } 411 412 fn features(&self) -> u64 { 413 self.common.avail_features 414 } 415 416 fn ack_features(&mut self, value: u64) { 417 self.common.ack_features(value) 418 } 419 420 fn read_config(&self, offset: u64, data: &mut [u8]) { 421 self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data); 422 } 423 424 fn write_config(&mut self, offset: u64, data: &[u8]) { 425 // The "actual" field is the only mutable field 426 if offset != CONFIG_ACTUAL_OFFSET || data.len() != CONFIG_ACTUAL_SIZE { 427 error!( 428 "Attempt to write to read-only field: offset {:x} length {}", 429 offset, 430 data.len() 431 ); 432 return; 433 } 434 435 self.write_config_helper(self.config.lock().unwrap().as_mut_slice(), offset, data); 436 } 437 438 fn activate( 439 &mut self, 440 mem: GuestMemoryAtomic<GuestMemoryMmap>, 441 interrupt_cb: Arc<dyn VirtioInterrupt>, 442 queues: Vec<Queue>, 443 mut queue_evts: Vec<EventFd>, 444 ) -> ActivateResult { 445 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 446 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 447 448 let mut handler = BalloonEpollHandler { 449 config: self.config.clone(), 450 resize_receiver: self.resize.get_receiver().map_err(|e| { 451 error!("failed to clone resize EventFd: {:?}", e); 452 ActivateError::BadActivate 453 })?, 454 queues, 455 mem, 456 interrupt_cb, 457 inflate_queue_evt: queue_evts.remove(0), 458 deflate_queue_evt: queue_evts.remove(0), 459 kill_evt, 460 pause_evt, 461 }; 462 463 let paused = self.common.paused.clone(); 464 let paused_sync = self.common.paused_sync.clone(); 465 let mut epoll_threads = Vec::new(); 466 467 spawn_virtio_thread( 468 &self.id, 469 &self.seccomp_action, 470 Thread::VirtioBalloon, 471 &mut epoll_threads, 472 &self.exit_evt, 473 move || { 474 if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 475 error!("Error running worker: {:?}", e); 476 } 477 }, 478 )?; 479 self.common.epoll_threads = Some(epoll_threads); 480 481 event!("virtio-device", "activated", "id", &self.id); 482 Ok(()) 483 } 484 485 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 486 let result = self.common.reset(); 487 event!("virtio-device", "reset", "id", &self.id); 488 result 489 } 490 } 491 492 impl Pausable for Balloon { 493 fn pause(&mut self) -> result::Result<(), MigratableError> { 494 self.common.pause() 495 } 496 497 fn resume(&mut self) -> result::Result<(), MigratableError> { 498 self.common.resume() 499 } 500 } 501 502 impl Snapshottable for Balloon { 503 fn id(&self) -> String { 504 self.id.clone() 505 } 506 } 507 impl Transportable for Balloon {} 508 impl Migratable for Balloon {} 509