1 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 2 // 3 // Copyright © 2020 Intel Corporation 4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style license that can be 6 // found in the LICENSE-BSD-3-Clause file. 7 8 use super::Error as DeviceError; 9 use super::{ 10 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon, 11 VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, 12 }; 13 use crate::seccomp_filters::Thread; 14 use crate::thread_helper::spawn_virtio_thread; 15 use crate::GuestMemoryMmap; 16 use crate::{VirtioInterrupt, VirtioInterruptType}; 17 use anyhow::anyhow; 18 use seccompiler::SeccompAction; 19 use std::fs::File; 20 use std::io::{self, Read}; 21 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 22 use std::result; 23 use std::sync::atomic::AtomicBool; 24 use std::sync::{Arc, Barrier, Mutex}; 25 use std::time::Instant; 26 use thiserror::Error; 27 use versionize::{VersionMap, Versionize, VersionizeResult}; 28 use versionize_derive::Versionize; 29 use virtio_queue::{Queue, QueueT}; 30 use vm_memory::{Bytes, GuestAddressSpace, GuestMemoryAtomic}; 31 use vm_migration::VersionMapped; 32 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 33 use vmm_sys_util::eventfd::EventFd; 34 35 const QUEUE_SIZE: u16 = 8; 36 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 37 38 // New descriptors are pending on the virtio queue. 39 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 40 // Timer expired 41 const TIMER_EXPIRED_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2; 42 43 // Number of seconds to check to see if there has been a ping 44 // This needs to match what the driver is using. 45 const WATCHDOG_TIMER_INTERVAL: i64 = 15; 46 47 // Number of seconds since last ping to trigger reboot 48 const WATCHDOG_TIMEOUT: u64 = WATCHDOG_TIMER_INTERVAL as u64 + 5; 49 50 #[derive(Error, Debug)] 51 enum Error { 52 #[error("Error programming timer fd: {0}")] 53 TimerfdSetup(io::Error), 54 #[error("Descriptor chain too short")] 55 DescriptorChainTooShort, 56 #[error("Failed adding used index: {0}")] 57 QueueAddUsed(virtio_queue::Error), 58 #[error("Invalid descriptor")] 59 InvalidDescriptor, 60 #[error("Failed to write to guest memory: {0}")] 61 GuestMemoryWrite(vm_memory::guest_memory::Error), 62 } 63 64 struct WatchdogEpollHandler { 65 mem: GuestMemoryAtomic<GuestMemoryMmap>, 66 queue: Queue, 67 interrupt_cb: Arc<dyn VirtioInterrupt>, 68 queue_evt: EventFd, 69 kill_evt: EventFd, 70 pause_evt: EventFd, 71 timer: File, 72 last_ping_time: Arc<Mutex<Option<Instant>>>, 73 reset_evt: EventFd, 74 } 75 76 impl WatchdogEpollHandler { 77 // The main queue is very simple - the driver "pings" the device by passing it a (write-only) 78 // descriptor. In response the device writes a 1 into the descriptor and returns it to the driver 79 fn process_queue(&mut self) -> result::Result<bool, Error> { 80 let queue = &mut self.queue; 81 let mut used_descs = false; 82 while let Some(mut desc_chain) = queue.pop_descriptor_chain(self.mem.memory()) { 83 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 84 85 if !(desc.is_write_only() && desc.len() > 0) { 86 return Err(Error::InvalidDescriptor); 87 } 88 89 desc_chain 90 .memory() 91 .write_obj(1u8, desc.addr()) 92 .map_err(Error::GuestMemoryWrite)?; 93 94 // If this is the first "ping" then setup the timer 95 if self.last_ping_time.lock().unwrap().is_none() { 96 info!( 97 "First ping received. Starting timer (every {} seconds)", 98 WATCHDOG_TIMER_INTERVAL 99 ); 100 timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL).map_err(Error::TimerfdSetup)?; 101 } 102 self.last_ping_time.lock().unwrap().replace(Instant::now()); 103 104 queue 105 .add_used(desc_chain.memory(), desc_chain.head_index(), desc.len()) 106 .map_err(Error::QueueAddUsed)?; 107 used_descs = true; 108 } 109 110 Ok(used_descs) 111 } 112 113 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 114 self.interrupt_cb 115 .trigger(VirtioInterruptType::Queue(0)) 116 .map_err(|e| { 117 error!("Failed to signal used queue: {:?}", e); 118 DeviceError::FailedSignalingUsedQueue(e) 119 }) 120 } 121 122 fn run( 123 &mut self, 124 paused: Arc<AtomicBool>, 125 paused_sync: Arc<Barrier>, 126 ) -> result::Result<(), EpollHelperError> { 127 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 128 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 129 helper.add_event(self.timer.as_raw_fd(), TIMER_EXPIRED_EVENT)?; 130 helper.run(paused, paused_sync, self)?; 131 132 Ok(()) 133 } 134 } 135 136 impl EpollHelperHandler for WatchdogEpollHandler { 137 fn handle_event( 138 &mut self, 139 _helper: &mut EpollHelper, 140 event: &epoll::Event, 141 ) -> result::Result<(), EpollHelperError> { 142 let ev_type = event.data as u16; 143 match ev_type { 144 QUEUE_AVAIL_EVENT => { 145 self.queue_evt.read().map_err(|e| { 146 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 147 })?; 148 149 let needs_notification = self.process_queue().map_err(|e| { 150 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 151 })?; 152 if needs_notification { 153 self.signal_used_queue().map_err(|e| { 154 EpollHelperError::HandleEvent(anyhow!( 155 "Failed to signal used queue: {:?}", 156 e 157 )) 158 })?; 159 } 160 } 161 TIMER_EXPIRED_EVENT => { 162 // When reading from the timerfd you get 8 bytes indicating 163 // the number of times this event has elapsed since the last read. 164 let mut buf = vec![0; 8]; 165 self.timer.read_exact(&mut buf).map_err(|e| { 166 EpollHelperError::HandleEvent(anyhow!("Error reading from timer fd: {:}", e)) 167 })?; 168 169 if let Some(last_ping_time) = self.last_ping_time.lock().unwrap().as_ref() { 170 let now = Instant::now(); 171 let gap = now.duration_since(*last_ping_time).as_secs(); 172 if gap > WATCHDOG_TIMEOUT { 173 error!("Watchdog triggered: {} seconds since last ping", gap); 174 self.reset_evt.write(1).ok(); 175 } 176 } 177 } 178 _ => { 179 return Err(EpollHelperError::HandleEvent(anyhow!( 180 "Unexpected event: {}", 181 ev_type 182 ))); 183 } 184 } 185 Ok(()) 186 } 187 } 188 189 /// Virtio device for exposing a watchdog to the guest 190 pub struct Watchdog { 191 common: VirtioCommon, 192 id: String, 193 seccomp_action: SeccompAction, 194 reset_evt: EventFd, 195 last_ping_time: Arc<Mutex<Option<Instant>>>, 196 timer: File, 197 exit_evt: EventFd, 198 } 199 200 #[derive(Versionize)] 201 pub struct WatchdogState { 202 pub avail_features: u64, 203 pub acked_features: u64, 204 pub enabled: bool, 205 } 206 207 impl VersionMapped for WatchdogState {} 208 209 impl Watchdog { 210 /// Create a new virtio watchdog device that will reboot VM if the guest hangs 211 pub fn new( 212 id: String, 213 reset_evt: EventFd, 214 seccomp_action: SeccompAction, 215 exit_evt: EventFd, 216 state: Option<WatchdogState>, 217 ) -> io::Result<Watchdog> { 218 let mut last_ping_time = None; 219 let (avail_features, acked_features) = if let Some(state) = state { 220 info!("Restoring virtio-watchdog {}", id); 221 222 // When restoring enable the watchdog if it was previously enabled. 223 // We reset the timer to ensure that we don't unnecessarily reboot 224 // due to the offline time. 225 if state.enabled { 226 last_ping_time = Some(Instant::now()); 227 } 228 229 (state.avail_features, state.acked_features) 230 } else { 231 (1u64 << VIRTIO_F_VERSION_1, 0) 232 }; 233 234 let timer_fd = timerfd_create().map_err(|e| { 235 error!("Failed to create timer fd {}", e); 236 e 237 })?; 238 // SAFETY: timer_fd is a valid fd 239 let timer = unsafe { File::from_raw_fd(timer_fd) }; 240 241 Ok(Watchdog { 242 common: VirtioCommon { 243 device_type: VirtioDeviceType::Watchdog as u32, 244 queue_sizes: QUEUE_SIZES.to_vec(), 245 paused_sync: Some(Arc::new(Barrier::new(2))), 246 avail_features, 247 acked_features, 248 min_queues: 1, 249 ..Default::default() 250 }, 251 id, 252 seccomp_action, 253 reset_evt, 254 last_ping_time: Arc::new(Mutex::new(last_ping_time)), 255 timer, 256 exit_evt, 257 }) 258 } 259 260 fn state(&self) -> WatchdogState { 261 WatchdogState { 262 avail_features: self.common.avail_features, 263 acked_features: self.common.acked_features, 264 enabled: self.last_ping_time.lock().unwrap().is_some(), 265 } 266 } 267 268 #[cfg(fuzzing)] 269 pub fn wait_for_epoll_threads(&mut self) { 270 self.common.wait_for_epoll_threads(); 271 } 272 } 273 274 impl Drop for Watchdog { 275 fn drop(&mut self) { 276 if let Some(kill_evt) = self.common.kill_evt.take() { 277 // Ignore the result because there is nothing we can do about it. 278 let _ = kill_evt.write(1); 279 } 280 } 281 } 282 283 fn timerfd_create() -> Result<RawFd, io::Error> { 284 // SAFETY: FFI call, trivially safe 285 let res = unsafe { libc::timerfd_create(libc::CLOCK_MONOTONIC, 0) }; 286 if res < 0 { 287 Err(io::Error::last_os_error()) 288 } else { 289 Ok(res as RawFd) 290 } 291 } 292 293 fn timerfd_setup(timer: &File, secs: i64) -> Result<(), io::Error> { 294 let periodic = libc::itimerspec { 295 it_interval: libc::timespec { 296 tv_sec: secs, 297 tv_nsec: 0, 298 }, 299 it_value: libc::timespec { 300 tv_sec: secs, 301 tv_nsec: 0, 302 }, 303 }; 304 305 let res = 306 // SAFETY: FFI call with correct arguments 307 unsafe { libc::timerfd_settime(timer.as_raw_fd(), 0, &periodic, std::ptr::null_mut()) }; 308 309 if res < 0 { 310 Err(io::Error::last_os_error()) 311 } else { 312 Ok(()) 313 } 314 } 315 316 impl VirtioDevice for Watchdog { 317 fn device_type(&self) -> u32 { 318 self.common.device_type 319 } 320 321 fn queue_max_sizes(&self) -> &[u16] { 322 &self.common.queue_sizes 323 } 324 325 fn features(&self) -> u64 { 326 self.common.avail_features 327 } 328 329 fn ack_features(&mut self, value: u64) { 330 self.common.ack_features(value) 331 } 332 333 fn activate( 334 &mut self, 335 mem: GuestMemoryAtomic<GuestMemoryMmap>, 336 interrupt_cb: Arc<dyn VirtioInterrupt>, 337 mut queues: Vec<(usize, Queue, EventFd)>, 338 ) -> ActivateResult { 339 self.common.activate(&queues, &interrupt_cb)?; 340 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 341 342 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 343 error!("Failed to clone reset_evt eventfd: {}", e); 344 ActivateError::BadActivate 345 })?; 346 347 let timer = self.timer.try_clone().map_err(|e| { 348 error!("Failed to clone timer fd: {}", e); 349 ActivateError::BadActivate 350 })?; 351 352 let (_, queue, queue_evt) = queues.remove(0); 353 354 let mut handler = WatchdogEpollHandler { 355 mem, 356 queue, 357 interrupt_cb, 358 queue_evt, 359 kill_evt, 360 pause_evt, 361 timer, 362 last_ping_time: self.last_ping_time.clone(), 363 reset_evt, 364 }; 365 366 let paused = self.common.paused.clone(); 367 let paused_sync = self.common.paused_sync.clone(); 368 let mut epoll_threads = Vec::new(); 369 370 spawn_virtio_thread( 371 &self.id, 372 &self.seccomp_action, 373 Thread::VirtioWatchdog, 374 &mut epoll_threads, 375 &self.exit_evt, 376 move || handler.run(paused, paused_sync.unwrap()), 377 )?; 378 379 self.common.epoll_threads = Some(epoll_threads); 380 381 event!("virtio-device", "activated", "id", &self.id); 382 Ok(()) 383 } 384 385 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 386 let result = self.common.reset(); 387 event!("virtio-device", "reset", "id", &self.id); 388 result 389 } 390 } 391 392 impl Pausable for Watchdog { 393 fn pause(&mut self) -> result::Result<(), MigratableError> { 394 info!("Watchdog paused - disabling timer"); 395 timerfd_setup(&self.timer, 0) 396 .map_err(|e| MigratableError::Pause(anyhow!("Error clearing timer: {:?}", e)))?; 397 self.common.pause() 398 } 399 400 fn resume(&mut self) -> result::Result<(), MigratableError> { 401 // Reset the timer on pause if it was previously used 402 if self.last_ping_time.lock().unwrap().is_some() { 403 info!( 404 "Watchdog resumed - enabling timer (every {} seconds)", 405 WATCHDOG_TIMER_INTERVAL 406 ); 407 self.last_ping_time.lock().unwrap().replace(Instant::now()); 408 timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL) 409 .map_err(|e| MigratableError::Resume(anyhow!("Error setting timer: {:?}", e)))?; 410 } 411 self.common.resume() 412 } 413 } 414 415 impl Snapshottable for Watchdog { 416 fn id(&self) -> String { 417 self.id.clone() 418 } 419 420 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 421 Snapshot::new_from_versioned_state(&self.id, &self.state()) 422 } 423 } 424 425 impl Transportable for Watchdog {} 426 impl Migratable for Watchdog {} 427