1 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 2 // 3 // Copyright © 2020 Intel Corporation 4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style license that can be 6 // found in the LICENSE-BSD-3-Clause file. 7 8 use super::Error as DeviceError; 9 use super::{ 10 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon, 11 VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, 12 }; 13 use crate::seccomp_filters::Thread; 14 use crate::thread_helper::spawn_virtio_thread; 15 use crate::GuestMemoryMmap; 16 use crate::{VirtioInterrupt, VirtioInterruptType}; 17 use anyhow::anyhow; 18 use seccompiler::SeccompAction; 19 use std::fs::File; 20 use std::io::{self, Read}; 21 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 22 use std::result; 23 use std::sync::atomic::AtomicBool; 24 use std::sync::{Arc, Barrier, Mutex}; 25 use std::time::Instant; 26 use thiserror::Error; 27 use versionize::{VersionMap, Versionize, VersionizeResult}; 28 use versionize_derive::Versionize; 29 use virtio_queue::{Queue, QueueT}; 30 use vm_memory::{Bytes, GuestAddressSpace, GuestMemoryAtomic}; 31 use vm_migration::VersionMapped; 32 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 33 use vmm_sys_util::eventfd::EventFd; 34 35 const QUEUE_SIZE: u16 = 8; 36 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 37 38 // New descriptors are pending on the virtio queue. 39 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 40 // Timer expired 41 const TIMER_EXPIRED_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2; 42 43 // Number of seconds to check to see if there has been a ping 44 // This needs to match what the driver is using. 45 const WATCHDOG_TIMER_INTERVAL: i64 = 15; 46 47 // Number of seconds since last ping to trigger reboot 48 const WATCHDOG_TIMEOUT: u64 = WATCHDOG_TIMER_INTERVAL as u64 + 5; 49 50 #[derive(Error, Debug)] 51 enum Error { 52 #[error("Error programming timer fd: {0}")] 53 TimerfdSetup(io::Error), 54 #[error("Descriptor chain too short")] 55 DescriptorChainTooShort, 56 #[error("Failed adding used index: {0}")] 57 QueueAddUsed(virtio_queue::Error), 58 } 59 60 struct WatchdogEpollHandler { 61 mem: GuestMemoryAtomic<GuestMemoryMmap>, 62 queue: Queue, 63 interrupt_cb: Arc<dyn VirtioInterrupt>, 64 queue_evt: EventFd, 65 kill_evt: EventFd, 66 pause_evt: EventFd, 67 timer: File, 68 last_ping_time: Arc<Mutex<Option<Instant>>>, 69 reset_evt: EventFd, 70 } 71 72 impl WatchdogEpollHandler { 73 // The main queue is very simple - the driver "pings" the device by passing it a (write-only) 74 // descriptor. In response the device writes a 1 into the descriptor and returns it to the driver 75 fn process_queue(&mut self) -> result::Result<bool, Error> { 76 let queue = &mut self.queue; 77 let mut used_descs = false; 78 while let Some(mut desc_chain) = queue.pop_descriptor_chain(self.mem.memory()) { 79 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 80 81 let mut len = 0; 82 83 if desc.is_write_only() && desc_chain.memory().write_obj(1u8, desc.addr()).is_ok() { 84 len = desc.len(); 85 // If this is the first "ping" then setup the timer 86 if self.last_ping_time.lock().unwrap().is_none() { 87 info!( 88 "First ping received. Starting timer (every {} seconds)", 89 WATCHDOG_TIMER_INTERVAL 90 ); 91 timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL) 92 .map_err(Error::TimerfdSetup)?; 93 } 94 self.last_ping_time.lock().unwrap().replace(Instant::now()); 95 } 96 97 queue 98 .add_used(desc_chain.memory(), desc_chain.head_index(), len) 99 .map_err(Error::QueueAddUsed)?; 100 used_descs = true; 101 } 102 103 Ok(used_descs) 104 } 105 106 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 107 self.interrupt_cb 108 .trigger(VirtioInterruptType::Queue(0)) 109 .map_err(|e| { 110 error!("Failed to signal used queue: {:?}", e); 111 DeviceError::FailedSignalingUsedQueue(e) 112 }) 113 } 114 115 fn run( 116 &mut self, 117 paused: Arc<AtomicBool>, 118 paused_sync: Arc<Barrier>, 119 ) -> result::Result<(), EpollHelperError> { 120 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 121 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 122 helper.add_event(self.timer.as_raw_fd(), TIMER_EXPIRED_EVENT)?; 123 helper.run(paused, paused_sync, self)?; 124 125 Ok(()) 126 } 127 } 128 129 impl EpollHelperHandler for WatchdogEpollHandler { 130 fn handle_event( 131 &mut self, 132 _helper: &mut EpollHelper, 133 event: &epoll::Event, 134 ) -> result::Result<(), EpollHelperError> { 135 let ev_type = event.data as u16; 136 match ev_type { 137 QUEUE_AVAIL_EVENT => { 138 self.queue_evt.read().map_err(|e| { 139 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 140 })?; 141 142 let needs_notification = self.process_queue().map_err(|e| { 143 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 144 })?; 145 if needs_notification { 146 self.signal_used_queue().map_err(|e| { 147 EpollHelperError::HandleEvent(anyhow!( 148 "Failed to signal used queue: {:?}", 149 e 150 )) 151 })?; 152 } 153 } 154 TIMER_EXPIRED_EVENT => { 155 // When reading from the timerfd you get 8 bytes indicating 156 // the number of times this event has elapsed since the last read. 157 let mut buf = vec![0; 8]; 158 self.timer.read_exact(&mut buf).map_err(|e| { 159 EpollHelperError::HandleEvent(anyhow!("Error reading from timer fd: {:}", e)) 160 })?; 161 162 if let Some(last_ping_time) = self.last_ping_time.lock().unwrap().as_ref() { 163 let now = Instant::now(); 164 let gap = now.duration_since(*last_ping_time).as_secs(); 165 if gap > WATCHDOG_TIMEOUT { 166 error!("Watchdog triggered: {} seconds since last ping", gap); 167 self.reset_evt.write(1).ok(); 168 } 169 } 170 } 171 _ => { 172 return Err(EpollHelperError::HandleEvent(anyhow!( 173 "Unexpected event: {}", 174 ev_type 175 ))); 176 } 177 } 178 Ok(()) 179 } 180 } 181 182 /// Virtio device for exposing a watchdog to the guest 183 pub struct Watchdog { 184 common: VirtioCommon, 185 id: String, 186 seccomp_action: SeccompAction, 187 reset_evt: EventFd, 188 last_ping_time: Arc<Mutex<Option<Instant>>>, 189 timer: File, 190 exit_evt: EventFd, 191 } 192 193 #[derive(Versionize)] 194 pub struct WatchdogState { 195 pub avail_features: u64, 196 pub acked_features: u64, 197 pub enabled: bool, 198 } 199 200 impl VersionMapped for WatchdogState {} 201 202 impl Watchdog { 203 /// Create a new virtio watchdog device that will reboot VM if the guest hangs 204 pub fn new( 205 id: String, 206 reset_evt: EventFd, 207 seccomp_action: SeccompAction, 208 exit_evt: EventFd, 209 ) -> io::Result<Watchdog> { 210 let avail_features = 1u64 << VIRTIO_F_VERSION_1; 211 let timer_fd = timerfd_create().map_err(|e| { 212 error!("Failed to create timer fd {}", e); 213 e 214 })?; 215 let timer = unsafe { File::from_raw_fd(timer_fd) }; 216 Ok(Watchdog { 217 common: VirtioCommon { 218 device_type: VirtioDeviceType::Watchdog as u32, 219 queue_sizes: QUEUE_SIZES.to_vec(), 220 paused_sync: Some(Arc::new(Barrier::new(2))), 221 avail_features, 222 min_queues: 1, 223 ..Default::default() 224 }, 225 id, 226 seccomp_action, 227 reset_evt, 228 last_ping_time: Arc::new(Mutex::new(None)), 229 timer, 230 exit_evt, 231 }) 232 } 233 234 fn state(&self) -> WatchdogState { 235 WatchdogState { 236 avail_features: self.common.avail_features, 237 acked_features: self.common.acked_features, 238 enabled: self.last_ping_time.lock().unwrap().is_some(), 239 } 240 } 241 242 fn set_state(&mut self, state: &WatchdogState) { 243 self.common.avail_features = state.avail_features; 244 self.common.acked_features = state.acked_features; 245 // When restoring enable the watchdog if it was previously enabled. We reset the timer 246 // to ensure that we don't unnecessarily reboot due to the offline time. 247 if state.enabled { 248 self.last_ping_time.lock().unwrap().replace(Instant::now()); 249 } 250 } 251 252 #[cfg(fuzzing)] 253 pub fn wait_for_epoll_threads(&mut self) { 254 self.common.wait_for_epoll_threads(); 255 } 256 } 257 258 impl Drop for Watchdog { 259 fn drop(&mut self) { 260 if let Some(kill_evt) = self.common.kill_evt.take() { 261 // Ignore the result because there is nothing we can do about it. 262 let _ = kill_evt.write(1); 263 } 264 } 265 } 266 267 fn timerfd_create() -> Result<RawFd, io::Error> { 268 let res = unsafe { libc::timerfd_create(libc::CLOCK_MONOTONIC, 0) }; 269 if res < 0 { 270 Err(io::Error::last_os_error()) 271 } else { 272 Ok(res as RawFd) 273 } 274 } 275 276 fn timerfd_setup(timer: &File, secs: i64) -> Result<(), io::Error> { 277 let periodic = libc::itimerspec { 278 it_interval: libc::timespec { 279 tv_sec: secs, 280 tv_nsec: 0, 281 }, 282 it_value: libc::timespec { 283 tv_sec: secs, 284 tv_nsec: 0, 285 }, 286 }; 287 288 let res = 289 unsafe { libc::timerfd_settime(timer.as_raw_fd(), 0, &periodic, std::ptr::null_mut()) }; 290 291 if res < 0 { 292 Err(io::Error::last_os_error()) 293 } else { 294 Ok(()) 295 } 296 } 297 298 impl VirtioDevice for Watchdog { 299 fn device_type(&self) -> u32 { 300 self.common.device_type 301 } 302 303 fn queue_max_sizes(&self) -> &[u16] { 304 &self.common.queue_sizes 305 } 306 307 fn features(&self) -> u64 { 308 self.common.avail_features 309 } 310 311 fn ack_features(&mut self, value: u64) { 312 self.common.ack_features(value) 313 } 314 315 fn activate( 316 &mut self, 317 mem: GuestMemoryAtomic<GuestMemoryMmap>, 318 interrupt_cb: Arc<dyn VirtioInterrupt>, 319 mut queues: Vec<(usize, Queue, EventFd)>, 320 ) -> ActivateResult { 321 self.common.activate(&queues, &interrupt_cb)?; 322 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 323 324 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 325 error!("Failed to clone reset_evt eventfd: {}", e); 326 ActivateError::BadActivate 327 })?; 328 329 let timer = self.timer.try_clone().map_err(|e| { 330 error!("Failed to clone timer fd: {}", e); 331 ActivateError::BadActivate 332 })?; 333 334 let (_, queue, queue_evt) = queues.remove(0); 335 336 let mut handler = WatchdogEpollHandler { 337 mem, 338 queue, 339 interrupt_cb, 340 queue_evt, 341 kill_evt, 342 pause_evt, 343 timer, 344 last_ping_time: self.last_ping_time.clone(), 345 reset_evt, 346 }; 347 348 let paused = self.common.paused.clone(); 349 let paused_sync = self.common.paused_sync.clone(); 350 let mut epoll_threads = Vec::new(); 351 352 spawn_virtio_thread( 353 &self.id, 354 &self.seccomp_action, 355 Thread::VirtioWatchdog, 356 &mut epoll_threads, 357 &self.exit_evt, 358 move || handler.run(paused, paused_sync.unwrap()), 359 )?; 360 361 self.common.epoll_threads = Some(epoll_threads); 362 363 event!("virtio-device", "activated", "id", &self.id); 364 Ok(()) 365 } 366 367 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 368 let result = self.common.reset(); 369 event!("virtio-device", "reset", "id", &self.id); 370 result 371 } 372 } 373 374 impl Pausable for Watchdog { 375 fn pause(&mut self) -> result::Result<(), MigratableError> { 376 info!("Watchdog paused - disabling timer"); 377 timerfd_setup(&self.timer, 0) 378 .map_err(|e| MigratableError::Pause(anyhow!("Error clearing timer: {:?}", e)))?; 379 self.common.pause() 380 } 381 382 fn resume(&mut self) -> result::Result<(), MigratableError> { 383 // Reset the timer on pause if it was previously used 384 if self.last_ping_time.lock().unwrap().is_some() { 385 info!( 386 "Watchdog resumed - enabling timer (every {} seconds)", 387 WATCHDOG_TIMER_INTERVAL 388 ); 389 self.last_ping_time.lock().unwrap().replace(Instant::now()); 390 timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL) 391 .map_err(|e| MigratableError::Resume(anyhow!("Error setting timer: {:?}", e)))?; 392 } 393 self.common.resume() 394 } 395 } 396 397 impl Snapshottable for Watchdog { 398 fn id(&self) -> String { 399 self.id.clone() 400 } 401 402 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 403 Snapshot::new_from_versioned_state(&self.id, &self.state()) 404 } 405 406 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 407 self.set_state(&snapshot.to_versioned_state(&self.id)?); 408 Ok(()) 409 } 410 } 411 412 impl Transportable for Watchdog {} 413 impl Migratable for Watchdog {} 414