1 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 2 // 3 // Copyright © 2020 Intel Corporation 4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style license that can be 6 // found in the LICENSE-BSD-3-Clause file. 7 8 use super::Error as DeviceError; 9 use super::{ 10 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, Queue, 11 VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, 12 }; 13 use crate::seccomp_filters::Thread; 14 use crate::thread_helper::spawn_virtio_thread; 15 use crate::GuestMemoryMmap; 16 use crate::{VirtioInterrupt, VirtioInterruptType}; 17 use anyhow::anyhow; 18 use seccompiler::SeccompAction; 19 use std::fs::File; 20 use std::io::{self, Read}; 21 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 22 use std::result; 23 use std::sync::atomic::AtomicBool; 24 use std::sync::{Arc, Barrier, Mutex}; 25 use std::time::Instant; 26 use versionize::{VersionMap, Versionize, VersionizeResult}; 27 use versionize_derive::Versionize; 28 use vm_memory::{Bytes, GuestAddressSpace, GuestMemoryAtomic}; 29 use vm_migration::VersionMapped; 30 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 31 use vmm_sys_util::eventfd::EventFd; 32 33 const QUEUE_SIZE: u16 = 8; 34 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 35 36 // New descriptors are pending on the virtio queue. 37 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 38 // Timer expired 39 const TIMER_EXPIRED_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2; 40 41 // Number of seconds to check to see if there has been a ping 42 // This needs to match what the driver is using. 43 const WATCHDOG_TIMER_INTERVAL: i64 = 15; 44 45 // Number of seconds since last ping to trigger reboot 46 const WATCHDOG_TIMEOUT: u64 = WATCHDOG_TIMER_INTERVAL as u64 + 5; 47 48 struct WatchdogEpollHandler { 49 queues: Vec<Queue>, 50 mem: GuestMemoryAtomic<GuestMemoryMmap>, 51 interrupt_cb: Arc<dyn VirtioInterrupt>, 52 queue_evt: EventFd, 53 kill_evt: EventFd, 54 pause_evt: EventFd, 55 timer: File, 56 last_ping_time: Arc<Mutex<Option<Instant>>>, 57 reset_evt: EventFd, 58 } 59 60 impl WatchdogEpollHandler { 61 // The main queue is very simple - the driver "pings" the device by passing it a (write-only) 62 // descriptor. In response the device writes a 1 into the descriptor and returns it to the driver 63 fn process_queue(&mut self) -> bool { 64 let queue = &mut self.queues[0]; 65 let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize]; 66 let mut used_count = 0; 67 let mem = self.mem.memory(); 68 for avail_desc in queue.iter(&mem) { 69 let mut len = 0; 70 71 if avail_desc.is_write_only() && mem.write_obj(1u8, avail_desc.addr).is_ok() { 72 len = avail_desc.len; 73 // If this is the first "ping" then setup the timer 74 if self.last_ping_time.lock().unwrap().is_none() { 75 info!( 76 "First ping received. Starting timer (every {} seconds)", 77 WATCHDOG_TIMER_INTERVAL 78 ); 79 if let Err(e) = timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL) { 80 error!("Error programming timer fd: {:?}", e); 81 } 82 } 83 self.last_ping_time.lock().unwrap().replace(Instant::now()); 84 } 85 86 used_desc_heads[used_count] = (avail_desc.index, len); 87 used_count += 1; 88 } 89 90 for &(desc_index, len) in &used_desc_heads[..used_count] { 91 queue.add_used(&mem, desc_index, len); 92 } 93 used_count > 0 94 } 95 96 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 97 self.interrupt_cb 98 .trigger(&VirtioInterruptType::Queue, Some(&self.queues[0])) 99 .map_err(|e| { 100 error!("Failed to signal used queue: {:?}", e); 101 DeviceError::FailedSignalingUsedQueue(e) 102 }) 103 } 104 105 fn run( 106 &mut self, 107 paused: Arc<AtomicBool>, 108 paused_sync: Arc<Barrier>, 109 ) -> result::Result<(), EpollHelperError> { 110 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 111 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 112 helper.add_event(self.timer.as_raw_fd(), TIMER_EXPIRED_EVENT)?; 113 helper.run(paused, paused_sync, self)?; 114 115 Ok(()) 116 } 117 } 118 119 impl EpollHelperHandler for WatchdogEpollHandler { 120 fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { 121 let ev_type = event.data as u16; 122 match ev_type { 123 QUEUE_AVAIL_EVENT => { 124 if let Err(e) = self.queue_evt.read() { 125 error!("Failed to get queue event: {:?}", e); 126 return true; 127 } else if self.process_queue() { 128 if let Err(e) = self.signal_used_queue() { 129 error!("Failed to signal used queue: {:?}", e); 130 return true; 131 } 132 } 133 } 134 TIMER_EXPIRED_EVENT => { 135 // When reading from the timerfd you get 8 bytes indicating 136 // the number of times this event has elapsed since the last read. 137 let mut buf = vec![0; 8]; 138 if let Err(e) = self.timer.read_exact(&mut buf) { 139 error!("Error reading from timer fd: {:}", e); 140 return true; 141 } 142 if let Some(last_ping_time) = self.last_ping_time.lock().unwrap().as_ref() { 143 let now = Instant::now(); 144 let gap = now.duration_since(*last_ping_time).as_secs(); 145 if gap > WATCHDOG_TIMEOUT { 146 error!("Watchdog triggered: {} seconds since last ping", gap); 147 self.reset_evt.write(1).ok(); 148 } 149 } 150 return false; 151 } 152 _ => { 153 error!("Unexpected event: {}", ev_type); 154 return true; 155 } 156 } 157 false 158 } 159 } 160 161 /// Virtio device for exposing a watchdog to the guest 162 pub struct Watchdog { 163 common: VirtioCommon, 164 id: String, 165 seccomp_action: SeccompAction, 166 reset_evt: EventFd, 167 last_ping_time: Arc<Mutex<Option<Instant>>>, 168 timer: File, 169 exit_evt: EventFd, 170 } 171 172 #[derive(Versionize)] 173 pub struct WatchdogState { 174 pub avail_features: u64, 175 pub acked_features: u64, 176 pub enabled: bool, 177 } 178 179 impl VersionMapped for WatchdogState {} 180 181 impl Watchdog { 182 /// Create a new virtio watchdog device that will reboot VM if the guest hangs 183 pub fn new( 184 id: String, 185 reset_evt: EventFd, 186 seccomp_action: SeccompAction, 187 exit_evt: EventFd, 188 ) -> io::Result<Watchdog> { 189 let avail_features = 1u64 << VIRTIO_F_VERSION_1; 190 let timer_fd = timerfd_create().map_err(|e| { 191 error!("Failed to create timer fd {}", e); 192 e 193 })?; 194 let timer = unsafe { File::from_raw_fd(timer_fd) }; 195 Ok(Watchdog { 196 common: VirtioCommon { 197 device_type: VirtioDeviceType::Watchdog as u32, 198 queue_sizes: QUEUE_SIZES.to_vec(), 199 paused_sync: Some(Arc::new(Barrier::new(2))), 200 avail_features, 201 min_queues: 1, 202 ..Default::default() 203 }, 204 id, 205 seccomp_action, 206 reset_evt, 207 last_ping_time: Arc::new(Mutex::new(None)), 208 timer, 209 exit_evt, 210 }) 211 } 212 213 fn state(&self) -> WatchdogState { 214 WatchdogState { 215 avail_features: self.common.avail_features, 216 acked_features: self.common.acked_features, 217 enabled: self.last_ping_time.lock().unwrap().is_some(), 218 } 219 } 220 221 fn set_state(&mut self, state: &WatchdogState) { 222 self.common.avail_features = state.avail_features; 223 self.common.acked_features = state.acked_features; 224 // When restoring enable the watchdog if it was previously enabled. We reset the timer 225 // to ensure that we don't unnecessarily reboot due to the offline time. 226 if state.enabled { 227 self.last_ping_time.lock().unwrap().replace(Instant::now()); 228 } 229 } 230 } 231 232 impl Drop for Watchdog { 233 fn drop(&mut self) { 234 if let Some(kill_evt) = self.common.kill_evt.take() { 235 // Ignore the result because there is nothing we can do about it. 236 let _ = kill_evt.write(1); 237 } 238 } 239 } 240 241 fn timerfd_create() -> Result<RawFd, io::Error> { 242 let res = unsafe { libc::timerfd_create(libc::CLOCK_MONOTONIC, 0) }; 243 if res < 0 { 244 Err(io::Error::last_os_error()) 245 } else { 246 Ok(res as RawFd) 247 } 248 } 249 250 fn timerfd_setup(timer: &File, secs: i64) -> Result<(), io::Error> { 251 let periodic = libc::itimerspec { 252 it_interval: libc::timespec { 253 tv_sec: secs, 254 tv_nsec: 0, 255 }, 256 it_value: libc::timespec { 257 tv_sec: secs, 258 tv_nsec: 0, 259 }, 260 }; 261 262 let res = 263 unsafe { libc::timerfd_settime(timer.as_raw_fd(), 0, &periodic, std::ptr::null_mut()) }; 264 265 if res < 0 { 266 Err(io::Error::last_os_error()) 267 } else { 268 Ok(()) 269 } 270 } 271 272 impl VirtioDevice for Watchdog { 273 fn device_type(&self) -> u32 { 274 self.common.device_type 275 } 276 277 fn queue_max_sizes(&self) -> &[u16] { 278 &self.common.queue_sizes 279 } 280 281 fn features(&self) -> u64 { 282 self.common.avail_features 283 } 284 285 fn ack_features(&mut self, value: u64) { 286 self.common.ack_features(value) 287 } 288 289 fn activate( 290 &mut self, 291 mem: GuestMemoryAtomic<GuestMemoryMmap>, 292 interrupt_cb: Arc<dyn VirtioInterrupt>, 293 queues: Vec<Queue>, 294 mut queue_evts: Vec<EventFd>, 295 ) -> ActivateResult { 296 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 297 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 298 299 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 300 error!("Failed to clone reset_evt eventfd: {}", e); 301 ActivateError::BadActivate 302 })?; 303 304 let timer = self.timer.try_clone().map_err(|e| { 305 error!("Failed to clone timer fd: {}", e); 306 ActivateError::BadActivate 307 })?; 308 309 let mut handler = WatchdogEpollHandler { 310 queues, 311 mem, 312 interrupt_cb, 313 queue_evt: queue_evts.remove(0), 314 kill_evt, 315 pause_evt, 316 timer, 317 last_ping_time: self.last_ping_time.clone(), 318 reset_evt, 319 }; 320 321 let paused = self.common.paused.clone(); 322 let paused_sync = self.common.paused_sync.clone(); 323 let mut epoll_threads = Vec::new(); 324 325 spawn_virtio_thread( 326 &self.id, 327 &self.seccomp_action, 328 Thread::VirtioWatchdog, 329 &mut epoll_threads, 330 &self.exit_evt, 331 move || { 332 if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 333 error!("Error running worker: {:?}", e); 334 } 335 }, 336 )?; 337 338 self.common.epoll_threads = Some(epoll_threads); 339 340 event!("virtio-device", "activated", "id", &self.id); 341 Ok(()) 342 } 343 344 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 345 let result = self.common.reset(); 346 event!("virtio-device", "reset", "id", &self.id); 347 result 348 } 349 } 350 351 impl Pausable for Watchdog { 352 fn pause(&mut self) -> result::Result<(), MigratableError> { 353 info!("Watchdog paused - disabling timer"); 354 timerfd_setup(&self.timer, 0) 355 .map_err(|e| MigratableError::Pause(anyhow!("Error clearing timer: {:?}", e)))?; 356 self.common.pause() 357 } 358 359 fn resume(&mut self) -> result::Result<(), MigratableError> { 360 // Reset the timer on pause if it was previously used 361 if self.last_ping_time.lock().unwrap().is_some() { 362 info!( 363 "Watchdog resumed - enabling timer (every {} seconds)", 364 WATCHDOG_TIMER_INTERVAL 365 ); 366 self.last_ping_time.lock().unwrap().replace(Instant::now()); 367 timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL) 368 .map_err(|e| MigratableError::Resume(anyhow!("Error setting timer: {:?}", e)))?; 369 } 370 self.common.resume() 371 } 372 } 373 374 impl Snapshottable for Watchdog { 375 fn id(&self) -> String { 376 self.id.clone() 377 } 378 379 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 380 Snapshot::new_from_versioned_state(&self.id, &self.state()) 381 } 382 383 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 384 self.set_state(&snapshot.to_versioned_state(&self.id)?); 385 Ok(()) 386 } 387 } 388 389 impl Transportable for Watchdog {} 390 impl Migratable for Watchdog {} 391