1 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 2 // 3 // Copyright © 2020 Intel Corporation 4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style license that can be 6 // found in the LICENSE-BSD-3-Clause file. 7 8 use super::Error as DeviceError; 9 use super::{ 10 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon, 11 VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, 12 }; 13 use crate::seccomp_filters::Thread; 14 use crate::thread_helper::spawn_virtio_thread; 15 use crate::GuestMemoryMmap; 16 use crate::{VirtioInterrupt, VirtioInterruptType}; 17 use anyhow::anyhow; 18 use seccompiler::SeccompAction; 19 use std::fs::File; 20 use std::io::{self, Read}; 21 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 22 use std::result; 23 use std::sync::atomic::AtomicBool; 24 use std::sync::{Arc, Barrier, Mutex}; 25 use std::time::Instant; 26 use versionize::{VersionMap, Versionize, VersionizeResult}; 27 use versionize_derive::Versionize; 28 use virtio_queue::Queue; 29 use vm_memory::{Bytes, GuestMemoryAtomic}; 30 use vm_migration::VersionMapped; 31 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 32 use vmm_sys_util::eventfd::EventFd; 33 34 const QUEUE_SIZE: u16 = 8; 35 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 36 37 // New descriptors are pending on the virtio queue. 38 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 39 // Timer expired 40 const TIMER_EXPIRED_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2; 41 42 // Number of seconds to check to see if there has been a ping 43 // This needs to match what the driver is using. 44 const WATCHDOG_TIMER_INTERVAL: i64 = 15; 45 46 // Number of seconds since last ping to trigger reboot 47 const WATCHDOG_TIMEOUT: u64 = WATCHDOG_TIMER_INTERVAL as u64 + 5; 48 49 struct WatchdogEpollHandler { 50 queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>, 51 interrupt_cb: Arc<dyn VirtioInterrupt>, 52 queue_evt: EventFd, 53 kill_evt: EventFd, 54 pause_evt: EventFd, 55 timer: File, 56 last_ping_time: Arc<Mutex<Option<Instant>>>, 57 reset_evt: EventFd, 58 } 59 60 impl WatchdogEpollHandler { 61 // The main queue is very simple - the driver "pings" the device by passing it a (write-only) 62 // descriptor. In response the device writes a 1 into the descriptor and returns it to the driver 63 fn process_queue(&mut self) -> bool { 64 let queue = &mut self.queues[0]; 65 let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize]; 66 let mut used_count = 0; 67 for mut desc_chain in queue.iter().unwrap() { 68 let desc = desc_chain.next().unwrap(); 69 70 let mut len = 0; 71 72 if desc.is_write_only() && desc_chain.memory().write_obj(1u8, desc.addr()).is_ok() { 73 len = desc.len(); 74 // If this is the first "ping" then setup the timer 75 if self.last_ping_time.lock().unwrap().is_none() { 76 info!( 77 "First ping received. Starting timer (every {} seconds)", 78 WATCHDOG_TIMER_INTERVAL 79 ); 80 if let Err(e) = timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL) { 81 error!("Error programming timer fd: {:?}", e); 82 } 83 } 84 self.last_ping_time.lock().unwrap().replace(Instant::now()); 85 } 86 87 used_desc_heads[used_count] = (desc_chain.head_index(), len); 88 used_count += 1; 89 } 90 91 for &(desc_index, len) in &used_desc_heads[..used_count] { 92 queue.add_used(desc_index, len).unwrap(); 93 } 94 used_count > 0 95 } 96 97 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 98 self.interrupt_cb 99 .trigger(VirtioInterruptType::Queue(0)) 100 .map_err(|e| { 101 error!("Failed to signal used queue: {:?}", e); 102 DeviceError::FailedSignalingUsedQueue(e) 103 }) 104 } 105 106 fn run( 107 &mut self, 108 paused: Arc<AtomicBool>, 109 paused_sync: Arc<Barrier>, 110 ) -> result::Result<(), EpollHelperError> { 111 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 112 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 113 helper.add_event(self.timer.as_raw_fd(), TIMER_EXPIRED_EVENT)?; 114 helper.run(paused, paused_sync, self)?; 115 116 Ok(()) 117 } 118 } 119 120 impl EpollHelperHandler for WatchdogEpollHandler { 121 fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { 122 let ev_type = event.data as u16; 123 match ev_type { 124 QUEUE_AVAIL_EVENT => { 125 if let Err(e) = self.queue_evt.read() { 126 error!("Failed to get queue event: {:?}", e); 127 return true; 128 } else if self.process_queue() { 129 if let Err(e) = self.signal_used_queue() { 130 error!("Failed to signal used queue: {:?}", e); 131 return true; 132 } 133 } 134 } 135 TIMER_EXPIRED_EVENT => { 136 // When reading from the timerfd you get 8 bytes indicating 137 // the number of times this event has elapsed since the last read. 138 let mut buf = vec![0; 8]; 139 if let Err(e) = self.timer.read_exact(&mut buf) { 140 error!("Error reading from timer fd: {:}", e); 141 return true; 142 } 143 if let Some(last_ping_time) = self.last_ping_time.lock().unwrap().as_ref() { 144 let now = Instant::now(); 145 let gap = now.duration_since(*last_ping_time).as_secs(); 146 if gap > WATCHDOG_TIMEOUT { 147 error!("Watchdog triggered: {} seconds since last ping", gap); 148 self.reset_evt.write(1).ok(); 149 } 150 } 151 return false; 152 } 153 _ => { 154 error!("Unexpected event: {}", ev_type); 155 return true; 156 } 157 } 158 false 159 } 160 } 161 162 /// Virtio device for exposing a watchdog to the guest 163 pub struct Watchdog { 164 common: VirtioCommon, 165 id: String, 166 seccomp_action: SeccompAction, 167 reset_evt: EventFd, 168 last_ping_time: Arc<Mutex<Option<Instant>>>, 169 timer: File, 170 exit_evt: EventFd, 171 } 172 173 #[derive(Versionize)] 174 pub struct WatchdogState { 175 pub avail_features: u64, 176 pub acked_features: u64, 177 pub enabled: bool, 178 } 179 180 impl VersionMapped for WatchdogState {} 181 182 impl Watchdog { 183 /// Create a new virtio watchdog device that will reboot VM if the guest hangs 184 pub fn new( 185 id: String, 186 reset_evt: EventFd, 187 seccomp_action: SeccompAction, 188 exit_evt: EventFd, 189 ) -> io::Result<Watchdog> { 190 let avail_features = 1u64 << VIRTIO_F_VERSION_1; 191 let timer_fd = timerfd_create().map_err(|e| { 192 error!("Failed to create timer fd {}", e); 193 e 194 })?; 195 let timer = unsafe { File::from_raw_fd(timer_fd) }; 196 Ok(Watchdog { 197 common: VirtioCommon { 198 device_type: VirtioDeviceType::Watchdog as u32, 199 queue_sizes: QUEUE_SIZES.to_vec(), 200 paused_sync: Some(Arc::new(Barrier::new(2))), 201 avail_features, 202 min_queues: 1, 203 ..Default::default() 204 }, 205 id, 206 seccomp_action, 207 reset_evt, 208 last_ping_time: Arc::new(Mutex::new(None)), 209 timer, 210 exit_evt, 211 }) 212 } 213 214 fn state(&self) -> WatchdogState { 215 WatchdogState { 216 avail_features: self.common.avail_features, 217 acked_features: self.common.acked_features, 218 enabled: self.last_ping_time.lock().unwrap().is_some(), 219 } 220 } 221 222 fn set_state(&mut self, state: &WatchdogState) { 223 self.common.avail_features = state.avail_features; 224 self.common.acked_features = state.acked_features; 225 // When restoring enable the watchdog if it was previously enabled. We reset the timer 226 // to ensure that we don't unnecessarily reboot due to the offline time. 227 if state.enabled { 228 self.last_ping_time.lock().unwrap().replace(Instant::now()); 229 } 230 } 231 } 232 233 impl Drop for Watchdog { 234 fn drop(&mut self) { 235 if let Some(kill_evt) = self.common.kill_evt.take() { 236 // Ignore the result because there is nothing we can do about it. 237 let _ = kill_evt.write(1); 238 } 239 } 240 } 241 242 fn timerfd_create() -> Result<RawFd, io::Error> { 243 let res = unsafe { libc::timerfd_create(libc::CLOCK_MONOTONIC, 0) }; 244 if res < 0 { 245 Err(io::Error::last_os_error()) 246 } else { 247 Ok(res as RawFd) 248 } 249 } 250 251 fn timerfd_setup(timer: &File, secs: i64) -> Result<(), io::Error> { 252 let periodic = libc::itimerspec { 253 it_interval: libc::timespec { 254 tv_sec: secs, 255 tv_nsec: 0, 256 }, 257 it_value: libc::timespec { 258 tv_sec: secs, 259 tv_nsec: 0, 260 }, 261 }; 262 263 let res = 264 unsafe { libc::timerfd_settime(timer.as_raw_fd(), 0, &periodic, std::ptr::null_mut()) }; 265 266 if res < 0 { 267 Err(io::Error::last_os_error()) 268 } else { 269 Ok(()) 270 } 271 } 272 273 impl VirtioDevice for Watchdog { 274 fn device_type(&self) -> u32 { 275 self.common.device_type 276 } 277 278 fn queue_max_sizes(&self) -> &[u16] { 279 &self.common.queue_sizes 280 } 281 282 fn features(&self) -> u64 { 283 self.common.avail_features 284 } 285 286 fn ack_features(&mut self, value: u64) { 287 self.common.ack_features(value) 288 } 289 290 fn activate( 291 &mut self, 292 _mem: GuestMemoryAtomic<GuestMemoryMmap>, 293 interrupt_cb: Arc<dyn VirtioInterrupt>, 294 queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>, 295 mut queue_evts: Vec<EventFd>, 296 ) -> ActivateResult { 297 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 298 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 299 300 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 301 error!("Failed to clone reset_evt eventfd: {}", e); 302 ActivateError::BadActivate 303 })?; 304 305 let timer = self.timer.try_clone().map_err(|e| { 306 error!("Failed to clone timer fd: {}", e); 307 ActivateError::BadActivate 308 })?; 309 310 let mut handler = WatchdogEpollHandler { 311 queues, 312 interrupt_cb, 313 queue_evt: queue_evts.remove(0), 314 kill_evt, 315 pause_evt, 316 timer, 317 last_ping_time: self.last_ping_time.clone(), 318 reset_evt, 319 }; 320 321 let paused = self.common.paused.clone(); 322 let paused_sync = self.common.paused_sync.clone(); 323 let mut epoll_threads = Vec::new(); 324 325 spawn_virtio_thread( 326 &self.id, 327 &self.seccomp_action, 328 Thread::VirtioWatchdog, 329 &mut epoll_threads, 330 &self.exit_evt, 331 move || { 332 if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 333 error!("Error running worker: {:?}", e); 334 } 335 }, 336 )?; 337 338 self.common.epoll_threads = Some(epoll_threads); 339 340 event!("virtio-device", "activated", "id", &self.id); 341 Ok(()) 342 } 343 344 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 345 let result = self.common.reset(); 346 event!("virtio-device", "reset", "id", &self.id); 347 result 348 } 349 } 350 351 impl Pausable for Watchdog { 352 fn pause(&mut self) -> result::Result<(), MigratableError> { 353 info!("Watchdog paused - disabling timer"); 354 timerfd_setup(&self.timer, 0) 355 .map_err(|e| MigratableError::Pause(anyhow!("Error clearing timer: {:?}", e)))?; 356 self.common.pause() 357 } 358 359 fn resume(&mut self) -> result::Result<(), MigratableError> { 360 // Reset the timer on pause if it was previously used 361 if self.last_ping_time.lock().unwrap().is_some() { 362 info!( 363 "Watchdog resumed - enabling timer (every {} seconds)", 364 WATCHDOG_TIMER_INTERVAL 365 ); 366 self.last_ping_time.lock().unwrap().replace(Instant::now()); 367 timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL) 368 .map_err(|e| MigratableError::Resume(anyhow!("Error setting timer: {:?}", e)))?; 369 } 370 self.common.resume() 371 } 372 } 373 374 impl Snapshottable for Watchdog { 375 fn id(&self) -> String { 376 self.id.clone() 377 } 378 379 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 380 Snapshot::new_from_versioned_state(&self.id, &self.state()) 381 } 382 383 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 384 self.set_state(&snapshot.to_versioned_state(&self.id)?); 385 Ok(()) 386 } 387 } 388 389 impl Transportable for Watchdog {} 390 impl Migratable for Watchdog {} 391