1 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 2 // 3 // Copyright © 2020 Intel Corporation 4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style license that can be 6 // found in the LICENSE-BSD-3-Clause file. 7 8 use std::fs::File; 9 use std::io::{self, Read}; 10 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 11 use std::result; 12 use std::sync::atomic::AtomicBool; 13 use std::sync::{Arc, Barrier, Mutex}; 14 use std::time::Instant; 15 16 use anyhow::anyhow; 17 use seccompiler::SeccompAction; 18 use serde::{Deserialize, Serialize}; 19 use thiserror::Error; 20 use virtio_queue::{Queue, QueueT}; 21 use vm_memory::{Bytes, GuestAddressSpace, GuestMemoryAtomic}; 22 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 23 use vmm_sys_util::eventfd::EventFd; 24 25 use super::{ 26 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, 27 Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, 28 VIRTIO_F_VERSION_1, 29 }; 30 use crate::seccomp_filters::Thread; 31 use crate::thread_helper::spawn_virtio_thread; 32 use crate::{GuestMemoryMmap, VirtioInterrupt, VirtioInterruptType}; 33 34 const QUEUE_SIZE: u16 = 8; 35 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 36 37 // New descriptors are pending on the virtio queue. 38 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 39 // Timer expired 40 const TIMER_EXPIRED_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2; 41 42 // Number of seconds to check to see if there has been a ping 43 // This needs to match what the driver is using. 44 const WATCHDOG_TIMER_INTERVAL: i64 = 15; 45 46 // Number of seconds since last ping to trigger reboot 47 const WATCHDOG_TIMEOUT: u64 = WATCHDOG_TIMER_INTERVAL as u64 + 5; 48 49 #[derive(Error, Debug)] 50 enum Error { 51 #[error("Error programming timer fd")] 52 TimerfdSetup(#[source] io::Error), 53 #[error("Descriptor chain too short")] 54 DescriptorChainTooShort, 55 #[error("Failed adding used index")] 56 QueueAddUsed(#[source] virtio_queue::Error), 57 #[error("Invalid descriptor")] 58 InvalidDescriptor, 59 #[error("Failed to write to guest memory")] 60 GuestMemoryWrite(#[source] vm_memory::guest_memory::Error), 61 } 62 63 struct WatchdogEpollHandler { 64 mem: GuestMemoryAtomic<GuestMemoryMmap>, 65 queue: Queue, 66 interrupt_cb: Arc<dyn VirtioInterrupt>, 67 queue_evt: EventFd, 68 kill_evt: EventFd, 69 pause_evt: EventFd, 70 timer: File, 71 last_ping_time: Arc<Mutex<Option<Instant>>>, 72 reset_evt: EventFd, 73 } 74 75 impl WatchdogEpollHandler { 76 // The main queue is very simple - the driver "pings" the device by passing it a (write-only) 77 // descriptor. In response the device writes a 1 into the descriptor and returns it to the driver 78 fn process_queue(&mut self) -> result::Result<bool, Error> { 79 let queue = &mut self.queue; 80 let mut used_descs = false; 81 while let Some(mut desc_chain) = queue.pop_descriptor_chain(self.mem.memory()) { 82 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 83 84 if !(desc.is_write_only() && desc.len() > 0) { 85 return Err(Error::InvalidDescriptor); 86 } 87 88 desc_chain 89 .memory() 90 .write_obj(1u8, desc.addr()) 91 .map_err(Error::GuestMemoryWrite)?; 92 93 // If this is the first "ping" then setup the timer 94 if self.last_ping_time.lock().unwrap().is_none() { 95 info!( 96 "First ping received. Starting timer (every {} seconds)", 97 WATCHDOG_TIMER_INTERVAL 98 ); 99 timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL).map_err(Error::TimerfdSetup)?; 100 } 101 self.last_ping_time.lock().unwrap().replace(Instant::now()); 102 103 queue 104 .add_used(desc_chain.memory(), desc_chain.head_index(), desc.len()) 105 .map_err(Error::QueueAddUsed)?; 106 used_descs = true; 107 } 108 109 Ok(used_descs) 110 } 111 112 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 113 self.interrupt_cb 114 .trigger(VirtioInterruptType::Queue(0)) 115 .map_err(|e| { 116 error!("Failed to signal used queue: {:?}", e); 117 DeviceError::FailedSignalingUsedQueue(e) 118 }) 119 } 120 121 fn run( 122 &mut self, 123 paused: Arc<AtomicBool>, 124 paused_sync: Arc<Barrier>, 125 ) -> result::Result<(), EpollHelperError> { 126 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 127 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 128 helper.add_event(self.timer.as_raw_fd(), TIMER_EXPIRED_EVENT)?; 129 helper.run(paused, paused_sync, self)?; 130 131 Ok(()) 132 } 133 } 134 135 impl EpollHelperHandler for WatchdogEpollHandler { 136 fn handle_event( 137 &mut self, 138 _helper: &mut EpollHelper, 139 event: &epoll::Event, 140 ) -> result::Result<(), EpollHelperError> { 141 let ev_type = event.data as u16; 142 match ev_type { 143 QUEUE_AVAIL_EVENT => { 144 self.queue_evt.read().map_err(|e| { 145 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 146 })?; 147 148 let needs_notification = self.process_queue().map_err(|e| { 149 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 150 })?; 151 if needs_notification { 152 self.signal_used_queue().map_err(|e| { 153 EpollHelperError::HandleEvent(anyhow!( 154 "Failed to signal used queue: {:?}", 155 e 156 )) 157 })?; 158 } 159 } 160 TIMER_EXPIRED_EVENT => { 161 // When reading from the timerfd you get 8 bytes indicating 162 // the number of times this event has elapsed since the last read. 163 let mut buf = vec![0; 8]; 164 self.timer.read_exact(&mut buf).map_err(|e| { 165 EpollHelperError::HandleEvent(anyhow!("Error reading from timer fd: {:}", e)) 166 })?; 167 168 if let Some(last_ping_time) = self.last_ping_time.lock().unwrap().as_ref() { 169 let now = Instant::now(); 170 let gap = now.duration_since(*last_ping_time).as_secs(); 171 if gap > WATCHDOG_TIMEOUT { 172 error!("Watchdog triggered: {} seconds since last ping", gap); 173 self.reset_evt.write(1).ok(); 174 } 175 } 176 } 177 _ => { 178 return Err(EpollHelperError::HandleEvent(anyhow!( 179 "Unexpected event: {}", 180 ev_type 181 ))); 182 } 183 } 184 Ok(()) 185 } 186 } 187 188 /// Virtio device for exposing a watchdog to the guest 189 pub struct Watchdog { 190 common: VirtioCommon, 191 id: String, 192 seccomp_action: SeccompAction, 193 reset_evt: EventFd, 194 last_ping_time: Arc<Mutex<Option<Instant>>>, 195 timer: File, 196 exit_evt: EventFd, 197 } 198 199 #[derive(Serialize, Deserialize)] 200 pub struct WatchdogState { 201 pub avail_features: u64, 202 pub acked_features: u64, 203 pub enabled: bool, 204 } 205 206 impl Watchdog { 207 /// Create a new virtio watchdog device that will reboot VM if the guest hangs 208 pub fn new( 209 id: String, 210 reset_evt: EventFd, 211 seccomp_action: SeccompAction, 212 exit_evt: EventFd, 213 state: Option<WatchdogState>, 214 ) -> io::Result<Watchdog> { 215 let mut last_ping_time = None; 216 let (avail_features, acked_features, paused) = if let Some(state) = state { 217 info!("Restoring virtio-watchdog {}", id); 218 219 // When restoring enable the watchdog if it was previously enabled. 220 // We reset the timer to ensure that we don't unnecessarily reboot 221 // due to the offline time. 222 if state.enabled { 223 last_ping_time = Some(Instant::now()); 224 } 225 226 (state.avail_features, state.acked_features, true) 227 } else { 228 (1u64 << VIRTIO_F_VERSION_1, 0, false) 229 }; 230 231 let timer_fd = timerfd_create().map_err(|e| { 232 error!("Failed to create timer fd {}", e); 233 e 234 })?; 235 // SAFETY: timer_fd is a valid fd 236 let timer = unsafe { File::from_raw_fd(timer_fd) }; 237 238 Ok(Watchdog { 239 common: VirtioCommon { 240 device_type: VirtioDeviceType::Watchdog as u32, 241 queue_sizes: QUEUE_SIZES.to_vec(), 242 paused_sync: Some(Arc::new(Barrier::new(2))), 243 avail_features, 244 acked_features, 245 min_queues: 1, 246 paused: Arc::new(AtomicBool::new(paused)), 247 ..Default::default() 248 }, 249 id, 250 seccomp_action, 251 reset_evt, 252 last_ping_time: Arc::new(Mutex::new(last_ping_time)), 253 timer, 254 exit_evt, 255 }) 256 } 257 258 fn state(&self) -> WatchdogState { 259 WatchdogState { 260 avail_features: self.common.avail_features, 261 acked_features: self.common.acked_features, 262 enabled: self.last_ping_time.lock().unwrap().is_some(), 263 } 264 } 265 266 #[cfg(fuzzing)] 267 pub fn wait_for_epoll_threads(&mut self) { 268 self.common.wait_for_epoll_threads(); 269 } 270 } 271 272 impl Drop for Watchdog { 273 fn drop(&mut self) { 274 if let Some(kill_evt) = self.common.kill_evt.take() { 275 // Ignore the result because there is nothing we can do about it. 276 let _ = kill_evt.write(1); 277 } 278 self.common.wait_for_epoll_threads(); 279 } 280 } 281 282 fn timerfd_create() -> Result<RawFd, io::Error> { 283 // SAFETY: FFI call, trivially safe 284 let res = unsafe { libc::timerfd_create(libc::CLOCK_MONOTONIC, 0) }; 285 if res < 0 { 286 Err(io::Error::last_os_error()) 287 } else { 288 Ok(res as RawFd) 289 } 290 } 291 292 fn timerfd_setup(timer: &File, secs: i64) -> Result<(), io::Error> { 293 let periodic = libc::itimerspec { 294 it_interval: libc::timespec { 295 tv_sec: secs, 296 tv_nsec: 0, 297 }, 298 it_value: libc::timespec { 299 tv_sec: secs, 300 tv_nsec: 0, 301 }, 302 }; 303 304 let res = 305 // SAFETY: FFI call with correct arguments 306 unsafe { libc::timerfd_settime(timer.as_raw_fd(), 0, &periodic, std::ptr::null_mut()) }; 307 308 if res < 0 { 309 Err(io::Error::last_os_error()) 310 } else { 311 Ok(()) 312 } 313 } 314 315 impl VirtioDevice for Watchdog { 316 fn device_type(&self) -> u32 { 317 self.common.device_type 318 } 319 320 fn queue_max_sizes(&self) -> &[u16] { 321 &self.common.queue_sizes 322 } 323 324 fn features(&self) -> u64 { 325 self.common.avail_features 326 } 327 328 fn ack_features(&mut self, value: u64) { 329 self.common.ack_features(value) 330 } 331 332 fn activate( 333 &mut self, 334 mem: GuestMemoryAtomic<GuestMemoryMmap>, 335 interrupt_cb: Arc<dyn VirtioInterrupt>, 336 mut queues: Vec<(usize, Queue, EventFd)>, 337 ) -> ActivateResult { 338 self.common.activate(&queues, &interrupt_cb)?; 339 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 340 341 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 342 error!("Failed to clone reset_evt eventfd: {}", e); 343 ActivateError::BadActivate 344 })?; 345 346 let timer = self.timer.try_clone().map_err(|e| { 347 error!("Failed to clone timer fd: {}", e); 348 ActivateError::BadActivate 349 })?; 350 351 let (_, queue, queue_evt) = queues.remove(0); 352 353 let mut handler = WatchdogEpollHandler { 354 mem, 355 queue, 356 interrupt_cb, 357 queue_evt, 358 kill_evt, 359 pause_evt, 360 timer, 361 last_ping_time: self.last_ping_time.clone(), 362 reset_evt, 363 }; 364 365 let paused = self.common.paused.clone(); 366 let paused_sync = self.common.paused_sync.clone(); 367 let mut epoll_threads = Vec::new(); 368 369 spawn_virtio_thread( 370 &self.id, 371 &self.seccomp_action, 372 Thread::VirtioWatchdog, 373 &mut epoll_threads, 374 &self.exit_evt, 375 move || handler.run(paused, paused_sync.unwrap()), 376 )?; 377 378 self.common.epoll_threads = Some(epoll_threads); 379 380 event!("virtio-device", "activated", "id", &self.id); 381 Ok(()) 382 } 383 384 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 385 let result = self.common.reset(); 386 event!("virtio-device", "reset", "id", &self.id); 387 result 388 } 389 } 390 391 impl Pausable for Watchdog { 392 fn pause(&mut self) -> result::Result<(), MigratableError> { 393 info!("Watchdog paused - disabling timer"); 394 timerfd_setup(&self.timer, 0) 395 .map_err(|e| MigratableError::Pause(anyhow!("Error clearing timer: {:?}", e)))?; 396 self.common.pause() 397 } 398 399 fn resume(&mut self) -> result::Result<(), MigratableError> { 400 // Reset the timer on pause if it was previously used 401 if self.last_ping_time.lock().unwrap().is_some() { 402 info!( 403 "Watchdog resumed - enabling timer (every {} seconds)", 404 WATCHDOG_TIMER_INTERVAL 405 ); 406 self.last_ping_time.lock().unwrap().replace(Instant::now()); 407 timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL) 408 .map_err(|e| MigratableError::Resume(anyhow!("Error setting timer: {:?}", e)))?; 409 } 410 self.common.resume() 411 } 412 } 413 414 impl Snapshottable for Watchdog { 415 fn id(&self) -> String { 416 self.id.clone() 417 } 418 419 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 420 Snapshot::new_from_state(&self.state()) 421 } 422 } 423 424 impl Transportable for Watchdog {} 425 impl Migratable for Watchdog {} 426