xref: /cloud-hypervisor/virtio-devices/src/watchdog.rs (revision eea9bcea38e0c5649f444c829f3a4f9c22aa486c)
1 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
2 //
3 // Copyright © 2020 Intel Corporation
4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the LICENSE-BSD-3-Clause file.
7 
8 use super::Error as DeviceError;
9 use super::{
10     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon,
11     VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
12 };
13 use crate::seccomp_filters::Thread;
14 use crate::thread_helper::spawn_virtio_thread;
15 use crate::GuestMemoryMmap;
16 use crate::{VirtioInterrupt, VirtioInterruptType};
17 use anyhow::anyhow;
18 use seccompiler::SeccompAction;
19 use std::fs::File;
20 use std::io::{self, Read};
21 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
22 use std::result;
23 use std::sync::atomic::AtomicBool;
24 use std::sync::{Arc, Barrier, Mutex};
25 use std::time::Instant;
26 use thiserror::Error;
27 use versionize::{VersionMap, Versionize, VersionizeResult};
28 use versionize_derive::Versionize;
29 use virtio_queue::{Queue, QueueT};
30 use vm_memory::{Bytes, GuestAddressSpace, GuestMemoryAtomic};
31 use vm_migration::VersionMapped;
32 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
33 use vmm_sys_util::eventfd::EventFd;
34 
35 const QUEUE_SIZE: u16 = 8;
36 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
37 
38 // New descriptors are pending on the virtio queue.
39 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
40 // Timer expired
41 const TIMER_EXPIRED_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
42 
43 // Number of seconds to check to see if there has been a ping
44 // This needs to match what the driver is using.
45 const WATCHDOG_TIMER_INTERVAL: i64 = 15;
46 
47 // Number of seconds since last ping to trigger reboot
48 const WATCHDOG_TIMEOUT: u64 = WATCHDOG_TIMER_INTERVAL as u64 + 5;
49 
50 #[derive(Error, Debug)]
51 enum Error {
52     #[error("Error programming timer fd: {0}")]
53     TimerfdSetup(io::Error),
54     #[error("Descriptor chain too short")]
55     DescriptorChainTooShort,
56     #[error("Failed adding used index: {0}")]
57     QueueAddUsed(virtio_queue::Error),
58 }
59 
60 struct WatchdogEpollHandler {
61     mem: GuestMemoryAtomic<GuestMemoryMmap>,
62     queue: Queue,
63     interrupt_cb: Arc<dyn VirtioInterrupt>,
64     queue_evt: EventFd,
65     kill_evt: EventFd,
66     pause_evt: EventFd,
67     timer: File,
68     last_ping_time: Arc<Mutex<Option<Instant>>>,
69     reset_evt: EventFd,
70 }
71 
72 impl WatchdogEpollHandler {
73     // The main queue is very simple - the driver "pings" the device by passing it a (write-only)
74     // descriptor. In response the device writes a 1 into the descriptor and returns it to the driver
75     fn process_queue(&mut self) -> result::Result<bool, Error> {
76         let queue = &mut self.queue;
77         let mut used_descs = false;
78         while let Some(mut desc_chain) = queue.pop_descriptor_chain(self.mem.memory()) {
79             let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
80 
81             let mut len = 0;
82 
83             if desc.is_write_only() && desc_chain.memory().write_obj(1u8, desc.addr()).is_ok() {
84                 len = desc.len();
85                 // If this is the first "ping" then setup the timer
86                 if self.last_ping_time.lock().unwrap().is_none() {
87                     info!(
88                         "First ping received. Starting timer (every {} seconds)",
89                         WATCHDOG_TIMER_INTERVAL
90                     );
91                     timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL)
92                         .map_err(Error::TimerfdSetup)?;
93                 }
94                 self.last_ping_time.lock().unwrap().replace(Instant::now());
95             }
96 
97             queue
98                 .add_used(desc_chain.memory(), desc_chain.head_index(), len)
99                 .map_err(Error::QueueAddUsed)?;
100             used_descs = true;
101         }
102 
103         Ok(used_descs)
104     }
105 
106     fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
107         self.interrupt_cb
108             .trigger(VirtioInterruptType::Queue(0))
109             .map_err(|e| {
110                 error!("Failed to signal used queue: {:?}", e);
111                 DeviceError::FailedSignalingUsedQueue(e)
112             })
113     }
114 
115     fn run(
116         &mut self,
117         paused: Arc<AtomicBool>,
118         paused_sync: Arc<Barrier>,
119     ) -> result::Result<(), EpollHelperError> {
120         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
121         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
122         helper.add_event(self.timer.as_raw_fd(), TIMER_EXPIRED_EVENT)?;
123         helper.run(paused, paused_sync, self)?;
124 
125         Ok(())
126     }
127 }
128 
129 impl EpollHelperHandler for WatchdogEpollHandler {
130     fn handle_event(
131         &mut self,
132         _helper: &mut EpollHelper,
133         event: &epoll::Event,
134     ) -> result::Result<(), EpollHelperError> {
135         let ev_type = event.data as u16;
136         match ev_type {
137             QUEUE_AVAIL_EVENT => {
138                 self.queue_evt.read().map_err(|e| {
139                     EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e))
140                 })?;
141 
142                 let needs_notification = self.process_queue().map_err(|e| {
143                     EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e))
144                 })?;
145                 if needs_notification {
146                     self.signal_used_queue().map_err(|e| {
147                         EpollHelperError::HandleEvent(anyhow!(
148                             "Failed to signal used queue: {:?}",
149                             e
150                         ))
151                     })?;
152                 }
153             }
154             TIMER_EXPIRED_EVENT => {
155                 // When reading from the timerfd you get 8 bytes indicating
156                 // the number of times this event has elapsed since the last read.
157                 let mut buf = vec![0; 8];
158                 self.timer.read_exact(&mut buf).map_err(|e| {
159                     EpollHelperError::HandleEvent(anyhow!("Error reading from timer fd: {:}", e))
160                 })?;
161 
162                 if let Some(last_ping_time) = self.last_ping_time.lock().unwrap().as_ref() {
163                     let now = Instant::now();
164                     let gap = now.duration_since(*last_ping_time).as_secs();
165                     if gap > WATCHDOG_TIMEOUT {
166                         error!("Watchdog triggered: {} seconds since last ping", gap);
167                         self.reset_evt.write(1).ok();
168                     }
169                 }
170             }
171             _ => {
172                 return Err(EpollHelperError::HandleEvent(anyhow!(
173                     "Unexpected event: {}",
174                     ev_type
175                 )));
176             }
177         }
178         Ok(())
179     }
180 }
181 
182 /// Virtio device for exposing a watchdog to the guest
183 pub struct Watchdog {
184     common: VirtioCommon,
185     id: String,
186     seccomp_action: SeccompAction,
187     reset_evt: EventFd,
188     last_ping_time: Arc<Mutex<Option<Instant>>>,
189     timer: File,
190     exit_evt: EventFd,
191 }
192 
193 #[derive(Versionize)]
194 pub struct WatchdogState {
195     pub avail_features: u64,
196     pub acked_features: u64,
197     pub enabled: bool,
198 }
199 
200 impl VersionMapped for WatchdogState {}
201 
202 impl Watchdog {
203     /// Create a new virtio watchdog device that will reboot VM if the guest hangs
204     pub fn new(
205         id: String,
206         reset_evt: EventFd,
207         seccomp_action: SeccompAction,
208         exit_evt: EventFd,
209     ) -> io::Result<Watchdog> {
210         let avail_features = 1u64 << VIRTIO_F_VERSION_1;
211         let timer_fd = timerfd_create().map_err(|e| {
212             error!("Failed to create timer fd {}", e);
213             e
214         })?;
215         let timer = unsafe { File::from_raw_fd(timer_fd) };
216         Ok(Watchdog {
217             common: VirtioCommon {
218                 device_type: VirtioDeviceType::Watchdog as u32,
219                 queue_sizes: QUEUE_SIZES.to_vec(),
220                 paused_sync: Some(Arc::new(Barrier::new(2))),
221                 avail_features,
222                 min_queues: 1,
223                 ..Default::default()
224             },
225             id,
226             seccomp_action,
227             reset_evt,
228             last_ping_time: Arc::new(Mutex::new(None)),
229             timer,
230             exit_evt,
231         })
232     }
233 
234     fn state(&self) -> WatchdogState {
235         WatchdogState {
236             avail_features: self.common.avail_features,
237             acked_features: self.common.acked_features,
238             enabled: self.last_ping_time.lock().unwrap().is_some(),
239         }
240     }
241 
242     fn set_state(&mut self, state: &WatchdogState) {
243         self.common.avail_features = state.avail_features;
244         self.common.acked_features = state.acked_features;
245         // When restoring enable the watchdog if it was previously enabled. We reset the timer
246         // to ensure that we don't unnecessarily reboot due to the offline time.
247         if state.enabled {
248             self.last_ping_time.lock().unwrap().replace(Instant::now());
249         }
250     }
251 
252     #[cfg(fuzzing)]
253     pub fn wait_for_epoll_threads(&mut self) {
254         self.common.wait_for_epoll_threads();
255     }
256 }
257 
258 impl Drop for Watchdog {
259     fn drop(&mut self) {
260         if let Some(kill_evt) = self.common.kill_evt.take() {
261             // Ignore the result because there is nothing we can do about it.
262             let _ = kill_evt.write(1);
263         }
264     }
265 }
266 
267 fn timerfd_create() -> Result<RawFd, io::Error> {
268     let res = unsafe { libc::timerfd_create(libc::CLOCK_MONOTONIC, 0) };
269     if res < 0 {
270         Err(io::Error::last_os_error())
271     } else {
272         Ok(res as RawFd)
273     }
274 }
275 
276 fn timerfd_setup(timer: &File, secs: i64) -> Result<(), io::Error> {
277     let periodic = libc::itimerspec {
278         it_interval: libc::timespec {
279             tv_sec: secs,
280             tv_nsec: 0,
281         },
282         it_value: libc::timespec {
283             tv_sec: secs,
284             tv_nsec: 0,
285         },
286     };
287 
288     let res =
289         unsafe { libc::timerfd_settime(timer.as_raw_fd(), 0, &periodic, std::ptr::null_mut()) };
290 
291     if res < 0 {
292         Err(io::Error::last_os_error())
293     } else {
294         Ok(())
295     }
296 }
297 
298 impl VirtioDevice for Watchdog {
299     fn device_type(&self) -> u32 {
300         self.common.device_type
301     }
302 
303     fn queue_max_sizes(&self) -> &[u16] {
304         &self.common.queue_sizes
305     }
306 
307     fn features(&self) -> u64 {
308         self.common.avail_features
309     }
310 
311     fn ack_features(&mut self, value: u64) {
312         self.common.ack_features(value)
313     }
314 
315     fn activate(
316         &mut self,
317         mem: GuestMemoryAtomic<GuestMemoryMmap>,
318         interrupt_cb: Arc<dyn VirtioInterrupt>,
319         mut queues: Vec<(usize, Queue, EventFd)>,
320     ) -> ActivateResult {
321         self.common.activate(&queues, &interrupt_cb)?;
322         let (kill_evt, pause_evt) = self.common.dup_eventfds();
323 
324         let reset_evt = self.reset_evt.try_clone().map_err(|e| {
325             error!("Failed to clone reset_evt eventfd: {}", e);
326             ActivateError::BadActivate
327         })?;
328 
329         let timer = self.timer.try_clone().map_err(|e| {
330             error!("Failed to clone timer fd: {}", e);
331             ActivateError::BadActivate
332         })?;
333 
334         let (_, queue, queue_evt) = queues.remove(0);
335 
336         let mut handler = WatchdogEpollHandler {
337             mem,
338             queue,
339             interrupt_cb,
340             queue_evt,
341             kill_evt,
342             pause_evt,
343             timer,
344             last_ping_time: self.last_ping_time.clone(),
345             reset_evt,
346         };
347 
348         let paused = self.common.paused.clone();
349         let paused_sync = self.common.paused_sync.clone();
350         let mut epoll_threads = Vec::new();
351 
352         spawn_virtio_thread(
353             &self.id,
354             &self.seccomp_action,
355             Thread::VirtioWatchdog,
356             &mut epoll_threads,
357             &self.exit_evt,
358             move || handler.run(paused, paused_sync.unwrap()),
359         )?;
360 
361         self.common.epoll_threads = Some(epoll_threads);
362 
363         event!("virtio-device", "activated", "id", &self.id);
364         Ok(())
365     }
366 
367     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
368         let result = self.common.reset();
369         event!("virtio-device", "reset", "id", &self.id);
370         result
371     }
372 }
373 
374 impl Pausable for Watchdog {
375     fn pause(&mut self) -> result::Result<(), MigratableError> {
376         info!("Watchdog paused - disabling timer");
377         timerfd_setup(&self.timer, 0)
378             .map_err(|e| MigratableError::Pause(anyhow!("Error clearing timer: {:?}", e)))?;
379         self.common.pause()
380     }
381 
382     fn resume(&mut self) -> result::Result<(), MigratableError> {
383         // Reset the timer on pause if it was previously used
384         if self.last_ping_time.lock().unwrap().is_some() {
385             info!(
386                 "Watchdog resumed - enabling timer (every {} seconds)",
387                 WATCHDOG_TIMER_INTERVAL
388             );
389             self.last_ping_time.lock().unwrap().replace(Instant::now());
390             timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL)
391                 .map_err(|e| MigratableError::Resume(anyhow!("Error setting timer: {:?}", e)))?;
392         }
393         self.common.resume()
394     }
395 }
396 
397 impl Snapshottable for Watchdog {
398     fn id(&self) -> String {
399         self.id.clone()
400     }
401 
402     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
403         Snapshot::new_from_versioned_state(&self.id, &self.state())
404     }
405 
406     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
407         self.set_state(&snapshot.to_versioned_state(&self.id)?);
408         Ok(())
409     }
410 }
411 
412 impl Transportable for Watchdog {}
413 impl Migratable for Watchdog {}
414