xref: /cloud-hypervisor/virtio-devices/src/watchdog.rs (revision 6f8bd27cf7629733582d930519e98d19e90afb16)
1 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
2 //
3 // Copyright © 2020 Intel Corporation
4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the LICENSE-BSD-3-Clause file.
7 
8 use super::Error as DeviceError;
9 use super::{
10     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon,
11     VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
12 };
13 use crate::seccomp_filters::Thread;
14 use crate::thread_helper::spawn_virtio_thread;
15 use crate::GuestMemoryMmap;
16 use crate::{VirtioInterrupt, VirtioInterruptType};
17 use anyhow::anyhow;
18 use seccompiler::SeccompAction;
19 use std::fs::File;
20 use std::io::{self, Read};
21 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
22 use std::result;
23 use std::sync::atomic::AtomicBool;
24 use std::sync::{Arc, Barrier, Mutex};
25 use std::time::Instant;
26 use thiserror::Error;
27 use versionize::{VersionMap, Versionize, VersionizeResult};
28 use versionize_derive::Versionize;
29 use virtio_queue::{Queue, QueueT};
30 use vm_memory::{Bytes, GuestAddressSpace, GuestMemoryAtomic};
31 use vm_migration::VersionMapped;
32 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
33 use vmm_sys_util::eventfd::EventFd;
34 
35 const QUEUE_SIZE: u16 = 8;
36 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
37 
38 // New descriptors are pending on the virtio queue.
39 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
40 // Timer expired
41 const TIMER_EXPIRED_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
42 
43 // Number of seconds to check to see if there has been a ping
44 // This needs to match what the driver is using.
45 const WATCHDOG_TIMER_INTERVAL: i64 = 15;
46 
47 // Number of seconds since last ping to trigger reboot
48 const WATCHDOG_TIMEOUT: u64 = WATCHDOG_TIMER_INTERVAL as u64 + 5;
49 
50 #[derive(Error, Debug)]
51 enum Error {
52     #[error("Error programming timer fd: {0}")]
53     TimerfdSetup(io::Error),
54     #[error("Descriptor chain too short")]
55     DescriptorChainTooShort,
56     #[error("Failed adding used index: {0}")]
57     QueueAddUsed(virtio_queue::Error),
58     #[error("Invalid descriptor")]
59     InvalidDescriptor,
60     #[error("Failed to write to guest memory: {0}")]
61     GuestMemoryWrite(vm_memory::guest_memory::Error),
62 }
63 
64 struct WatchdogEpollHandler {
65     mem: GuestMemoryAtomic<GuestMemoryMmap>,
66     queue: Queue,
67     interrupt_cb: Arc<dyn VirtioInterrupt>,
68     queue_evt: EventFd,
69     kill_evt: EventFd,
70     pause_evt: EventFd,
71     timer: File,
72     last_ping_time: Arc<Mutex<Option<Instant>>>,
73     reset_evt: EventFd,
74 }
75 
76 impl WatchdogEpollHandler {
77     // The main queue is very simple - the driver "pings" the device by passing it a (write-only)
78     // descriptor. In response the device writes a 1 into the descriptor and returns it to the driver
79     fn process_queue(&mut self) -> result::Result<bool, Error> {
80         let queue = &mut self.queue;
81         let mut used_descs = false;
82         while let Some(mut desc_chain) = queue.pop_descriptor_chain(self.mem.memory()) {
83             let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
84 
85             if !(desc.is_write_only() && desc.len() > 0) {
86                 return Err(Error::InvalidDescriptor);
87             }
88 
89             desc_chain
90                 .memory()
91                 .write_obj(1u8, desc.addr())
92                 .map_err(Error::GuestMemoryWrite)?;
93 
94             // If this is the first "ping" then setup the timer
95             if self.last_ping_time.lock().unwrap().is_none() {
96                 info!(
97                     "First ping received. Starting timer (every {} seconds)",
98                     WATCHDOG_TIMER_INTERVAL
99                 );
100                 timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL).map_err(Error::TimerfdSetup)?;
101             }
102             self.last_ping_time.lock().unwrap().replace(Instant::now());
103 
104             queue
105                 .add_used(desc_chain.memory(), desc_chain.head_index(), desc.len())
106                 .map_err(Error::QueueAddUsed)?;
107             used_descs = true;
108         }
109 
110         Ok(used_descs)
111     }
112 
113     fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
114         self.interrupt_cb
115             .trigger(VirtioInterruptType::Queue(0))
116             .map_err(|e| {
117                 error!("Failed to signal used queue: {:?}", e);
118                 DeviceError::FailedSignalingUsedQueue(e)
119             })
120     }
121 
122     fn run(
123         &mut self,
124         paused: Arc<AtomicBool>,
125         paused_sync: Arc<Barrier>,
126     ) -> result::Result<(), EpollHelperError> {
127         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
128         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
129         helper.add_event(self.timer.as_raw_fd(), TIMER_EXPIRED_EVENT)?;
130         helper.run(paused, paused_sync, self)?;
131 
132         Ok(())
133     }
134 }
135 
136 impl EpollHelperHandler for WatchdogEpollHandler {
137     fn handle_event(
138         &mut self,
139         _helper: &mut EpollHelper,
140         event: &epoll::Event,
141     ) -> result::Result<(), EpollHelperError> {
142         let ev_type = event.data as u16;
143         match ev_type {
144             QUEUE_AVAIL_EVENT => {
145                 self.queue_evt.read().map_err(|e| {
146                     EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e))
147                 })?;
148 
149                 let needs_notification = self.process_queue().map_err(|e| {
150                     EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e))
151                 })?;
152                 if needs_notification {
153                     self.signal_used_queue().map_err(|e| {
154                         EpollHelperError::HandleEvent(anyhow!(
155                             "Failed to signal used queue: {:?}",
156                             e
157                         ))
158                     })?;
159                 }
160             }
161             TIMER_EXPIRED_EVENT => {
162                 // When reading from the timerfd you get 8 bytes indicating
163                 // the number of times this event has elapsed since the last read.
164                 let mut buf = vec![0; 8];
165                 self.timer.read_exact(&mut buf).map_err(|e| {
166                     EpollHelperError::HandleEvent(anyhow!("Error reading from timer fd: {:}", e))
167                 })?;
168 
169                 if let Some(last_ping_time) = self.last_ping_time.lock().unwrap().as_ref() {
170                     let now = Instant::now();
171                     let gap = now.duration_since(*last_ping_time).as_secs();
172                     if gap > WATCHDOG_TIMEOUT {
173                         error!("Watchdog triggered: {} seconds since last ping", gap);
174                         self.reset_evt.write(1).ok();
175                     }
176                 }
177             }
178             _ => {
179                 return Err(EpollHelperError::HandleEvent(anyhow!(
180                     "Unexpected event: {}",
181                     ev_type
182                 )));
183             }
184         }
185         Ok(())
186     }
187 }
188 
189 /// Virtio device for exposing a watchdog to the guest
190 pub struct Watchdog {
191     common: VirtioCommon,
192     id: String,
193     seccomp_action: SeccompAction,
194     reset_evt: EventFd,
195     last_ping_time: Arc<Mutex<Option<Instant>>>,
196     timer: File,
197     exit_evt: EventFd,
198 }
199 
200 #[derive(Versionize)]
201 pub struct WatchdogState {
202     pub avail_features: u64,
203     pub acked_features: u64,
204     pub enabled: bool,
205 }
206 
207 impl VersionMapped for WatchdogState {}
208 
209 impl Watchdog {
210     /// Create a new virtio watchdog device that will reboot VM if the guest hangs
211     pub fn new(
212         id: String,
213         reset_evt: EventFd,
214         seccomp_action: SeccompAction,
215         exit_evt: EventFd,
216         state: Option<WatchdogState>,
217     ) -> io::Result<Watchdog> {
218         let mut last_ping_time = None;
219         let (avail_features, acked_features) = if let Some(state) = state {
220             info!("Restoring virtio-watchdog {}", id);
221 
222             // When restoring enable the watchdog if it was previously enabled.
223             // We reset the timer to ensure that we don't unnecessarily reboot
224             // due to the offline time.
225             if state.enabled {
226                 last_ping_time = Some(Instant::now());
227             }
228 
229             (state.avail_features, state.acked_features)
230         } else {
231             (1u64 << VIRTIO_F_VERSION_1, 0)
232         };
233 
234         let timer_fd = timerfd_create().map_err(|e| {
235             error!("Failed to create timer fd {}", e);
236             e
237         })?;
238         // SAFETY: timer_fd is a valid fd
239         let timer = unsafe { File::from_raw_fd(timer_fd) };
240 
241         Ok(Watchdog {
242             common: VirtioCommon {
243                 device_type: VirtioDeviceType::Watchdog as u32,
244                 queue_sizes: QUEUE_SIZES.to_vec(),
245                 paused_sync: Some(Arc::new(Barrier::new(2))),
246                 avail_features,
247                 acked_features,
248                 min_queues: 1,
249                 ..Default::default()
250             },
251             id,
252             seccomp_action,
253             reset_evt,
254             last_ping_time: Arc::new(Mutex::new(last_ping_time)),
255             timer,
256             exit_evt,
257         })
258     }
259 
260     fn state(&self) -> WatchdogState {
261         WatchdogState {
262             avail_features: self.common.avail_features,
263             acked_features: self.common.acked_features,
264             enabled: self.last_ping_time.lock().unwrap().is_some(),
265         }
266     }
267 
268     #[cfg(fuzzing)]
269     pub fn wait_for_epoll_threads(&mut self) {
270         self.common.wait_for_epoll_threads();
271     }
272 }
273 
274 impl Drop for Watchdog {
275     fn drop(&mut self) {
276         if let Some(kill_evt) = self.common.kill_evt.take() {
277             // Ignore the result because there is nothing we can do about it.
278             let _ = kill_evt.write(1);
279         }
280     }
281 }
282 
283 fn timerfd_create() -> Result<RawFd, io::Error> {
284     // SAFETY: FFI call, trivially safe
285     let res = unsafe { libc::timerfd_create(libc::CLOCK_MONOTONIC, 0) };
286     if res < 0 {
287         Err(io::Error::last_os_error())
288     } else {
289         Ok(res as RawFd)
290     }
291 }
292 
293 fn timerfd_setup(timer: &File, secs: i64) -> Result<(), io::Error> {
294     let periodic = libc::itimerspec {
295         it_interval: libc::timespec {
296             tv_sec: secs,
297             tv_nsec: 0,
298         },
299         it_value: libc::timespec {
300             tv_sec: secs,
301             tv_nsec: 0,
302         },
303     };
304 
305     let res =
306         // SAFETY: FFI call with correct arguments
307         unsafe { libc::timerfd_settime(timer.as_raw_fd(), 0, &periodic, std::ptr::null_mut()) };
308 
309     if res < 0 {
310         Err(io::Error::last_os_error())
311     } else {
312         Ok(())
313     }
314 }
315 
316 impl VirtioDevice for Watchdog {
317     fn device_type(&self) -> u32 {
318         self.common.device_type
319     }
320 
321     fn queue_max_sizes(&self) -> &[u16] {
322         &self.common.queue_sizes
323     }
324 
325     fn features(&self) -> u64 {
326         self.common.avail_features
327     }
328 
329     fn ack_features(&mut self, value: u64) {
330         self.common.ack_features(value)
331     }
332 
333     fn activate(
334         &mut self,
335         mem: GuestMemoryAtomic<GuestMemoryMmap>,
336         interrupt_cb: Arc<dyn VirtioInterrupt>,
337         mut queues: Vec<(usize, Queue, EventFd)>,
338     ) -> ActivateResult {
339         self.common.activate(&queues, &interrupt_cb)?;
340         let (kill_evt, pause_evt) = self.common.dup_eventfds();
341 
342         let reset_evt = self.reset_evt.try_clone().map_err(|e| {
343             error!("Failed to clone reset_evt eventfd: {}", e);
344             ActivateError::BadActivate
345         })?;
346 
347         let timer = self.timer.try_clone().map_err(|e| {
348             error!("Failed to clone timer fd: {}", e);
349             ActivateError::BadActivate
350         })?;
351 
352         let (_, queue, queue_evt) = queues.remove(0);
353 
354         let mut handler = WatchdogEpollHandler {
355             mem,
356             queue,
357             interrupt_cb,
358             queue_evt,
359             kill_evt,
360             pause_evt,
361             timer,
362             last_ping_time: self.last_ping_time.clone(),
363             reset_evt,
364         };
365 
366         let paused = self.common.paused.clone();
367         let paused_sync = self.common.paused_sync.clone();
368         let mut epoll_threads = Vec::new();
369 
370         spawn_virtio_thread(
371             &self.id,
372             &self.seccomp_action,
373             Thread::VirtioWatchdog,
374             &mut epoll_threads,
375             &self.exit_evt,
376             move || handler.run(paused, paused_sync.unwrap()),
377         )?;
378 
379         self.common.epoll_threads = Some(epoll_threads);
380 
381         event!("virtio-device", "activated", "id", &self.id);
382         Ok(())
383     }
384 
385     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
386         let result = self.common.reset();
387         event!("virtio-device", "reset", "id", &self.id);
388         result
389     }
390 }
391 
392 impl Pausable for Watchdog {
393     fn pause(&mut self) -> result::Result<(), MigratableError> {
394         info!("Watchdog paused - disabling timer");
395         timerfd_setup(&self.timer, 0)
396             .map_err(|e| MigratableError::Pause(anyhow!("Error clearing timer: {:?}", e)))?;
397         self.common.pause()
398     }
399 
400     fn resume(&mut self) -> result::Result<(), MigratableError> {
401         // Reset the timer on pause if it was previously used
402         if self.last_ping_time.lock().unwrap().is_some() {
403             info!(
404                 "Watchdog resumed - enabling timer (every {} seconds)",
405                 WATCHDOG_TIMER_INTERVAL
406             );
407             self.last_ping_time.lock().unwrap().replace(Instant::now());
408             timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL)
409                 .map_err(|e| MigratableError::Resume(anyhow!("Error setting timer: {:?}", e)))?;
410         }
411         self.common.resume()
412     }
413 }
414 
415 impl Snapshottable for Watchdog {
416     fn id(&self) -> String {
417         self.id.clone()
418     }
419 
420     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
421         Snapshot::new_from_versioned_state(&self.id, &self.state())
422     }
423 }
424 
425 impl Transportable for Watchdog {}
426 impl Migratable for Watchdog {}
427