xref: /cloud-hypervisor/virtio-devices/src/watchdog.rs (revision 7d7bfb2034001d4cb15df2ddc56d2d350c8da30f)
1 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
2 //
3 // Copyright © 2020 Intel Corporation
4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the LICENSE-BSD-3-Clause file.
7 
8 use super::Error as DeviceError;
9 use super::{
10     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, VirtioCommon,
11     VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
12 };
13 use crate::seccomp_filters::Thread;
14 use crate::thread_helper::spawn_virtio_thread;
15 use crate::GuestMemoryMmap;
16 use crate::{VirtioInterrupt, VirtioInterruptType};
17 use anyhow::anyhow;
18 use seccompiler::SeccompAction;
19 use std::fs::File;
20 use std::io::{self, Read};
21 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
22 use std::result;
23 use std::sync::atomic::AtomicBool;
24 use std::sync::{Arc, Barrier, Mutex};
25 use std::time::Instant;
26 use versionize::{VersionMap, Versionize, VersionizeResult};
27 use versionize_derive::Versionize;
28 use virtio_queue::Queue;
29 use vm_memory::{Bytes, GuestMemoryAtomic};
30 use vm_migration::VersionMapped;
31 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
32 use vmm_sys_util::eventfd::EventFd;
33 
34 const QUEUE_SIZE: u16 = 8;
35 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
36 
37 // New descriptors are pending on the virtio queue.
38 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
39 // Timer expired
40 const TIMER_EXPIRED_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
41 
42 // Number of seconds to check to see if there has been a ping
43 // This needs to match what the driver is using.
44 const WATCHDOG_TIMER_INTERVAL: i64 = 15;
45 
46 // Number of seconds since last ping to trigger reboot
47 const WATCHDOG_TIMEOUT: u64 = WATCHDOG_TIMER_INTERVAL as u64 + 5;
48 
49 struct WatchdogEpollHandler {
50     queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>,
51     interrupt_cb: Arc<dyn VirtioInterrupt>,
52     queue_evt: EventFd,
53     kill_evt: EventFd,
54     pause_evt: EventFd,
55     timer: File,
56     last_ping_time: Arc<Mutex<Option<Instant>>>,
57     reset_evt: EventFd,
58 }
59 
60 impl WatchdogEpollHandler {
61     // The main queue is very simple - the driver "pings" the device by passing it a (write-only)
62     // descriptor. In response the device writes a 1 into the descriptor and returns it to the driver
63     fn process_queue(&mut self) -> bool {
64         let queue = &mut self.queues[0];
65         let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize];
66         let mut used_count = 0;
67         for mut desc_chain in queue.iter().unwrap() {
68             let desc = desc_chain.next().unwrap();
69 
70             let mut len = 0;
71 
72             if desc.is_write_only() && desc_chain.memory().write_obj(1u8, desc.addr()).is_ok() {
73                 len = desc.len();
74                 // If this is the first "ping" then setup the timer
75                 if self.last_ping_time.lock().unwrap().is_none() {
76                     info!(
77                         "First ping received. Starting timer (every {} seconds)",
78                         WATCHDOG_TIMER_INTERVAL
79                     );
80                     if let Err(e) = timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL) {
81                         error!("Error programming timer fd: {:?}", e);
82                     }
83                 }
84                 self.last_ping_time.lock().unwrap().replace(Instant::now());
85             }
86 
87             used_desc_heads[used_count] = (desc_chain.head_index(), len);
88             used_count += 1;
89         }
90 
91         for &(desc_index, len) in &used_desc_heads[..used_count] {
92             queue.add_used(desc_index, len).unwrap();
93         }
94         used_count > 0
95     }
96 
97     fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
98         self.interrupt_cb
99             .trigger(VirtioInterruptType::Queue(0))
100             .map_err(|e| {
101                 error!("Failed to signal used queue: {:?}", e);
102                 DeviceError::FailedSignalingUsedQueue(e)
103             })
104     }
105 
106     fn run(
107         &mut self,
108         paused: Arc<AtomicBool>,
109         paused_sync: Arc<Barrier>,
110     ) -> result::Result<(), EpollHelperError> {
111         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
112         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
113         helper.add_event(self.timer.as_raw_fd(), TIMER_EXPIRED_EVENT)?;
114         helper.run(paused, paused_sync, self)?;
115 
116         Ok(())
117     }
118 }
119 
120 impl EpollHelperHandler for WatchdogEpollHandler {
121     fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool {
122         let ev_type = event.data as u16;
123         match ev_type {
124             QUEUE_AVAIL_EVENT => {
125                 if let Err(e) = self.queue_evt.read() {
126                     error!("Failed to get queue event: {:?}", e);
127                     return true;
128                 } else if self.process_queue() {
129                     if let Err(e) = self.signal_used_queue() {
130                         error!("Failed to signal used queue: {:?}", e);
131                         return true;
132                     }
133                 }
134             }
135             TIMER_EXPIRED_EVENT => {
136                 // When reading from the timerfd you get 8 bytes indicating
137                 // the number of times this event has elapsed since the last read.
138                 let mut buf = vec![0; 8];
139                 if let Err(e) = self.timer.read_exact(&mut buf) {
140                     error!("Error reading from timer fd: {:}", e);
141                     return true;
142                 }
143                 if let Some(last_ping_time) = self.last_ping_time.lock().unwrap().as_ref() {
144                     let now = Instant::now();
145                     let gap = now.duration_since(*last_ping_time).as_secs();
146                     if gap > WATCHDOG_TIMEOUT {
147                         error!("Watchdog triggered: {} seconds since last ping", gap);
148                         self.reset_evt.write(1).ok();
149                     }
150                 }
151                 return false;
152             }
153             _ => {
154                 error!("Unexpected event: {}", ev_type);
155                 return true;
156             }
157         }
158         false
159     }
160 }
161 
162 /// Virtio device for exposing a watchdog to the guest
163 pub struct Watchdog {
164     common: VirtioCommon,
165     id: String,
166     seccomp_action: SeccompAction,
167     reset_evt: EventFd,
168     last_ping_time: Arc<Mutex<Option<Instant>>>,
169     timer: File,
170     exit_evt: EventFd,
171 }
172 
173 #[derive(Versionize)]
174 pub struct WatchdogState {
175     pub avail_features: u64,
176     pub acked_features: u64,
177     pub enabled: bool,
178 }
179 
180 impl VersionMapped for WatchdogState {}
181 
182 impl Watchdog {
183     /// Create a new virtio watchdog device that will reboot VM if the guest hangs
184     pub fn new(
185         id: String,
186         reset_evt: EventFd,
187         seccomp_action: SeccompAction,
188         exit_evt: EventFd,
189     ) -> io::Result<Watchdog> {
190         let avail_features = 1u64 << VIRTIO_F_VERSION_1;
191         let timer_fd = timerfd_create().map_err(|e| {
192             error!("Failed to create timer fd {}", e);
193             e
194         })?;
195         let timer = unsafe { File::from_raw_fd(timer_fd) };
196         Ok(Watchdog {
197             common: VirtioCommon {
198                 device_type: VirtioDeviceType::Watchdog as u32,
199                 queue_sizes: QUEUE_SIZES.to_vec(),
200                 paused_sync: Some(Arc::new(Barrier::new(2))),
201                 avail_features,
202                 min_queues: 1,
203                 ..Default::default()
204             },
205             id,
206             seccomp_action,
207             reset_evt,
208             last_ping_time: Arc::new(Mutex::new(None)),
209             timer,
210             exit_evt,
211         })
212     }
213 
214     fn state(&self) -> WatchdogState {
215         WatchdogState {
216             avail_features: self.common.avail_features,
217             acked_features: self.common.acked_features,
218             enabled: self.last_ping_time.lock().unwrap().is_some(),
219         }
220     }
221 
222     fn set_state(&mut self, state: &WatchdogState) {
223         self.common.avail_features = state.avail_features;
224         self.common.acked_features = state.acked_features;
225         // When restoring enable the watchdog if it was previously enabled. We reset the timer
226         // to ensure that we don't unnecessarily reboot due to the offline time.
227         if state.enabled {
228             self.last_ping_time.lock().unwrap().replace(Instant::now());
229         }
230     }
231 }
232 
233 impl Drop for Watchdog {
234     fn drop(&mut self) {
235         if let Some(kill_evt) = self.common.kill_evt.take() {
236             // Ignore the result because there is nothing we can do about it.
237             let _ = kill_evt.write(1);
238         }
239     }
240 }
241 
242 fn timerfd_create() -> Result<RawFd, io::Error> {
243     let res = unsafe { libc::timerfd_create(libc::CLOCK_MONOTONIC, 0) };
244     if res < 0 {
245         Err(io::Error::last_os_error())
246     } else {
247         Ok(res as RawFd)
248     }
249 }
250 
251 fn timerfd_setup(timer: &File, secs: i64) -> Result<(), io::Error> {
252     let periodic = libc::itimerspec {
253         it_interval: libc::timespec {
254             tv_sec: secs,
255             tv_nsec: 0,
256         },
257         it_value: libc::timespec {
258             tv_sec: secs,
259             tv_nsec: 0,
260         },
261     };
262 
263     let res =
264         unsafe { libc::timerfd_settime(timer.as_raw_fd(), 0, &periodic, std::ptr::null_mut()) };
265 
266     if res < 0 {
267         Err(io::Error::last_os_error())
268     } else {
269         Ok(())
270     }
271 }
272 
273 impl VirtioDevice for Watchdog {
274     fn device_type(&self) -> u32 {
275         self.common.device_type
276     }
277 
278     fn queue_max_sizes(&self) -> &[u16] {
279         &self.common.queue_sizes
280     }
281 
282     fn features(&self) -> u64 {
283         self.common.avail_features
284     }
285 
286     fn ack_features(&mut self, value: u64) {
287         self.common.ack_features(value)
288     }
289 
290     fn activate(
291         &mut self,
292         _mem: GuestMemoryAtomic<GuestMemoryMmap>,
293         interrupt_cb: Arc<dyn VirtioInterrupt>,
294         queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>,
295         mut queue_evts: Vec<EventFd>,
296     ) -> ActivateResult {
297         self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
298         let (kill_evt, pause_evt) = self.common.dup_eventfds();
299 
300         let reset_evt = self.reset_evt.try_clone().map_err(|e| {
301             error!("Failed to clone reset_evt eventfd: {}", e);
302             ActivateError::BadActivate
303         })?;
304 
305         let timer = self.timer.try_clone().map_err(|e| {
306             error!("Failed to clone timer fd: {}", e);
307             ActivateError::BadActivate
308         })?;
309 
310         let mut handler = WatchdogEpollHandler {
311             queues,
312             interrupt_cb,
313             queue_evt: queue_evts.remove(0),
314             kill_evt,
315             pause_evt,
316             timer,
317             last_ping_time: self.last_ping_time.clone(),
318             reset_evt,
319         };
320 
321         let paused = self.common.paused.clone();
322         let paused_sync = self.common.paused_sync.clone();
323         let mut epoll_threads = Vec::new();
324 
325         spawn_virtio_thread(
326             &self.id,
327             &self.seccomp_action,
328             Thread::VirtioWatchdog,
329             &mut epoll_threads,
330             &self.exit_evt,
331             move || {
332                 if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
333                     error!("Error running worker: {:?}", e);
334                 }
335             },
336         )?;
337 
338         self.common.epoll_threads = Some(epoll_threads);
339 
340         event!("virtio-device", "activated", "id", &self.id);
341         Ok(())
342     }
343 
344     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
345         let result = self.common.reset();
346         event!("virtio-device", "reset", "id", &self.id);
347         result
348     }
349 }
350 
351 impl Pausable for Watchdog {
352     fn pause(&mut self) -> result::Result<(), MigratableError> {
353         info!("Watchdog paused - disabling timer");
354         timerfd_setup(&self.timer, 0)
355             .map_err(|e| MigratableError::Pause(anyhow!("Error clearing timer: {:?}", e)))?;
356         self.common.pause()
357     }
358 
359     fn resume(&mut self) -> result::Result<(), MigratableError> {
360         // Reset the timer on pause if it was previously used
361         if self.last_ping_time.lock().unwrap().is_some() {
362             info!(
363                 "Watchdog resumed - enabling timer (every {} seconds)",
364                 WATCHDOG_TIMER_INTERVAL
365             );
366             self.last_ping_time.lock().unwrap().replace(Instant::now());
367             timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL)
368                 .map_err(|e| MigratableError::Resume(anyhow!("Error setting timer: {:?}", e)))?;
369         }
370         self.common.resume()
371     }
372 }
373 
374 impl Snapshottable for Watchdog {
375     fn id(&self) -> String {
376         self.id.clone()
377     }
378 
379     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
380         Snapshot::new_from_versioned_state(&self.id, &self.state())
381     }
382 
383     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
384         self.set_state(&snapshot.to_versioned_state(&self.id)?);
385         Ok(())
386     }
387 }
388 
389 impl Transportable for Watchdog {}
390 impl Migratable for Watchdog {}
391