xref: /cloud-hypervisor/virtio-devices/src/watchdog.rs (revision f67b3f79ea19c9a66e04074cbbf5d292f6529e43)
1 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
2 //
3 // Copyright © 2020 Intel Corporation
4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the LICENSE-BSD-3-Clause file.
7 
8 use super::Error as DeviceError;
9 use super::{
10     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, Queue,
11     VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
12 };
13 use crate::seccomp_filters::Thread;
14 use crate::thread_helper::spawn_virtio_thread;
15 use crate::GuestMemoryMmap;
16 use crate::{VirtioInterrupt, VirtioInterruptType};
17 use anyhow::anyhow;
18 use seccompiler::SeccompAction;
19 use std::fs::File;
20 use std::io::{self, Read};
21 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
22 use std::result;
23 use std::sync::atomic::AtomicBool;
24 use std::sync::{Arc, Barrier, Mutex};
25 use std::time::Instant;
26 use versionize::{VersionMap, Versionize, VersionizeResult};
27 use versionize_derive::Versionize;
28 use vm_memory::{Bytes, GuestAddressSpace, GuestMemoryAtomic};
29 use vm_migration::VersionMapped;
30 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
31 use vmm_sys_util::eventfd::EventFd;
32 
33 const QUEUE_SIZE: u16 = 8;
34 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
35 
36 // New descriptors are pending on the virtio queue.
37 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
38 // Timer expired
39 const TIMER_EXPIRED_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
40 
41 // Number of seconds to check to see if there has been a ping
42 // This needs to match what the driver is using.
43 const WATCHDOG_TIMER_INTERVAL: i64 = 15;
44 
45 // Number of seconds since last ping to trigger reboot
46 const WATCHDOG_TIMEOUT: u64 = WATCHDOG_TIMER_INTERVAL as u64 + 5;
47 
48 struct WatchdogEpollHandler {
49     queues: Vec<Queue>,
50     mem: GuestMemoryAtomic<GuestMemoryMmap>,
51     interrupt_cb: Arc<dyn VirtioInterrupt>,
52     queue_evt: EventFd,
53     kill_evt: EventFd,
54     pause_evt: EventFd,
55     timer: File,
56     last_ping_time: Arc<Mutex<Option<Instant>>>,
57     reset_evt: EventFd,
58 }
59 
60 impl WatchdogEpollHandler {
61     // The main queue is very simple - the driver "pings" the device by passing it a (write-only)
62     // descriptor. In response the device writes a 1 into the descriptor and returns it to the driver
63     fn process_queue(&mut self) -> bool {
64         let queue = &mut self.queues[0];
65         let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize];
66         let mut used_count = 0;
67         let mem = self.mem.memory();
68         for avail_desc in queue.iter(&mem) {
69             let mut len = 0;
70 
71             if avail_desc.is_write_only() && mem.write_obj(1u8, avail_desc.addr).is_ok() {
72                 len = avail_desc.len;
73                 // If this is the first "ping" then setup the timer
74                 if self.last_ping_time.lock().unwrap().is_none() {
75                     info!(
76                         "First ping received. Starting timer (every {} seconds)",
77                         WATCHDOG_TIMER_INTERVAL
78                     );
79                     if let Err(e) = timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL) {
80                         error!("Error programming timer fd: {:?}", e);
81                     }
82                 }
83                 self.last_ping_time.lock().unwrap().replace(Instant::now());
84             }
85 
86             used_desc_heads[used_count] = (avail_desc.index, len);
87             used_count += 1;
88         }
89 
90         for &(desc_index, len) in &used_desc_heads[..used_count] {
91             queue.add_used(&mem, desc_index, len);
92         }
93         used_count > 0
94     }
95 
96     fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
97         self.interrupt_cb
98             .trigger(&VirtioInterruptType::Queue, Some(&self.queues[0]))
99             .map_err(|e| {
100                 error!("Failed to signal used queue: {:?}", e);
101                 DeviceError::FailedSignalingUsedQueue(e)
102             })
103     }
104 
105     fn run(
106         &mut self,
107         paused: Arc<AtomicBool>,
108         paused_sync: Arc<Barrier>,
109     ) -> result::Result<(), EpollHelperError> {
110         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
111         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
112         helper.add_event(self.timer.as_raw_fd(), TIMER_EXPIRED_EVENT)?;
113         helper.run(paused, paused_sync, self)?;
114 
115         Ok(())
116     }
117 }
118 
119 impl EpollHelperHandler for WatchdogEpollHandler {
120     fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool {
121         let ev_type = event.data as u16;
122         match ev_type {
123             QUEUE_AVAIL_EVENT => {
124                 if let Err(e) = self.queue_evt.read() {
125                     error!("Failed to get queue event: {:?}", e);
126                     return true;
127                 } else if self.process_queue() {
128                     if let Err(e) = self.signal_used_queue() {
129                         error!("Failed to signal used queue: {:?}", e);
130                         return true;
131                     }
132                 }
133             }
134             TIMER_EXPIRED_EVENT => {
135                 // When reading from the timerfd you get 8 bytes indicating
136                 // the number of times this event has elapsed since the last read.
137                 let mut buf = vec![0; 8];
138                 if let Err(e) = self.timer.read_exact(&mut buf) {
139                     error!("Error reading from timer fd: {:}", e);
140                     return true;
141                 }
142                 if let Some(last_ping_time) = self.last_ping_time.lock().unwrap().as_ref() {
143                     let now = Instant::now();
144                     let gap = now.duration_since(*last_ping_time).as_secs();
145                     if gap > WATCHDOG_TIMEOUT {
146                         error!("Watchdog triggered: {} seconds since last ping", gap);
147                         self.reset_evt.write(1).ok();
148                     }
149                 }
150                 return false;
151             }
152             _ => {
153                 error!("Unexpected event: {}", ev_type);
154                 return true;
155             }
156         }
157         false
158     }
159 }
160 
161 /// Virtio device for exposing a watchdog to the guest
162 pub struct Watchdog {
163     common: VirtioCommon,
164     id: String,
165     seccomp_action: SeccompAction,
166     reset_evt: EventFd,
167     last_ping_time: Arc<Mutex<Option<Instant>>>,
168     timer: File,
169     exit_evt: EventFd,
170 }
171 
172 #[derive(Versionize)]
173 pub struct WatchdogState {
174     pub avail_features: u64,
175     pub acked_features: u64,
176     pub enabled: bool,
177 }
178 
179 impl VersionMapped for WatchdogState {}
180 
181 impl Watchdog {
182     /// Create a new virtio watchdog device that will reboot VM if the guest hangs
183     pub fn new(
184         id: String,
185         reset_evt: EventFd,
186         seccomp_action: SeccompAction,
187         exit_evt: EventFd,
188     ) -> io::Result<Watchdog> {
189         let avail_features = 1u64 << VIRTIO_F_VERSION_1;
190         let timer_fd = timerfd_create().map_err(|e| {
191             error!("Failed to create timer fd {}", e);
192             e
193         })?;
194         let timer = unsafe { File::from_raw_fd(timer_fd) };
195         Ok(Watchdog {
196             common: VirtioCommon {
197                 device_type: VirtioDeviceType::Watchdog as u32,
198                 queue_sizes: QUEUE_SIZES.to_vec(),
199                 paused_sync: Some(Arc::new(Barrier::new(2))),
200                 avail_features,
201                 min_queues: 1,
202                 ..Default::default()
203             },
204             id,
205             seccomp_action,
206             reset_evt,
207             last_ping_time: Arc::new(Mutex::new(None)),
208             timer,
209             exit_evt,
210         })
211     }
212 
213     fn state(&self) -> WatchdogState {
214         WatchdogState {
215             avail_features: self.common.avail_features,
216             acked_features: self.common.acked_features,
217             enabled: self.last_ping_time.lock().unwrap().is_some(),
218         }
219     }
220 
221     fn set_state(&mut self, state: &WatchdogState) {
222         self.common.avail_features = state.avail_features;
223         self.common.acked_features = state.acked_features;
224         // When restoring enable the watchdog if it was previously enabled. We reset the timer
225         // to ensure that we don't unnecessarily reboot due to the offline time.
226         if state.enabled {
227             self.last_ping_time.lock().unwrap().replace(Instant::now());
228         }
229     }
230 }
231 
232 impl Drop for Watchdog {
233     fn drop(&mut self) {
234         if let Some(kill_evt) = self.common.kill_evt.take() {
235             // Ignore the result because there is nothing we can do about it.
236             let _ = kill_evt.write(1);
237         }
238     }
239 }
240 
241 fn timerfd_create() -> Result<RawFd, io::Error> {
242     let res = unsafe { libc::timerfd_create(libc::CLOCK_MONOTONIC, 0) };
243     if res < 0 {
244         Err(io::Error::last_os_error())
245     } else {
246         Ok(res as RawFd)
247     }
248 }
249 
250 fn timerfd_setup(timer: &File, secs: i64) -> Result<(), io::Error> {
251     let periodic = libc::itimerspec {
252         it_interval: libc::timespec {
253             tv_sec: secs,
254             tv_nsec: 0,
255         },
256         it_value: libc::timespec {
257             tv_sec: secs,
258             tv_nsec: 0,
259         },
260     };
261 
262     let res =
263         unsafe { libc::timerfd_settime(timer.as_raw_fd(), 0, &periodic, std::ptr::null_mut()) };
264 
265     if res < 0 {
266         Err(io::Error::last_os_error())
267     } else {
268         Ok(())
269     }
270 }
271 
272 impl VirtioDevice for Watchdog {
273     fn device_type(&self) -> u32 {
274         self.common.device_type
275     }
276 
277     fn queue_max_sizes(&self) -> &[u16] {
278         &self.common.queue_sizes
279     }
280 
281     fn features(&self) -> u64 {
282         self.common.avail_features
283     }
284 
285     fn ack_features(&mut self, value: u64) {
286         self.common.ack_features(value)
287     }
288 
289     fn activate(
290         &mut self,
291         mem: GuestMemoryAtomic<GuestMemoryMmap>,
292         interrupt_cb: Arc<dyn VirtioInterrupt>,
293         queues: Vec<Queue>,
294         mut queue_evts: Vec<EventFd>,
295     ) -> ActivateResult {
296         self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
297         let (kill_evt, pause_evt) = self.common.dup_eventfds();
298 
299         let reset_evt = self.reset_evt.try_clone().map_err(|e| {
300             error!("Failed to clone reset_evt eventfd: {}", e);
301             ActivateError::BadActivate
302         })?;
303 
304         let timer = self.timer.try_clone().map_err(|e| {
305             error!("Failed to clone timer fd: {}", e);
306             ActivateError::BadActivate
307         })?;
308 
309         let mut handler = WatchdogEpollHandler {
310             queues,
311             mem,
312             interrupt_cb,
313             queue_evt: queue_evts.remove(0),
314             kill_evt,
315             pause_evt,
316             timer,
317             last_ping_time: self.last_ping_time.clone(),
318             reset_evt,
319         };
320 
321         let paused = self.common.paused.clone();
322         let paused_sync = self.common.paused_sync.clone();
323         let mut epoll_threads = Vec::new();
324 
325         spawn_virtio_thread(
326             &self.id,
327             &self.seccomp_action,
328             Thread::VirtioWatchdog,
329             &mut epoll_threads,
330             &self.exit_evt,
331             move || {
332                 if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
333                     error!("Error running worker: {:?}", e);
334                 }
335             },
336         )?;
337 
338         self.common.epoll_threads = Some(epoll_threads);
339 
340         event!("virtio-device", "activated", "id", &self.id);
341         Ok(())
342     }
343 
344     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
345         let result = self.common.reset();
346         event!("virtio-device", "reset", "id", &self.id);
347         result
348     }
349 }
350 
351 impl Pausable for Watchdog {
352     fn pause(&mut self) -> result::Result<(), MigratableError> {
353         info!("Watchdog paused - disabling timer");
354         timerfd_setup(&self.timer, 0)
355             .map_err(|e| MigratableError::Pause(anyhow!("Error clearing timer: {:?}", e)))?;
356         self.common.pause()
357     }
358 
359     fn resume(&mut self) -> result::Result<(), MigratableError> {
360         // Reset the timer on pause if it was previously used
361         if self.last_ping_time.lock().unwrap().is_some() {
362             info!(
363                 "Watchdog resumed - enabling timer (every {} seconds)",
364                 WATCHDOG_TIMER_INTERVAL
365             );
366             self.last_ping_time.lock().unwrap().replace(Instant::now());
367             timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL)
368                 .map_err(|e| MigratableError::Resume(anyhow!("Error setting timer: {:?}", e)))?;
369         }
370         self.common.resume()
371     }
372 }
373 
374 impl Snapshottable for Watchdog {
375     fn id(&self) -> String {
376         self.id.clone()
377     }
378 
379     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
380         Snapshot::new_from_versioned_state(&self.id, &self.state())
381     }
382 
383     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
384         self.set_state(&snapshot.to_versioned_state(&self.id)?);
385         Ok(())
386     }
387 }
388 
389 impl Transportable for Watchdog {}
390 impl Migratable for Watchdog {}
391