xref: /cloud-hypervisor/virtio-devices/src/watchdog.rs (revision eb0b14f70ed5ed44b76579145fd2a741c0100ae4)
1 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
2 //
3 // Copyright © 2020 Intel Corporation
4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the LICENSE-BSD-3-Clause file.
7 
8 use std::fs::File;
9 use std::io::{self, Read};
10 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
11 use std::result;
12 use std::sync::atomic::AtomicBool;
13 use std::sync::{Arc, Barrier, Mutex};
14 use std::time::Instant;
15 
16 use anyhow::anyhow;
17 use seccompiler::SeccompAction;
18 use serde::{Deserialize, Serialize};
19 use thiserror::Error;
20 use virtio_queue::{Queue, QueueT};
21 use vm_memory::{Bytes, GuestAddressSpace, GuestMemoryAtomic};
22 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
23 use vmm_sys_util::eventfd::EventFd;
24 
25 use super::{
26     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
27     Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST,
28     VIRTIO_F_VERSION_1,
29 };
30 use crate::seccomp_filters::Thread;
31 use crate::thread_helper::spawn_virtio_thread;
32 use crate::{GuestMemoryMmap, VirtioInterrupt, VirtioInterruptType};
33 
34 const QUEUE_SIZE: u16 = 8;
35 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
36 
37 // New descriptors are pending on the virtio queue.
38 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
39 // Timer expired
40 const TIMER_EXPIRED_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
41 
42 // Number of seconds to check to see if there has been a ping
43 // This needs to match what the driver is using.
44 const WATCHDOG_TIMER_INTERVAL: i64 = 15;
45 
46 // Number of seconds since last ping to trigger reboot
47 const WATCHDOG_TIMEOUT: u64 = WATCHDOG_TIMER_INTERVAL as u64 + 5;
48 
49 #[derive(Error, Debug)]
50 enum Error {
51     #[error("Error programming timer fd: {0}")]
52     TimerfdSetup(#[source] io::Error),
53     #[error("Descriptor chain too short")]
54     DescriptorChainTooShort,
55     #[error("Failed adding used index: {0}")]
56     QueueAddUsed(#[source] virtio_queue::Error),
57     #[error("Invalid descriptor")]
58     InvalidDescriptor,
59     #[error("Failed to write to guest memory: {0}")]
60     GuestMemoryWrite(#[source] vm_memory::guest_memory::Error),
61 }
62 
63 struct WatchdogEpollHandler {
64     mem: GuestMemoryAtomic<GuestMemoryMmap>,
65     queue: Queue,
66     interrupt_cb: Arc<dyn VirtioInterrupt>,
67     queue_evt: EventFd,
68     kill_evt: EventFd,
69     pause_evt: EventFd,
70     timer: File,
71     last_ping_time: Arc<Mutex<Option<Instant>>>,
72     reset_evt: EventFd,
73 }
74 
75 impl WatchdogEpollHandler {
76     // The main queue is very simple - the driver "pings" the device by passing it a (write-only)
77     // descriptor. In response the device writes a 1 into the descriptor and returns it to the driver
78     fn process_queue(&mut self) -> result::Result<bool, Error> {
79         let queue = &mut self.queue;
80         let mut used_descs = false;
81         while let Some(mut desc_chain) = queue.pop_descriptor_chain(self.mem.memory()) {
82             let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
83 
84             if !(desc.is_write_only() && desc.len() > 0) {
85                 return Err(Error::InvalidDescriptor);
86             }
87 
88             desc_chain
89                 .memory()
90                 .write_obj(1u8, desc.addr())
91                 .map_err(Error::GuestMemoryWrite)?;
92 
93             // If this is the first "ping" then setup the timer
94             if self.last_ping_time.lock().unwrap().is_none() {
95                 info!(
96                     "First ping received. Starting timer (every {} seconds)",
97                     WATCHDOG_TIMER_INTERVAL
98                 );
99                 timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL).map_err(Error::TimerfdSetup)?;
100             }
101             self.last_ping_time.lock().unwrap().replace(Instant::now());
102 
103             queue
104                 .add_used(desc_chain.memory(), desc_chain.head_index(), desc.len())
105                 .map_err(Error::QueueAddUsed)?;
106             used_descs = true;
107         }
108 
109         Ok(used_descs)
110     }
111 
112     fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
113         self.interrupt_cb
114             .trigger(VirtioInterruptType::Queue(0))
115             .map_err(|e| {
116                 error!("Failed to signal used queue: {:?}", e);
117                 DeviceError::FailedSignalingUsedQueue(e)
118             })
119     }
120 
121     fn run(
122         &mut self,
123         paused: Arc<AtomicBool>,
124         paused_sync: Arc<Barrier>,
125     ) -> result::Result<(), EpollHelperError> {
126         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
127         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
128         helper.add_event(self.timer.as_raw_fd(), TIMER_EXPIRED_EVENT)?;
129         helper.run(paused, paused_sync, self)?;
130 
131         Ok(())
132     }
133 }
134 
135 impl EpollHelperHandler for WatchdogEpollHandler {
136     fn handle_event(
137         &mut self,
138         _helper: &mut EpollHelper,
139         event: &epoll::Event,
140     ) -> result::Result<(), EpollHelperError> {
141         let ev_type = event.data as u16;
142         match ev_type {
143             QUEUE_AVAIL_EVENT => {
144                 self.queue_evt.read().map_err(|e| {
145                     EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e))
146                 })?;
147 
148                 let needs_notification = self.process_queue().map_err(|e| {
149                     EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e))
150                 })?;
151                 if needs_notification {
152                     self.signal_used_queue().map_err(|e| {
153                         EpollHelperError::HandleEvent(anyhow!(
154                             "Failed to signal used queue: {:?}",
155                             e
156                         ))
157                     })?;
158                 }
159             }
160             TIMER_EXPIRED_EVENT => {
161                 // When reading from the timerfd you get 8 bytes indicating
162                 // the number of times this event has elapsed since the last read.
163                 let mut buf = vec![0; 8];
164                 self.timer.read_exact(&mut buf).map_err(|e| {
165                     EpollHelperError::HandleEvent(anyhow!("Error reading from timer fd: {:}", e))
166                 })?;
167 
168                 if let Some(last_ping_time) = self.last_ping_time.lock().unwrap().as_ref() {
169                     let now = Instant::now();
170                     let gap = now.duration_since(*last_ping_time).as_secs();
171                     if gap > WATCHDOG_TIMEOUT {
172                         error!("Watchdog triggered: {} seconds since last ping", gap);
173                         self.reset_evt.write(1).ok();
174                     }
175                 }
176             }
177             _ => {
178                 return Err(EpollHelperError::HandleEvent(anyhow!(
179                     "Unexpected event: {}",
180                     ev_type
181                 )));
182             }
183         }
184         Ok(())
185     }
186 }
187 
188 /// Virtio device for exposing a watchdog to the guest
189 pub struct Watchdog {
190     common: VirtioCommon,
191     id: String,
192     seccomp_action: SeccompAction,
193     reset_evt: EventFd,
194     last_ping_time: Arc<Mutex<Option<Instant>>>,
195     timer: File,
196     exit_evt: EventFd,
197 }
198 
199 #[derive(Serialize, Deserialize)]
200 pub struct WatchdogState {
201     pub avail_features: u64,
202     pub acked_features: u64,
203     pub enabled: bool,
204 }
205 
206 impl Watchdog {
207     /// Create a new virtio watchdog device that will reboot VM if the guest hangs
208     pub fn new(
209         id: String,
210         reset_evt: EventFd,
211         seccomp_action: SeccompAction,
212         exit_evt: EventFd,
213         state: Option<WatchdogState>,
214     ) -> io::Result<Watchdog> {
215         let mut last_ping_time = None;
216         let (avail_features, acked_features, paused) = if let Some(state) = state {
217             info!("Restoring virtio-watchdog {}", id);
218 
219             // When restoring enable the watchdog if it was previously enabled.
220             // We reset the timer to ensure that we don't unnecessarily reboot
221             // due to the offline time.
222             if state.enabled {
223                 last_ping_time = Some(Instant::now());
224             }
225 
226             (state.avail_features, state.acked_features, true)
227         } else {
228             (1u64 << VIRTIO_F_VERSION_1, 0, false)
229         };
230 
231         let timer_fd = timerfd_create().map_err(|e| {
232             error!("Failed to create timer fd {}", e);
233             e
234         })?;
235         // SAFETY: timer_fd is a valid fd
236         let timer = unsafe { File::from_raw_fd(timer_fd) };
237 
238         Ok(Watchdog {
239             common: VirtioCommon {
240                 device_type: VirtioDeviceType::Watchdog as u32,
241                 queue_sizes: QUEUE_SIZES.to_vec(),
242                 paused_sync: Some(Arc::new(Barrier::new(2))),
243                 avail_features,
244                 acked_features,
245                 min_queues: 1,
246                 paused: Arc::new(AtomicBool::new(paused)),
247                 ..Default::default()
248             },
249             id,
250             seccomp_action,
251             reset_evt,
252             last_ping_time: Arc::new(Mutex::new(last_ping_time)),
253             timer,
254             exit_evt,
255         })
256     }
257 
258     fn state(&self) -> WatchdogState {
259         WatchdogState {
260             avail_features: self.common.avail_features,
261             acked_features: self.common.acked_features,
262             enabled: self.last_ping_time.lock().unwrap().is_some(),
263         }
264     }
265 
266     #[cfg(fuzzing)]
267     pub fn wait_for_epoll_threads(&mut self) {
268         self.common.wait_for_epoll_threads();
269     }
270 }
271 
272 impl Drop for Watchdog {
273     fn drop(&mut self) {
274         if let Some(kill_evt) = self.common.kill_evt.take() {
275             // Ignore the result because there is nothing we can do about it.
276             let _ = kill_evt.write(1);
277         }
278         self.common.wait_for_epoll_threads();
279     }
280 }
281 
282 fn timerfd_create() -> Result<RawFd, io::Error> {
283     // SAFETY: FFI call, trivially safe
284     let res = unsafe { libc::timerfd_create(libc::CLOCK_MONOTONIC, 0) };
285     if res < 0 {
286         Err(io::Error::last_os_error())
287     } else {
288         Ok(res as RawFd)
289     }
290 }
291 
292 fn timerfd_setup(timer: &File, secs: i64) -> Result<(), io::Error> {
293     let periodic = libc::itimerspec {
294         it_interval: libc::timespec {
295             tv_sec: secs,
296             tv_nsec: 0,
297         },
298         it_value: libc::timespec {
299             tv_sec: secs,
300             tv_nsec: 0,
301         },
302     };
303 
304     let res =
305         // SAFETY: FFI call with correct arguments
306         unsafe { libc::timerfd_settime(timer.as_raw_fd(), 0, &periodic, std::ptr::null_mut()) };
307 
308     if res < 0 {
309         Err(io::Error::last_os_error())
310     } else {
311         Ok(())
312     }
313 }
314 
315 impl VirtioDevice for Watchdog {
316     fn device_type(&self) -> u32 {
317         self.common.device_type
318     }
319 
320     fn queue_max_sizes(&self) -> &[u16] {
321         &self.common.queue_sizes
322     }
323 
324     fn features(&self) -> u64 {
325         self.common.avail_features
326     }
327 
328     fn ack_features(&mut self, value: u64) {
329         self.common.ack_features(value)
330     }
331 
332     fn activate(
333         &mut self,
334         mem: GuestMemoryAtomic<GuestMemoryMmap>,
335         interrupt_cb: Arc<dyn VirtioInterrupt>,
336         mut queues: Vec<(usize, Queue, EventFd)>,
337     ) -> ActivateResult {
338         self.common.activate(&queues, &interrupt_cb)?;
339         let (kill_evt, pause_evt) = self.common.dup_eventfds();
340 
341         let reset_evt = self.reset_evt.try_clone().map_err(|e| {
342             error!("Failed to clone reset_evt eventfd: {}", e);
343             ActivateError::BadActivate
344         })?;
345 
346         let timer = self.timer.try_clone().map_err(|e| {
347             error!("Failed to clone timer fd: {}", e);
348             ActivateError::BadActivate
349         })?;
350 
351         let (_, queue, queue_evt) = queues.remove(0);
352 
353         let mut handler = WatchdogEpollHandler {
354             mem,
355             queue,
356             interrupt_cb,
357             queue_evt,
358             kill_evt,
359             pause_evt,
360             timer,
361             last_ping_time: self.last_ping_time.clone(),
362             reset_evt,
363         };
364 
365         let paused = self.common.paused.clone();
366         let paused_sync = self.common.paused_sync.clone();
367         let mut epoll_threads = Vec::new();
368 
369         spawn_virtio_thread(
370             &self.id,
371             &self.seccomp_action,
372             Thread::VirtioWatchdog,
373             &mut epoll_threads,
374             &self.exit_evt,
375             move || handler.run(paused, paused_sync.unwrap()),
376         )?;
377 
378         self.common.epoll_threads = Some(epoll_threads);
379 
380         event!("virtio-device", "activated", "id", &self.id);
381         Ok(())
382     }
383 
384     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
385         let result = self.common.reset();
386         event!("virtio-device", "reset", "id", &self.id);
387         result
388     }
389 }
390 
391 impl Pausable for Watchdog {
392     fn pause(&mut self) -> result::Result<(), MigratableError> {
393         info!("Watchdog paused - disabling timer");
394         timerfd_setup(&self.timer, 0)
395             .map_err(|e| MigratableError::Pause(anyhow!("Error clearing timer: {:?}", e)))?;
396         self.common.pause()
397     }
398 
399     fn resume(&mut self) -> result::Result<(), MigratableError> {
400         // Reset the timer on pause if it was previously used
401         if self.last_ping_time.lock().unwrap().is_some() {
402             info!(
403                 "Watchdog resumed - enabling timer (every {} seconds)",
404                 WATCHDOG_TIMER_INTERVAL
405             );
406             self.last_ping_time.lock().unwrap().replace(Instant::now());
407             timerfd_setup(&self.timer, WATCHDOG_TIMER_INTERVAL)
408                 .map_err(|e| MigratableError::Resume(anyhow!("Error setting timer: {:?}", e)))?;
409         }
410         self.common.resume()
411     }
412 }
413 
414 impl Snapshottable for Watchdog {
415     fn id(&self) -> String {
416         self.id.clone()
417     }
418 
419     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
420         Snapshot::new_from_state(&self.state())
421     }
422 }
423 
424 impl Transportable for Watchdog {}
425 impl Migratable for Watchdog {}
426