xref: /cloud-hypervisor/virtio-devices/src/epoll_helper.rs (revision 8e2973fe7cc5a0e2c212fc327014ba6efb77b8c8)
17d9dc401SRob Bradford // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
27d9dc401SRob Bradford //
37d9dc401SRob Bradford // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
47d9dc401SRob Bradford // Use of this source code is governed by a BSD-style license that can be
57d9dc401SRob Bradford // found in the LICENSE-BSD-3-Clause file.
67d9dc401SRob Bradford //
77d9dc401SRob Bradford // Copyright © 2020 Intel Corporation
87d9dc401SRob Bradford //
97d9dc401SRob Bradford // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
107d9dc401SRob Bradford 
117d9dc401SRob Bradford use std::fs::File;
127d9dc401SRob Bradford use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
137d9dc401SRob Bradford use std::sync::atomic::{AtomicBool, Ordering};
14aa57762cSSebastien Boeuf use std::sync::{Arc, Barrier};
157d9dc401SRob Bradford use std::thread;
1688a9f799SRob Bradford 
17b1752994SBo Chen use thiserror::Error;
187d9dc401SRob Bradford use vmm_sys_util::eventfd::EventFd;
197d9dc401SRob Bradford 
207d9dc401SRob Bradford pub struct EpollHelper {
217d9dc401SRob Bradford     pause_evt: EventFd,
227d9dc401SRob Bradford     epoll_file: File,
237d9dc401SRob Bradford }
247d9dc401SRob Bradford 
25b1752994SBo Chen #[derive(Error, Debug)]
267d9dc401SRob Bradford pub enum EpollHelperError {
27*8e2973feSPhilipp Schuster     #[error("Failed to create Fd")]
2828e0a954SPhilipp Schuster     CreateFd(#[source] std::io::Error),
29*8e2973feSPhilipp Schuster     #[error("Failed to epoll_ctl")]
3028e0a954SPhilipp Schuster     Ctl(#[source] std::io::Error),
31*8e2973feSPhilipp Schuster     #[error("IO error")]
3228e0a954SPhilipp Schuster     IoError(#[source] std::io::Error),
33*8e2973feSPhilipp Schuster     #[error("Failed to epoll_wait")]
3428e0a954SPhilipp Schuster     Wait(#[source] std::io::Error),
35*8e2973feSPhilipp Schuster     #[error("Failed to get virtio-queue index")]
3628e0a954SPhilipp Schuster     QueueRingIndex(#[source] virtio_queue::Error),
37*8e2973feSPhilipp Schuster     #[error("Failed to handle virtio device events")]
3828e0a954SPhilipp Schuster     HandleEvent(#[source] anyhow::Error),
39*8e2973feSPhilipp Schuster     #[error("Failed to handle timeout")]
4028e0a954SPhilipp Schuster     HandleTimeout(#[source] anyhow::Error),
417d9dc401SRob Bradford }
427d9dc401SRob Bradford 
437d9dc401SRob Bradford pub const EPOLL_HELPER_EVENT_PAUSE: u16 = 0;
447d9dc401SRob Bradford pub const EPOLL_HELPER_EVENT_KILL: u16 = 1;
451dcf1b11SRob Bradford pub const EPOLL_HELPER_EVENT_LAST: u16 = 15;
467d9dc401SRob Bradford 
477d9dc401SRob Bradford pub trait EpollHelperHandler {
489d2e835dSSebastien Boeuf     // Handle one event at a time. The EpollHelper iterates over a list of
499d2e835dSSebastien Boeuf     // events that have been returned by epoll_wait(). For each event, the
509d2e835dSSebastien Boeuf     // current method is invoked to let the implementation decide how to process
519d2e835dSSebastien Boeuf     // the incoming event.
handle_event( &mut self, helper: &mut EpollHelper, event: &epoll::Event, ) -> Result<(), EpollHelperError>52b1752994SBo Chen     fn handle_event(
53b1752994SBo Chen         &mut self,
54b1752994SBo Chen         helper: &mut EpollHelper,
55b1752994SBo Chen         event: &epoll::Event,
56b1752994SBo Chen     ) -> Result<(), EpollHelperError>;
579d2e835dSSebastien Boeuf 
589d2e835dSSebastien Boeuf     // This method is only invoked if the EpollHelper was configured to call
599d2e835dSSebastien Boeuf     // epoll_wait() with a valid timeout (different from -1), meaning the call
609d2e835dSSebastien Boeuf     // won't block forever. When the timeout is reached, and if no even has been
619d2e835dSSebastien Boeuf     // triggered, this function will be called to let the implementation decide
629d2e835dSSebastien Boeuf     // how to interpret such situation. By default, it provides a no-op
639d2e835dSSebastien Boeuf     // implementation.
handle_timeout(&mut self, _helper: &mut EpollHelper) -> Result<(), EpollHelperError>649d2e835dSSebastien Boeuf     fn handle_timeout(&mut self, _helper: &mut EpollHelper) -> Result<(), EpollHelperError> {
659d2e835dSSebastien Boeuf         Ok(())
669d2e835dSSebastien Boeuf     }
679d2e835dSSebastien Boeuf 
689d2e835dSSebastien Boeuf     // In some situations, it might be useful to know the full list of events
699d2e835dSSebastien Boeuf     // triggered while waiting on epoll_wait(). And having this list provided
709d2e835dSSebastien Boeuf     // prior to the iterations over each event might help make some informed
719d2e835dSSebastien Boeuf     // decisions. This function should not replace handle_event(), otherwise it
729d2e835dSSebastien Boeuf     // would completely defeat the purpose of having the loop being factorized
739d2e835dSSebastien Boeuf     // through the EpollHelper structure.
event_list( &mut self, _helper: &mut EpollHelper, _events: &[epoll::Event], ) -> Result<(), EpollHelperError>749d2e835dSSebastien Boeuf     fn event_list(
759d2e835dSSebastien Boeuf         &mut self,
769d2e835dSSebastien Boeuf         _helper: &mut EpollHelper,
779d2e835dSSebastien Boeuf         _events: &[epoll::Event],
789d2e835dSSebastien Boeuf     ) -> Result<(), EpollHelperError> {
799d2e835dSSebastien Boeuf         Ok(())
809d2e835dSSebastien Boeuf     }
817d9dc401SRob Bradford }
827d9dc401SRob Bradford 
837d9dc401SRob Bradford impl EpollHelper {
new( kill_evt: &EventFd, pause_evt: &EventFd, ) -> std::result::Result<Self, EpollHelperError>847d9dc401SRob Bradford     pub fn new(
857d9dc401SRob Bradford         kill_evt: &EventFd,
867d9dc401SRob Bradford         pause_evt: &EventFd,
877d9dc401SRob Bradford     ) -> std::result::Result<Self, EpollHelperError> {
887d9dc401SRob Bradford         // Create the epoll file descriptor
897d9dc401SRob Bradford         let epoll_fd = epoll::create(true).map_err(EpollHelperError::CreateFd)?;
907d9dc401SRob Bradford         // Use 'File' to enforce closing on 'epoll_fd'
91c45d24dfSWei Liu         // SAFETY: epoll_fd is a valid fd
927d9dc401SRob Bradford         let epoll_file = unsafe { File::from_raw_fd(epoll_fd) };
937d9dc401SRob Bradford 
947d9dc401SRob Bradford         let mut helper = Self {
957d9dc401SRob Bradford             pause_evt: pause_evt.try_clone().unwrap(),
967d9dc401SRob Bradford             epoll_file,
977d9dc401SRob Bradford         };
987d9dc401SRob Bradford 
997d9dc401SRob Bradford         helper.add_event(kill_evt.as_raw_fd(), EPOLL_HELPER_EVENT_KILL)?;
1007d9dc401SRob Bradford         helper.add_event(pause_evt.as_raw_fd(), EPOLL_HELPER_EVENT_PAUSE)?;
1017d9dc401SRob Bradford         Ok(helper)
1027d9dc401SRob Bradford     }
1037d9dc401SRob Bradford 
add_event(&mut self, fd: RawFd, id: u16) -> std::result::Result<(), EpollHelperError>1047d9dc401SRob Bradford     pub fn add_event(&mut self, fd: RawFd, id: u16) -> std::result::Result<(), EpollHelperError> {
1056bce7f79SSebastien Boeuf         self.add_event_custom(fd, id, epoll::Events::EPOLLIN)
1066bce7f79SSebastien Boeuf     }
1076bce7f79SSebastien Boeuf 
add_event_custom( &mut self, fd: RawFd, id: u16, evts: epoll::Events, ) -> std::result::Result<(), EpollHelperError>1086bce7f79SSebastien Boeuf     pub fn add_event_custom(
1096bce7f79SSebastien Boeuf         &mut self,
1106bce7f79SSebastien Boeuf         fd: RawFd,
1116bce7f79SSebastien Boeuf         id: u16,
1126bce7f79SSebastien Boeuf         evts: epoll::Events,
1136bce7f79SSebastien Boeuf     ) -> std::result::Result<(), EpollHelperError> {
1147d9dc401SRob Bradford         epoll::ctl(
1157d9dc401SRob Bradford             self.epoll_file.as_raw_fd(),
1167d9dc401SRob Bradford             epoll::ControlOptions::EPOLL_CTL_ADD,
1177d9dc401SRob Bradford             fd,
1186bce7f79SSebastien Boeuf             epoll::Event::new(evts, id.into()),
1196bce7f79SSebastien Boeuf         )
1206bce7f79SSebastien Boeuf         .map_err(EpollHelperError::Ctl)
1216bce7f79SSebastien Boeuf     }
1226bce7f79SSebastien Boeuf 
mod_event_custom( &mut self, fd: RawFd, id: u16, evts: epoll::Events, ) -> std::result::Result<(), EpollHelperError>1239d2e835dSSebastien Boeuf     pub fn mod_event_custom(
1249d2e835dSSebastien Boeuf         &mut self,
1259d2e835dSSebastien Boeuf         fd: RawFd,
1269d2e835dSSebastien Boeuf         id: u16,
1279d2e835dSSebastien Boeuf         evts: epoll::Events,
1289d2e835dSSebastien Boeuf     ) -> std::result::Result<(), EpollHelperError> {
1299d2e835dSSebastien Boeuf         epoll::ctl(
1309d2e835dSSebastien Boeuf             self.epoll_file.as_raw_fd(),
1319d2e835dSSebastien Boeuf             epoll::ControlOptions::EPOLL_CTL_MOD,
1329d2e835dSSebastien Boeuf             fd,
1339d2e835dSSebastien Boeuf             epoll::Event::new(evts, id.into()),
1349d2e835dSSebastien Boeuf         )
1359d2e835dSSebastien Boeuf         .map_err(EpollHelperError::Ctl)
1369d2e835dSSebastien Boeuf     }
1379d2e835dSSebastien Boeuf 
del_event_custom( &mut self, fd: RawFd, id: u16, evts: epoll::Events, ) -> std::result::Result<(), EpollHelperError>1386bce7f79SSebastien Boeuf     pub fn del_event_custom(
1396bce7f79SSebastien Boeuf         &mut self,
1406bce7f79SSebastien Boeuf         fd: RawFd,
1416bce7f79SSebastien Boeuf         id: u16,
1426bce7f79SSebastien Boeuf         evts: epoll::Events,
1436bce7f79SSebastien Boeuf     ) -> std::result::Result<(), EpollHelperError> {
1446bce7f79SSebastien Boeuf         epoll::ctl(
1456bce7f79SSebastien Boeuf             self.epoll_file.as_raw_fd(),
1466bce7f79SSebastien Boeuf             epoll::ControlOptions::EPOLL_CTL_DEL,
1476bce7f79SSebastien Boeuf             fd,
1486bce7f79SSebastien Boeuf             epoll::Event::new(evts, id.into()),
1497d9dc401SRob Bradford         )
1507d9dc401SRob Bradford         .map_err(EpollHelperError::Ctl)
1517d9dc401SRob Bradford     }
1527d9dc401SRob Bradford 
run( &mut self, paused: Arc<AtomicBool>, paused_sync: Arc<Barrier>, handler: &mut dyn EpollHelperHandler, ) -> std::result::Result<(), EpollHelperError>1537d9dc401SRob Bradford     pub fn run(
1547d9dc401SRob Bradford         &mut self,
1557d9dc401SRob Bradford         paused: Arc<AtomicBool>,
156aa57762cSSebastien Boeuf         paused_sync: Arc<Barrier>,
1577d9dc401SRob Bradford         handler: &mut dyn EpollHelperHandler,
1587d9dc401SRob Bradford     ) -> std::result::Result<(), EpollHelperError> {
1599d2e835dSSebastien Boeuf         self.run_with_timeout(paused, paused_sync, handler, -1, false)
1609d2e835dSSebastien Boeuf     }
1619d2e835dSSebastien Boeuf 
162cfafc85bSBo Chen     #[cfg(not(fuzzing))]
run_with_timeout( &mut self, paused: Arc<AtomicBool>, paused_sync: Arc<Barrier>, handler: &mut dyn EpollHelperHandler, timeout: i32, enable_event_list: bool, ) -> std::result::Result<(), EpollHelperError>1639d2e835dSSebastien Boeuf     pub fn run_with_timeout(
1649d2e835dSSebastien Boeuf         &mut self,
1659d2e835dSSebastien Boeuf         paused: Arc<AtomicBool>,
1669d2e835dSSebastien Boeuf         paused_sync: Arc<Barrier>,
1679d2e835dSSebastien Boeuf         handler: &mut dyn EpollHelperHandler,
1689d2e835dSSebastien Boeuf         timeout: i32,
1699d2e835dSSebastien Boeuf         enable_event_list: bool,
1709d2e835dSSebastien Boeuf     ) -> std::result::Result<(), EpollHelperError> {
1717d9dc401SRob Bradford         const EPOLL_EVENTS_LEN: usize = 100;
1727d9dc401SRob Bradford         let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
1737d9dc401SRob Bradford 
1747d9dc401SRob Bradford         // Before jumping into the epoll loop, check if the device is expected
1757d9dc401SRob Bradford         // to be in a paused state. This is helpful for the restore code path
1767d9dc401SRob Bradford         // as the device thread should not start processing anything before the
1777d9dc401SRob Bradford         // device has been resumed.
1787d9dc401SRob Bradford         while paused.load(Ordering::SeqCst) {
1797d9dc401SRob Bradford             thread::park();
1807d9dc401SRob Bradford         }
1817d9dc401SRob Bradford 
1827d9dc401SRob Bradford         loop {
1839d2e835dSSebastien Boeuf             let num_events =
1849d2e835dSSebastien Boeuf                 match epoll::wait(self.epoll_file.as_raw_fd(), timeout, &mut events[..]) {
1857d9dc401SRob Bradford                     Ok(res) => res,
1867d9dc401SRob Bradford                     Err(e) => {
1877d9dc401SRob Bradford                         if e.kind() == std::io::ErrorKind::Interrupted {
1887d9dc401SRob Bradford                             // It's well defined from the epoll_wait() syscall
1897d9dc401SRob Bradford                             // documentation that the epoll loop can be interrupted
1907d9dc401SRob Bradford                             // before any of the requested events occurred or the
1917d9dc401SRob Bradford                             // timeout expired. In both those cases, epoll_wait()
1927d9dc401SRob Bradford                             // returns an error of type EINTR, but this should not
1937d9dc401SRob Bradford                             // be considered as a regular error. Instead it is more
1947d9dc401SRob Bradford                             // appropriate to retry, by calling into epoll_wait().
1957d9dc401SRob Bradford                             continue;
1967d9dc401SRob Bradford                         }
1977d9dc401SRob Bradford                         return Err(EpollHelperError::Wait(e));
1987d9dc401SRob Bradford                     }
1997d9dc401SRob Bradford                 };
2007d9dc401SRob Bradford 
2019d2e835dSSebastien Boeuf             if num_events == 0 {
2029d2e835dSSebastien Boeuf                 // This case happens when the timeout is reached before any of
2039d2e835dSSebastien Boeuf                 // the registered events is triggered.
2049d2e835dSSebastien Boeuf                 handler.handle_timeout(self)?;
2059d2e835dSSebastien Boeuf                 continue;
2069d2e835dSSebastien Boeuf             }
2079d2e835dSSebastien Boeuf 
2089d2e835dSSebastien Boeuf             if enable_event_list {
2099d2e835dSSebastien Boeuf                 handler.event_list(self, &events[..num_events])?;
2109d2e835dSSebastien Boeuf             }
2119d2e835dSSebastien Boeuf 
2127d9dc401SRob Bradford             for event in events.iter().take(num_events) {
2137d9dc401SRob Bradford                 let ev_type = event.data as u16;
2147d9dc401SRob Bradford 
2157d9dc401SRob Bradford                 match ev_type {
2167d9dc401SRob Bradford                     EPOLL_HELPER_EVENT_KILL => {
217e475b12cSRob Bradford                         info!("KILL_EVENT received, stopping epoll loop");
2187d9dc401SRob Bradford                         return Ok(());
2197d9dc401SRob Bradford                     }
2207d9dc401SRob Bradford                     EPOLL_HELPER_EVENT_PAUSE => {
221e475b12cSRob Bradford                         info!("PAUSE_EVENT received, pausing epoll loop");
222aa57762cSSebastien Boeuf 
223aa57762cSSebastien Boeuf                         // Acknowledge the pause is effective by using the
224aa57762cSSebastien Boeuf                         // paused_sync barrier.
225aa57762cSSebastien Boeuf                         paused_sync.wait();
226aa57762cSSebastien Boeuf 
2277d9dc401SRob Bradford                         // We loop here to handle spurious park() returns.
2287d9dc401SRob Bradford                         // Until we have not resumed, the paused boolean will
2297d9dc401SRob Bradford                         // be true.
2307d9dc401SRob Bradford                         while paused.load(Ordering::SeqCst) {
2317d9dc401SRob Bradford                             thread::park();
2327d9dc401SRob Bradford                         }
2337d9dc401SRob Bradford 
2347d9dc401SRob Bradford                         // Drain pause event after the device has been resumed.
2357d9dc401SRob Bradford                         // This ensures the pause event has been seen by each
23690758094SZiye Yang                         // thread related to this virtio device.
2377d9dc401SRob Bradford                         let _ = self.pause_evt.read();
2387d9dc401SRob Bradford                     }
23901e7bd72SSebastien Boeuf                     _ => {
240b1752994SBo Chen                         handler.handle_event(self, event)?;
2417d9dc401SRob Bradford                     }
2427d9dc401SRob Bradford                 }
2437d9dc401SRob Bradford             }
2447d9dc401SRob Bradford         }
2457d9dc401SRob Bradford     }
246a9924df2SBo Chen 
247a9924df2SBo Chen     #[cfg(fuzzing)]
248a9924df2SBo Chen     // Require to have a 'queue_evt' being kicked before calling
249a9924df2SBo Chen     // and return when no epoll events are active
run_with_timeout( &mut self, paused: Arc<AtomicBool>, paused_sync: Arc<Barrier>, handler: &mut dyn EpollHelperHandler, _timeout: i32, _enable_event_list: bool, ) -> std::result::Result<(), EpollHelperError>250cfafc85bSBo Chen     pub fn run_with_timeout(
251a9924df2SBo Chen         &mut self,
252a9924df2SBo Chen         paused: Arc<AtomicBool>,
253a9924df2SBo Chen         paused_sync: Arc<Barrier>,
254a9924df2SBo Chen         handler: &mut dyn EpollHelperHandler,
255cfafc85bSBo Chen         _timeout: i32,
256cfafc85bSBo Chen         _enable_event_list: bool,
257a9924df2SBo Chen     ) -> std::result::Result<(), EpollHelperError> {
258a9924df2SBo Chen         const EPOLL_EVENTS_LEN: usize = 100;
259a9924df2SBo Chen         let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
260a9924df2SBo Chen 
261a9924df2SBo Chen         loop {
262a9924df2SBo Chen             let num_events = match epoll::wait(self.epoll_file.as_raw_fd(), 0, &mut events[..]) {
263a9924df2SBo Chen                 Ok(res) => res,
264a9924df2SBo Chen                 Err(e) => {
265a9924df2SBo Chen                     if e.kind() == std::io::ErrorKind::Interrupted {
266a9924df2SBo Chen                         // It's well defined from the epoll_wait() syscall
267a9924df2SBo Chen                         // documentation that the epoll loop can be interrupted
268a9924df2SBo Chen                         // before any of the requested events occurred or the
269a9924df2SBo Chen                         // timeout expired. In both those cases, epoll_wait()
270a9924df2SBo Chen                         // returns an error of type EINTR, but this should not
271a9924df2SBo Chen                         // be considered as a regular error. Instead it is more
272a9924df2SBo Chen                         // appropriate to retry, by calling into epoll_wait().
273a9924df2SBo Chen                         continue;
274a9924df2SBo Chen                     }
275a9924df2SBo Chen                     return Err(EpollHelperError::Wait(e));
276a9924df2SBo Chen                 }
277a9924df2SBo Chen             };
278a9924df2SBo Chen 
279a9924df2SBo Chen             // Return when no epoll events are active
280a9924df2SBo Chen             if num_events == 0 {
281a9924df2SBo Chen                 return Ok(());
282a9924df2SBo Chen             }
283a9924df2SBo Chen 
284a9924df2SBo Chen             for event in events.iter().take(num_events) {
285a9924df2SBo Chen                 let ev_type = event.data as u16;
286a9924df2SBo Chen 
287a9924df2SBo Chen                 match ev_type {
288a9924df2SBo Chen                     EPOLL_HELPER_EVENT_KILL => {
289a9924df2SBo Chen                         info!("KILL_EVENT received, stopping epoll loop");
290a9924df2SBo Chen                         return Ok(());
291a9924df2SBo Chen                     }
292a9924df2SBo Chen                     EPOLL_HELPER_EVENT_PAUSE => {
293a9924df2SBo Chen                         info!("PAUSE_EVENT received, pausing epoll loop");
294a9924df2SBo Chen 
295a9924df2SBo Chen                         // Acknowledge the pause is effective by using the
296a9924df2SBo Chen                         // paused_sync barrier.
297a9924df2SBo Chen                         paused_sync.wait();
298a9924df2SBo Chen 
299a9924df2SBo Chen                         // We loop here to handle spurious park() returns.
300a9924df2SBo Chen                         // Until we have not resumed, the paused boolean will
301a9924df2SBo Chen                         // be true.
302a9924df2SBo Chen                         while paused.load(Ordering::SeqCst) {
303a9924df2SBo Chen                             thread::park();
304a9924df2SBo Chen                         }
305a9924df2SBo Chen 
306a9924df2SBo Chen                         // Drain pause event after the device has been resumed.
307a9924df2SBo Chen                         // This ensures the pause event has been seen by each
308a9924df2SBo Chen                         // thread related to this virtio device.
309a9924df2SBo Chen                         let _ = self.pause_evt.read();
310a9924df2SBo Chen                     }
311a9924df2SBo Chen                     _ => {
312a9924df2SBo Chen                         handler.handle_event(self, event)?;
313a9924df2SBo Chen                     }
314a9924df2SBo Chen                 }
315a9924df2SBo Chen             }
316a9924df2SBo Chen         }
317a9924df2SBo Chen     }
3187d9dc401SRob Bradford }
319d66fa942SRob Bradford 
320d66fa942SRob Bradford impl AsRawFd for EpollHelper {
as_raw_fd(&self) -> RawFd321d66fa942SRob Bradford     fn as_raw_fd(&self) -> RawFd {
322d66fa942SRob Bradford         self.epoll_file.as_raw_fd()
323d66fa942SRob Bradford     }
324d66fa942SRob Bradford }
325