17d9dc401SRob Bradford // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 27d9dc401SRob Bradford // 37d9dc401SRob Bradford // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 47d9dc401SRob Bradford // Use of this source code is governed by a BSD-style license that can be 57d9dc401SRob Bradford // found in the LICENSE-BSD-3-Clause file. 67d9dc401SRob Bradford // 77d9dc401SRob Bradford // Copyright © 2020 Intel Corporation 87d9dc401SRob Bradford // 97d9dc401SRob Bradford // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 107d9dc401SRob Bradford 117d9dc401SRob Bradford use std::fs::File; 127d9dc401SRob Bradford use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 137d9dc401SRob Bradford use std::sync::atomic::{AtomicBool, Ordering}; 14aa57762cSSebastien Boeuf use std::sync::{Arc, Barrier}; 157d9dc401SRob Bradford use std::thread; 1688a9f799SRob Bradford 17b1752994SBo Chen use thiserror::Error; 187d9dc401SRob Bradford use vmm_sys_util::eventfd::EventFd; 197d9dc401SRob Bradford 207d9dc401SRob Bradford pub struct EpollHelper { 217d9dc401SRob Bradford pause_evt: EventFd, 227d9dc401SRob Bradford epoll_file: File, 237d9dc401SRob Bradford } 247d9dc401SRob Bradford 25b1752994SBo Chen #[derive(Error, Debug)] 267d9dc401SRob Bradford pub enum EpollHelperError { 27*8e2973feSPhilipp Schuster #[error("Failed to create Fd")] 2828e0a954SPhilipp Schuster CreateFd(#[source] std::io::Error), 29*8e2973feSPhilipp Schuster #[error("Failed to epoll_ctl")] 3028e0a954SPhilipp Schuster Ctl(#[source] std::io::Error), 31*8e2973feSPhilipp Schuster #[error("IO error")] 3228e0a954SPhilipp Schuster IoError(#[source] std::io::Error), 33*8e2973feSPhilipp Schuster #[error("Failed to epoll_wait")] 3428e0a954SPhilipp Schuster Wait(#[source] std::io::Error), 35*8e2973feSPhilipp Schuster #[error("Failed to get virtio-queue index")] 3628e0a954SPhilipp Schuster QueueRingIndex(#[source] virtio_queue::Error), 37*8e2973feSPhilipp Schuster #[error("Failed to handle virtio device events")] 3828e0a954SPhilipp Schuster HandleEvent(#[source] anyhow::Error), 39*8e2973feSPhilipp Schuster #[error("Failed to handle timeout")] 4028e0a954SPhilipp Schuster HandleTimeout(#[source] anyhow::Error), 417d9dc401SRob Bradford } 427d9dc401SRob Bradford 437d9dc401SRob Bradford pub const EPOLL_HELPER_EVENT_PAUSE: u16 = 0; 447d9dc401SRob Bradford pub const EPOLL_HELPER_EVENT_KILL: u16 = 1; 451dcf1b11SRob Bradford pub const EPOLL_HELPER_EVENT_LAST: u16 = 15; 467d9dc401SRob Bradford 477d9dc401SRob Bradford pub trait EpollHelperHandler { 489d2e835dSSebastien Boeuf // Handle one event at a time. The EpollHelper iterates over a list of 499d2e835dSSebastien Boeuf // events that have been returned by epoll_wait(). For each event, the 509d2e835dSSebastien Boeuf // current method is invoked to let the implementation decide how to process 519d2e835dSSebastien Boeuf // the incoming event. handle_event( &mut self, helper: &mut EpollHelper, event: &epoll::Event, ) -> Result<(), EpollHelperError>52b1752994SBo Chen fn handle_event( 53b1752994SBo Chen &mut self, 54b1752994SBo Chen helper: &mut EpollHelper, 55b1752994SBo Chen event: &epoll::Event, 56b1752994SBo Chen ) -> Result<(), EpollHelperError>; 579d2e835dSSebastien Boeuf 589d2e835dSSebastien Boeuf // This method is only invoked if the EpollHelper was configured to call 599d2e835dSSebastien Boeuf // epoll_wait() with a valid timeout (different from -1), meaning the call 609d2e835dSSebastien Boeuf // won't block forever. When the timeout is reached, and if no even has been 619d2e835dSSebastien Boeuf // triggered, this function will be called to let the implementation decide 629d2e835dSSebastien Boeuf // how to interpret such situation. By default, it provides a no-op 639d2e835dSSebastien Boeuf // implementation. handle_timeout(&mut self, _helper: &mut EpollHelper) -> Result<(), EpollHelperError>649d2e835dSSebastien Boeuf fn handle_timeout(&mut self, _helper: &mut EpollHelper) -> Result<(), EpollHelperError> { 659d2e835dSSebastien Boeuf Ok(()) 669d2e835dSSebastien Boeuf } 679d2e835dSSebastien Boeuf 689d2e835dSSebastien Boeuf // In some situations, it might be useful to know the full list of events 699d2e835dSSebastien Boeuf // triggered while waiting on epoll_wait(). And having this list provided 709d2e835dSSebastien Boeuf // prior to the iterations over each event might help make some informed 719d2e835dSSebastien Boeuf // decisions. This function should not replace handle_event(), otherwise it 729d2e835dSSebastien Boeuf // would completely defeat the purpose of having the loop being factorized 739d2e835dSSebastien Boeuf // through the EpollHelper structure. event_list( &mut self, _helper: &mut EpollHelper, _events: &[epoll::Event], ) -> Result<(), EpollHelperError>749d2e835dSSebastien Boeuf fn event_list( 759d2e835dSSebastien Boeuf &mut self, 769d2e835dSSebastien Boeuf _helper: &mut EpollHelper, 779d2e835dSSebastien Boeuf _events: &[epoll::Event], 789d2e835dSSebastien Boeuf ) -> Result<(), EpollHelperError> { 799d2e835dSSebastien Boeuf Ok(()) 809d2e835dSSebastien Boeuf } 817d9dc401SRob Bradford } 827d9dc401SRob Bradford 837d9dc401SRob Bradford impl EpollHelper { new( kill_evt: &EventFd, pause_evt: &EventFd, ) -> std::result::Result<Self, EpollHelperError>847d9dc401SRob Bradford pub fn new( 857d9dc401SRob Bradford kill_evt: &EventFd, 867d9dc401SRob Bradford pause_evt: &EventFd, 877d9dc401SRob Bradford ) -> std::result::Result<Self, EpollHelperError> { 887d9dc401SRob Bradford // Create the epoll file descriptor 897d9dc401SRob Bradford let epoll_fd = epoll::create(true).map_err(EpollHelperError::CreateFd)?; 907d9dc401SRob Bradford // Use 'File' to enforce closing on 'epoll_fd' 91c45d24dfSWei Liu // SAFETY: epoll_fd is a valid fd 927d9dc401SRob Bradford let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 937d9dc401SRob Bradford 947d9dc401SRob Bradford let mut helper = Self { 957d9dc401SRob Bradford pause_evt: pause_evt.try_clone().unwrap(), 967d9dc401SRob Bradford epoll_file, 977d9dc401SRob Bradford }; 987d9dc401SRob Bradford 997d9dc401SRob Bradford helper.add_event(kill_evt.as_raw_fd(), EPOLL_HELPER_EVENT_KILL)?; 1007d9dc401SRob Bradford helper.add_event(pause_evt.as_raw_fd(), EPOLL_HELPER_EVENT_PAUSE)?; 1017d9dc401SRob Bradford Ok(helper) 1027d9dc401SRob Bradford } 1037d9dc401SRob Bradford add_event(&mut self, fd: RawFd, id: u16) -> std::result::Result<(), EpollHelperError>1047d9dc401SRob Bradford pub fn add_event(&mut self, fd: RawFd, id: u16) -> std::result::Result<(), EpollHelperError> { 1056bce7f79SSebastien Boeuf self.add_event_custom(fd, id, epoll::Events::EPOLLIN) 1066bce7f79SSebastien Boeuf } 1076bce7f79SSebastien Boeuf add_event_custom( &mut self, fd: RawFd, id: u16, evts: epoll::Events, ) -> std::result::Result<(), EpollHelperError>1086bce7f79SSebastien Boeuf pub fn add_event_custom( 1096bce7f79SSebastien Boeuf &mut self, 1106bce7f79SSebastien Boeuf fd: RawFd, 1116bce7f79SSebastien Boeuf id: u16, 1126bce7f79SSebastien Boeuf evts: epoll::Events, 1136bce7f79SSebastien Boeuf ) -> std::result::Result<(), EpollHelperError> { 1147d9dc401SRob Bradford epoll::ctl( 1157d9dc401SRob Bradford self.epoll_file.as_raw_fd(), 1167d9dc401SRob Bradford epoll::ControlOptions::EPOLL_CTL_ADD, 1177d9dc401SRob Bradford fd, 1186bce7f79SSebastien Boeuf epoll::Event::new(evts, id.into()), 1196bce7f79SSebastien Boeuf ) 1206bce7f79SSebastien Boeuf .map_err(EpollHelperError::Ctl) 1216bce7f79SSebastien Boeuf } 1226bce7f79SSebastien Boeuf mod_event_custom( &mut self, fd: RawFd, id: u16, evts: epoll::Events, ) -> std::result::Result<(), EpollHelperError>1239d2e835dSSebastien Boeuf pub fn mod_event_custom( 1249d2e835dSSebastien Boeuf &mut self, 1259d2e835dSSebastien Boeuf fd: RawFd, 1269d2e835dSSebastien Boeuf id: u16, 1279d2e835dSSebastien Boeuf evts: epoll::Events, 1289d2e835dSSebastien Boeuf ) -> std::result::Result<(), EpollHelperError> { 1299d2e835dSSebastien Boeuf epoll::ctl( 1309d2e835dSSebastien Boeuf self.epoll_file.as_raw_fd(), 1319d2e835dSSebastien Boeuf epoll::ControlOptions::EPOLL_CTL_MOD, 1329d2e835dSSebastien Boeuf fd, 1339d2e835dSSebastien Boeuf epoll::Event::new(evts, id.into()), 1349d2e835dSSebastien Boeuf ) 1359d2e835dSSebastien Boeuf .map_err(EpollHelperError::Ctl) 1369d2e835dSSebastien Boeuf } 1379d2e835dSSebastien Boeuf del_event_custom( &mut self, fd: RawFd, id: u16, evts: epoll::Events, ) -> std::result::Result<(), EpollHelperError>1386bce7f79SSebastien Boeuf pub fn del_event_custom( 1396bce7f79SSebastien Boeuf &mut self, 1406bce7f79SSebastien Boeuf fd: RawFd, 1416bce7f79SSebastien Boeuf id: u16, 1426bce7f79SSebastien Boeuf evts: epoll::Events, 1436bce7f79SSebastien Boeuf ) -> std::result::Result<(), EpollHelperError> { 1446bce7f79SSebastien Boeuf epoll::ctl( 1456bce7f79SSebastien Boeuf self.epoll_file.as_raw_fd(), 1466bce7f79SSebastien Boeuf epoll::ControlOptions::EPOLL_CTL_DEL, 1476bce7f79SSebastien Boeuf fd, 1486bce7f79SSebastien Boeuf epoll::Event::new(evts, id.into()), 1497d9dc401SRob Bradford ) 1507d9dc401SRob Bradford .map_err(EpollHelperError::Ctl) 1517d9dc401SRob Bradford } 1527d9dc401SRob Bradford run( &mut self, paused: Arc<AtomicBool>, paused_sync: Arc<Barrier>, handler: &mut dyn EpollHelperHandler, ) -> std::result::Result<(), EpollHelperError>1537d9dc401SRob Bradford pub fn run( 1547d9dc401SRob Bradford &mut self, 1557d9dc401SRob Bradford paused: Arc<AtomicBool>, 156aa57762cSSebastien Boeuf paused_sync: Arc<Barrier>, 1577d9dc401SRob Bradford handler: &mut dyn EpollHelperHandler, 1587d9dc401SRob Bradford ) -> std::result::Result<(), EpollHelperError> { 1599d2e835dSSebastien Boeuf self.run_with_timeout(paused, paused_sync, handler, -1, false) 1609d2e835dSSebastien Boeuf } 1619d2e835dSSebastien Boeuf 162cfafc85bSBo Chen #[cfg(not(fuzzing))] run_with_timeout( &mut self, paused: Arc<AtomicBool>, paused_sync: Arc<Barrier>, handler: &mut dyn EpollHelperHandler, timeout: i32, enable_event_list: bool, ) -> std::result::Result<(), EpollHelperError>1639d2e835dSSebastien Boeuf pub fn run_with_timeout( 1649d2e835dSSebastien Boeuf &mut self, 1659d2e835dSSebastien Boeuf paused: Arc<AtomicBool>, 1669d2e835dSSebastien Boeuf paused_sync: Arc<Barrier>, 1679d2e835dSSebastien Boeuf handler: &mut dyn EpollHelperHandler, 1689d2e835dSSebastien Boeuf timeout: i32, 1699d2e835dSSebastien Boeuf enable_event_list: bool, 1709d2e835dSSebastien Boeuf ) -> std::result::Result<(), EpollHelperError> { 1717d9dc401SRob Bradford const EPOLL_EVENTS_LEN: usize = 100; 1727d9dc401SRob Bradford let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1737d9dc401SRob Bradford 1747d9dc401SRob Bradford // Before jumping into the epoll loop, check if the device is expected 1757d9dc401SRob Bradford // to be in a paused state. This is helpful for the restore code path 1767d9dc401SRob Bradford // as the device thread should not start processing anything before the 1777d9dc401SRob Bradford // device has been resumed. 1787d9dc401SRob Bradford while paused.load(Ordering::SeqCst) { 1797d9dc401SRob Bradford thread::park(); 1807d9dc401SRob Bradford } 1817d9dc401SRob Bradford 1827d9dc401SRob Bradford loop { 1839d2e835dSSebastien Boeuf let num_events = 1849d2e835dSSebastien Boeuf match epoll::wait(self.epoll_file.as_raw_fd(), timeout, &mut events[..]) { 1857d9dc401SRob Bradford Ok(res) => res, 1867d9dc401SRob Bradford Err(e) => { 1877d9dc401SRob Bradford if e.kind() == std::io::ErrorKind::Interrupted { 1887d9dc401SRob Bradford // It's well defined from the epoll_wait() syscall 1897d9dc401SRob Bradford // documentation that the epoll loop can be interrupted 1907d9dc401SRob Bradford // before any of the requested events occurred or the 1917d9dc401SRob Bradford // timeout expired. In both those cases, epoll_wait() 1927d9dc401SRob Bradford // returns an error of type EINTR, but this should not 1937d9dc401SRob Bradford // be considered as a regular error. Instead it is more 1947d9dc401SRob Bradford // appropriate to retry, by calling into epoll_wait(). 1957d9dc401SRob Bradford continue; 1967d9dc401SRob Bradford } 1977d9dc401SRob Bradford return Err(EpollHelperError::Wait(e)); 1987d9dc401SRob Bradford } 1997d9dc401SRob Bradford }; 2007d9dc401SRob Bradford 2019d2e835dSSebastien Boeuf if num_events == 0 { 2029d2e835dSSebastien Boeuf // This case happens when the timeout is reached before any of 2039d2e835dSSebastien Boeuf // the registered events is triggered. 2049d2e835dSSebastien Boeuf handler.handle_timeout(self)?; 2059d2e835dSSebastien Boeuf continue; 2069d2e835dSSebastien Boeuf } 2079d2e835dSSebastien Boeuf 2089d2e835dSSebastien Boeuf if enable_event_list { 2099d2e835dSSebastien Boeuf handler.event_list(self, &events[..num_events])?; 2109d2e835dSSebastien Boeuf } 2119d2e835dSSebastien Boeuf 2127d9dc401SRob Bradford for event in events.iter().take(num_events) { 2137d9dc401SRob Bradford let ev_type = event.data as u16; 2147d9dc401SRob Bradford 2157d9dc401SRob Bradford match ev_type { 2167d9dc401SRob Bradford EPOLL_HELPER_EVENT_KILL => { 217e475b12cSRob Bradford info!("KILL_EVENT received, stopping epoll loop"); 2187d9dc401SRob Bradford return Ok(()); 2197d9dc401SRob Bradford } 2207d9dc401SRob Bradford EPOLL_HELPER_EVENT_PAUSE => { 221e475b12cSRob Bradford info!("PAUSE_EVENT received, pausing epoll loop"); 222aa57762cSSebastien Boeuf 223aa57762cSSebastien Boeuf // Acknowledge the pause is effective by using the 224aa57762cSSebastien Boeuf // paused_sync barrier. 225aa57762cSSebastien Boeuf paused_sync.wait(); 226aa57762cSSebastien Boeuf 2277d9dc401SRob Bradford // We loop here to handle spurious park() returns. 2287d9dc401SRob Bradford // Until we have not resumed, the paused boolean will 2297d9dc401SRob Bradford // be true. 2307d9dc401SRob Bradford while paused.load(Ordering::SeqCst) { 2317d9dc401SRob Bradford thread::park(); 2327d9dc401SRob Bradford } 2337d9dc401SRob Bradford 2347d9dc401SRob Bradford // Drain pause event after the device has been resumed. 2357d9dc401SRob Bradford // This ensures the pause event has been seen by each 23690758094SZiye Yang // thread related to this virtio device. 2377d9dc401SRob Bradford let _ = self.pause_evt.read(); 2387d9dc401SRob Bradford } 23901e7bd72SSebastien Boeuf _ => { 240b1752994SBo Chen handler.handle_event(self, event)?; 2417d9dc401SRob Bradford } 2427d9dc401SRob Bradford } 2437d9dc401SRob Bradford } 2447d9dc401SRob Bradford } 2457d9dc401SRob Bradford } 246a9924df2SBo Chen 247a9924df2SBo Chen #[cfg(fuzzing)] 248a9924df2SBo Chen // Require to have a 'queue_evt' being kicked before calling 249a9924df2SBo Chen // and return when no epoll events are active run_with_timeout( &mut self, paused: Arc<AtomicBool>, paused_sync: Arc<Barrier>, handler: &mut dyn EpollHelperHandler, _timeout: i32, _enable_event_list: bool, ) -> std::result::Result<(), EpollHelperError>250cfafc85bSBo Chen pub fn run_with_timeout( 251a9924df2SBo Chen &mut self, 252a9924df2SBo Chen paused: Arc<AtomicBool>, 253a9924df2SBo Chen paused_sync: Arc<Barrier>, 254a9924df2SBo Chen handler: &mut dyn EpollHelperHandler, 255cfafc85bSBo Chen _timeout: i32, 256cfafc85bSBo Chen _enable_event_list: bool, 257a9924df2SBo Chen ) -> std::result::Result<(), EpollHelperError> { 258a9924df2SBo Chen const EPOLL_EVENTS_LEN: usize = 100; 259a9924df2SBo Chen let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 260a9924df2SBo Chen 261a9924df2SBo Chen loop { 262a9924df2SBo Chen let num_events = match epoll::wait(self.epoll_file.as_raw_fd(), 0, &mut events[..]) { 263a9924df2SBo Chen Ok(res) => res, 264a9924df2SBo Chen Err(e) => { 265a9924df2SBo Chen if e.kind() == std::io::ErrorKind::Interrupted { 266a9924df2SBo Chen // It's well defined from the epoll_wait() syscall 267a9924df2SBo Chen // documentation that the epoll loop can be interrupted 268a9924df2SBo Chen // before any of the requested events occurred or the 269a9924df2SBo Chen // timeout expired. In both those cases, epoll_wait() 270a9924df2SBo Chen // returns an error of type EINTR, but this should not 271a9924df2SBo Chen // be considered as a regular error. Instead it is more 272a9924df2SBo Chen // appropriate to retry, by calling into epoll_wait(). 273a9924df2SBo Chen continue; 274a9924df2SBo Chen } 275a9924df2SBo Chen return Err(EpollHelperError::Wait(e)); 276a9924df2SBo Chen } 277a9924df2SBo Chen }; 278a9924df2SBo Chen 279a9924df2SBo Chen // Return when no epoll events are active 280a9924df2SBo Chen if num_events == 0 { 281a9924df2SBo Chen return Ok(()); 282a9924df2SBo Chen } 283a9924df2SBo Chen 284a9924df2SBo Chen for event in events.iter().take(num_events) { 285a9924df2SBo Chen let ev_type = event.data as u16; 286a9924df2SBo Chen 287a9924df2SBo Chen match ev_type { 288a9924df2SBo Chen EPOLL_HELPER_EVENT_KILL => { 289a9924df2SBo Chen info!("KILL_EVENT received, stopping epoll loop"); 290a9924df2SBo Chen return Ok(()); 291a9924df2SBo Chen } 292a9924df2SBo Chen EPOLL_HELPER_EVENT_PAUSE => { 293a9924df2SBo Chen info!("PAUSE_EVENT received, pausing epoll loop"); 294a9924df2SBo Chen 295a9924df2SBo Chen // Acknowledge the pause is effective by using the 296a9924df2SBo Chen // paused_sync barrier. 297a9924df2SBo Chen paused_sync.wait(); 298a9924df2SBo Chen 299a9924df2SBo Chen // We loop here to handle spurious park() returns. 300a9924df2SBo Chen // Until we have not resumed, the paused boolean will 301a9924df2SBo Chen // be true. 302a9924df2SBo Chen while paused.load(Ordering::SeqCst) { 303a9924df2SBo Chen thread::park(); 304a9924df2SBo Chen } 305a9924df2SBo Chen 306a9924df2SBo Chen // Drain pause event after the device has been resumed. 307a9924df2SBo Chen // This ensures the pause event has been seen by each 308a9924df2SBo Chen // thread related to this virtio device. 309a9924df2SBo Chen let _ = self.pause_evt.read(); 310a9924df2SBo Chen } 311a9924df2SBo Chen _ => { 312a9924df2SBo Chen handler.handle_event(self, event)?; 313a9924df2SBo Chen } 314a9924df2SBo Chen } 315a9924df2SBo Chen } 316a9924df2SBo Chen } 317a9924df2SBo Chen } 3187d9dc401SRob Bradford } 319d66fa942SRob Bradford 320d66fa942SRob Bradford impl AsRawFd for EpollHelper { as_raw_fd(&self) -> RawFd321d66fa942SRob Bradford fn as_raw_fd(&self) -> RawFd { 322d66fa942SRob Bradford self.epoll_file.as_raw_fd() 323d66fa942SRob Bradford } 324d66fa942SRob Bradford } 325