xref: /cloud-hypervisor/vmm/src/sigwinch_listener.rs (revision 61e57e1cb149de03ae1e0b799b9e5ba9a4a63ace)
1 // Copyright 2021, 2023 Alyssa Ross <hi@alyssa.is>
2 // SPDX-License-Identifier: Apache-2.0
3 
4 use std::cell::RefCell;
5 use std::collections::BTreeSet;
6 use std::fs::{read_dir, File};
7 use std::io::{self, ErrorKind, Read, Write};
8 use std::iter::once;
9 use std::mem::{size_of, MaybeUninit};
10 use std::os::unix::prelude::*;
11 use std::process::exit;
12 use std::ptr::null_mut;
13 
14 use arch::_NSIG;
15 use hypervisor::HypervisorType;
16 use libc::{
17     c_int, c_void, close, fork, getpgrp, ioctl, pipe2, poll, pollfd, setsid, sigemptyset,
18     siginfo_t, signal, sigprocmask, syscall, tcgetpgrp, tcsetpgrp, SYS_close_range, EINVAL, ENOSYS,
19     ENOTTY, O_CLOEXEC, POLLERR, SIGCHLD, SIGWINCH, SIG_DFL, SIG_SETMASK, STDERR_FILENO, TIOCSCTTY,
20 };
21 use seccompiler::{apply_filter, BpfProgram, SeccompAction};
22 use vmm_sys_util::signal::register_signal_handler;
23 
24 use crate::clone3::{clone3, clone_args, CLONE_CLEAR_SIGHAND};
25 use crate::seccomp_filters::{get_seccomp_filter, Thread};
26 
27 thread_local! {
28     // The tty file descriptor is stored in a global variable so it
29     // can be accessed by a signal handler.
30     static TX: RefCell<Option<File>> = const { RefCell::new(None) };
31 }
32 
with_tx<R, F: FnOnce(&File) -> R>(f: F) -> R33 fn with_tx<R, F: FnOnce(&File) -> R>(f: F) -> R {
34     TX.with(|tx| f(tx.borrow().as_ref().unwrap()))
35 }
36 
37 // This function has to be safe to call from a signal handler, and
38 // therefore must not panic.
notify()39 fn notify() {
40     if let Err(e) = with_tx(|mut tx| tx.write_all(b"\n")) {
41         if e.kind() == ErrorKind::BrokenPipe {
42             exit(0);
43         }
44         exit(1);
45     }
46 }
47 
sigwinch_handler(_signo: c_int, _info: *mut siginfo_t, _unused: *mut c_void)48 extern "C" fn sigwinch_handler(_signo: c_int, _info: *mut siginfo_t, _unused: *mut c_void) {
49     notify();
50 }
51 
unblock_all_signals() -> io::Result<()>52 fn unblock_all_signals() -> io::Result<()> {
53     let mut set = MaybeUninit::uninit();
54     // SAFETY: set is a correct structure for sigemptyset
55     if unsafe { sigemptyset(set.as_mut_ptr()) } == -1 {
56         return Err(io::Error::last_os_error());
57     }
58     // SAFETY: set is initialized above
59     let set = unsafe { set.assume_init() };
60 
61     // SAFETY: all arguments are correct
62     if unsafe { sigprocmask(SIG_SETMASK, &set, null_mut()) } == -1 {
63         return Err(io::Error::last_os_error());
64     }
65 
66     Ok(())
67 }
68 
69 /// # Safety
70 ///
71 /// Caller is responsible for ensuring all file descriptors not listed
72 /// in `keep_fds` are not accessed after this point, and that no other
73 /// thread is opening file descriptors while this function is
74 /// running.
close_fds_fallback(keep_fds: &BTreeSet<RawFd>)75 unsafe fn close_fds_fallback(keep_fds: &BTreeSet<RawFd>) {
76     // We collect these instead of iterating through them, because we
77     // don't want to close the descriptor for /proc/self/fd while
78     // we're iterating through it.
79     let open_fds: BTreeSet<RawFd> = read_dir("/proc/self/fd")
80         .unwrap()
81         .map(Result::unwrap)
82         .filter_map(|s| s.file_name().into_string().ok()?.parse().ok())
83         .collect();
84 
85     for fd in open_fds.difference(keep_fds) {
86         close(*fd);
87     }
88 }
89 
90 /// # Safety
91 ///
92 /// Caller is responsible for ensuring all file descriptors not listed
93 /// in `keep_fds` are not accessed after this point, and that no other
94 /// thread is opening file descriptors while this function is
95 /// running.
close_unused_fds(keep_fds: &mut [RawFd])96 unsafe fn close_unused_fds(keep_fds: &mut [RawFd]) {
97     keep_fds.sort();
98 
99     // Iterate over the gaps between descriptors we want to keep.
100     let firsts = keep_fds.iter().map(|fd| fd + 1);
101     for (i, first) in once(0).chain(firsts).enumerate() {
102         // The next fd is the one at i, because the indexes in the
103         // iterator are offset by one due to the initial 0.
104         let next_keep_fd = keep_fds.get(i);
105         let last = next_keep_fd.map(|fd| fd - 1).unwrap_or(RawFd::MAX);
106 
107         if first > last {
108             continue;
109         }
110 
111         if syscall(SYS_close_range, first, last, 0) == -1 {
112             // The kernel might be too old to have close_range, in
113             // which case we need to fall back to an uglier method.
114             let e = io::Error::last_os_error();
115             if e.raw_os_error() == Some(ENOSYS) {
116                 return close_fds_fallback(&keep_fds.iter().copied().collect());
117             }
118 
119             panic!("close_range: {e}");
120         }
121     }
122 }
123 
set_foreground_process_group(tty: &File) -> io::Result<()>124 fn set_foreground_process_group(tty: &File) -> io::Result<()> {
125     // SAFETY: trivially safe.
126     let my_pgrp = unsafe { getpgrp() };
127     // SAFETY: we have borrowed tty.
128     let tty_pgrp = unsafe { tcgetpgrp(tty.as_raw_fd()) };
129 
130     if tty_pgrp == -1 {
131         let e = io::Error::last_os_error();
132         if e.raw_os_error() != Some(ENOTTY) {
133             return Err(e);
134         }
135     }
136     if tty_pgrp == my_pgrp {
137         return Ok(());
138     }
139 
140     // SAFETY: trivially safe.
141     let my_pgrp = unsafe { setsid() };
142     if my_pgrp == -1 {
143         return Err(io::Error::last_os_error());
144     }
145 
146     // Set the tty to be this process's controlling terminal.
147     // SAFETY: we have borrowed tty.
148     if unsafe { ioctl(tty.as_raw_fd(), TIOCSCTTY, 0) } == -1 {
149         return Err(io::Error::last_os_error());
150     }
151 
152     // Become the foreground process group of the tty.
153     // SAFETY: we have borrowed tty.
154     if unsafe { tcsetpgrp(tty.as_raw_fd(), my_pgrp) } == -1 {
155         return Err(io::Error::last_os_error());
156     }
157 
158     Ok(())
159 }
160 
sigwinch_listener_main(seccomp_filter: BpfProgram, tx: File, tty: File) -> !161 fn sigwinch_listener_main(seccomp_filter: BpfProgram, tx: File, tty: File) -> ! {
162     // SAFETY: any references to these file descriptors are
163     // unreachable, because this function never returns.
164     unsafe {
165         close_unused_fds(&mut [STDERR_FILENO, tx.as_raw_fd(), tty.as_raw_fd()]);
166     }
167 
168     TX.with(|opt| opt.replace(Some(tx)));
169 
170     unblock_all_signals().unwrap();
171 
172     if !seccomp_filter.is_empty() {
173         apply_filter(&seccomp_filter).unwrap();
174     }
175 
176     register_signal_handler(SIGWINCH, sigwinch_handler).unwrap();
177 
178     set_foreground_process_group(&tty).unwrap();
179     drop(tty);
180 
181     notify();
182 
183     // Wait for the pipe to close, indicating the parent has exited.
184     with_tx(|tx| {
185         let mut pollfd = pollfd {
186             fd: tx.as_raw_fd(),
187             events: 0,
188             revents: 0,
189         };
190 
191         // SAFETY: FFI call with valid arguments
192         while unsafe { poll(&mut pollfd, 1, -1) } == -1 {
193             let e = io::Error::last_os_error();
194             assert!(
195                 matches!(e.kind(), ErrorKind::Interrupted | ErrorKind::WouldBlock),
196                 "poll: {e}"
197             );
198         }
199 
200         assert_eq!(pollfd.revents, POLLERR);
201     });
202 
203     exit(0);
204 }
205 
206 /// # Safety
207 ///
208 /// Same as [`fork`].
clone_clear_sighand() -> io::Result<u64>209 unsafe fn clone_clear_sighand() -> io::Result<u64> {
210     let mut args = clone_args {
211         exit_signal: SIGCHLD as u64,
212         ..Default::default()
213     };
214     args.flags |= CLONE_CLEAR_SIGHAND;
215     let r = clone3(&mut args, size_of::<clone_args>());
216     if r != -1 {
217         return Ok(r.try_into().unwrap());
218     }
219     let e = io::Error::last_os_error();
220     if e.raw_os_error() != Some(ENOSYS) && e.raw_os_error() != Some(EINVAL) {
221         return Err(e);
222     }
223 
224     // If CLONE_CLEAR_SIGHAND isn't available, fall back to resetting
225     // all the signal handlers one by one.
226     let r = fork();
227     if r == -1 {
228         return Err(io::Error::last_os_error());
229     }
230     if r == 0 {
231         for signum in 1.._NSIG {
232             let _ = signal(signum, SIG_DFL);
233         }
234     }
235     Ok(r.try_into().unwrap())
236 }
237 
start_sigwinch_listener(seccomp_filter: BpfProgram, tty_sub: File) -> io::Result<File>238 pub fn start_sigwinch_listener(seccomp_filter: BpfProgram, tty_sub: File) -> io::Result<File> {
239     let mut pipe = [-1; 2];
240     // SAFETY: FFI call with valid arguments
241     if unsafe { pipe2(pipe.as_mut_ptr(), O_CLOEXEC) } == -1 {
242         return Err(io::Error::last_os_error());
243     }
244 
245     // SAFETY: pipe[0] is valid
246     let mut rx = unsafe { File::from_raw_fd(pipe[0]) };
247     // SAFETY: pipe[1] is valid
248     let tx = unsafe { File::from_raw_fd(pipe[1]) };
249 
250     // SAFETY: FFI call
251     if unsafe { clone_clear_sighand() }? == 0 {
252         sigwinch_listener_main(seccomp_filter, tx, tty_sub);
253     }
254 
255     drop(tx);
256 
257     // Wait for a notification indicating readiness.
258     rx.read_exact(&mut [0])?;
259 
260     Ok(rx)
261 }
262 
listen_for_sigwinch_on_tty( pty_sub: File, seccomp_action: &SeccompAction, hypervisor_type: HypervisorType, ) -> std::io::Result<File>263 pub fn listen_for_sigwinch_on_tty(
264     pty_sub: File,
265     seccomp_action: &SeccompAction,
266     hypervisor_type: HypervisorType,
267 ) -> std::io::Result<File> {
268     let seccomp_filter =
269         get_seccomp_filter(seccomp_action, Thread::PtyForeground, hypervisor_type).unwrap();
270 
271     let console_resize_pipe = start_sigwinch_listener(seccomp_filter, pty_sub)?;
272 
273     Ok(console_resize_pipe)
274 }
275