xref: /qemu/include/user/safe-syscall.h (revision a57e0c3657b764fa0311ffca2c72bd8dcd39e8af)
1*a57e0c36SPeter Maydell /*
2*a57e0c36SPeter Maydell  * safe-syscall.h: prototypes for linux-user signal-race-safe syscalls
3*a57e0c36SPeter Maydell  *
4*a57e0c36SPeter Maydell  *  This program is free software; you can redistribute it and/or modify
5*a57e0c36SPeter Maydell  *  it under the terms of the GNU General Public License as published by
6*a57e0c36SPeter Maydell  *  the Free Software Foundation; either version 2 of the License, or
7*a57e0c36SPeter Maydell  *  (at your option) any later version.
8*a57e0c36SPeter Maydell  *
9*a57e0c36SPeter Maydell  *  This program is distributed in the hope that it will be useful,
10*a57e0c36SPeter Maydell  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11*a57e0c36SPeter Maydell  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*a57e0c36SPeter Maydell  *  GNU General Public License for more details.
13*a57e0c36SPeter Maydell  *
14*a57e0c36SPeter Maydell  *  You should have received a copy of the GNU General Public License
15*a57e0c36SPeter Maydell  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16*a57e0c36SPeter Maydell  */
17*a57e0c36SPeter Maydell 
18*a57e0c36SPeter Maydell #ifndef LINUX_USER_SAFE_SYSCALL_H
19*a57e0c36SPeter Maydell #define LINUX_USER_SAFE_SYSCALL_H
20*a57e0c36SPeter Maydell 
21*a57e0c36SPeter Maydell /**
22*a57e0c36SPeter Maydell  * safe_syscall:
23*a57e0c36SPeter Maydell  * @int number: number of system call to make
24*a57e0c36SPeter Maydell  * ...: arguments to the system call
25*a57e0c36SPeter Maydell  *
26*a57e0c36SPeter Maydell  * Call a system call if guest signal not pending.
27*a57e0c36SPeter Maydell  * This has the same API as the libc syscall() function, except that it
28*a57e0c36SPeter Maydell  * may return -1 with errno == TARGET_ERESTARTSYS if a signal was pending.
29*a57e0c36SPeter Maydell  *
30*a57e0c36SPeter Maydell  * Returns: the system call result, or -1 with an error code in errno
31*a57e0c36SPeter Maydell  * (Errnos are host errnos; we rely on TARGET_ERESTARTSYS not clashing
32*a57e0c36SPeter Maydell  * with any of the host errno values.)
33*a57e0c36SPeter Maydell  */
34*a57e0c36SPeter Maydell 
35*a57e0c36SPeter Maydell /*
36*a57e0c36SPeter Maydell  * A guide to using safe_syscall() to handle interactions between guest
37*a57e0c36SPeter Maydell  * syscalls and guest signals:
38*a57e0c36SPeter Maydell  *
39*a57e0c36SPeter Maydell  * Guest syscalls come in two flavours:
40*a57e0c36SPeter Maydell  *
41*a57e0c36SPeter Maydell  * (1) Non-interruptible syscalls
42*a57e0c36SPeter Maydell  *
43*a57e0c36SPeter Maydell  * These are guest syscalls that never get interrupted by signals and
44*a57e0c36SPeter Maydell  * so never return EINTR. They can be implemented straightforwardly in
45*a57e0c36SPeter Maydell  * QEMU: just make sure that if the implementation code has to make any
46*a57e0c36SPeter Maydell  * blocking calls that those calls are retried if they return EINTR.
47*a57e0c36SPeter Maydell  * It's also OK to implement these with safe_syscall, though it will be
48*a57e0c36SPeter Maydell  * a little less efficient if a signal is delivered at the 'wrong' moment.
49*a57e0c36SPeter Maydell  *
50*a57e0c36SPeter Maydell  * Some non-interruptible syscalls need to be handled using block_signals()
51*a57e0c36SPeter Maydell  * to block signals for the duration of the syscall. This mainly applies
52*a57e0c36SPeter Maydell  * to code which needs to modify the data structures used by the
53*a57e0c36SPeter Maydell  * host_signal_handler() function and the functions it calls, including
54*a57e0c36SPeter Maydell  * all syscalls which change the thread's signal mask.
55*a57e0c36SPeter Maydell  *
56*a57e0c36SPeter Maydell  * (2) Interruptible syscalls
57*a57e0c36SPeter Maydell  *
58*a57e0c36SPeter Maydell  * These are guest syscalls that can be interrupted by signals and
59*a57e0c36SPeter Maydell  * for which we need to either return EINTR or arrange for the guest
60*a57e0c36SPeter Maydell  * syscall to be restarted. This category includes both syscalls which
61*a57e0c36SPeter Maydell  * always restart (and in the kernel return -ERESTARTNOINTR), ones
62*a57e0c36SPeter Maydell  * which only restart if there is no handler (kernel returns -ERESTARTNOHAND
63*a57e0c36SPeter Maydell  * or -ERESTART_RESTARTBLOCK), and the most common kind which restart
64*a57e0c36SPeter Maydell  * if the handler was registered with SA_RESTART (kernel returns
65*a57e0c36SPeter Maydell  * -ERESTARTSYS). System calls which are only interruptible in some
66*a57e0c36SPeter Maydell  * situations (like 'open') also need to be handled this way.
67*a57e0c36SPeter Maydell  *
68*a57e0c36SPeter Maydell  * Here it is important that the host syscall is made
69*a57e0c36SPeter Maydell  * via this safe_syscall() function, and *not* via the host libc.
70*a57e0c36SPeter Maydell  * If the host libc is used then the implementation will appear to work
71*a57e0c36SPeter Maydell  * most of the time, but there will be a race condition where a
72*a57e0c36SPeter Maydell  * signal could arrive just before we make the host syscall inside libc,
73*a57e0c36SPeter Maydell  * and then then guest syscall will not correctly be interrupted.
74*a57e0c36SPeter Maydell  * Instead the implementation of the guest syscall can use the safe_syscall
75*a57e0c36SPeter Maydell  * function but otherwise just return the result or errno in the usual
76*a57e0c36SPeter Maydell  * way; the main loop code will take care of restarting the syscall
77*a57e0c36SPeter Maydell  * if appropriate.
78*a57e0c36SPeter Maydell  *
79*a57e0c36SPeter Maydell  * (If the implementation needs to make multiple host syscalls this is
80*a57e0c36SPeter Maydell  * OK; any which might really block must be via safe_syscall(); for those
81*a57e0c36SPeter Maydell  * which are only technically blocking (ie which we know in practice won't
82*a57e0c36SPeter Maydell  * stay in the host kernel indefinitely) it's OK to use libc if necessary.
83*a57e0c36SPeter Maydell  * You must be able to cope with backing out correctly if some safe_syscall
84*a57e0c36SPeter Maydell  * you make in the implementation returns either -TARGET_ERESTARTSYS or
85*a57e0c36SPeter Maydell  * EINTR though.)
86*a57e0c36SPeter Maydell  *
87*a57e0c36SPeter Maydell  * block_signals() cannot be used for interruptible syscalls.
88*a57e0c36SPeter Maydell  *
89*a57e0c36SPeter Maydell  *
90*a57e0c36SPeter Maydell  * How and why the safe_syscall implementation works:
91*a57e0c36SPeter Maydell  *
92*a57e0c36SPeter Maydell  * The basic setup is that we make the host syscall via a known
93*a57e0c36SPeter Maydell  * section of host native assembly. If a signal occurs, our signal
94*a57e0c36SPeter Maydell  * handler checks the interrupted host PC against the addresse of that
95*a57e0c36SPeter Maydell  * known section. If the PC is before or at the address of the syscall
96*a57e0c36SPeter Maydell  * instruction then we change the PC to point at a "return
97*a57e0c36SPeter Maydell  * -TARGET_ERESTARTSYS" code path instead, and then exit the signal handler
98*a57e0c36SPeter Maydell  * (causing the safe_syscall() call to immediately return that value).
99*a57e0c36SPeter Maydell  * Then in the main.c loop if we see this magic return value we adjust
100*a57e0c36SPeter Maydell  * the guest PC to wind it back to before the system call, and invoke
101*a57e0c36SPeter Maydell  * the guest signal handler as usual.
102*a57e0c36SPeter Maydell  *
103*a57e0c36SPeter Maydell  * This winding-back will happen in two cases:
104*a57e0c36SPeter Maydell  * (1) signal came in just before we took the host syscall (a race);
105*a57e0c36SPeter Maydell  *   in this case we'll take the guest signal and have another go
106*a57e0c36SPeter Maydell  *   at the syscall afterwards, and this is indistinguishable for the
107*a57e0c36SPeter Maydell  *   guest from the timing having been different such that the guest
108*a57e0c36SPeter Maydell  *   signal really did win the race
109*a57e0c36SPeter Maydell  * (2) signal came in while the host syscall was blocking, and the
110*a57e0c36SPeter Maydell  *   host kernel decided the syscall should be restarted;
111*a57e0c36SPeter Maydell  *   in this case we want to restart the guest syscall also, and so
112*a57e0c36SPeter Maydell  *   rewinding is the right thing. (Note that "restart" semantics mean
113*a57e0c36SPeter Maydell  *   "first call the signal handler, then reattempt the syscall".)
114*a57e0c36SPeter Maydell  * The other situation to consider is when a signal came in while the
115*a57e0c36SPeter Maydell  * host syscall was blocking, and the host kernel decided that the syscall
116*a57e0c36SPeter Maydell  * should not be restarted; in this case QEMU's host signal handler will
117*a57e0c36SPeter Maydell  * be invoked with the PC pointing just after the syscall instruction,
118*a57e0c36SPeter Maydell  * with registers indicating an EINTR return; the special code in the
119*a57e0c36SPeter Maydell  * handler will not kick in, and we will return EINTR to the guest as
120*a57e0c36SPeter Maydell  * we should.
121*a57e0c36SPeter Maydell  *
122*a57e0c36SPeter Maydell  * Notice that we can leave the host kernel to make the decision for
123*a57e0c36SPeter Maydell  * us about whether to do a restart of the syscall or not; we do not
124*a57e0c36SPeter Maydell  * need to check SA_RESTART flags in QEMU or distinguish the various
125*a57e0c36SPeter Maydell  * kinds of restartability.
126*a57e0c36SPeter Maydell  */
127*a57e0c36SPeter Maydell #ifdef HAVE_SAFE_SYSCALL
128*a57e0c36SPeter Maydell /* The core part of this function is implemented in assembly */
129*a57e0c36SPeter Maydell extern long safe_syscall_base(int *pending, long number, ...);
130*a57e0c36SPeter Maydell 
131*a57e0c36SPeter Maydell #define safe_syscall(...)                                               \
132*a57e0c36SPeter Maydell     ({                                                                  \
133*a57e0c36SPeter Maydell         long ret_;                                                      \
134*a57e0c36SPeter Maydell         int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \
135*a57e0c36SPeter Maydell         ret_ = safe_syscall_base(psp_, __VA_ARGS__);                    \
136*a57e0c36SPeter Maydell         if (is_error(ret_)) {                                           \
137*a57e0c36SPeter Maydell             errno = -ret_;                                              \
138*a57e0c36SPeter Maydell             ret_ = -1;                                                  \
139*a57e0c36SPeter Maydell         }                                                               \
140*a57e0c36SPeter Maydell         ret_;                                                           \
141*a57e0c36SPeter Maydell     })
142*a57e0c36SPeter Maydell 
143*a57e0c36SPeter Maydell #else
144*a57e0c36SPeter Maydell 
145*a57e0c36SPeter Maydell /*
146*a57e0c36SPeter Maydell  * Fallback for architectures which don't yet provide a safe-syscall assembly
147*a57e0c36SPeter Maydell  * fragment; note that this is racy!
148*a57e0c36SPeter Maydell  * This should go away when all host architectures have been updated.
149*a57e0c36SPeter Maydell  */
150*a57e0c36SPeter Maydell #define safe_syscall syscall
151*a57e0c36SPeter Maydell 
152*a57e0c36SPeter Maydell #endif
153*a57e0c36SPeter Maydell 
154*a57e0c36SPeter Maydell #endif
155