1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _FUTEX_H
3 #define _FUTEX_H
4 
5 #include <linux/futex.h>
6 #include <linux/rtmutex.h>
7 #include <linux/sched/wake_q.h>
8 #include <linux/compat.h>
9 #include <linux/uaccess.h>
10 #include <linux/cleanup.h>
11 
12 #ifdef CONFIG_PREEMPT_RT
13 #include <linux/rcuwait.h>
14 #endif
15 
16 #include <asm/futex.h>
17 
18 /*
19  * Futex flags used to encode options to functions and preserve them across
20  * restarts.
21  */
22 #define FLAGS_SIZE_8		0x0000
23 #define FLAGS_SIZE_16		0x0001
24 #define FLAGS_SIZE_32		0x0002
25 #define FLAGS_SIZE_64		0x0003
26 
27 #define FLAGS_SIZE_MASK		0x0003
28 
29 #ifdef CONFIG_MMU
30 # define FLAGS_SHARED		0x0010
31 #else
32 /*
33  * NOMMU does not have per process address space. Let the compiler optimize
34  * code away.
35  */
36 # define FLAGS_SHARED		0x0000
37 #endif
38 #define FLAGS_CLOCKRT		0x0020
39 #define FLAGS_HAS_TIMEOUT	0x0040
40 #define FLAGS_NUMA		0x0080
41 #define FLAGS_STRICT		0x0100
42 #define FLAGS_MPOL		0x0200
43 
44 /* FUTEX_ to FLAGS_ */
45 static inline unsigned int futex_to_flags(unsigned int op)
46 {
47 	unsigned int flags = FLAGS_SIZE_32;
48 
49 	if (!(op & FUTEX_PRIVATE_FLAG))
50 		flags |= FLAGS_SHARED;
51 
52 	if (op & FUTEX_CLOCK_REALTIME)
53 		flags |= FLAGS_CLOCKRT;
54 
55 	return flags;
56 }
57 
58 #define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_NUMA | FUTEX2_MPOL | FUTEX2_PRIVATE)
59 
60 /* FUTEX2_ to FLAGS_ */
61 static inline unsigned int futex2_to_flags(unsigned int flags2)
62 {
63 	unsigned int flags = flags2 & FUTEX2_SIZE_MASK;
64 
65 	if (!(flags2 & FUTEX2_PRIVATE))
66 		flags |= FLAGS_SHARED;
67 
68 	if (flags2 & FUTEX2_NUMA)
69 		flags |= FLAGS_NUMA;
70 
71 	if (flags2 & FUTEX2_MPOL)
72 		flags |= FLAGS_MPOL;
73 
74 	return flags;
75 }
76 
77 static inline unsigned int futex_size(unsigned int flags)
78 {
79 	return 1 << (flags & FLAGS_SIZE_MASK);
80 }
81 
82 static inline bool futex_flags_valid(unsigned int flags)
83 {
84 	/* Only 64bit futexes for 64bit code */
85 	if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) {
86 		if ((flags & FLAGS_SIZE_MASK) == FLAGS_SIZE_64)
87 			return false;
88 	}
89 
90 	/* Only 32bit futexes are implemented -- for now */
91 	if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32)
92 		return false;
93 
94 	/*
95 	 * Must be able to represent both FUTEX_NO_NODE and every valid nodeid
96 	 * in a futex word.
97 	 */
98 	if (flags & FLAGS_NUMA) {
99 		int bits = 8 * futex_size(flags);
100 		u64 max = ~0ULL;
101 
102 		max >>= 64 - bits;
103 		if (nr_node_ids >= max)
104 			return false;
105 	}
106 
107 	return true;
108 }
109 
110 static inline bool futex_validate_input(unsigned int flags, u64 val)
111 {
112 	int bits = 8 * futex_size(flags);
113 
114 	if (bits < 64 && (val >> bits))
115 		return false;
116 
117 	return true;
118 }
119 
120 #ifdef CONFIG_FAIL_FUTEX
121 extern bool should_fail_futex(bool fshared);
122 #else
123 static inline bool should_fail_futex(bool fshared)
124 {
125 	return false;
126 }
127 #endif
128 
129 /*
130  * Hash buckets are shared by all the futex_keys that hash to the same
131  * location.  Each key may have multiple futex_q structures, one for each task
132  * waiting on a futex.
133  */
134 struct futex_hash_bucket {
135 	atomic_t waiters;
136 	spinlock_t lock;
137 	struct plist_head chain;
138 	struct futex_private_hash *priv;
139 } ____cacheline_aligned_in_smp;
140 
141 /*
142  * Priority Inheritance state:
143  */
144 struct futex_pi_state {
145 	/*
146 	 * list of 'owned' pi_state instances - these have to be
147 	 * cleaned up in do_exit() if the task exits prematurely:
148 	 */
149 	struct list_head list;
150 
151 	/*
152 	 * The PI object:
153 	 */
154 	struct rt_mutex_base pi_mutex;
155 
156 	struct task_struct *owner;
157 	refcount_t refcount;
158 
159 	union futex_key key;
160 } __randomize_layout;
161 
162 struct futex_q;
163 typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q);
164 
165 /**
166  * struct futex_q - The hashed futex queue entry, one per waiting task
167  * @list:		priority-sorted list of tasks waiting on this futex
168  * @task:		the task waiting on the futex
169  * @lock_ptr:		the hash bucket lock
170  * @wake:		the wake handler for this queue
171  * @wake_data:		data associated with the wake handler
172  * @key:		the key the futex is hashed on
173  * @pi_state:		optional priority inheritance state
174  * @rt_waiter:		rt_waiter storage for use with requeue_pi
175  * @requeue_pi_key:	the requeue_pi target futex key
176  * @bitset:		bitset for the optional bitmasked wakeup
177  * @requeue_state:	State field for futex_requeue_pi()
178  * @drop_hb_ref:	Waiter should drop the extra hash bucket reference if true
179  * @requeue_wait:	RCU wait for futex_requeue_pi() (RT only)
180  *
181  * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
182  * we can wake only the relevant ones (hashed queues may be shared).
183  *
184  * A futex_q has a woken state, just like tasks have TASK_RUNNING.
185  * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
186  * The order of wakeup is always to make the first condition true, then
187  * the second.
188  *
189  * PI futexes are typically woken before they are removed from the hash list via
190  * the rt_mutex code. See futex_unqueue_pi().
191  */
192 struct futex_q {
193 	struct plist_node list;
194 
195 	struct task_struct *task;
196 	spinlock_t *lock_ptr;
197 	futex_wake_fn *wake;
198 	void *wake_data;
199 	union futex_key key;
200 	struct futex_pi_state *pi_state;
201 	struct rt_mutex_waiter *rt_waiter;
202 	union futex_key *requeue_pi_key;
203 	u32 bitset;
204 	atomic_t requeue_state;
205 	bool drop_hb_ref;
206 #ifdef CONFIG_PREEMPT_RT
207 	struct rcuwait requeue_wait;
208 #endif
209 } __randomize_layout;
210 
211 extern const struct futex_q futex_q_init;
212 
213 enum futex_access {
214 	FUTEX_READ,
215 	FUTEX_WRITE
216 };
217 
218 extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
219 			 enum futex_access rw);
220 extern void futex_q_lockptr_lock(struct futex_q *q);
221 extern struct hrtimer_sleeper *
222 futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
223 		  int flags, u64 range_ns);
224 
225 extern struct futex_hash_bucket *futex_hash(union futex_key *key);
226 #ifdef CONFIG_FUTEX_PRIVATE_HASH
227 extern void futex_hash_get(struct futex_hash_bucket *hb);
228 extern void futex_hash_put(struct futex_hash_bucket *hb);
229 
230 extern struct futex_private_hash *futex_private_hash(void);
231 extern bool futex_private_hash_get(struct futex_private_hash *fph);
232 extern void futex_private_hash_put(struct futex_private_hash *fph);
233 
234 #else /* !CONFIG_FUTEX_PRIVATE_HASH */
235 static inline void futex_hash_get(struct futex_hash_bucket *hb) { }
236 static inline void futex_hash_put(struct futex_hash_bucket *hb) { }
237 static inline struct futex_private_hash *futex_private_hash(void) { return NULL; }
238 static inline bool futex_private_hash_get(void) { return false; }
239 static inline void futex_private_hash_put(struct futex_private_hash *fph) { }
240 #endif
241 
242 DEFINE_CLASS(hb, struct futex_hash_bucket *,
243 	     if (_T) futex_hash_put(_T),
244 	     futex_hash(key), union futex_key *key);
245 
246 DEFINE_CLASS(private_hash, struct futex_private_hash *,
247 	     if (_T) futex_private_hash_put(_T),
248 	     futex_private_hash(), void);
249 
250 /**
251  * futex_match - Check whether two futex keys are equal
252  * @key1:	Pointer to key1
253  * @key2:	Pointer to key2
254  *
255  * Return 1 if two futex_keys are equal, 0 otherwise.
256  */
257 static inline int futex_match(union futex_key *key1, union futex_key *key2)
258 {
259 	return (key1 && key2
260 		&& key1->both.word == key2->both.word
261 		&& key1->both.ptr == key2->both.ptr
262 		&& key1->both.offset == key2->both.offset);
263 }
264 
265 extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
266 			    struct futex_q *q, union futex_key *key2,
267 			    struct task_struct *task);
268 extern void futex_do_wait(struct futex_q *q, struct hrtimer_sleeper *timeout);
269 extern bool __futex_wake_mark(struct futex_q *q);
270 extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
271 
272 extern int fault_in_user_writeable(u32 __user *uaddr);
273 extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
274 
275 static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
276 {
277 	int ret;
278 
279 	pagefault_disable();
280 	ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
281 	pagefault_enable();
282 
283 	return ret;
284 }
285 
286 /*
287  * This does a plain atomic user space read, and the user pointer has
288  * already been verified earlier by get_futex_key() to be both aligned
289  * and actually in user space, just like futex_atomic_cmpxchg_inatomic().
290  *
291  * We still want to avoid any speculation, and while __get_user() is
292  * the traditional model for this, it's actually slower than doing
293  * this manually these days.
294  *
295  * We could just have a per-architecture special function for it,
296  * the same way we do futex_atomic_cmpxchg_inatomic(), but rather
297  * than force everybody to do that, write it out long-hand using
298  * the low-level user-access infrastructure.
299  *
300  * This looks a bit overkill, but generally just results in a couple
301  * of instructions.
302  */
303 static __always_inline int futex_get_value(u32 *dest, u32 __user *from)
304 {
305 	u32 val;
306 
307 	if (can_do_masked_user_access())
308 		from = masked_user_access_begin(from);
309 	else if (!user_read_access_begin(from, sizeof(*from)))
310 		return -EFAULT;
311 	unsafe_get_user(val, from, Efault);
312 	user_read_access_end();
313 	*dest = val;
314 	return 0;
315 Efault:
316 	user_read_access_end();
317 	return -EFAULT;
318 }
319 
320 static __always_inline int futex_put_value(u32 val, u32 __user *to)
321 {
322 	if (can_do_masked_user_access())
323 		to = masked_user_access_begin(to);
324 	else if (!user_read_access_begin(to, sizeof(*to)))
325 		return -EFAULT;
326 	unsafe_put_user(val, to, Efault);
327 	user_read_access_end();
328 	return 0;
329 Efault:
330 	user_read_access_end();
331 	return -EFAULT;
332 }
333 
334 static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
335 {
336 	int ret;
337 
338 	pagefault_disable();
339 	ret = futex_get_value(dest, from);
340 	pagefault_enable();
341 
342 	return ret;
343 }
344 
345 extern void __futex_unqueue(struct futex_q *q);
346 extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
347 				struct task_struct *task);
348 extern int futex_unqueue(struct futex_q *q);
349 
350 /**
351  * futex_queue() - Enqueue the futex_q on the futex_hash_bucket
352  * @q:	The futex_q to enqueue
353  * @hb:	The destination hash bucket
354  * @task: Task queueing this futex
355  *
356  * The hb->lock must be held by the caller, and is released here. A call to
357  * futex_queue() is typically paired with exactly one call to futex_unqueue().  The
358  * exceptions involve the PI related operations, which may use futex_unqueue_pi()
359  * or nothing if the unqueue is done as part of the wake process and the unqueue
360  * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
361  * an example).
362  *
363  * Note that @task may be NULL, for async usage of futexes.
364  */
365 static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
366 			       struct task_struct *task)
367 	__releases(&hb->lock)
368 {
369 	__futex_queue(q, hb, task);
370 	spin_unlock(&hb->lock);
371 }
372 
373 extern void futex_unqueue_pi(struct futex_q *q);
374 
375 extern void wait_for_owner_exiting(int ret, struct task_struct *exiting);
376 
377 /*
378  * Reflects a new waiter being added to the waitqueue.
379  */
380 static inline void futex_hb_waiters_inc(struct futex_hash_bucket *hb)
381 {
382 #ifdef CONFIG_SMP
383 	atomic_inc(&hb->waiters);
384 	/*
385 	 * Full barrier (A), see the ordering comment above.
386 	 */
387 	smp_mb__after_atomic();
388 #endif
389 }
390 
391 /*
392  * Reflects a waiter being removed from the waitqueue by wakeup
393  * paths.
394  */
395 static inline void futex_hb_waiters_dec(struct futex_hash_bucket *hb)
396 {
397 #ifdef CONFIG_SMP
398 	atomic_dec(&hb->waiters);
399 #endif
400 }
401 
402 static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb)
403 {
404 #ifdef CONFIG_SMP
405 	/*
406 	 * Full barrier (B), see the ordering comment above.
407 	 */
408 	smp_mb();
409 	return atomic_read(&hb->waiters);
410 #else
411 	return 1;
412 #endif
413 }
414 
415 extern void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb);
416 extern void futex_q_unlock(struct futex_hash_bucket *hb);
417 
418 
419 extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
420 				union futex_key *key,
421 				struct futex_pi_state **ps,
422 				struct task_struct *task,
423 				struct task_struct **exiting,
424 				int set_waiters);
425 
426 extern int refill_pi_state_cache(void);
427 extern void get_pi_state(struct futex_pi_state *pi_state);
428 extern void put_pi_state(struct futex_pi_state *pi_state);
429 extern int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked);
430 
431 /*
432  * Express the locking dependencies for lockdep:
433  */
434 static inline void
435 double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
436 {
437 	if (hb1 > hb2)
438 		swap(hb1, hb2);
439 
440 	spin_lock(&hb1->lock);
441 	if (hb1 != hb2)
442 		spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
443 }
444 
445 static inline void
446 double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
447 {
448 	spin_unlock(&hb1->lock);
449 	if (hb1 != hb2)
450 		spin_unlock(&hb2->lock);
451 }
452 
453 /* syscalls */
454 
455 extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32
456 				 val, ktime_t *abs_time, u32 bitset, u32 __user
457 				 *uaddr2);
458 
459 extern int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
460 			 u32 __user *uaddr2, unsigned int flags2,
461 			 int nr_wake, int nr_requeue,
462 			 u32 *cmpval, int requeue_pi);
463 
464 extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
465 			struct hrtimer_sleeper *to, u32 bitset);
466 
467 extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
468 		      ktime_t *abs_time, u32 bitset);
469 
470 /**
471  * struct futex_vector - Auxiliary struct for futex_waitv()
472  * @w: Userspace provided data
473  * @q: Kernel side data
474  *
475  * Struct used to build an array with all data need for futex_waitv()
476  */
477 struct futex_vector {
478 	struct futex_waitv w;
479 	struct futex_q q;
480 };
481 
482 extern int futex_parse_waitv(struct futex_vector *futexv,
483 			     struct futex_waitv __user *uwaitv,
484 			     unsigned int nr_futexes, futex_wake_fn *wake,
485 			     void *wake_data);
486 
487 extern int futex_wait_multiple_setup(struct futex_vector *vs, int count,
488 				     int *woken);
489 
490 extern int futex_unqueue_multiple(struct futex_vector *v, int count);
491 
492 extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
493 			       struct hrtimer_sleeper *to);
494 
495 extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset);
496 
497 extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags,
498 			 u32 __user *uaddr2, int nr_wake, int nr_wake2, int op);
499 
500 extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags);
501 
502 extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock);
503 
504 #endif /* _FUTEX_H */
505