1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _FUTEX_H 3 #define _FUTEX_H 4 5 #include <linux/futex.h> 6 #include <linux/rtmutex.h> 7 #include <linux/sched/wake_q.h> 8 #include <linux/compat.h> 9 #include <linux/uaccess.h> 10 #include <linux/cleanup.h> 11 12 #ifdef CONFIG_PREEMPT_RT 13 #include <linux/rcuwait.h> 14 #endif 15 16 #include <asm/futex.h> 17 18 /* 19 * Futex flags used to encode options to functions and preserve them across 20 * restarts. 21 */ 22 #define FLAGS_SIZE_8 0x0000 23 #define FLAGS_SIZE_16 0x0001 24 #define FLAGS_SIZE_32 0x0002 25 #define FLAGS_SIZE_64 0x0003 26 27 #define FLAGS_SIZE_MASK 0x0003 28 29 #ifdef CONFIG_MMU 30 # define FLAGS_SHARED 0x0010 31 #else 32 /* 33 * NOMMU does not have per process address space. Let the compiler optimize 34 * code away. 35 */ 36 # define FLAGS_SHARED 0x0000 37 #endif 38 #define FLAGS_CLOCKRT 0x0020 39 #define FLAGS_HAS_TIMEOUT 0x0040 40 #define FLAGS_NUMA 0x0080 41 #define FLAGS_STRICT 0x0100 42 #define FLAGS_MPOL 0x0200 43 44 /* FUTEX_ to FLAGS_ */ 45 static inline unsigned int futex_to_flags(unsigned int op) 46 { 47 unsigned int flags = FLAGS_SIZE_32; 48 49 if (!(op & FUTEX_PRIVATE_FLAG)) 50 flags |= FLAGS_SHARED; 51 52 if (op & FUTEX_CLOCK_REALTIME) 53 flags |= FLAGS_CLOCKRT; 54 55 return flags; 56 } 57 58 #define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_NUMA | FUTEX2_MPOL | FUTEX2_PRIVATE) 59 60 /* FUTEX2_ to FLAGS_ */ 61 static inline unsigned int futex2_to_flags(unsigned int flags2) 62 { 63 unsigned int flags = flags2 & FUTEX2_SIZE_MASK; 64 65 if (!(flags2 & FUTEX2_PRIVATE)) 66 flags |= FLAGS_SHARED; 67 68 if (flags2 & FUTEX2_NUMA) 69 flags |= FLAGS_NUMA; 70 71 if (flags2 & FUTEX2_MPOL) 72 flags |= FLAGS_MPOL; 73 74 return flags; 75 } 76 77 static inline unsigned int futex_size(unsigned int flags) 78 { 79 return 1 << (flags & FLAGS_SIZE_MASK); 80 } 81 82 static inline bool futex_flags_valid(unsigned int flags) 83 { 84 /* Only 64bit futexes for 64bit code */ 85 if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) { 86 if ((flags & FLAGS_SIZE_MASK) == FLAGS_SIZE_64) 87 return false; 88 } 89 90 /* Only 32bit futexes are implemented -- for now */ 91 if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32) 92 return false; 93 94 /* 95 * Must be able to represent both FUTEX_NO_NODE and every valid nodeid 96 * in a futex word. 97 */ 98 if (flags & FLAGS_NUMA) { 99 int bits = 8 * futex_size(flags); 100 u64 max = ~0ULL; 101 102 max >>= 64 - bits; 103 if (nr_node_ids >= max) 104 return false; 105 } 106 107 return true; 108 } 109 110 static inline bool futex_validate_input(unsigned int flags, u64 val) 111 { 112 int bits = 8 * futex_size(flags); 113 114 if (bits < 64 && (val >> bits)) 115 return false; 116 117 return true; 118 } 119 120 #ifdef CONFIG_FAIL_FUTEX 121 extern bool should_fail_futex(bool fshared); 122 #else 123 static inline bool should_fail_futex(bool fshared) 124 { 125 return false; 126 } 127 #endif 128 129 /* 130 * Hash buckets are shared by all the futex_keys that hash to the same 131 * location. Each key may have multiple futex_q structures, one for each task 132 * waiting on a futex. 133 */ 134 struct futex_hash_bucket { 135 atomic_t waiters; 136 spinlock_t lock; 137 struct plist_head chain; 138 struct futex_private_hash *priv; 139 } ____cacheline_aligned_in_smp; 140 141 /* 142 * Priority Inheritance state: 143 */ 144 struct futex_pi_state { 145 /* 146 * list of 'owned' pi_state instances - these have to be 147 * cleaned up in do_exit() if the task exits prematurely: 148 */ 149 struct list_head list; 150 151 /* 152 * The PI object: 153 */ 154 struct rt_mutex_base pi_mutex; 155 156 struct task_struct *owner; 157 refcount_t refcount; 158 159 union futex_key key; 160 } __randomize_layout; 161 162 struct futex_q; 163 typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q); 164 165 /** 166 * struct futex_q - The hashed futex queue entry, one per waiting task 167 * @list: priority-sorted list of tasks waiting on this futex 168 * @task: the task waiting on the futex 169 * @lock_ptr: the hash bucket lock 170 * @wake: the wake handler for this queue 171 * @wake_data: data associated with the wake handler 172 * @key: the key the futex is hashed on 173 * @pi_state: optional priority inheritance state 174 * @rt_waiter: rt_waiter storage for use with requeue_pi 175 * @requeue_pi_key: the requeue_pi target futex key 176 * @bitset: bitset for the optional bitmasked wakeup 177 * @requeue_state: State field for futex_requeue_pi() 178 * @drop_hb_ref: Waiter should drop the extra hash bucket reference if true 179 * @requeue_wait: RCU wait for futex_requeue_pi() (RT only) 180 * 181 * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so 182 * we can wake only the relevant ones (hashed queues may be shared). 183 * 184 * A futex_q has a woken state, just like tasks have TASK_RUNNING. 185 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. 186 * The order of wakeup is always to make the first condition true, then 187 * the second. 188 * 189 * PI futexes are typically woken before they are removed from the hash list via 190 * the rt_mutex code. See futex_unqueue_pi(). 191 */ 192 struct futex_q { 193 struct plist_node list; 194 195 struct task_struct *task; 196 spinlock_t *lock_ptr; 197 futex_wake_fn *wake; 198 void *wake_data; 199 union futex_key key; 200 struct futex_pi_state *pi_state; 201 struct rt_mutex_waiter *rt_waiter; 202 union futex_key *requeue_pi_key; 203 u32 bitset; 204 atomic_t requeue_state; 205 bool drop_hb_ref; 206 #ifdef CONFIG_PREEMPT_RT 207 struct rcuwait requeue_wait; 208 #endif 209 } __randomize_layout; 210 211 extern const struct futex_q futex_q_init; 212 213 enum futex_access { 214 FUTEX_READ, 215 FUTEX_WRITE 216 }; 217 218 extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, 219 enum futex_access rw); 220 extern void futex_q_lockptr_lock(struct futex_q *q); 221 extern struct hrtimer_sleeper * 222 futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, 223 int flags, u64 range_ns); 224 225 extern struct futex_hash_bucket *futex_hash(union futex_key *key); 226 #ifdef CONFIG_FUTEX_PRIVATE_HASH 227 extern void futex_hash_get(struct futex_hash_bucket *hb); 228 extern void futex_hash_put(struct futex_hash_bucket *hb); 229 230 extern struct futex_private_hash *futex_private_hash(void); 231 extern bool futex_private_hash_get(struct futex_private_hash *fph); 232 extern void futex_private_hash_put(struct futex_private_hash *fph); 233 234 #else /* !CONFIG_FUTEX_PRIVATE_HASH */ 235 static inline void futex_hash_get(struct futex_hash_bucket *hb) { } 236 static inline void futex_hash_put(struct futex_hash_bucket *hb) { } 237 static inline struct futex_private_hash *futex_private_hash(void) { return NULL; } 238 static inline bool futex_private_hash_get(void) { return false; } 239 static inline void futex_private_hash_put(struct futex_private_hash *fph) { } 240 #endif 241 242 DEFINE_CLASS(hb, struct futex_hash_bucket *, 243 if (_T) futex_hash_put(_T), 244 futex_hash(key), union futex_key *key); 245 246 DEFINE_CLASS(private_hash, struct futex_private_hash *, 247 if (_T) futex_private_hash_put(_T), 248 futex_private_hash(), void); 249 250 /** 251 * futex_match - Check whether two futex keys are equal 252 * @key1: Pointer to key1 253 * @key2: Pointer to key2 254 * 255 * Return 1 if two futex_keys are equal, 0 otherwise. 256 */ 257 static inline int futex_match(union futex_key *key1, union futex_key *key2) 258 { 259 return (key1 && key2 260 && key1->both.word == key2->both.word 261 && key1->both.ptr == key2->both.ptr 262 && key1->both.offset == key2->both.offset); 263 } 264 265 extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, 266 struct futex_q *q, union futex_key *key2, 267 struct task_struct *task); 268 extern void futex_do_wait(struct futex_q *q, struct hrtimer_sleeper *timeout); 269 extern bool __futex_wake_mark(struct futex_q *q); 270 extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q); 271 272 extern int fault_in_user_writeable(u32 __user *uaddr); 273 extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key); 274 275 static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval) 276 { 277 int ret; 278 279 pagefault_disable(); 280 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); 281 pagefault_enable(); 282 283 return ret; 284 } 285 286 /* 287 * This does a plain atomic user space read, and the user pointer has 288 * already been verified earlier by get_futex_key() to be both aligned 289 * and actually in user space, just like futex_atomic_cmpxchg_inatomic(). 290 * 291 * We still want to avoid any speculation, and while __get_user() is 292 * the traditional model for this, it's actually slower than doing 293 * this manually these days. 294 * 295 * We could just have a per-architecture special function for it, 296 * the same way we do futex_atomic_cmpxchg_inatomic(), but rather 297 * than force everybody to do that, write it out long-hand using 298 * the low-level user-access infrastructure. 299 * 300 * This looks a bit overkill, but generally just results in a couple 301 * of instructions. 302 */ 303 static __always_inline int futex_get_value(u32 *dest, u32 __user *from) 304 { 305 u32 val; 306 307 if (can_do_masked_user_access()) 308 from = masked_user_access_begin(from); 309 else if (!user_read_access_begin(from, sizeof(*from))) 310 return -EFAULT; 311 unsafe_get_user(val, from, Efault); 312 user_read_access_end(); 313 *dest = val; 314 return 0; 315 Efault: 316 user_read_access_end(); 317 return -EFAULT; 318 } 319 320 static __always_inline int futex_put_value(u32 val, u32 __user *to) 321 { 322 if (can_do_masked_user_access()) 323 to = masked_user_access_begin(to); 324 else if (!user_read_access_begin(to, sizeof(*to))) 325 return -EFAULT; 326 unsafe_put_user(val, to, Efault); 327 user_read_access_end(); 328 return 0; 329 Efault: 330 user_read_access_end(); 331 return -EFAULT; 332 } 333 334 static inline int futex_get_value_locked(u32 *dest, u32 __user *from) 335 { 336 int ret; 337 338 pagefault_disable(); 339 ret = futex_get_value(dest, from); 340 pagefault_enable(); 341 342 return ret; 343 } 344 345 extern void __futex_unqueue(struct futex_q *q); 346 extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, 347 struct task_struct *task); 348 extern int futex_unqueue(struct futex_q *q); 349 350 /** 351 * futex_queue() - Enqueue the futex_q on the futex_hash_bucket 352 * @q: The futex_q to enqueue 353 * @hb: The destination hash bucket 354 * @task: Task queueing this futex 355 * 356 * The hb->lock must be held by the caller, and is released here. A call to 357 * futex_queue() is typically paired with exactly one call to futex_unqueue(). The 358 * exceptions involve the PI related operations, which may use futex_unqueue_pi() 359 * or nothing if the unqueue is done as part of the wake process and the unqueue 360 * state is implicit in the state of woken task (see futex_wait_requeue_pi() for 361 * an example). 362 * 363 * Note that @task may be NULL, for async usage of futexes. 364 */ 365 static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, 366 struct task_struct *task) 367 __releases(&hb->lock) 368 { 369 __futex_queue(q, hb, task); 370 spin_unlock(&hb->lock); 371 } 372 373 extern void futex_unqueue_pi(struct futex_q *q); 374 375 extern void wait_for_owner_exiting(int ret, struct task_struct *exiting); 376 377 /* 378 * Reflects a new waiter being added to the waitqueue. 379 */ 380 static inline void futex_hb_waiters_inc(struct futex_hash_bucket *hb) 381 { 382 #ifdef CONFIG_SMP 383 atomic_inc(&hb->waiters); 384 /* 385 * Full barrier (A), see the ordering comment above. 386 */ 387 smp_mb__after_atomic(); 388 #endif 389 } 390 391 /* 392 * Reflects a waiter being removed from the waitqueue by wakeup 393 * paths. 394 */ 395 static inline void futex_hb_waiters_dec(struct futex_hash_bucket *hb) 396 { 397 #ifdef CONFIG_SMP 398 atomic_dec(&hb->waiters); 399 #endif 400 } 401 402 static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb) 403 { 404 #ifdef CONFIG_SMP 405 /* 406 * Full barrier (B), see the ordering comment above. 407 */ 408 smp_mb(); 409 return atomic_read(&hb->waiters); 410 #else 411 return 1; 412 #endif 413 } 414 415 extern void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb); 416 extern void futex_q_unlock(struct futex_hash_bucket *hb); 417 418 419 extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, 420 union futex_key *key, 421 struct futex_pi_state **ps, 422 struct task_struct *task, 423 struct task_struct **exiting, 424 int set_waiters); 425 426 extern int refill_pi_state_cache(void); 427 extern void get_pi_state(struct futex_pi_state *pi_state); 428 extern void put_pi_state(struct futex_pi_state *pi_state); 429 extern int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked); 430 431 /* 432 * Express the locking dependencies for lockdep: 433 */ 434 static inline void 435 double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) 436 { 437 if (hb1 > hb2) 438 swap(hb1, hb2); 439 440 spin_lock(&hb1->lock); 441 if (hb1 != hb2) 442 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); 443 } 444 445 static inline void 446 double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) 447 { 448 spin_unlock(&hb1->lock); 449 if (hb1 != hb2) 450 spin_unlock(&hb2->lock); 451 } 452 453 /* syscalls */ 454 455 extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32 456 val, ktime_t *abs_time, u32 bitset, u32 __user 457 *uaddr2); 458 459 extern int futex_requeue(u32 __user *uaddr1, unsigned int flags1, 460 u32 __user *uaddr2, unsigned int flags2, 461 int nr_wake, int nr_requeue, 462 u32 *cmpval, int requeue_pi); 463 464 extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, 465 struct hrtimer_sleeper *to, u32 bitset); 466 467 extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, 468 ktime_t *abs_time, u32 bitset); 469 470 /** 471 * struct futex_vector - Auxiliary struct for futex_waitv() 472 * @w: Userspace provided data 473 * @q: Kernel side data 474 * 475 * Struct used to build an array with all data need for futex_waitv() 476 */ 477 struct futex_vector { 478 struct futex_waitv w; 479 struct futex_q q; 480 }; 481 482 extern int futex_parse_waitv(struct futex_vector *futexv, 483 struct futex_waitv __user *uwaitv, 484 unsigned int nr_futexes, futex_wake_fn *wake, 485 void *wake_data); 486 487 extern int futex_wait_multiple_setup(struct futex_vector *vs, int count, 488 int *woken); 489 490 extern int futex_unqueue_multiple(struct futex_vector *v, int count); 491 492 extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count, 493 struct hrtimer_sleeper *to); 494 495 extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset); 496 497 extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags, 498 u32 __user *uaddr2, int nr_wake, int nr_wake2, int op); 499 500 extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags); 501 502 extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock); 503 504 #endif /* _FUTEX_H */ 505