Lines Matching +full:per +full:- +full:hart
1 // SPDX-License-Identifier: GPL-2.0-or-later
16 * PI-futex support started by Ingo Molnar and Thomas Gleixner
23 * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
29 * Kirkwood for proof-of-concept implementation.
41 #include <linux/fault-inject.h>
102 * smp_mb(); (A) <-- paired with -.
111 * `--------> smp_mb(); (B)
118 * waiters--; (b) unlock(hash_bucket(futex));
142 * acquiring the lock. It then decrements them again after releasing it -
162 * NOMMU does not have per process address space. Let the compiler optimize
175 * list of 'owned' pi_state instances - these have to be
192 * struct futex_q - The hashed futex queue entry, one per waiting task
193 * @list: priority-sorted list of tasks waiting on this futex
206 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
295 debugfs_create_bool("ignore-private", mode, dir, in fail_futex_debugfs()
323 atomic_inc(&hb->waiters); in hb_waiters_inc()
338 atomic_dec(&hb->waiters); in hb_waiters_dec()
349 return atomic_read(&hb->waiters); in hb_waiters_pending()
356 * hash_futex - Return the hash bucket in the global hash
365 key->both.offset); in hash_futex()
367 return &futex_queues[hash & (futex_hashsize - 1)]; in hash_futex()
372 * match_futex - Check whether two futex keys are equal
381 && key1->both.word == key2->both.word in match_futex()
382 && key1->both.ptr == key2->both.ptr in match_futex()
383 && key1->both.offset == key2->both.offset); in match_futex()
392 * futex_setup_timer - set up the sleeping hrtimer.
415 hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns); in futex_setup_timer()
423 * This relies on u64 not wrapping in the life-time of the machine; which with
434 * It is important that match_futex() will never have a false-positive, esp.
435 * for PI futexes that can mess up the state. The above argues that false-negatives
444 old = atomic64_read(&inode->i_sequence); in get_inode_sequence_number()
453 old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); in get_inode_sequence_number()
461 * get_futex_key() - Get parameters which are the keys for a futex
474 * ( inode->i_sequence, page->index, offset_within_page )
480 * ( current->mm, address, 0 )
491 struct mm_struct *mm = current->mm; in get_futex_key()
499 key->both.offset = address % PAGE_SIZE; in get_futex_key()
501 return -EINVAL; in get_futex_key()
502 address -= key->both.offset; in get_futex_key()
505 return -EFAULT; in get_futex_key()
508 return -EFAULT; in get_futex_key()
518 key->private.mm = mm; in get_futex_key()
519 key->private.address = address; in get_futex_key()
526 return -EFAULT; in get_futex_key()
531 * and get read-only access. in get_futex_key()
533 if (err == -EFAULT && rw == FUTEX_READ) { in get_futex_key()
546 * file-backed region case and guards against movement to swap cache. in get_futex_key()
550 * From this point on, mapping will be re-verified if necessary and in get_futex_key()
556 * based on the address. For filesystem-backed pages, the tail is in get_futex_key()
562 mapping = READ_ONCE(page->mapping); in get_futex_key()
565 * If page->mapping is NULL, then it cannot be a PageAnon in get_futex_key()
577 * an unlikely race, but we do need to retry for page->mapping. in get_futex_key()
588 shmem_swizzled = PageSwapCache(page) || page->mapping; in get_futex_key()
595 return -EFAULT; in get_futex_key()
605 * it's a read-only handle, it's expected that futexes attach to in get_futex_key()
614 err = -EFAULT; in get_futex_key()
618 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ in get_futex_key()
619 key->private.mm = mm; in get_futex_key()
620 key->private.address = address; in get_futex_key()
627 * the page->mapping must be traversed. Ordinarily this should in get_futex_key()
634 * mapping->host can be safely accessed as being a valid inode. in get_futex_key()
638 if (READ_ONCE(page->mapping) != mapping) { in get_futex_key()
645 inode = READ_ONCE(mapping->host); in get_futex_key()
653 key->both.offset |= FUT_OFF_INODE; /* inode-based key */ in get_futex_key()
654 key->shared.i_seq = get_inode_sequence_number(inode); in get_futex_key()
655 key->shared.pgoff = basepage_index(tail); in get_futex_key()
665 * fault_in_user_writeable() - Fault in user address and verify RW access
671 * We have no generic implementation of a non-destructive write to the
678 struct mm_struct *mm = current->mm; in fault_in_user_writeable()
690 * futex_top_waiter() - Return the highest priority waiter on a futex
701 plist_for_each_entry(this, &hb->chain, list) { in futex_top_waiter()
702 if (match_futex(&this->key, key)) in futex_top_waiter()
728 return ret ? -EFAULT : 0; in get_futex_value_locked()
739 if (likely(current->pi_state_cache)) in refill_pi_state_cache()
745 return -ENOMEM; in refill_pi_state_cache()
747 INIT_LIST_HEAD(&pi_state->list); in refill_pi_state_cache()
749 pi_state->owner = NULL; in refill_pi_state_cache()
750 refcount_set(&pi_state->refcount, 1); in refill_pi_state_cache()
751 pi_state->key = FUTEX_KEY_INIT; in refill_pi_state_cache()
753 current->pi_state_cache = pi_state; in refill_pi_state_cache()
760 struct futex_pi_state *pi_state = current->pi_state_cache; in alloc_pi_state()
763 current->pi_state_cache = NULL; in alloc_pi_state()
770 WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount)); in get_pi_state()
782 if (!refcount_dec_and_test(&pi_state->refcount)) in put_pi_state()
786 * If pi_state->owner is NULL, the owner is most probably dying in put_pi_state()
789 if (pi_state->owner) { in put_pi_state()
793 raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); in put_pi_state()
794 owner = pi_state->owner; in put_pi_state()
796 raw_spin_lock(&owner->pi_lock); in put_pi_state()
797 list_del_init(&pi_state->list); in put_pi_state()
798 raw_spin_unlock(&owner->pi_lock); in put_pi_state()
800 rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner); in put_pi_state()
801 raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); in put_pi_state()
804 if (current->pi_state_cache) { in put_pi_state()
808 * pi_state->list is already empty. in put_pi_state()
809 * clear pi_state->owner. in put_pi_state()
810 * refcount is at 0 - put it back to 1. in put_pi_state()
812 pi_state->owner = NULL; in put_pi_state()
813 refcount_set(&pi_state->refcount, 1); in put_pi_state()
814 current->pi_state_cache = pi_state; in put_pi_state()
822 * Kernel cleans up PI-state, but userspace is likely hosed.
823 * (Robust-futex cleanup is separate and might save the day for userspace.)
827 struct list_head *next, *head = &curr->pi_state_list; in exit_pi_state_list()
839 raw_spin_lock_irq(&curr->pi_lock); in exit_pi_state_list()
841 next = head->next; in exit_pi_state_list()
843 key = pi_state->key; in exit_pi_state_list()
856 if (!refcount_inc_not_zero(&pi_state->refcount)) { in exit_pi_state_list()
857 raw_spin_unlock_irq(&curr->pi_lock); in exit_pi_state_list()
859 raw_spin_lock_irq(&curr->pi_lock); in exit_pi_state_list()
862 raw_spin_unlock_irq(&curr->pi_lock); in exit_pi_state_list()
864 spin_lock(&hb->lock); in exit_pi_state_list()
865 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in exit_pi_state_list()
866 raw_spin_lock(&curr->pi_lock); in exit_pi_state_list()
868 * We dropped the pi-lock, so re-check whether this in exit_pi_state_list()
869 * task still owns the PI-state: in exit_pi_state_list()
871 if (head->next != next) { in exit_pi_state_list()
872 /* retain curr->pi_lock for the loop invariant */ in exit_pi_state_list()
873 raw_spin_unlock(&pi_state->pi_mutex.wait_lock); in exit_pi_state_list()
874 spin_unlock(&hb->lock); in exit_pi_state_list()
879 WARN_ON(pi_state->owner != curr); in exit_pi_state_list()
880 WARN_ON(list_empty(&pi_state->list)); in exit_pi_state_list()
881 list_del_init(&pi_state->list); in exit_pi_state_list()
882 pi_state->owner = NULL; in exit_pi_state_list()
884 raw_spin_unlock(&curr->pi_lock); in exit_pi_state_list()
885 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in exit_pi_state_list()
886 spin_unlock(&hb->lock); in exit_pi_state_list()
888 rt_mutex_futex_unlock(&pi_state->pi_mutex); in exit_pi_state_list()
891 raw_spin_lock_irq(&curr->pi_lock); in exit_pi_state_list()
893 raw_spin_unlock_irq(&curr->pi_lock); in exit_pi_state_list()
902 * Waiter | pi_state | pi->owner | uTID | uODIED | ?
904 * [1] NULL | --- | --- | 0 | 0/1 | Valid
905 * [2] NULL | --- | --- | >0 | 0/1 | Valid
907 * [3] Found | NULL | -- | Any | 0/1 | Invalid
937 * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
951 * hb->lock:
953 * hb -> futex_q, relation
954 * futex_q -> pi_state, relation
959 * pi_mutex->wait_lock:
965 * p->pi_lock:
967 * p->pi_state_list -> pi_state->list, relation
969 * pi_state->refcount:
976 * hb->lock
977 * pi_mutex->wait_lock
978 * p->pi_lock
996 * Userspace might have messed up non-PI and PI futexes [3] in attach_to_pi_state()
999 return -EINVAL; in attach_to_pi_state()
1002 * We get here with hb->lock held, and having found a in attach_to_pi_state()
1004 * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), in attach_to_pi_state()
1013 WARN_ON(!refcount_read(&pi_state->refcount)); in attach_to_pi_state()
1019 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in attach_to_pi_state()
1040 * pi_state->rt_mutex will fixup owner. in attach_to_pi_state()
1042 if (!pi_state->owner) { in attach_to_pi_state()
1070 if (!pi_state->owner) in attach_to_pi_state()
1079 if (pid != task_pid_vnr(pi_state->owner)) in attach_to_pi_state()
1084 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in attach_to_pi_state()
1089 ret = -EINVAL; in attach_to_pi_state()
1093 ret = -EAGAIN; in attach_to_pi_state()
1097 ret = -EFAULT; in attach_to_pi_state()
1101 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in attach_to_pi_state()
1106 * wait_for_owner_exiting - Block until the owner has exited
1114 if (ret != -EBUSY) { in wait_for_owner_exiting()
1119 if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) in wait_for_owner_exiting()
1122 mutex_lock(&exiting->futex_exit_mutex); in wait_for_owner_exiting()
1125 * while the task was in exec()->exec_futex_release() then it can in wait_for_owner_exiting()
1131 mutex_unlock(&exiting->futex_exit_mutex); in wait_for_owner_exiting()
1145 if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) in handle_exit_race()
1146 return -EBUSY; in handle_exit_race()
1157 * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID in handle_exit_race()
1162 * } if (!tsk->flags & PF_EXITING) { in handle_exit_race()
1164 * tsk->futex_state = } else { in handle_exit_race()
1165 * FUTEX_STATE_DEAD; if (tsk->futex_state != in handle_exit_race()
1167 * return -EAGAIN; in handle_exit_race()
1168 * return -ESRCH; <--- FAIL in handle_exit_race()
1178 return -EFAULT; in handle_exit_race()
1182 return -EAGAIN; in handle_exit_race()
1189 return -ESRCH; in handle_exit_race()
1205 * We are the first waiter - try to look up the real owner and attach in attach_to_pi_owner()
1212 return -EAGAIN; in attach_to_pi_owner()
1217 if (unlikely(p->flags & PF_KTHREAD)) { in attach_to_pi_owner()
1219 return -EPERM; in attach_to_pi_owner()
1225 * in futex_exit_release(), we do this protected by p->pi_lock: in attach_to_pi_owner()
1227 raw_spin_lock_irq(&p->pi_lock); in attach_to_pi_owner()
1228 if (unlikely(p->futex_state != FUTEX_STATE_OK)) { in attach_to_pi_owner()
1236 raw_spin_unlock_irq(&p->pi_lock); in attach_to_pi_owner()
1246 if (ret == -EBUSY) in attach_to_pi_owner()
1256 * This creates pi_state, we have hb->lock held, this means nothing can in attach_to_pi_owner()
1265 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); in attach_to_pi_owner()
1268 pi_state->key = *key; in attach_to_pi_owner()
1270 WARN_ON(!list_empty(&pi_state->list)); in attach_to_pi_owner()
1271 list_add(&pi_state->list, &p->pi_state_list); in attach_to_pi_owner()
1273 * Assignment without holding pi_state->pi_mutex.wait_lock is safe in attach_to_pi_owner()
1276 pi_state->owner = p; in attach_to_pi_owner()
1277 raw_spin_unlock_irq(&p->pi_lock); in attach_to_pi_owner()
1298 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); in lookup_pi_state()
1301 * We are the first waiter - try to look up the owner based on in lookup_pi_state()
1313 return -EFAULT; in lock_pi_update_atomic()
1320 return curval != uval ? -EAGAIN : 0; in lock_pi_update_atomic()
1324 * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
1337 * - 0 - ready to wait;
1338 * - 1 - acquired the lock;
1339 * - <0 - error
1341 * The hb->lock and futex_key refs shall be held by the caller.
1343 * @exiting is only set when the return value is -EBUSY. If so, this holds
1363 return -EFAULT; in futex_lock_pi_atomic()
1366 return -EFAULT; in futex_lock_pi_atomic()
1372 return -EDEADLK; in futex_lock_pi_atomic()
1375 return -EDEADLK; in futex_lock_pi_atomic()
1383 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); in futex_lock_pi_atomic()
1411 * the kernel and blocked on hb->lock. in futex_lock_pi_atomic()
1426 * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
1429 * The q->lock_ptr must not be NULL and must be held by the caller.
1435 if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list))) in __unqueue_futex()
1437 lockdep_assert_held(q->lock_ptr); in __unqueue_futex()
1439 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); in __unqueue_futex()
1440 plist_del(&q->list, &hb->chain); in __unqueue_futex()
1452 struct task_struct *p = q->task; in mark_wake_futex()
1454 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) in mark_wake_futex()
1460 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL in mark_wake_futex()
1466 smp_store_release(&q->lock_ptr, NULL); in mark_wake_futex()
1470 * the hb->lock. in mark_wake_futex()
1486 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); in wake_futex_pi()
1489 * As per the comment in futex_unlock_pi() this should not happen. in wake_futex_pi()
1496 ret = -EAGAIN; in wake_futex_pi()
1508 ret = -EFAULT; in wake_futex_pi()
1516 * try the TID->0 transition) raced with a waiter setting the in wake_futex_pi()
1521 ret = -EAGAIN; in wake_futex_pi()
1523 ret = -EINVAL; in wake_futex_pi()
1534 raw_spin_lock(&pi_state->owner->pi_lock); in wake_futex_pi()
1535 WARN_ON(list_empty(&pi_state->list)); in wake_futex_pi()
1536 list_del_init(&pi_state->list); in wake_futex_pi()
1537 raw_spin_unlock(&pi_state->owner->pi_lock); in wake_futex_pi()
1539 raw_spin_lock(&new_owner->pi_lock); in wake_futex_pi()
1540 WARN_ON(!list_empty(&pi_state->list)); in wake_futex_pi()
1541 list_add(&pi_state->list, &new_owner->pi_state_list); in wake_futex_pi()
1542 pi_state->owner = new_owner; in wake_futex_pi()
1543 raw_spin_unlock(&new_owner->pi_lock); in wake_futex_pi()
1545 postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); in wake_futex_pi()
1548 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in wake_futex_pi()
1563 spin_lock(&hb1->lock); in double_lock_hb()
1565 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); in double_lock_hb()
1567 spin_lock(&hb2->lock); in double_lock_hb()
1568 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING); in double_lock_hb()
1575 spin_unlock(&hb1->lock); in double_unlock_hb()
1577 spin_unlock(&hb2->lock); in double_unlock_hb()
1593 return -EINVAL; in futex_wake()
1605 spin_lock(&hb->lock); in futex_wake()
1607 plist_for_each_entry_safe(this, next, &hb->chain, list) { in futex_wake()
1608 if (match_futex (&this->key, &key)) { in futex_wake()
1609 if (this->pi_state || this->rt_waiter) { in futex_wake()
1610 ret = -EINVAL; in futex_wake()
1615 if (!(this->bitset & bitset)) in futex_wake()
1624 spin_unlock(&hb->lock); in futex_wake()
1639 char comm[sizeof(current->comm)]; in futex_atomic_op_inuser()
1641 * kill this print and return -EINVAL when userspace in futex_atomic_op_inuser()
1671 return -ENOSYS; in futex_atomic_op_inuser()
1707 unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { in futex_wake_op()
1716 if (op_ret == -EFAULT) { in futex_wake_op()
1731 plist_for_each_entry_safe(this, next, &hb1->chain, list) { in futex_wake_op()
1732 if (match_futex (&this->key, &key1)) { in futex_wake_op()
1733 if (this->pi_state || this->rt_waiter) { in futex_wake_op()
1734 ret = -EINVAL; in futex_wake_op()
1745 plist_for_each_entry_safe(this, next, &hb2->chain, list) { in futex_wake_op()
1746 if (match_futex (&this->key, &key2)) { in futex_wake_op()
1747 if (this->pi_state || this->rt_waiter) { in futex_wake_op()
1748 ret = -EINVAL; in futex_wake_op()
1766 * requeue_futex() - Requeue a futex_q from one hb to another
1781 if (likely(&hb1->chain != &hb2->chain)) { in requeue_futex()
1782 plist_del(&q->list, &hb1->chain); in requeue_futex()
1785 plist_add(&q->list, &hb2->chain); in requeue_futex()
1786 q->lock_ptr = &hb2->lock; in requeue_futex()
1788 q->key = *key2; in requeue_futex()
1792 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
1801 * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
1803 * with both q->lock_ptr and hb->lock held.
1809 q->key = *key; in requeue_pi_wake_futex()
1813 WARN_ON(!q->rt_waiter); in requeue_pi_wake_futex()
1814 q->rt_waiter = NULL; in requeue_pi_wake_futex()
1816 q->lock_ptr = &hb->lock; in requeue_pi_wake_futex()
1818 wake_up_state(q->task, TASK_NORMAL); in requeue_pi_wake_futex()
1822 * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
1838 * @exiting is only set when the return value is -EBUSY. If so, this holds
1843 * - 0 - failed to acquire the lock atomically;
1844 * - >0 - acquired the lock, return value is vpid of the top_waiter
1845 * - <0 - error
1858 return -EFAULT; in futex_proxy_trylock_atomic()
1861 return -EFAULT; in futex_proxy_trylock_atomic()
1878 if (!match_futex(top_waiter->requeue_pi_key, key2)) in futex_proxy_trylock_atomic()
1879 return -EINVAL; in futex_proxy_trylock_atomic()
1886 vpid = task_pid_vnr(top_waiter->task); in futex_proxy_trylock_atomic()
1887 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, in futex_proxy_trylock_atomic()
1897 * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
1902 * @nr_requeue: number of waiters to requeue (0-INT_MAX)
1904 * @requeue_pi: if we are attempting to requeue from a non-pi futex to a
1911 * - >=0 - on success, the number of tasks requeued or woken;
1912 * - <0 - on error
1926 return -EINVAL; in futex_requeue()
1929 * When PI not supported: return -ENOSYS if requeue_pi is true, in futex_requeue()
1935 return -ENOSYS; in futex_requeue()
1943 return -EINVAL; in futex_requeue()
1950 return -ENOMEM; in futex_requeue()
1955 * waiters and no owner. However, second and third wake-ups in futex_requeue()
1962 return -EINVAL; in futex_requeue()
1979 return -EINVAL; in futex_requeue()
2007 ret = -EAGAIN; in futex_requeue()
2012 if (requeue_pi && (task_count - nr_wake < nr_requeue)) { in futex_requeue()
2059 case -EFAULT: in futex_requeue()
2066 case -EBUSY: in futex_requeue()
2067 case -EAGAIN: in futex_requeue()
2070 * - EBUSY: Owner is exiting and we just wait for the in futex_requeue()
2072 * - EAGAIN: The user space value changed. in futex_requeue()
2089 plist_for_each_entry_safe(this, next, &hb1->chain, list) { in futex_requeue()
2090 if (task_count - nr_wake >= nr_requeue) in futex_requeue()
2093 if (!match_futex(&this->key, &key1)) in futex_requeue()
2103 if ((requeue_pi && !this->rt_waiter) || in futex_requeue()
2104 (!requeue_pi && this->rt_waiter) || in futex_requeue()
2105 this->pi_state) { in futex_requeue()
2106 ret = -EINVAL; in futex_requeue()
2121 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) { in futex_requeue()
2122 ret = -EINVAL; in futex_requeue()
2137 this->pi_state = pi_state; in futex_requeue()
2138 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, in futex_requeue()
2139 this->rt_waiter, in futex_requeue()
2140 this->task); in futex_requeue()
2145 * this->pi_state because the waiter needs the in futex_requeue()
2161 this->pi_state = NULL; in futex_requeue()
2187 /* The key must be already stored in q->key. */
2189 __acquires(&hb->lock) in queue_lock()
2193 hb = hash_futex(&q->key); in queue_lock()
2197 * a potential waker won't miss a to-be-slept task that is in queue_lock()
2205 q->lock_ptr = &hb->lock; in queue_lock()
2207 spin_lock(&hb->lock); in queue_lock()
2213 __releases(&hb->lock) in queue_unlock()
2215 spin_unlock(&hb->lock); in queue_unlock()
2225 * - either the real thread-priority for the real-time threads in __queue_me()
2227 * - or MAX_RT_PRIO for non-RT threads. in __queue_me()
2228 * Thus, all RT-threads are woken first in priority order, and in __queue_me()
2231 prio = min(current->normal_prio, MAX_RT_PRIO); in __queue_me()
2233 plist_node_init(&q->list, prio); in __queue_me()
2234 plist_add(&q->list, &hb->chain); in __queue_me()
2235 q->task = current; in __queue_me()
2239 * queue_me() - Enqueue the futex_q on the futex_hash_bucket
2243 * The hb->lock must be held by the caller, and is released here. A call to
2251 __releases(&hb->lock) in queue_me()
2254 spin_unlock(&hb->lock); in queue_me()
2258 * unqueue_me() - Remove the futex_q from its futex_hash_bucket
2261 * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
2265 * - 1 - if the futex_q was still queued (and we removed unqueued it);
2266 * - 0 - if the futex_q was already removed by the waking thread
2276 * q->lock_ptr can change between this read and the following spin_lock. in unqueue_me()
2277 * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and in unqueue_me()
2280 lock_ptr = READ_ONCE(q->lock_ptr); in unqueue_me()
2284 * q->lock_ptr can change between reading it and in unqueue_me()
2289 * q->lock_ptr must have changed (maybe several times) in unqueue_me()
2296 if (unlikely(lock_ptr != q->lock_ptr)) { in unqueue_me()
2302 BUG_ON(q->pi_state); in unqueue_me()
2317 __releases(q->lock_ptr) in unqueue_me_pi()
2321 BUG_ON(!q->pi_state); in unqueue_me_pi()
2322 put_pi_state(q->pi_state); in unqueue_me_pi()
2323 q->pi_state = NULL; in unqueue_me_pi()
2325 spin_unlock(q->lock_ptr); in unqueue_me_pi()
2331 struct futex_pi_state *pi_state = q->pi_state; in fixup_pi_state_owner()
2337 lockdep_assert_held(q->lock_ptr); in fixup_pi_state_owner()
2339 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in fixup_pi_state_owner()
2341 oldowner = pi_state->owner; in fixup_pi_state_owner()
2346 * - we stole the lock and pi_state->owner needs updating to reflect in fixup_pi_state_owner()
2351 * - someone stole our lock and we need to fix things to point to the in fixup_pi_state_owner()
2377 if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { in fixup_pi_state_owner()
2387 newowner = rt_mutex_owner(&pi_state->pi_mutex); in fixup_pi_state_owner()
2397 err = -EAGAIN; in fixup_pi_state_owner()
2415 if (!pi_state->owner) in fixup_pi_state_owner()
2438 if (pi_state->owner != NULL) { in fixup_pi_state_owner()
2439 raw_spin_lock(&pi_state->owner->pi_lock); in fixup_pi_state_owner()
2440 WARN_ON(list_empty(&pi_state->list)); in fixup_pi_state_owner()
2441 list_del_init(&pi_state->list); in fixup_pi_state_owner()
2442 raw_spin_unlock(&pi_state->owner->pi_lock); in fixup_pi_state_owner()
2445 pi_state->owner = newowner; in fixup_pi_state_owner()
2447 raw_spin_lock(&newowner->pi_lock); in fixup_pi_state_owner()
2448 WARN_ON(!list_empty(&pi_state->list)); in fixup_pi_state_owner()
2449 list_add(&pi_state->list, &newowner->pi_state_list); in fixup_pi_state_owner()
2450 raw_spin_unlock(&newowner->pi_lock); in fixup_pi_state_owner()
2451 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in fixup_pi_state_owner()
2464 * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely in fixup_pi_state_owner()
2465 * drop hb->lock since the caller owns the hb -> futex_q relation. in fixup_pi_state_owner()
2466 * Dropping the pi_mutex->wait_lock requires the state revalidate. in fixup_pi_state_owner()
2469 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in fixup_pi_state_owner()
2470 spin_unlock(q->lock_ptr); in fixup_pi_state_owner()
2473 case -EFAULT: in fixup_pi_state_owner()
2477 case -EAGAIN: in fixup_pi_state_owner()
2488 spin_lock(q->lock_ptr); in fixup_pi_state_owner()
2489 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in fixup_pi_state_owner()
2494 if (pi_state->owner != oldowner) { in fixup_pi_state_owner()
2505 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in fixup_pi_state_owner()
2512 * fixup_owner() - Post lock pi_state and corner case management
2522 * - 1 - success, lock taken;
2523 * - 0 - success, lock not taken;
2524 * - <0 - on error (-EFAULT)
2533 * did a lock-steal - fix up the PI-state in that case: in fixup_owner()
2535 * Speculative pi_state->owner read (we don't hold wait_lock); in fixup_owner()
2536 * since we own the lock pi_state->owner == current is the in fixup_owner()
2539 if (q->pi_state->owner != current) in fixup_owner()
2549 * Another speculative read; pi_state->owner == current is unstable in fixup_owner()
2552 if (q->pi_state->owner == current) { in fixup_owner()
2561 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) { in fixup_owner()
2562 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " in fixup_owner()
2563 "pi-state %p\n", ret, in fixup_owner()
2564 q->pi_state->pi_mutex.owner, in fixup_owner()
2565 q->pi_state->owner); in fixup_owner()
2572 * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
2597 if (likely(!plist_node_empty(&q->list))) { in futex_wait_queue_me()
2603 if (!timeout || timeout->task) in futex_wait_queue_me()
2610 * futex_wait_setup() - Prepare to wait on a futex
2623 * - 0 - uaddr contains val and hb has been locked;
2624 * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
2633 * Access the page AFTER the hash-bucket is locked. in futex_wait_setup()
2641 * any cond. If we locked the hash-bucket after testing *uaddr, that in futex_wait_setup()
2645 * On the other hand, we insert q and release the hash-bucket only in futex_wait_setup()
2651 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ); in futex_wait_setup()
2675 ret = -EWOULDBLOCK; in futex_wait_setup()
2691 return -EINVAL; in futex_wait()
2695 current->timer_slack_ns); in futex_wait()
2713 ret = -ETIMEDOUT; in futex_wait()
2714 if (to && !to->task) in futex_wait()
2724 ret = -ERESTARTSYS; in futex_wait()
2728 restart = ¤t->restart_block; in futex_wait()
2729 restart->fn = futex_wait_restart; in futex_wait()
2730 restart->futex.uaddr = uaddr; in futex_wait()
2731 restart->futex.val = val; in futex_wait()
2732 restart->futex.time = *abs_time; in futex_wait()
2733 restart->futex.bitset = bitset; in futex_wait()
2734 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; in futex_wait()
2736 ret = -ERESTART_RESTARTBLOCK; in futex_wait()
2740 hrtimer_cancel(&to->timer); in futex_wait()
2741 destroy_hrtimer_on_stack(&to->timer); in futex_wait()
2749 u32 __user *uaddr = restart->futex.uaddr; in futex_wait_restart()
2752 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { in futex_wait_restart()
2753 t = restart->futex.time; in futex_wait_restart()
2756 restart->fn = do_no_restart_syscall; in futex_wait_restart()
2758 return (long)futex_wait(uaddr, restart->futex.flags, in futex_wait_restart()
2759 restart->futex.val, tp, restart->futex.bitset); in futex_wait_restart()
2764 * Userspace tried a 0 -> TID atomic transition of the futex value
2767 * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
2784 return -ENOSYS; in futex_lock_pi()
2787 return -ENOMEM; in futex_lock_pi()
2811 case -EFAULT: in futex_lock_pi()
2813 case -EBUSY: in futex_lock_pi()
2814 case -EAGAIN: in futex_lock_pi()
2817 * - EBUSY: Task is exiting and we just wait for the in futex_lock_pi()
2819 * - EAGAIN: The user space value changed. in futex_lock_pi()
2843 ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); in futex_lock_pi()
2845 ret = ret ? 0 : -EWOULDBLOCK; in futex_lock_pi()
2852 * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not in futex_lock_pi()
2854 * include hb->lock in the blocking chain, even through we'll not in in futex_lock_pi()
2855 * fact hold it while blocking. This will lead it to report -EDEADLK in futex_lock_pi()
2858 * Therefore acquire wait_lock while holding hb->lock, but drop the in futex_lock_pi()
2860 * interleaves with futex_unlock_pi() -- which does a similar lock in futex_lock_pi()
2861 * handoff -- such that the latter can observe the futex_q::pi_state in futex_lock_pi()
2864 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); in futex_lock_pi()
2871 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); in futex_lock_pi()
2872 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); in futex_lock_pi()
2883 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); in futex_lock_pi()
2889 * first acquire the hb->lock before removing the lock from the in futex_lock_pi()
2896 if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) in futex_lock_pi()
2907 * the lock, clear our -ETIMEDOUT or -EINTR. in futex_lock_pi()
2916 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) { in futex_lock_pi()
2925 rt_mutex_futex_unlock(&pi_state->pi_mutex); in futex_lock_pi()
2936 hrtimer_cancel(&to->timer); in futex_lock_pi()
2937 destroy_hrtimer_on_stack(&to->timer); in futex_lock_pi()
2939 return ret != -EINTR ? ret : -ERESTARTNOINTR; in futex_lock_pi()
2955 * Userspace attempted a TID -> 0 atomic transition, and failed.
2956 * This is the in-kernel slowpath: we look up the PI state (if any),
2957 * and do the rt-mutex unlock.
2968 return -ENOSYS; in futex_unlock_pi()
2972 return -EFAULT; in futex_unlock_pi()
2977 return -EPERM; in futex_unlock_pi()
2984 spin_lock(&hb->lock); in futex_unlock_pi()
2993 struct futex_pi_state *pi_state = top_waiter->pi_state; in futex_unlock_pi()
2995 ret = -EINVAL; in futex_unlock_pi()
3003 if (pi_state->owner != current) in futex_unlock_pi()
3008 * By taking wait_lock while still holding hb->lock, we ensure in futex_unlock_pi()
3017 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in futex_unlock_pi()
3018 spin_unlock(&hb->lock); in futex_unlock_pi()
3020 /* drops pi_state->pi_mutex.wait_lock */ in futex_unlock_pi()
3032 * pagefault, so retry the user-access and the wakeup: in futex_unlock_pi()
3034 if (ret == -EFAULT) in futex_unlock_pi()
3040 if (ret == -EAGAIN) in futex_unlock_pi()
3052 * on hb->lock. So we can safely ignore them. We do neither in futex_unlock_pi()
3057 spin_unlock(&hb->lock); in futex_unlock_pi()
3059 case -EFAULT: in futex_unlock_pi()
3062 case -EAGAIN: in futex_unlock_pi()
3074 ret = (curval == uval) ? 0 : -EAGAIN; in futex_unlock_pi()
3077 spin_unlock(&hb->lock); in futex_unlock_pi()
3095 * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
3107 * - 0 = no early wakeup detected;
3108 * - <0 = -ETIMEDOUT or -ERESTARTNOINTR
3124 if (!match_futex(&q->key, key2)) { in handle_early_requeue_pi_wakeup()
3125 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr)); in handle_early_requeue_pi_wakeup()
3130 plist_del(&q->list, &hb->chain); in handle_early_requeue_pi_wakeup()
3134 ret = -EWOULDBLOCK; in handle_early_requeue_pi_wakeup()
3135 if (timeout && !timeout->task) in handle_early_requeue_pi_wakeup()
3136 ret = -ETIMEDOUT; in handle_early_requeue_pi_wakeup()
3138 ret = -ERESTARTNOINTR; in handle_early_requeue_pi_wakeup()
3144 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
3145 * @uaddr: the futex we initially wait on (non-pi)
3151 * @uaddr2: the pi futex we will take prior to returning to user-space
3161 * via the following--
3167 * If 3, cleanup and return -ERESTARTNOINTR.
3175 * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
3177 * If 4 or 7, we cleanup and return with -ETIMEDOUT.
3180 * - 0 - On success;
3181 * - <0 - On error
3196 return -ENOSYS; in futex_wait_requeue_pi()
3199 return -EINVAL; in futex_wait_requeue_pi()
3202 return -EINVAL; in futex_wait_requeue_pi()
3205 current->timer_slack_ns); in futex_wait_requeue_pi()
3235 ret = -EINVAL; in futex_wait_requeue_pi()
3242 spin_lock(&hb->lock); in futex_wait_requeue_pi()
3244 spin_unlock(&hb->lock); in futex_wait_requeue_pi()
3250 * we took the hb->lock above, we also know that futex_requeue() has in futex_wait_requeue_pi()
3261 * did a lock-steal - fix up the PI-state in that case. in futex_wait_requeue_pi()
3263 if (q.pi_state && (q.pi_state->owner != current)) { in futex_wait_requeue_pi()
3266 if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { in futex_wait_requeue_pi()
3286 pi_mutex = &q.pi_state->pi_mutex; in futex_wait_requeue_pi()
3301 * acquired the lock, clear -ETIMEDOUT or -EINTR. in futex_wait_requeue_pi()
3311 if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { in futex_wait_requeue_pi()
3321 rt_mutex_futex_unlock(&pi_state->pi_mutex); in futex_wait_requeue_pi()
3325 if (ret == -EINTR) { in futex_wait_requeue_pi()
3330 * -EWOULDBLOCK. Save the overhead of the restart and return in futex_wait_requeue_pi()
3331 * -EWOULDBLOCK directly. in futex_wait_requeue_pi()
3333 ret = -EWOULDBLOCK; in futex_wait_requeue_pi()
3338 hrtimer_cancel(&to->timer); in futex_wait_requeue_pi()
3339 destroy_hrtimer_on_stack(&to->timer); in futex_wait_requeue_pi()
3348 * Implementation: user-space maintains a per-thread list of locks it
3353 * per-thread. Userspace also maintains a per-thread 'list_op_pending'
3360 * sys_set_robust_list() - Set the robust-futex list head of a task
3361 * @head: pointer to the list-head
3362 * @len: length of the list-head, as userspace expects
3368 return -ENOSYS; in SYSCALL_DEFINE2()
3373 return -EINVAL; in SYSCALL_DEFINE2()
3375 current->robust_list = head; in SYSCALL_DEFINE2()
3381 * sys_get_robust_list() - Get the robust-futex list head of a task
3383 * @head_ptr: pointer to a list-head pointer, the kernel fills it in
3395 return -ENOSYS; in SYSCALL_DEFINE3()
3399 ret = -ESRCH; in SYSCALL_DEFINE3()
3408 ret = -EPERM; in SYSCALL_DEFINE3()
3412 head = p->robust_list; in SYSCALL_DEFINE3()
3416 return -EFAULT; in SYSCALL_DEFINE3()
3430 * Process a futex-list entry, check whether it's owned by the
3441 return -1; in handle_futex_death()
3445 return -1; in handle_futex_death()
3464 * 1) task->robust_list->list_op_pending != NULL in handle_futex_death()
3491 * futex_wake() even if OWNER_DIED is already set - in handle_futex_death()
3493 * thread-death.) The rest of the cleanup is done in in handle_futex_death()
3509 case -EFAULT: in handle_futex_death()
3511 return -1; in handle_futex_death()
3514 case -EAGAIN: in handle_futex_death()
3528 * Wake robust non-PI futexes here. The wakeup of in handle_futex_death()
3538 * Fetch a robust-list pointer. Bit 0 signals PI futexes:
3547 return -EFAULT; in fetch_robust_entry()
3556 * Walk curr->robust_list (very carefully, it's a userspace list!)
3559 * We silently return on any sign of list-walking problem.
3563 struct robust_list_head __user *head = curr->robust_list; in exit_robust_list()
3577 if (fetch_robust_entry(&entry, &head->list.next, &pi)) in exit_robust_list()
3582 if (get_user(futex_offset, &head->futex_offset)) in exit_robust_list()
3585 * Fetch any possibly pending lock-add first, and handle it in exit_robust_list()
3588 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) in exit_robust_list()
3592 while (entry != &head->list) { in exit_robust_list()
3597 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi); in exit_robust_list()
3614 if (!--limit) in exit_robust_list()
3628 if (unlikely(tsk->robust_list)) { in futex_cleanup()
3630 tsk->robust_list = NULL; in futex_cleanup()
3634 if (unlikely(tsk->compat_robust_list)) { in futex_cleanup()
3636 tsk->compat_robust_list = NULL; in futex_cleanup()
3640 if (unlikely(!list_empty(&tsk->pi_state_list))) in futex_cleanup()
3645 * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
3664 if (tsk->futex_state == FUTEX_STATE_EXITING) in futex_exit_recursive()
3665 mutex_unlock(&tsk->futex_exit_mutex); in futex_exit_recursive()
3666 tsk->futex_state = FUTEX_STATE_DEAD; in futex_exit_recursive()
3674 * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in in futex_cleanup_begin()
3677 mutex_lock(&tsk->futex_exit_mutex); in futex_cleanup_begin()
3680 * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. in futex_cleanup_begin()
3682 * This ensures that all subsequent checks of tsk->futex_state in in futex_cleanup_begin()
3684 * tsk->pi_lock held. in futex_cleanup_begin()
3687 * the state change under tsk->pi_lock by a concurrent waiter must in futex_cleanup_begin()
3690 raw_spin_lock_irq(&tsk->pi_lock); in futex_cleanup_begin()
3691 tsk->futex_state = FUTEX_STATE_EXITING; in futex_cleanup_begin()
3692 raw_spin_unlock_irq(&tsk->pi_lock); in futex_cleanup_begin()
3701 tsk->futex_state = state; in futex_cleanup_end()
3706 mutex_unlock(&tsk->futex_exit_mutex); in futex_cleanup_end()
3747 return -ENOSYS; in do_futex()
3757 return -ENOSYS; in do_futex()
3790 return -ENOSYS; in do_futex()
3807 return -EFAULT; in SYSCALL_DEFINE6()
3809 return -EFAULT; in SYSCALL_DEFINE6()
3811 return -EINVAL; in SYSCALL_DEFINE6()
3833 * Fetch a robust-list pointer. Bit 0 signals PI futexes:
3840 return -EFAULT; in compat_fetch_robust_entry()
3858 * Walk curr->robust_list (very carefully, it's a userspace list!)
3861 * We silently return on any sign of list-walking problem.
3865 struct compat_robust_list_head __user *head = curr->compat_robust_list; in compat_exit_robust_list()
3880 if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) in compat_exit_robust_list()
3885 if (get_user(futex_offset, &head->futex_offset)) in compat_exit_robust_list()
3888 * Fetch any possibly pending lock-add first, and handle it in compat_exit_robust_list()
3892 &head->list_op_pending, &pip)) in compat_exit_robust_list()
3896 while (entry != (struct robust_list __user *) &head->list) { in compat_exit_robust_list()
3902 (compat_uptr_t __user *)&entry->next, &next_pi); in compat_exit_robust_list()
3922 if (!--limit) in compat_exit_robust_list()
3939 return -ENOSYS; in COMPAT_SYSCALL_DEFINE2()
3942 return -EINVAL; in COMPAT_SYSCALL_DEFINE2()
3944 current->compat_robust_list = head; in COMPAT_SYSCALL_DEFINE2()
3958 return -ENOSYS; in COMPAT_SYSCALL_DEFINE3()
3962 ret = -ESRCH; in COMPAT_SYSCALL_DEFINE3()
3971 ret = -EPERM; in COMPAT_SYSCALL_DEFINE3()
3975 head = p->compat_robust_list; in COMPAT_SYSCALL_DEFINE3()
3979 return -EFAULT; in COMPAT_SYSCALL_DEFINE3()
4003 return -EFAULT; in SYSCALL_DEFINE6()
4005 return -EINVAL; in SYSCALL_DEFINE6()
4033 * guaranteed to fault and we get -EFAULT on functional in futex_detect_cmpxchg()
4034 * implementation, the non-functional ones will return in futex_detect_cmpxchg()
4035 * -ENOSYS. in futex_detect_cmpxchg()
4037 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) in futex_detect_cmpxchg()