xref: /linux/kernel/futex/pi.c (revision 7393febcb1b2082c0484952729cbebfe4dc508d5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 
3 #include <linux/slab.h>
4 #include <linux/sched/rt.h>
5 #include <linux/sched/task.h>
6 
7 #include "futex.h"
8 #include "../locking/rtmutex_common.h"
9 
10 /*
11  * PI code:
12  */
refill_pi_state_cache(void)13 int refill_pi_state_cache(void)
14 {
15 	struct futex_pi_state *pi_state;
16 
17 	if (likely(current->pi_state_cache))
18 		return 0;
19 
20 	pi_state = kzalloc_obj(*pi_state);
21 
22 	if (!pi_state)
23 		return -ENOMEM;
24 
25 	INIT_LIST_HEAD(&pi_state->list);
26 	/* pi_mutex gets initialized later */
27 	pi_state->owner = NULL;
28 	refcount_set(&pi_state->refcount, 1);
29 	pi_state->key = FUTEX_KEY_INIT;
30 
31 	current->pi_state_cache = pi_state;
32 
33 	return 0;
34 }
35 
alloc_pi_state(void)36 static struct futex_pi_state *alloc_pi_state(void)
37 {
38 	struct futex_pi_state *pi_state = current->pi_state_cache;
39 
40 	WARN_ON(!pi_state);
41 	current->pi_state_cache = NULL;
42 
43 	return pi_state;
44 }
45 
pi_state_update_owner(struct futex_pi_state * pi_state,struct task_struct * new_owner)46 static void pi_state_update_owner(struct futex_pi_state *pi_state,
47 				  struct task_struct *new_owner)
48 {
49 	struct task_struct *old_owner = pi_state->owner;
50 
51 	lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
52 
53 	if (old_owner) {
54 		raw_spin_lock(&old_owner->pi_lock);
55 		WARN_ON(list_empty(&pi_state->list));
56 		list_del_init(&pi_state->list);
57 		raw_spin_unlock(&old_owner->pi_lock);
58 	}
59 
60 	if (new_owner) {
61 		raw_spin_lock(&new_owner->pi_lock);
62 		WARN_ON(!list_empty(&pi_state->list));
63 		list_add(&pi_state->list, &new_owner->pi_state_list);
64 		pi_state->owner = new_owner;
65 		raw_spin_unlock(&new_owner->pi_lock);
66 	}
67 }
68 
get_pi_state(struct futex_pi_state * pi_state)69 void get_pi_state(struct futex_pi_state *pi_state)
70 {
71 	WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
72 }
73 
74 /*
75  * Drops a reference to the pi_state object and frees or caches it
76  * when the last reference is gone.
77  */
put_pi_state(struct futex_pi_state * pi_state)78 void put_pi_state(struct futex_pi_state *pi_state)
79 {
80 	if (!pi_state)
81 		return;
82 
83 	if (!refcount_dec_and_test(&pi_state->refcount))
84 		return;
85 
86 	/*
87 	 * If pi_state->owner is NULL, the owner is most probably dying
88 	 * and has cleaned up the pi_state already
89 	 */
90 	if (pi_state->owner) {
91 		unsigned long flags;
92 
93 		raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
94 		pi_state_update_owner(pi_state, NULL);
95 		rt_mutex_proxy_unlock(&pi_state->pi_mutex);
96 		raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
97 	}
98 
99 	if (current->pi_state_cache) {
100 		kfree(pi_state);
101 	} else {
102 		/*
103 		 * pi_state->list is already empty.
104 		 * clear pi_state->owner.
105 		 * refcount is at 0 - put it back to 1.
106 		 */
107 		pi_state->owner = NULL;
108 		refcount_set(&pi_state->refcount, 1);
109 		current->pi_state_cache = pi_state;
110 	}
111 }
112 
113 /*
114  * We need to check the following states:
115  *
116  *      Waiter | pi_state | pi->owner | uTID      | uODIED | ?
117  *
118  * [1]  NULL   | ---      | ---       | 0         | 0/1    | Valid
119  * [2]  NULL   | ---      | ---       | >0        | 0/1    | Valid
120  *
121  * [3]  Found  | NULL     | --        | Any       | 0/1    | Invalid
122  *
123  * [4]  Found  | Found    | NULL      | 0         | 1      | Valid
124  * [5]  Found  | Found    | NULL      | >0        | 1      | Invalid
125  *
126  * [6]  Found  | Found    | task      | 0         | 1      | Valid
127  *
128  * [7]  Found  | Found    | NULL      | Any       | 0      | Invalid
129  *
130  * [8]  Found  | Found    | task      | ==taskTID | 0/1    | Valid
131  * [9]  Found  | Found    | task      | 0         | 0      | Invalid
132  * [10] Found  | Found    | task      | !=taskTID | 0/1    | Invalid
133  *
134  * [1]	Indicates that the kernel can acquire the futex atomically. We
135  *	came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
136  *
137  * [2]	Valid, if TID does not belong to a kernel thread. If no matching
138  *      thread is found then it indicates that the owner TID has died.
139  *
140  * [3]	Invalid. The waiter is queued on a non PI futex
141  *
142  * [4]	Valid state after exit_robust_list(), which sets the user space
143  *	value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
144  *
145  * [5]	The user space value got manipulated between exit_robust_list()
146  *	and exit_pi_state_list()
147  *
148  * [6]	Valid state after exit_pi_state_list() which sets the new owner in
149  *	the pi_state but cannot access the user space value.
150  *
151  * [7]	pi_state->owner can only be NULL when the OWNER_DIED bit is set.
152  *
153  * [8]	Owner and user space value match
154  *
155  * [9]	There is no transient state which sets the user space TID to 0
156  *	except exit_robust_list(), but this is indicated by the
157  *	FUTEX_OWNER_DIED bit. See [4]
158  *
159  * [10] There is no transient state which leaves owner and user space
160  *	TID out of sync. Except one error case where the kernel is denied
161  *	write access to the user address, see fixup_pi_state_owner().
162  *
163  *
164  * Serialization and lifetime rules:
165  *
166  * hb->lock:
167  *
168  *	hb -> futex_q, relation
169  *	futex_q -> pi_state, relation
170  *
171  *	(cannot be raw because hb can contain arbitrary amount
172  *	 of futex_q's)
173  *
174  * pi_mutex->wait_lock:
175  *
176  *	{uval, pi_state}
177  *
178  *	(and pi_mutex 'obviously')
179  *
180  * p->pi_lock:
181  *
182  *	p->pi_state_list -> pi_state->list, relation
183  *	pi_mutex->owner -> pi_state->owner, relation
184  *
185  * pi_state->refcount:
186  *
187  *	pi_state lifetime
188  *
189  *
190  * Lock order:
191  *
192  *   hb->lock
193  *     pi_mutex->wait_lock
194  *       p->pi_lock
195  *
196  */
197 
198 /*
199  * Validate that the existing waiter has a pi_state and sanity check
200  * the pi_state against the user space value. If correct, attach to
201  * it.
202  */
attach_to_pi_state(u32 __user * uaddr,u32 uval,struct futex_pi_state * pi_state,struct futex_pi_state ** ps)203 static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
204 			      struct futex_pi_state *pi_state,
205 			      struct futex_pi_state **ps)
206 {
207 	pid_t pid = uval & FUTEX_TID_MASK;
208 	u32 uval2;
209 	int ret;
210 
211 	/*
212 	 * Userspace might have messed up non-PI and PI futexes [3]
213 	 */
214 	if (unlikely(!pi_state))
215 		return -EINVAL;
216 
217 	/*
218 	 * We get here with hb->lock held, and having found a
219 	 * futex_top_waiter(). This means that futex_lock_pi() of said futex_q
220 	 * has dropped the hb->lock in between futex_queue() and futex_unqueue_pi(),
221 	 * which in turn means that futex_lock_pi() still has a reference on
222 	 * our pi_state.
223 	 *
224 	 * The waiter holding a reference on @pi_state also protects against
225 	 * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
226 	 * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
227 	 * free pi_state before we can take a reference ourselves.
228 	 */
229 	WARN_ON(!refcount_read(&pi_state->refcount));
230 
231 	/*
232 	 * Now that we have a pi_state, we can acquire wait_lock
233 	 * and do the state validation.
234 	 */
235 	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
236 
237 	/*
238 	 * Since {uval, pi_state} is serialized by wait_lock, and our current
239 	 * uval was read without holding it, it can have changed. Verify it
240 	 * still is what we expect it to be, otherwise retry the entire
241 	 * operation.
242 	 */
243 	if (futex_get_value_locked(&uval2, uaddr))
244 		goto out_efault;
245 
246 	if (uval != uval2)
247 		goto out_eagain;
248 
249 	/*
250 	 * Handle the owner died case:
251 	 */
252 	if (uval & FUTEX_OWNER_DIED) {
253 		/*
254 		 * exit_pi_state_list sets owner to NULL and wakes the
255 		 * topmost waiter. The task which acquires the
256 		 * pi_state->rt_mutex will fixup owner.
257 		 */
258 		if (!pi_state->owner) {
259 			/*
260 			 * No pi state owner, but the user space TID
261 			 * is not 0. Inconsistent state. [5]
262 			 */
263 			if (pid)
264 				goto out_einval;
265 			/*
266 			 * Take a ref on the state and return success. [4]
267 			 */
268 			goto out_attach;
269 		}
270 
271 		/*
272 		 * If TID is 0, then either the dying owner has not
273 		 * yet executed exit_pi_state_list() or some waiter
274 		 * acquired the rtmutex in the pi state, but did not
275 		 * yet fixup the TID in user space.
276 		 *
277 		 * Take a ref on the state and return success. [6]
278 		 */
279 		if (!pid)
280 			goto out_attach;
281 	} else {
282 		/*
283 		 * If the owner died bit is not set, then the pi_state
284 		 * must have an owner. [7]
285 		 */
286 		if (!pi_state->owner)
287 			goto out_einval;
288 	}
289 
290 	/*
291 	 * Bail out if user space manipulated the futex value. If pi
292 	 * state exists then the owner TID must be the same as the
293 	 * user space TID. [9/10]
294 	 */
295 	if (pid != task_pid_vnr(pi_state->owner))
296 		goto out_einval;
297 
298 out_attach:
299 	get_pi_state(pi_state);
300 	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
301 	*ps = pi_state;
302 	return 0;
303 
304 out_einval:
305 	ret = -EINVAL;
306 	goto out_error;
307 
308 out_eagain:
309 	ret = -EAGAIN;
310 	goto out_error;
311 
312 out_efault:
313 	ret = -EFAULT;
314 	goto out_error;
315 
316 out_error:
317 	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
318 	return ret;
319 }
320 
handle_exit_race(u32 __user * uaddr,u32 uval,struct task_struct * tsk)321 static int handle_exit_race(u32 __user *uaddr, u32 uval,
322 			    struct task_struct *tsk)
323 {
324 	u32 uval2;
325 
326 	/*
327 	 * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
328 	 * caller that the alleged owner is busy.
329 	 */
330 	if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
331 		return -EBUSY;
332 
333 	/*
334 	 * Reread the user space value to handle the following situation:
335 	 *
336 	 * CPU0				CPU1
337 	 *
338 	 * sys_exit()			sys_futex()
339 	 *  do_exit()			 futex_lock_pi()
340 	 *                                futex_lock_pi_atomic()
341 	 *   exit_signals(tsk)		    No waiters:
342 	 *    tsk->flags |= PF_EXITING;	    *uaddr == 0x00000PID
343 	 *  mm_release(tsk)		    Set waiter bit
344 	 *   exit_robust_list(tsk) {	    *uaddr = 0x80000PID;
345 	 *      Set owner died		    attach_to_pi_owner() {
346 	 *    *uaddr = 0xC0000000;	     tsk = get_task(PID);
347 	 *   }				     if (!tsk->flags & PF_EXITING) {
348 	 *  ...				       attach();
349 	 *  tsk->futex_state =               } else {
350 	 *	FUTEX_STATE_DEAD;              if (tsk->futex_state !=
351 	 *					  FUTEX_STATE_DEAD)
352 	 *				         return -EAGAIN;
353 	 *				       return -ESRCH; <--- FAIL
354 	 *				     }
355 	 *
356 	 * Returning ESRCH unconditionally is wrong here because the
357 	 * user space value has been changed by the exiting task.
358 	 *
359 	 * The same logic applies to the case where the exiting task is
360 	 * already gone.
361 	 */
362 	if (futex_get_value_locked(&uval2, uaddr))
363 		return -EFAULT;
364 
365 	/* If the user space value has changed, try again. */
366 	if (uval2 != uval)
367 		return -EAGAIN;
368 
369 	/*
370 	 * The exiting task did not have a robust list, the robust list was
371 	 * corrupted or the user space value in *uaddr is simply bogus.
372 	 * Give up and tell user space.
373 	 */
374 	return -ESRCH;
375 }
376 
__attach_to_pi_owner(struct task_struct * p,union futex_key * key,struct futex_pi_state ** ps)377 static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
378 				 struct futex_pi_state **ps)
379 {
380 	/*
381 	 * No existing pi state. First waiter. [2]
382 	 *
383 	 * This creates pi_state, we have hb->lock held, this means nothing can
384 	 * observe this state, wait_lock is irrelevant.
385 	 */
386 	struct futex_pi_state *pi_state = alloc_pi_state();
387 
388 	/*
389 	 * Initialize the pi_mutex in locked state and make @p
390 	 * the owner of it:
391 	 */
392 	__assume_ctx_lock(&pi_state->pi_mutex.wait_lock);
393 	rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
394 
395 	/* Store the key for possible exit cleanups: */
396 	pi_state->key = *key;
397 
398 	WARN_ON(!list_empty(&pi_state->list));
399 	list_add(&pi_state->list, &p->pi_state_list);
400 	/*
401 	 * Assignment without holding pi_state->pi_mutex.wait_lock is safe
402 	 * because there is no concurrency as the object is not published yet.
403 	 */
404 	pi_state->owner = p;
405 
406 	*ps = pi_state;
407 }
408 /*
409  * Lookup the task for the TID provided from user space and attach to
410  * it after doing proper sanity checks.
411  */
attach_to_pi_owner(u32 __user * uaddr,u32 uval,union futex_key * key,struct futex_pi_state ** ps,struct task_struct ** exiting)412 static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
413 			      struct futex_pi_state **ps,
414 			      struct task_struct **exiting)
415 {
416 	pid_t pid = uval & FUTEX_TID_MASK;
417 	struct task_struct *p;
418 
419 	/*
420 	 * We are the first waiter - try to look up the real owner and attach
421 	 * the new pi_state to it, but bail out when TID = 0 [1]
422 	 *
423 	 * The !pid check is paranoid. None of the call sites should end up
424 	 * with pid == 0, but better safe than sorry. Let the caller retry
425 	 */
426 	if (!pid)
427 		return -EAGAIN;
428 	p = find_get_task_by_vpid(pid);
429 	if (!p)
430 		return handle_exit_race(uaddr, uval, NULL);
431 
432 	if (unlikely(p->flags & PF_KTHREAD)) {
433 		put_task_struct(p);
434 		return -EPERM;
435 	}
436 
437 	/*
438 	 * We need to look at the task state to figure out, whether the
439 	 * task is exiting. To protect against the change of the task state
440 	 * in futex_exit_release(), we do this protected by p->pi_lock:
441 	 */
442 	raw_spin_lock_irq(&p->pi_lock);
443 	if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
444 		/*
445 		 * The task is on the way out. When the futex state is
446 		 * FUTEX_STATE_DEAD, we know that the task has finished
447 		 * the cleanup:
448 		 */
449 		int ret = handle_exit_race(uaddr, uval, p);
450 
451 		raw_spin_unlock_irq(&p->pi_lock);
452 		/*
453 		 * If the owner task is between FUTEX_STATE_EXITING and
454 		 * FUTEX_STATE_DEAD then store the task pointer and keep
455 		 * the reference on the task struct. The calling code will
456 		 * drop all locks, wait for the task to reach
457 		 * FUTEX_STATE_DEAD and then drop the refcount. This is
458 		 * required to prevent a live lock when the current task
459 		 * preempted the exiting task between the two states.
460 		 */
461 		if (ret == -EBUSY)
462 			*exiting = p;
463 		else
464 			put_task_struct(p);
465 		return ret;
466 	}
467 
468 	__attach_to_pi_owner(p, key, ps);
469 	raw_spin_unlock_irq(&p->pi_lock);
470 
471 	put_task_struct(p);
472 
473 	return 0;
474 }
475 
lock_pi_update_atomic(u32 __user * uaddr,u32 uval,u32 newval)476 static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
477 {
478 	int err;
479 	u32 curval;
480 
481 	if (unlikely(should_fail_futex(true)))
482 		return -EFAULT;
483 
484 	err = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval);
485 	if (unlikely(err))
486 		return err;
487 
488 	/* If user space value changed, let the caller retry */
489 	return curval != uval ? -EAGAIN : 0;
490 }
491 
492 /**
493  * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
494  * @uaddr:		the pi futex user address
495  * @hb:			the pi futex hash bucket
496  * @key:		the futex key associated with uaddr and hb
497  * @ps:			the pi_state pointer where we store the result of the
498  *			lookup
499  * @task:		the task to perform the atomic lock work for.  This will
500  *			be "current" except in the case of requeue pi.
501  * @exiting:		Pointer to store the task pointer of the owner task
502  *			which is in the middle of exiting
503  * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
504  *
505  * Return:
506  *  -  0 - ready to wait;
507  *  -  1 - acquired the lock;
508  *  - <0 - error
509  *
510  * The hb->lock must be held by the caller.
511  *
512  * @exiting is only set when the return value is -EBUSY. If so, this holds
513  * a refcount on the exiting task on return and the caller needs to drop it
514  * after waiting for the exit to complete.
515  */
futex_lock_pi_atomic(u32 __user * uaddr,struct futex_hash_bucket * hb,union futex_key * key,struct futex_pi_state ** ps,struct task_struct * task,struct task_struct ** exiting,int set_waiters)516 int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
517 			 union futex_key *key,
518 			 struct futex_pi_state **ps,
519 			 struct task_struct *task,
520 			 struct task_struct **exiting,
521 			 int set_waiters)
522 {
523 	u32 uval, newval, vpid = task_pid_vnr(task);
524 	struct futex_q *top_waiter;
525 	int ret;
526 
527 	/*
528 	 * Read the user space value first so we can validate a few
529 	 * things before proceeding further.
530 	 */
531 	if (futex_get_value_locked(&uval, uaddr))
532 		return -EFAULT;
533 
534 	if (unlikely(should_fail_futex(true)))
535 		return -EFAULT;
536 
537 	/*
538 	 * Detect deadlocks.
539 	 */
540 	if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
541 		return -EDEADLK;
542 
543 	if ((unlikely(should_fail_futex(true))))
544 		return -EDEADLK;
545 
546 	/*
547 	 * Lookup existing state first. If it exists, try to attach to
548 	 * its pi_state.
549 	 */
550 	top_waiter = futex_top_waiter(hb, key);
551 	if (top_waiter)
552 		return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
553 
554 	/*
555 	 * No waiter and user TID is 0. We are here because the
556 	 * waiters or the owner died bit is set or called from
557 	 * requeue_cmp_pi or for whatever reason something took the
558 	 * syscall.
559 	 */
560 	if (!(uval & FUTEX_TID_MASK)) {
561 		/*
562 		 * We take over the futex. No other waiters and the user space
563 		 * TID is 0. We preserve the owner died bit.
564 		 */
565 		newval = uval & FUTEX_OWNER_DIED;
566 		newval |= vpid;
567 
568 		/* The futex requeue_pi code can enforce the waiters bit */
569 		if (set_waiters)
570 			newval |= FUTEX_WAITERS;
571 
572 		ret = lock_pi_update_atomic(uaddr, uval, newval);
573 		if (ret)
574 			return ret;
575 
576 		/*
577 		 * If the waiter bit was requested the caller also needs PI
578 		 * state attached to the new owner of the user space futex.
579 		 *
580 		 * @task is guaranteed to be alive and it cannot be exiting
581 		 * because it is either sleeping or waiting in
582 		 * futex_requeue_pi_wakeup_sync().
583 		 *
584 		 * No need to do the full attach_to_pi_owner() exercise
585 		 * because @task is known and valid.
586 		 */
587 		if (set_waiters) {
588 			raw_spin_lock_irq(&task->pi_lock);
589 			__attach_to_pi_owner(task, key, ps);
590 			raw_spin_unlock_irq(&task->pi_lock);
591 		}
592 		return 1;
593 	}
594 
595 	/*
596 	 * First waiter. Set the waiters bit before attaching ourself to
597 	 * the owner. If owner tries to unlock, it will be forced into
598 	 * the kernel and blocked on hb->lock.
599 	 */
600 	newval = uval | FUTEX_WAITERS;
601 	ret = lock_pi_update_atomic(uaddr, uval, newval);
602 	if (ret)
603 		return ret;
604 	/*
605 	 * If the update of the user space value succeeded, we try to
606 	 * attach to the owner. If that fails, no harm done, we only
607 	 * set the FUTEX_WAITERS bit in the user space variable.
608 	 */
609 	return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
610 }
611 
612 /*
613  * Caller must hold a reference on @pi_state.
614  */
wake_futex_pi(u32 __user * uaddr,u32 uval,struct futex_pi_state * pi_state,struct rt_mutex_waiter * top_waiter)615 static int wake_futex_pi(u32 __user *uaddr, u32 uval,
616 			 struct futex_pi_state *pi_state,
617 			 struct rt_mutex_waiter *top_waiter)
618 	__must_hold(&pi_state->pi_mutex.wait_lock)
619 	__releases(&pi_state->pi_mutex.wait_lock)
620 {
621 	struct task_struct *new_owner;
622 	bool postunlock = false;
623 	DEFINE_RT_WAKE_Q(wqh);
624 	u32 curval, newval;
625 	int ret = 0;
626 
627 	new_owner = top_waiter->task;
628 
629 	/*
630 	 * We pass it to the next owner. The WAITERS bit is always kept
631 	 * enabled while there is PI state around. We cleanup the owner
632 	 * died bit, because we are the owner.
633 	 */
634 	newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
635 
636 	if (unlikely(should_fail_futex(true))) {
637 		ret = -EFAULT;
638 		goto out_unlock;
639 	}
640 
641 	ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval);
642 	if (!ret && (curval != uval)) {
643 		/*
644 		 * If a unconditional UNLOCK_PI operation (user space did not
645 		 * try the TID->0 transition) raced with a waiter setting the
646 		 * FUTEX_WAITERS flag between get_user() and locking the hash
647 		 * bucket lock, retry the operation.
648 		 */
649 		if ((FUTEX_TID_MASK & curval) == uval)
650 			ret = -EAGAIN;
651 		else
652 			ret = -EINVAL;
653 	}
654 
655 	if (!ret) {
656 		/*
657 		 * This is a point of no return; once we modified the uval
658 		 * there is no going back and subsequent operations must
659 		 * not fail.
660 		 */
661 		pi_state_update_owner(pi_state, new_owner);
662 		postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh);
663 	}
664 
665 out_unlock:
666 	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
667 
668 	if (postunlock)
669 		rt_mutex_postunlock(&wqh);
670 
671 	return ret;
672 }
673 
__fixup_pi_state_owner(u32 __user * uaddr,struct futex_q * q,struct task_struct * argowner)674 static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
675 				  struct task_struct *argowner)
676 	__must_hold(&q->pi_state->pi_mutex.wait_lock)
677 	__must_hold(q->lock_ptr)
678 {
679 	struct futex_pi_state *pi_state = q->pi_state;
680 	struct task_struct *oldowner, *newowner;
681 	u32 uval, curval, newval, newtid;
682 	int err = 0;
683 
684 	oldowner = pi_state->owner;
685 
686 	/*
687 	 * We are here because either:
688 	 *
689 	 *  - we stole the lock and pi_state->owner needs updating to reflect
690 	 *    that (@argowner == current),
691 	 *
692 	 * or:
693 	 *
694 	 *  - someone stole our lock and we need to fix things to point to the
695 	 *    new owner (@argowner == NULL).
696 	 *
697 	 * Either way, we have to replace the TID in the user space variable.
698 	 * This must be atomic as we have to preserve the owner died bit here.
699 	 *
700 	 * Note: We write the user space value _before_ changing the pi_state
701 	 * because we can fault here. Imagine swapped out pages or a fork
702 	 * that marked all the anonymous memory readonly for cow.
703 	 *
704 	 * Modifying pi_state _before_ the user space value would leave the
705 	 * pi_state in an inconsistent state when we fault here, because we
706 	 * need to drop the locks to handle the fault. This might be observed
707 	 * in the PID checks when attaching to PI state .
708 	 */
709 retry:
710 	if (!argowner) {
711 		if (oldowner != current) {
712 			/*
713 			 * We raced against a concurrent self; things are
714 			 * already fixed up. Nothing to do.
715 			 */
716 			return 0;
717 		}
718 
719 		if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
720 			/* We got the lock. pi_state is correct. Tell caller. */
721 			return 1;
722 		}
723 
724 		/*
725 		 * The trylock just failed, so either there is an owner or
726 		 * there is a higher priority waiter than this one.
727 		 */
728 		newowner = rt_mutex_owner(&pi_state->pi_mutex);
729 		/*
730 		 * If the higher priority waiter has not yet taken over the
731 		 * rtmutex then newowner is NULL. We can't return here with
732 		 * that state because it's inconsistent vs. the user space
733 		 * state. So drop the locks and try again. It's a valid
734 		 * situation and not any different from the other retry
735 		 * conditions.
736 		 */
737 		if (unlikely(!newowner)) {
738 			err = -EAGAIN;
739 			goto handle_err;
740 		}
741 	} else {
742 		WARN_ON_ONCE(argowner != current);
743 		if (oldowner == current) {
744 			/*
745 			 * We raced against a concurrent self; things are
746 			 * already fixed up. Nothing to do.
747 			 */
748 			return 1;
749 		}
750 		newowner = argowner;
751 	}
752 
753 	newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
754 	/* Owner died? */
755 	if (!pi_state->owner)
756 		newtid |= FUTEX_OWNER_DIED;
757 
758 	err = futex_get_value_locked(&uval, uaddr);
759 	if (err)
760 		goto handle_err;
761 
762 	for (;;) {
763 		newval = (uval & FUTEX_OWNER_DIED) | newtid;
764 
765 		err = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval);
766 		if (err)
767 			goto handle_err;
768 
769 		if (curval == uval)
770 			break;
771 		uval = curval;
772 	}
773 
774 	/*
775 	 * We fixed up user space. Now we need to fix the pi_state
776 	 * itself.
777 	 */
778 	pi_state_update_owner(pi_state, newowner);
779 
780 	return argowner == current;
781 
782 	/*
783 	 * In order to reschedule or handle a page fault, we need to drop the
784 	 * locks here. In the case of a fault, this gives the other task
785 	 * (either the highest priority waiter itself or the task which stole
786 	 * the rtmutex) the chance to try the fixup of the pi_state. So once we
787 	 * are back from handling the fault we need to check the pi_state after
788 	 * reacquiring the locks and before trying to do another fixup. When
789 	 * the fixup has been done already we simply return.
790 	 *
791 	 * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
792 	 * drop hb->lock since the caller owns the hb -> futex_q relation.
793 	 * Dropping the pi_mutex->wait_lock requires the state revalidate.
794 	 */
795 handle_err:
796 	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
797 	spin_unlock(q->lock_ptr);
798 
799 	switch (err) {
800 	case -EFAULT:
801 		err = fault_in_user_writeable(uaddr);
802 		break;
803 
804 	case -EAGAIN:
805 		cond_resched();
806 		err = 0;
807 		break;
808 
809 	default:
810 		WARN_ON_ONCE(1);
811 		break;
812 	}
813 
814 	futex_q_lockptr_lock(q);
815 	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
816 
817 	/*
818 	 * Check if someone else fixed it for us:
819 	 */
820 	if (pi_state->owner != oldowner)
821 		return argowner == current;
822 
823 	/* Retry if err was -EAGAIN or the fault in succeeded */
824 	if (!err)
825 		goto retry;
826 
827 	/*
828 	 * fault_in_user_writeable() failed so user state is immutable. At
829 	 * best we can make the kernel state consistent but user state will
830 	 * be most likely hosed and any subsequent unlock operation will be
831 	 * rejected due to PI futex rule [10].
832 	 *
833 	 * Ensure that the rtmutex owner is also the pi_state owner despite
834 	 * the user space value claiming something different. There is no
835 	 * point in unlocking the rtmutex if current is the owner as it
836 	 * would need to wait until the next waiter has taken the rtmutex
837 	 * to guarantee consistent state. Keep it simple. Userspace asked
838 	 * for this wreckaged state.
839 	 *
840 	 * The rtmutex has an owner - either current or some other
841 	 * task. See the EAGAIN loop above.
842 	 */
843 	pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
844 
845 	return err;
846 }
847 
fixup_pi_state_owner(u32 __user * uaddr,struct futex_q * q,struct task_struct * argowner)848 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
849 				struct task_struct *argowner)
850 {
851 	struct futex_pi_state *pi_state = q->pi_state;
852 	int ret;
853 
854 	lockdep_assert_held(q->lock_ptr);
855 
856 	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
857 	ret = __fixup_pi_state_owner(uaddr, q, argowner);
858 	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
859 	return ret;
860 }
861 
862 /**
863  * fixup_pi_owner() - Post lock pi_state and corner case management
864  * @uaddr:	user address of the futex
865  * @q:		futex_q (contains pi_state and access to the rt_mutex)
866  * @locked:	if the attempt to take the rt_mutex succeeded (1) or not (0)
867  *
868  * After attempting to lock an rt_mutex, this function is called to cleanup
869  * the pi_state owner as well as handle race conditions that may allow us to
870  * acquire the lock. Must be called with the hb lock held.
871  *
872  * Return:
873  *  -  1 - success, lock taken;
874  *  -  0 - success, lock not taken;
875  *  - <0 - on error (-EFAULT)
876  */
fixup_pi_owner(u32 __user * uaddr,struct futex_q * q,int locked)877 int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked)
878 {
879 	if (locked) {
880 		/*
881 		 * Got the lock. We might not be the anticipated owner if we
882 		 * did a lock-steal - fix up the PI-state in that case:
883 		 *
884 		 * Speculative pi_state->owner read (we don't hold wait_lock);
885 		 * since we own the lock pi_state->owner == current is the
886 		 * stable state, anything else needs more attention.
887 		 */
888 		if (q->pi_state->owner != current)
889 			return fixup_pi_state_owner(uaddr, q, current);
890 		return 1;
891 	}
892 
893 	/*
894 	 * If we didn't get the lock; check if anybody stole it from us. In
895 	 * that case, we need to fix up the uval to point to them instead of
896 	 * us, otherwise bad things happen. [10]
897 	 *
898 	 * Another speculative read; pi_state->owner == current is unstable
899 	 * but needs our attention.
900 	 */
901 	if (q->pi_state->owner == current)
902 		return fixup_pi_state_owner(uaddr, q, NULL);
903 
904 	/*
905 	 * Paranoia check. If we did not take the lock, then we should not be
906 	 * the owner of the rt_mutex. Warn and establish consistent state.
907 	 */
908 	if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
909 		return fixup_pi_state_owner(uaddr, q, current);
910 
911 	return 0;
912 }
913 
914 /*
915  * Userspace tried a 0 -> TID atomic transition of the futex value
916  * and failed. The kernel side here does the whole locking operation:
917  * if there are waiters then it will block as a consequence of relying
918  * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
919  * a 0 value of the futex too.).
920  *
921  * Also serves as futex trylock_pi()'ing, and due semantics.
922  */
futex_lock_pi(u32 __user * uaddr,unsigned int flags,ktime_t * time,int trylock)923 int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock)
924 {
925 	struct hrtimer_sleeper timeout, *to;
926 	struct task_struct *exiting;
927 	struct rt_mutex_waiter rt_waiter;
928 	struct futex_q q = futex_q_init;
929 	DEFINE_WAKE_Q(wake_q);
930 	int res, ret;
931 
932 	if (!IS_ENABLED(CONFIG_FUTEX_PI))
933 		return -ENOSYS;
934 
935 	if (refill_pi_state_cache())
936 		return -ENOMEM;
937 
938 	to = futex_setup_timer(time, &timeout, flags, 0);
939 
940 retry:
941 	exiting = NULL;
942 	ret = get_futex_key(uaddr, flags, &q.key, FUTEX_WRITE);
943 	if (unlikely(ret != 0))
944 		goto out;
945 
946 retry_private:
947 	if (1) {
948 		CLASS(hb, hb)(&q.key);
949 
950 		futex_q_lock(&q, hb);
951 
952 		ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
953 					   &exiting, 0);
954 		if (unlikely(ret)) {
955 			/*
956 			 * Atomic work succeeded and we got the lock,
957 			 * or failed. Either way, we do _not_ block.
958 			 */
959 			switch (ret) {
960 			case 1:
961 				/* We got the lock. */
962 				ret = 0;
963 				goto out_unlock_put_key;
964 			case -EFAULT:
965 				goto uaddr_faulted;
966 			case -EBUSY:
967 			case -EAGAIN:
968 				/*
969 				 * Two reasons for this:
970 				 * - EBUSY: Task is exiting and we just wait for the
971 				 *   exit to complete.
972 				 * - EAGAIN: The user space value changed.
973 				 */
974 				futex_q_unlock(hb);
975 				__release(q.lock_ptr);
976 				/*
977 				 * Handle the case where the owner is in the middle of
978 				 * exiting. Wait for the exit to complete otherwise
979 				 * this task might loop forever, aka. live lock.
980 				 */
981 				wait_for_owner_exiting(ret, exiting);
982 				cond_resched();
983 				goto retry;
984 			default:
985 				goto out_unlock_put_key;
986 			}
987 		}
988 
989 		WARN_ON(!q.pi_state);
990 
991 		/*
992 		 * Only actually queue now that the atomic ops are done:
993 		 */
994 		__futex_queue(&q, hb, current);
995 
996 		if (trylock) {
997 			ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
998 			/* Fixup the trylock return value: */
999 			ret = ret ? 0 : -EWOULDBLOCK;
1000 			goto no_block;
1001 		}
1002 
1003 		/*
1004 		 * Caution; releasing @hb in-scope. The hb->lock is still locked
1005 		 * while the reference is dropped. The reference can not be dropped
1006 		 * after the unlock because if a user initiated resize is in progress
1007 		 * then we might need to wake him. This can not be done after the
1008 		 * rt_mutex_pre_schedule() invocation. The hb will remain valid because
1009 		 * the thread, performing resize, will block on hb->lock during
1010 		 * the requeue.
1011 		 */
1012 		futex_hash_put(no_free_ptr(hb));
1013 		/*
1014 		 * Must be done before we enqueue the waiter, here is unfortunately
1015 		 * under the hb lock, but that *should* work because it does nothing.
1016 		 */
1017 		rt_mutex_pre_schedule();
1018 
1019 		rt_mutex_init_waiter(&rt_waiter);
1020 
1021 		/*
1022 		 * On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not
1023 		 * hold it while doing rt_mutex_start_proxy(), because then it will
1024 		 * include hb->lock in the blocking chain, even through we'll not in
1025 		 * fact hold it while blocking. This will lead it to report -EDEADLK
1026 		 * and BUG when futex_unlock_pi() interleaves with this.
1027 		 *
1028 		 * Therefore acquire wait_lock while holding hb->lock, but drop the
1029 		 * latter before calling __rt_mutex_start_proxy_lock(). This
1030 		 * interleaves with futex_unlock_pi() -- which does a similar lock
1031 		 * handoff -- such that the latter can observe the futex_q::pi_state
1032 		 * before __rt_mutex_start_proxy_lock() is done.
1033 		 */
1034 		raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
1035 		spin_unlock(q.lock_ptr);
1036 		/*
1037 		 * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
1038 		 * such that futex_unlock_pi() is guaranteed to observe the waiter when
1039 		 * it sees the futex_q::pi_state.
1040 		 */
1041 		ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current, &wake_q);
1042 		raw_spin_unlock_irq_wake(&q.pi_state->pi_mutex.wait_lock, &wake_q);
1043 
1044 		if (ret) {
1045 			if (ret == 1)
1046 				ret = 0;
1047 			goto cleanup;
1048 		}
1049 
1050 		if (unlikely(to))
1051 			hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
1052 
1053 		ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
1054 
1055 cleanup:
1056 		/*
1057 		 * If we failed to acquire the lock (deadlock/signal/timeout), we must
1058 		 * unwind the above, however we canont lock hb->lock because
1059 		 * rt_mutex already has a waiter enqueued and hb->lock can itself try
1060 		 * and enqueue an rt_waiter through rtlock.
1061 		 *
1062 		 * Doing the cleanup without holding hb->lock can cause inconsistent
1063 		 * state between hb and pi_state, but only in the direction of not
1064 		 * seeing a waiter that is leaving.
1065 		 *
1066 		 * See futex_unlock_pi(), it deals with this inconsistency.
1067 		 *
1068 		 * There be dragons here, since we must deal with the inconsistency on
1069 		 * the way out (here), it is impossible to detect/warn about the race
1070 		 * the other way around (missing an incoming waiter).
1071 		 *
1072 		 * What could possibly go wrong...
1073 		 */
1074 		if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
1075 			ret = 0;
1076 
1077 		/*
1078 		 * Now that the rt_waiter has been dequeued, it is safe to use
1079 		 * spinlock/rtlock (which might enqueue its own rt_waiter) and fix up
1080 		 * the
1081 		 */
1082 		futex_q_lockptr_lock(&q);
1083 		/*
1084 		 * Waiter is unqueued.
1085 		 */
1086 		rt_mutex_post_schedule();
1087 no_block:
1088 		/*
1089 		 * Fixup the pi_state owner and possibly acquire the lock if we
1090 		 * haven't already.
1091 		 */
1092 		res = fixup_pi_owner(uaddr, &q, !ret);
1093 		/*
1094 		 * If fixup_pi_owner() returned an error, propagate that.  If it acquired
1095 		 * the lock, clear our -ETIMEDOUT or -EINTR.
1096 		 */
1097 		if (res)
1098 			ret = (res < 0) ? res : 0;
1099 
1100 		__release(&hb->lock);
1101 		futex_unqueue_pi(&q);
1102 		spin_unlock(q.lock_ptr);
1103 		if (q.drop_hb_ref) {
1104 			CLASS(hb, hb)(&q.key);
1105 			/* Additional reference from futex_unlock_pi() */
1106 			futex_hash_put(hb);
1107 		}
1108 		goto out;
1109 
1110 out_unlock_put_key:
1111 		futex_q_unlock(hb);
1112 		__release(q.lock_ptr);
1113 		goto out;
1114 
1115 uaddr_faulted:
1116 		futex_q_unlock(hb);
1117 		__release(q.lock_ptr);
1118 
1119 		ret = fault_in_user_writeable(uaddr);
1120 		if (ret)
1121 			goto out;
1122 
1123 		if (!(flags & FLAGS_SHARED))
1124 			goto retry_private;
1125 
1126 		goto retry;
1127 	}
1128 
1129 out:
1130 	if (to) {
1131 		hrtimer_cancel(&to->timer);
1132 		destroy_hrtimer_on_stack(&to->timer);
1133 	}
1134 	return ret != -EINTR ? ret : -ERESTARTNOINTR;
1135 }
1136 
1137 /*
1138  * Userspace attempted a TID -> 0 atomic transition, and failed.
1139  * This is the in-kernel slowpath: we look up the PI state (if any),
1140  * and do the rt-mutex unlock.
1141  */
futex_unlock_pi(u32 __user * uaddr,unsigned int flags)1142 int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
1143 {
1144 	u32 curval, uval, vpid = task_pid_vnr(current);
1145 	union futex_key key = FUTEX_KEY_INIT;
1146 	struct futex_q *top_waiter;
1147 	int ret;
1148 
1149 	if (!IS_ENABLED(CONFIG_FUTEX_PI))
1150 		return -ENOSYS;
1151 
1152 retry:
1153 	if (get_user(uval, uaddr))
1154 		return -EFAULT;
1155 	/*
1156 	 * We release only a lock we actually own:
1157 	 */
1158 	if ((uval & FUTEX_TID_MASK) != vpid)
1159 		return -EPERM;
1160 
1161 	ret = get_futex_key(uaddr, flags, &key, FUTEX_WRITE);
1162 	if (ret)
1163 		return ret;
1164 
1165 	CLASS(hb, hb)(&key);
1166 	spin_lock(&hb->lock);
1167 retry_hb:
1168 
1169 	/*
1170 	 * Check waiters first. We do not trust user space values at
1171 	 * all and we at least want to know if user space fiddled
1172 	 * with the futex value instead of blindly unlocking.
1173 	 */
1174 	top_waiter = futex_top_waiter(hb, &key);
1175 	if (top_waiter) {
1176 		struct futex_pi_state *pi_state = top_waiter->pi_state;
1177 		struct rt_mutex_waiter *rt_waiter;
1178 
1179 		ret = -EINVAL;
1180 		if (!pi_state)
1181 			goto out_unlock;
1182 
1183 		/*
1184 		 * If current does not own the pi_state then the futex is
1185 		 * inconsistent and user space fiddled with the futex value.
1186 		 */
1187 		if (pi_state->owner != current)
1188 			goto out_unlock;
1189 
1190 		/*
1191 		 * By taking wait_lock while still holding hb->lock, we ensure
1192 		 * there is no point where we hold neither; and thereby
1193 		 * wake_futex_pi() must observe any new waiters.
1194 		 *
1195 		 * Since the cleanup: case in futex_lock_pi() removes the
1196 		 * rt_waiter without holding hb->lock, it is possible for
1197 		 * wake_futex_pi() to not find a waiter while the above does,
1198 		 * in this case the waiter is on the way out and it can be
1199 		 * ignored.
1200 		 *
1201 		 * In particular; this forces __rt_mutex_start_proxy() to
1202 		 * complete such that we're guaranteed to observe the
1203 		 * rt_waiter.
1204 		 */
1205 		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
1206 
1207 		/*
1208 		 * Futex vs rt_mutex waiter state -- if there are no rt_mutex
1209 		 * waiters even though futex thinks there are, then the waiter
1210 		 * is leaving. The entry needs to be removed from the list so a
1211 		 * new futex_lock_pi() is not using this stale PI-state while
1212 		 * the futex is available in user space again.
1213 		 * There can be more than one task on its way out so it needs
1214 		 * to retry.
1215 		 */
1216 		rt_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
1217 		if (!rt_waiter) {
1218 			/*
1219 			 * Acquire a reference for the leaving waiter to ensure
1220 			 * valid futex_q::lock_ptr.
1221 			 */
1222 			futex_hash_get(hb);
1223 			top_waiter->drop_hb_ref = true;
1224 			__futex_unqueue(top_waiter);
1225 			raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1226 			goto retry_hb;
1227 		}
1228 
1229 		get_pi_state(pi_state);
1230 		spin_unlock(&hb->lock);
1231 
1232 		/* drops pi_state->pi_mutex.wait_lock */
1233 		ret = wake_futex_pi(uaddr, uval, pi_state, rt_waiter);
1234 
1235 		put_pi_state(pi_state);
1236 
1237 		/*
1238 		 * Success, we're done! No tricky corner cases.
1239 		 */
1240 		if (!ret)
1241 			return ret;
1242 		/*
1243 		 * The atomic access to the futex value generated a
1244 		 * pagefault, so retry the user-access and the wakeup:
1245 		 */
1246 		if (ret == -EFAULT)
1247 			goto pi_faulted;
1248 		/*
1249 		 * A unconditional UNLOCK_PI op raced against a waiter
1250 		 * setting the FUTEX_WAITERS bit. Try again.
1251 		 */
1252 		if (ret == -EAGAIN)
1253 			goto pi_retry;
1254 		/*
1255 		 * wake_futex_pi has detected invalid state. Tell user
1256 		 * space.
1257 		 */
1258 		return ret;
1259 	}
1260 
1261 	/*
1262 	 * We have no kernel internal state, i.e. no waiters in the
1263 	 * kernel. Waiters which are about to queue themselves are stuck
1264 	 * on hb->lock. So we can safely ignore them. We do neither
1265 	 * preserve the WAITERS bit not the OWNER_DIED one. We are the
1266 	 * owner.
1267 	 */
1268 	if ((ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, 0))) {
1269 		spin_unlock(&hb->lock);
1270 		switch (ret) {
1271 		case -EFAULT:
1272 			goto pi_faulted;
1273 
1274 		case -EAGAIN:
1275 			goto pi_retry;
1276 
1277 		default:
1278 			WARN_ON_ONCE(1);
1279 			return ret;
1280 		}
1281 	}
1282 
1283 	/*
1284 	 * If uval has changed, let user space handle it.
1285 	 */
1286 	ret = (curval == uval) ? 0 : -EAGAIN;
1287 
1288 out_unlock:
1289 	spin_unlock(&hb->lock);
1290 	return ret;
1291 
1292 pi_retry:
1293 	cond_resched();
1294 	goto retry;
1295 
1296 pi_faulted:
1297 
1298 	ret = fault_in_user_writeable(uaddr);
1299 	if (!ret)
1300 		goto retry;
1301 
1302 	return ret;
1303 }
1304 
1305