1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * Queued spinlock defines
4  *
5  * This file contains macro definitions and functions shared between different
6  * qspinlock slow path implementations.
7  */
8 #ifndef __LINUX_QSPINLOCK_H
9 #define __LINUX_QSPINLOCK_H
10 
11 #include <asm-generic/percpu.h>
12 #include <linux/percpu-defs.h>
13 #include <asm-generic/qspinlock.h>
14 #include <asm-generic/mcs_spinlock.h>
15 
16 #define _Q_MAX_NODES	4
17 
18 /*
19  * The pending bit spinning loop count.
20  * This heuristic is used to limit the number of lockword accesses
21  * made by atomic_cond_read_relaxed when waiting for the lock to
22  * transition out of the "== _Q_PENDING_VAL" state. We don't spin
23  * indefinitely because there's no guarantee that we'll make forward
24  * progress.
25  */
26 #ifndef _Q_PENDING_LOOPS
27 #define _Q_PENDING_LOOPS	1
28 #endif
29 
30 /*
31  * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
32  * size and four of them will fit nicely in one 64-byte cacheline. For
33  * pvqspinlock, however, we need more space for extra data. To accommodate
34  * that, we insert two more long words to pad it up to 32 bytes. IOW, only
35  * two of them can fit in a cacheline in this case. That is OK as it is rare
36  * to have more than 2 levels of slowpath nesting in actual use. We don't
37  * want to penalize pvqspinlocks to optimize for a rare case in native
38  * qspinlocks.
39  */
40 struct qnode {
41 	struct mcs_spinlock mcs;
42 #ifdef CONFIG_PARAVIRT_SPINLOCKS
43 	long reserved[2];
44 #endif
45 };
46 
47 /*
48  * We must be able to distinguish between no-tail and the tail at 0:0,
49  * therefore increment the cpu number by one.
50  */
51 
52 static inline __pure u32 encode_tail(int cpu, int idx)
53 {
54 	u32 tail;
55 
56 	tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
57 	tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
58 
59 	return tail;
60 }
61 
62 static inline __pure struct mcs_spinlock *decode_tail(u32 tail,
63 						      struct qnode __percpu *qnodes)
64 {
65 	int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
66 	int idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
67 
68 	return per_cpu_ptr(&qnodes[idx].mcs, cpu);
69 }
70 
71 static inline __pure
72 struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
73 {
74 	return &((struct qnode *)base + idx)->mcs;
75 }
76 
77 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
78 
79 #if _Q_PENDING_BITS == 8
80 /**
81  * clear_pending - clear the pending bit.
82  * @lock: Pointer to queued spinlock structure
83  *
84  * *,1,* -> *,0,*
85  */
86 static __always_inline void clear_pending(struct qspinlock *lock)
87 {
88 	WRITE_ONCE(lock->pending, 0);
89 }
90 
91 /**
92  * clear_pending_set_locked - take ownership and clear the pending bit.
93  * @lock: Pointer to queued spinlock structure
94  *
95  * *,1,0 -> *,0,1
96  *
97  * Lock stealing is not allowed if this function is used.
98  */
99 static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
100 {
101 	WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
102 }
103 
104 /*
105  * xchg_tail - Put in the new queue tail code word & retrieve previous one
106  * @lock : Pointer to queued spinlock structure
107  * @tail : The new queue tail code word
108  * Return: The previous queue tail code word
109  *
110  * xchg(lock, tail), which heads an address dependency
111  *
112  * p,*,* -> n,*,* ; prev = xchg(lock, node)
113  */
114 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
115 {
116 	/*
117 	 * We can use relaxed semantics since the caller ensures that the
118 	 * MCS node is properly initialized before updating the tail.
119 	 */
120 	return (u32)xchg_relaxed(&lock->tail,
121 				 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
122 }
123 
124 #else /* _Q_PENDING_BITS == 8 */
125 
126 /**
127  * clear_pending - clear the pending bit.
128  * @lock: Pointer to queued spinlock structure
129  *
130  * *,1,* -> *,0,*
131  */
132 static __always_inline void clear_pending(struct qspinlock *lock)
133 {
134 	atomic_andnot(_Q_PENDING_VAL, &lock->val);
135 }
136 
137 /**
138  * clear_pending_set_locked - take ownership and clear the pending bit.
139  * @lock: Pointer to queued spinlock structure
140  *
141  * *,1,0 -> *,0,1
142  */
143 static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
144 {
145 	atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
146 }
147 
148 /**
149  * xchg_tail - Put in the new queue tail code word & retrieve previous one
150  * @lock : Pointer to queued spinlock structure
151  * @tail : The new queue tail code word
152  * Return: The previous queue tail code word
153  *
154  * xchg(lock, tail)
155  *
156  * p,*,* -> n,*,* ; prev = xchg(lock, node)
157  */
158 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
159 {
160 	u32 old, new;
161 
162 	old = atomic_read(&lock->val);
163 	do {
164 		new = (old & _Q_LOCKED_PENDING_MASK) | tail;
165 		/*
166 		 * We can use relaxed semantics since the caller ensures that
167 		 * the MCS node is properly initialized before updating the
168 		 * tail.
169 		 */
170 	} while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
171 
172 	return old;
173 }
174 #endif /* _Q_PENDING_BITS == 8 */
175 
176 /**
177  * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
178  * @lock : Pointer to queued spinlock structure
179  * Return: The previous lock value
180  *
181  * *,*,* -> *,1,*
182  */
183 #ifndef queued_fetch_set_pending_acquire
184 static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
185 {
186 	return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
187 }
188 #endif
189 
190 /**
191  * set_locked - Set the lock bit and own the lock
192  * @lock: Pointer to queued spinlock structure
193  *
194  * *,*,0 -> *,0,1
195  */
196 static __always_inline void set_locked(struct qspinlock *lock)
197 {
198 	WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
199 }
200 
201 #endif /* __LINUX_QSPINLOCK_H */
202