1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * Queued spinlock defines 4 * 5 * This file contains macro definitions and functions shared between different 6 * qspinlock slow path implementations. 7 */ 8 #ifndef __LINUX_QSPINLOCK_H 9 #define __LINUX_QSPINLOCK_H 10 11 #include <asm-generic/percpu.h> 12 #include <linux/percpu-defs.h> 13 #include <asm-generic/qspinlock.h> 14 #include <asm-generic/mcs_spinlock.h> 15 16 #define _Q_MAX_NODES 4 17 18 /* 19 * The pending bit spinning loop count. 20 * This heuristic is used to limit the number of lockword accesses 21 * made by atomic_cond_read_relaxed when waiting for the lock to 22 * transition out of the "== _Q_PENDING_VAL" state. We don't spin 23 * indefinitely because there's no guarantee that we'll make forward 24 * progress. 25 */ 26 #ifndef _Q_PENDING_LOOPS 27 #define _Q_PENDING_LOOPS 1 28 #endif 29 30 /* 31 * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in 32 * size and four of them will fit nicely in one 64-byte cacheline. For 33 * pvqspinlock, however, we need more space for extra data. To accommodate 34 * that, we insert two more long words to pad it up to 32 bytes. IOW, only 35 * two of them can fit in a cacheline in this case. That is OK as it is rare 36 * to have more than 2 levels of slowpath nesting in actual use. We don't 37 * want to penalize pvqspinlocks to optimize for a rare case in native 38 * qspinlocks. 39 */ 40 struct qnode { 41 struct mcs_spinlock mcs; 42 #ifdef CONFIG_PARAVIRT_SPINLOCKS 43 long reserved[2]; 44 #endif 45 }; 46 47 /* 48 * We must be able to distinguish between no-tail and the tail at 0:0, 49 * therefore increment the cpu number by one. 50 */ 51 52 static inline __pure u32 encode_tail(int cpu, int idx) 53 { 54 u32 tail; 55 56 tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET; 57 tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */ 58 59 return tail; 60 } 61 62 static inline __pure struct mcs_spinlock *decode_tail(u32 tail, 63 struct qnode __percpu *qnodes) 64 { 65 int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; 66 int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; 67 68 return per_cpu_ptr(&qnodes[idx].mcs, cpu); 69 } 70 71 static inline __pure 72 struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx) 73 { 74 return &((struct qnode *)base + idx)->mcs; 75 } 76 77 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) 78 79 #if _Q_PENDING_BITS == 8 80 /** 81 * clear_pending - clear the pending bit. 82 * @lock: Pointer to queued spinlock structure 83 * 84 * *,1,* -> *,0,* 85 */ 86 static __always_inline void clear_pending(struct qspinlock *lock) 87 { 88 WRITE_ONCE(lock->pending, 0); 89 } 90 91 /** 92 * clear_pending_set_locked - take ownership and clear the pending bit. 93 * @lock: Pointer to queued spinlock structure 94 * 95 * *,1,0 -> *,0,1 96 * 97 * Lock stealing is not allowed if this function is used. 98 */ 99 static __always_inline void clear_pending_set_locked(struct qspinlock *lock) 100 { 101 WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); 102 } 103 104 /* 105 * xchg_tail - Put in the new queue tail code word & retrieve previous one 106 * @lock : Pointer to queued spinlock structure 107 * @tail : The new queue tail code word 108 * Return: The previous queue tail code word 109 * 110 * xchg(lock, tail), which heads an address dependency 111 * 112 * p,*,* -> n,*,* ; prev = xchg(lock, node) 113 */ 114 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) 115 { 116 /* 117 * We can use relaxed semantics since the caller ensures that the 118 * MCS node is properly initialized before updating the tail. 119 */ 120 return (u32)xchg_relaxed(&lock->tail, 121 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; 122 } 123 124 #else /* _Q_PENDING_BITS == 8 */ 125 126 /** 127 * clear_pending - clear the pending bit. 128 * @lock: Pointer to queued spinlock structure 129 * 130 * *,1,* -> *,0,* 131 */ 132 static __always_inline void clear_pending(struct qspinlock *lock) 133 { 134 atomic_andnot(_Q_PENDING_VAL, &lock->val); 135 } 136 137 /** 138 * clear_pending_set_locked - take ownership and clear the pending bit. 139 * @lock: Pointer to queued spinlock structure 140 * 141 * *,1,0 -> *,0,1 142 */ 143 static __always_inline void clear_pending_set_locked(struct qspinlock *lock) 144 { 145 atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); 146 } 147 148 /** 149 * xchg_tail - Put in the new queue tail code word & retrieve previous one 150 * @lock : Pointer to queued spinlock structure 151 * @tail : The new queue tail code word 152 * Return: The previous queue tail code word 153 * 154 * xchg(lock, tail) 155 * 156 * p,*,* -> n,*,* ; prev = xchg(lock, node) 157 */ 158 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) 159 { 160 u32 old, new; 161 162 old = atomic_read(&lock->val); 163 do { 164 new = (old & _Q_LOCKED_PENDING_MASK) | tail; 165 /* 166 * We can use relaxed semantics since the caller ensures that 167 * the MCS node is properly initialized before updating the 168 * tail. 169 */ 170 } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new)); 171 172 return old; 173 } 174 #endif /* _Q_PENDING_BITS == 8 */ 175 176 /** 177 * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending 178 * @lock : Pointer to queued spinlock structure 179 * Return: The previous lock value 180 * 181 * *,*,* -> *,1,* 182 */ 183 #ifndef queued_fetch_set_pending_acquire 184 static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) 185 { 186 return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); 187 } 188 #endif 189 190 /** 191 * set_locked - Set the lock bit and own the lock 192 * @lock: Pointer to queued spinlock structure 193 * 194 * *,*,0 -> *,0,1 195 */ 196 static __always_inline void set_locked(struct qspinlock *lock) 197 { 198 WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); 199 } 200 201 #endif /* __LINUX_QSPINLOCK_H */ 202