1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #ifndef _LINUX_SIX_H 4 #define _LINUX_SIX_H 5 6 /** 7 * DOC: SIX locks overview 8 * 9 * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores 10 * but with an additional state: read/shared, intent, exclusive/write 11 * 12 * The purpose of the intent state is to allow for greater concurrency on tree 13 * structures without deadlocking. In general, a read can't be upgraded to a 14 * write lock without deadlocking, so an operation that updates multiple nodes 15 * will have to take write locks for the full duration of the operation. 16 * 17 * But by adding an intent state, which is exclusive with other intent locks but 18 * not with readers, we can take intent locks at the start of the operation, 19 * and then take write locks only for the actual update to each individual 20 * nodes, without deadlocking. 21 * 22 * Example usage: 23 * six_lock_read(&foo->lock); 24 * six_unlock_read(&foo->lock); 25 * 26 * An intent lock must be held before taking a write lock: 27 * six_lock_intent(&foo->lock); 28 * six_lock_write(&foo->lock); 29 * six_unlock_write(&foo->lock); 30 * six_unlock_intent(&foo->lock); 31 * 32 * Other operations: 33 * six_trylock_read() 34 * six_trylock_intent() 35 * six_trylock_write() 36 * 37 * six_lock_downgrade() convert from intent to read 38 * six_lock_tryupgrade() attempt to convert from read to intent, may fail 39 * 40 * There are also interfaces that take the lock type as an enum: 41 * 42 * six_lock_type(&foo->lock, SIX_LOCK_read); 43 * six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent) 44 * six_lock_type(&foo->lock, SIX_LOCK_write); 45 * six_unlock_type(&foo->lock, SIX_LOCK_write); 46 * six_unlock_type(&foo->lock, SIX_LOCK_intent); 47 * 48 * Lock sequence numbers - unlock(), relock(): 49 * 50 * Locks embed sequences numbers, which are incremented on write lock/unlock. 51 * This allows locks to be dropped and the retaken iff the state they protect 52 * hasn't changed; this makes it much easier to avoid holding locks while e.g. 53 * doing IO or allocating memory. 54 * 55 * Example usage: 56 * six_lock_read(&foo->lock); 57 * u32 seq = six_lock_seq(&foo->lock); 58 * six_unlock_read(&foo->lock); 59 * 60 * some_operation_that_may_block(); 61 * 62 * if (six_relock_read(&foo->lock, seq)) { ... } 63 * 64 * If the relock operation succeeds, it is as if the lock was never unlocked. 65 * 66 * Reentrancy: 67 * 68 * Six locks are not by themselves reentrant, but have counters for both the 69 * read and intent states that can be used to provide reentrancy by an upper 70 * layer that tracks held locks. If a lock is known to already be held in the 71 * read or intent state, six_lock_increment() can be used to bump the "lock 72 * held in this state" counter, increasing the number of unlock calls that 73 * will be required to fully unlock it. 74 * 75 * Example usage: 76 * six_lock_read(&foo->lock); 77 * six_lock_increment(&foo->lock, SIX_LOCK_read); 78 * six_unlock_read(&foo->lock); 79 * six_unlock_read(&foo->lock); 80 * foo->lock is now fully unlocked. 81 * 82 * Since the intent state supercedes read, it's legal to increment the read 83 * counter when holding an intent lock, but not the reverse. 84 * 85 * A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write) 86 * is not legal. 87 * 88 * should_sleep_fn: 89 * 90 * There is a six_lock() variant that takes a function pointer that is called 91 * immediately prior to schedule() when blocking, and may return an error to 92 * abort. 93 * 94 * One possible use for this feature is when objects being locked are part of 95 * a cache and may reused, and lock ordering is based on a property of the 96 * object that will change when the object is reused - i.e. logical key order. 97 * 98 * If looking up an object in the cache may race with object reuse, and lock 99 * ordering is required to prevent deadlock, object reuse may change the 100 * correct lock order for that object and cause a deadlock. should_sleep_fn 101 * can be used to check if the object is still the object we want and avoid 102 * this deadlock. 103 * 104 * Wait list entry interface: 105 * 106 * There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a 107 * wait list entry. By embedding six_lock_waiter into another object, and by 108 * traversing lock waitlists, it is then possible for an upper layer to 109 * implement full cycle detection for deadlock avoidance. 110 * 111 * should_sleep_fn should be used for invoking the cycle detector, walking the 112 * graph of held locks to check for a deadlock. The upper layer must track 113 * held locks for each thread, and each thread's held locks must be reachable 114 * from its six_lock_waiter object. 115 * 116 * six_lock_waiter() will add the wait object to the waitlist re-trying taking 117 * the lock, and before calling should_sleep_fn, and the wait object will not 118 * be removed from the waitlist until either the lock has been successfully 119 * acquired, or we aborted because should_sleep_fn returned an error. 120 * 121 * Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will 122 * have timestamps in strictly ascending order - this is so the timestamp can 123 * be used as a cursor for lock graph traverse. 124 */ 125 126 #include <linux/lockdep.h> 127 #include <linux/sched.h> 128 #include <linux/types.h> 129 130 enum six_lock_type { 131 SIX_LOCK_read, 132 SIX_LOCK_intent, 133 SIX_LOCK_write, 134 }; 135 136 struct six_lock { 137 atomic_t state; 138 u32 seq; 139 unsigned intent_lock_recurse; 140 unsigned write_lock_recurse; 141 struct task_struct *owner; 142 unsigned __percpu *readers; 143 raw_spinlock_t wait_lock; 144 struct list_head wait_list; 145 #ifdef CONFIG_DEBUG_LOCK_ALLOC 146 struct lockdep_map dep_map; 147 #endif 148 }; 149 150 struct six_lock_waiter { 151 struct list_head list; 152 struct task_struct *task; 153 enum six_lock_type lock_want; 154 bool lock_acquired; 155 u64 start_time; 156 }; 157 158 typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *); 159 160 void six_lock_exit(struct six_lock *lock); 161 162 enum six_lock_init_flags { 163 SIX_LOCK_INIT_PCPU = 1U << 0, 164 }; 165 166 void __six_lock_init(struct six_lock *lock, const char *name, 167 struct lock_class_key *key, enum six_lock_init_flags flags, 168 gfp_t gfp); 169 170 /** 171 * six_lock_init - initialize a six lock 172 * @lock: lock to initialize 173 * @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU 174 */ 175 #define six_lock_init(lock, flags, gfp) \ 176 do { \ 177 static struct lock_class_key __key; \ 178 \ 179 __six_lock_init((lock), #lock, &__key, flags, gfp); \ 180 } while (0) 181 182 /** 183 * six_lock_seq - obtain current lock sequence number 184 * @lock: six_lock to obtain sequence number for 185 * 186 * @lock should be held for read or intent, and not write 187 * 188 * By saving the lock sequence number, we can unlock @lock and then (typically 189 * after some blocking operation) attempt to relock it: the relock will succeed 190 * if the sequence number hasn't changed, meaning no write locks have been taken 191 * and state corresponding to what @lock protects is still valid. 192 */ 193 static inline u32 six_lock_seq(const struct six_lock *lock) 194 { 195 return lock->seq; 196 } 197 198 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip); 199 200 /** 201 * six_trylock_type - attempt to take a six lock without blocking 202 * @lock: lock to take 203 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write 204 * 205 * Return: true on success, false on failure. 206 */ 207 static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type) 208 { 209 return six_trylock_ip(lock, type, _THIS_IP_); 210 } 211 212 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type, 213 struct six_lock_waiter *wait, 214 six_lock_should_sleep_fn should_sleep_fn, void *p, 215 unsigned long ip); 216 217 /** 218 * six_lock_waiter - take a lock, with full waitlist interface 219 * @lock: lock to take 220 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write 221 * @wait: pointer to wait object, which will be added to lock's waitlist 222 * @should_sleep_fn: callback run after adding to waitlist, immediately prior 223 * to scheduling 224 * @p: passed through to @should_sleep_fn 225 * 226 * This is a convenience wrapper around six_lock_ip_waiter(), see that function 227 * for full documentation. 228 * 229 * Return: 0 on success, or the return code from @should_sleep_fn on failure. 230 */ 231 static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type, 232 struct six_lock_waiter *wait, 233 six_lock_should_sleep_fn should_sleep_fn, void *p) 234 { 235 return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_); 236 } 237 238 /** 239 * six_lock_ip - take a six lock lock 240 * @lock: lock to take 241 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write 242 * @should_sleep_fn: callback run after adding to waitlist, immediately prior 243 * to scheduling 244 * @p: passed through to @should_sleep_fn 245 * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ 246 * 247 * Return: 0 on success, or the return code from @should_sleep_fn on failure. 248 */ 249 static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type, 250 six_lock_should_sleep_fn should_sleep_fn, void *p, 251 unsigned long ip) 252 { 253 struct six_lock_waiter wait; 254 255 return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip); 256 } 257 258 /** 259 * six_lock_type - take a six lock lock 260 * @lock: lock to take 261 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write 262 * @should_sleep_fn: callback run after adding to waitlist, immediately prior 263 * to scheduling 264 * @p: passed through to @should_sleep_fn 265 * 266 * Return: 0 on success, or the return code from @should_sleep_fn on failure. 267 */ 268 static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type, 269 six_lock_should_sleep_fn should_sleep_fn, void *p) 270 { 271 struct six_lock_waiter wait; 272 273 return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_); 274 } 275 276 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type, 277 unsigned seq, unsigned long ip); 278 279 /** 280 * six_relock_type - attempt to re-take a lock that was held previously 281 * @lock: lock to take 282 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write 283 * @seq: lock sequence number obtained from six_lock_seq() while lock was 284 * held previously 285 * 286 * Return: true on success, false on failure. 287 */ 288 static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type, 289 unsigned seq) 290 { 291 return six_relock_ip(lock, type, seq, _THIS_IP_); 292 } 293 294 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip); 295 296 /** 297 * six_unlock_type - drop a six lock 298 * @lock: lock to unlock 299 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write 300 * 301 * When a lock is held multiple times (because six_lock_incement()) was used), 302 * this decrements the 'lock held' counter by one. 303 * 304 * For example: 305 * six_lock_read(&foo->lock); read count 1 306 * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2 307 * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1 308 * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0 309 */ 310 static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type) 311 { 312 six_unlock_ip(lock, type, _THIS_IP_); 313 } 314 315 #define __SIX_LOCK(type) \ 316 static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\ 317 { \ 318 return six_trylock_ip(lock, SIX_LOCK_##type, ip); \ 319 } \ 320 \ 321 static inline bool six_trylock_##type(struct six_lock *lock) \ 322 { \ 323 return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \ 324 } \ 325 \ 326 static inline int six_lock_ip_waiter_##type(struct six_lock *lock, \ 327 struct six_lock_waiter *wait, \ 328 six_lock_should_sleep_fn should_sleep_fn, void *p,\ 329 unsigned long ip) \ 330 { \ 331 return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\ 332 } \ 333 \ 334 static inline int six_lock_ip_##type(struct six_lock *lock, \ 335 six_lock_should_sleep_fn should_sleep_fn, void *p, \ 336 unsigned long ip) \ 337 { \ 338 return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\ 339 } \ 340 \ 341 static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\ 342 { \ 343 return six_relock_ip(lock, SIX_LOCK_##type, seq, ip); \ 344 } \ 345 \ 346 static inline bool six_relock_##type(struct six_lock *lock, u32 seq) \ 347 { \ 348 return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_); \ 349 } \ 350 \ 351 static inline int six_lock_##type(struct six_lock *lock, \ 352 six_lock_should_sleep_fn fn, void *p)\ 353 { \ 354 return six_lock_ip_##type(lock, fn, p, _THIS_IP_); \ 355 } \ 356 \ 357 static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \ 358 { \ 359 six_unlock_ip(lock, SIX_LOCK_##type, ip); \ 360 } \ 361 \ 362 static inline void six_unlock_##type(struct six_lock *lock) \ 363 { \ 364 six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \ 365 } 366 367 __SIX_LOCK(read) 368 __SIX_LOCK(intent) 369 __SIX_LOCK(write) 370 #undef __SIX_LOCK 371 372 void six_lock_downgrade(struct six_lock *); 373 bool six_lock_tryupgrade(struct six_lock *); 374 bool six_trylock_convert(struct six_lock *, enum six_lock_type, 375 enum six_lock_type); 376 377 void six_lock_increment(struct six_lock *, enum six_lock_type); 378 379 void six_lock_wakeup_all(struct six_lock *); 380 381 struct six_lock_count { 382 unsigned n[3]; 383 }; 384 385 struct six_lock_count six_lock_counts(struct six_lock *); 386 void six_lock_readers_add(struct six_lock *, int); 387 388 #endif /* _LINUX_SIX_H */ 389