1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Resilient Queued Spin Lock 4 * 5 * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. 6 * (C) Copyright 2013-2014,2018 Red Hat, Inc. 7 * (C) Copyright 2015 Intel Corp. 8 * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP 9 * (C) Copyright 2024-2025 Meta Platforms, Inc. and affiliates. 10 * 11 * Authors: Waiman Long <longman@redhat.com> 12 * Peter Zijlstra <peterz@infradead.org> 13 * Kumar Kartikeya Dwivedi <memxor@gmail.com> 14 */ 15 16 #include <linux/smp.h> 17 #include <linux/bug.h> 18 #include <linux/bpf.h> 19 #include <linux/err.h> 20 #include <linux/cpumask.h> 21 #include <linux/percpu.h> 22 #include <linux/hardirq.h> 23 #include <linux/mutex.h> 24 #include <linux/prefetch.h> 25 #include <asm/byteorder.h> 26 #ifdef CONFIG_QUEUED_SPINLOCKS 27 #include <asm/qspinlock.h> 28 #endif 29 #include <trace/events/lock.h> 30 #include <asm/rqspinlock.h> 31 #include <linux/timekeeping.h> 32 33 /* 34 * Include queued spinlock definitions and statistics code 35 */ 36 #ifdef CONFIG_QUEUED_SPINLOCKS 37 #include "../locking/qspinlock.h" 38 #include "../locking/lock_events.h" 39 #include "rqspinlock.h" 40 #include "../locking/mcs_spinlock.h" 41 #endif 42 43 /* 44 * The basic principle of a queue-based spinlock can best be understood 45 * by studying a classic queue-based spinlock implementation called the 46 * MCS lock. A copy of the original MCS lock paper ("Algorithms for Scalable 47 * Synchronization on Shared-Memory Multiprocessors by Mellor-Crummey and 48 * Scott") is available at 49 * 50 * https://bugzilla.kernel.org/show_bug.cgi?id=206115 51 * 52 * This queued spinlock implementation is based on the MCS lock, however to 53 * make it fit the 4 bytes we assume spinlock_t to be, and preserve its 54 * existing API, we must modify it somehow. 55 * 56 * In particular; where the traditional MCS lock consists of a tail pointer 57 * (8 bytes) and needs the next pointer (another 8 bytes) of its own node to 58 * unlock the next pending (next->locked), we compress both these: {tail, 59 * next->locked} into a single u32 value. 60 * 61 * Since a spinlock disables recursion of its own context and there is a limit 62 * to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there 63 * are at most 4 nesting levels, it can be encoded by a 2-bit number. Now 64 * we can encode the tail by combining the 2-bit nesting level with the cpu 65 * number. With one byte for the lock value and 3 bytes for the tail, only a 66 * 32-bit word is now needed. Even though we only need 1 bit for the lock, 67 * we extend it to a full byte to achieve better performance for architectures 68 * that support atomic byte write. 69 * 70 * We also change the first spinner to spin on the lock bit instead of its 71 * node; whereby avoiding the need to carry a node from lock to unlock, and 72 * preserving existing lock API. This also makes the unlock code simpler and 73 * faster. 74 * 75 * N.B. The current implementation only supports architectures that allow 76 * atomic operations on smaller 8-bit and 16-bit data types. 77 * 78 */ 79 80 struct rqspinlock_timeout { 81 u64 timeout_end; 82 u64 duration; 83 u64 cur; 84 u16 spin; 85 }; 86 87 #define RES_TIMEOUT_VAL 2 88 89 DEFINE_PER_CPU_ALIGNED(struct rqspinlock_held, rqspinlock_held_locks); 90 EXPORT_SYMBOL_GPL(rqspinlock_held_locks); 91 92 static bool is_lock_released(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts) 93 { 94 if (!(atomic_read_acquire(&lock->val) & (mask))) 95 return true; 96 return false; 97 } 98 99 static noinline int check_deadlock_AA(rqspinlock_t *lock, u32 mask, 100 struct rqspinlock_timeout *ts) 101 { 102 struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); 103 int cnt = min(RES_NR_HELD, rqh->cnt); 104 105 /* 106 * Return an error if we hold the lock we are attempting to acquire. 107 * We'll iterate over max 32 locks; no need to do is_lock_released. 108 */ 109 for (int i = 0; i < cnt - 1; i++) { 110 if (rqh->locks[i] == lock) 111 return -EDEADLK; 112 } 113 return 0; 114 } 115 116 /* 117 * This focuses on the most common case of ABBA deadlocks (or ABBA involving 118 * more locks, which reduce to ABBA). This is not exhaustive, and we rely on 119 * timeouts as the final line of defense. 120 */ 121 static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask, 122 struct rqspinlock_timeout *ts) 123 { 124 struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); 125 int rqh_cnt = min(RES_NR_HELD, rqh->cnt); 126 void *remote_lock; 127 int cpu; 128 129 /* 130 * Find the CPU holding the lock that we want to acquire. If there is a 131 * deadlock scenario, we will read a stable set on the remote CPU and 132 * find the target. This would be a constant time operation instead of 133 * O(NR_CPUS) if we could determine the owning CPU from a lock value, but 134 * that requires increasing the size of the lock word. 135 */ 136 for_each_possible_cpu(cpu) { 137 struct rqspinlock_held *rqh_cpu = per_cpu_ptr(&rqspinlock_held_locks, cpu); 138 int real_cnt = READ_ONCE(rqh_cpu->cnt); 139 int cnt = min(RES_NR_HELD, real_cnt); 140 141 /* 142 * Let's ensure to break out of this loop if the lock is available for 143 * us to potentially acquire. 144 */ 145 if (is_lock_released(lock, mask, ts)) 146 return 0; 147 148 /* 149 * Skip ourselves, and CPUs whose count is less than 2, as they need at 150 * least one held lock and one acquisition attempt (reflected as top 151 * most entry) to participate in an ABBA deadlock. 152 * 153 * If cnt is more than RES_NR_HELD, it means the current lock being 154 * acquired won't appear in the table, and other locks in the table are 155 * already held, so we can't determine ABBA. 156 */ 157 if (cpu == smp_processor_id() || real_cnt < 2 || real_cnt > RES_NR_HELD) 158 continue; 159 160 /* 161 * Obtain the entry at the top, this corresponds to the lock the 162 * remote CPU is attempting to acquire in a deadlock situation, 163 * and would be one of the locks we hold on the current CPU. 164 */ 165 remote_lock = READ_ONCE(rqh_cpu->locks[cnt - 1]); 166 /* 167 * If it is NULL, we've raced and cannot determine a deadlock 168 * conclusively, skip this CPU. 169 */ 170 if (!remote_lock) 171 continue; 172 /* 173 * Find if the lock we're attempting to acquire is held by this CPU. 174 * Don't consider the topmost entry, as that must be the latest lock 175 * being held or acquired. For a deadlock, the target CPU must also 176 * attempt to acquire a lock we hold, so for this search only 'cnt - 1' 177 * entries are important. 178 */ 179 for (int i = 0; i < cnt - 1; i++) { 180 if (READ_ONCE(rqh_cpu->locks[i]) != lock) 181 continue; 182 /* 183 * We found our lock as held on the remote CPU. Is the 184 * acquisition attempt on the remote CPU for a lock held 185 * by us? If so, we have a deadlock situation, and need 186 * to recover. 187 */ 188 for (int i = 0; i < rqh_cnt - 1; i++) { 189 if (rqh->locks[i] == remote_lock) 190 return -EDEADLK; 191 } 192 /* 193 * Inconclusive; retry again later. 194 */ 195 return 0; 196 } 197 } 198 return 0; 199 } 200 201 static noinline int check_deadlock(rqspinlock_t *lock, u32 mask, 202 struct rqspinlock_timeout *ts) 203 { 204 int ret; 205 206 ret = check_deadlock_AA(lock, mask, ts); 207 if (ret) 208 return ret; 209 ret = check_deadlock_ABBA(lock, mask, ts); 210 if (ret) 211 return ret; 212 213 return 0; 214 } 215 216 static noinline int check_timeout(rqspinlock_t *lock, u32 mask, 217 struct rqspinlock_timeout *ts) 218 { 219 u64 time = ktime_get_mono_fast_ns(); 220 u64 prev = ts->cur; 221 222 if (!ts->timeout_end) { 223 ts->cur = time; 224 ts->timeout_end = time + ts->duration; 225 return 0; 226 } 227 228 if (time > ts->timeout_end) 229 return -ETIMEDOUT; 230 231 /* 232 * A millisecond interval passed from last time? Trigger deadlock 233 * checks. 234 */ 235 if (prev + NSEC_PER_MSEC < time) { 236 ts->cur = time; 237 return check_deadlock(lock, mask, ts); 238 } 239 240 return 0; 241 } 242 243 /* 244 * Do not amortize with spins when res_smp_cond_load_acquire is defined, 245 * as the macro does internal amortization for us. 246 */ 247 #ifndef res_smp_cond_load_acquire 248 #define RES_CHECK_TIMEOUT(ts, ret, mask) \ 249 ({ \ 250 if (!(ts).spin++) \ 251 (ret) = check_timeout((lock), (mask), &(ts)); \ 252 (ret); \ 253 }) 254 #else 255 #define RES_CHECK_TIMEOUT(ts, ret, mask) \ 256 ({ (ret) = check_timeout((lock), (mask), &(ts)); }) 257 #endif 258 259 /* 260 * Initialize the 'spin' member. 261 * Set spin member to 0 to trigger AA/ABBA checks immediately. 262 */ 263 #define RES_INIT_TIMEOUT(ts) ({ (ts).spin = 0; }) 264 265 /* 266 * We only need to reset 'timeout_end', 'spin' will just wrap around as necessary. 267 * Duration is defined for each spin attempt, so set it here. 268 */ 269 #define RES_RESET_TIMEOUT(ts, _duration) ({ (ts).timeout_end = 0; (ts).duration = _duration; }) 270 271 /* 272 * Provide a test-and-set fallback for cases when queued spin lock support is 273 * absent from the architecture. 274 */ 275 int __lockfunc resilient_tas_spin_lock(rqspinlock_t *lock) 276 { 277 struct rqspinlock_timeout ts; 278 int val, ret = 0; 279 280 RES_INIT_TIMEOUT(ts); 281 grab_held_lock_entry(lock); 282 283 /* 284 * Since the waiting loop's time is dependent on the amount of 285 * contention, a short timeout unlike rqspinlock waiting loops 286 * isn't enough. Choose a second as the timeout value. 287 */ 288 RES_RESET_TIMEOUT(ts, NSEC_PER_SEC); 289 retry: 290 val = atomic_read(&lock->val); 291 292 if (val || !atomic_try_cmpxchg(&lock->val, &val, 1)) { 293 if (RES_CHECK_TIMEOUT(ts, ret, ~0u)) 294 goto out; 295 cpu_relax(); 296 goto retry; 297 } 298 299 return 0; 300 out: 301 release_held_lock_entry(); 302 return ret; 303 } 304 EXPORT_SYMBOL_GPL(resilient_tas_spin_lock); 305 306 #ifdef CONFIG_QUEUED_SPINLOCKS 307 308 /* 309 * Per-CPU queue node structures; we can never have more than 4 nested 310 * contexts: task, softirq, hardirq, nmi. 311 * 312 * Exactly fits one 64-byte cacheline on a 64-bit architecture. 313 */ 314 static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]); 315 316 #ifndef res_smp_cond_load_acquire 317 #define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire(v, c) 318 #endif 319 320 #define res_atomic_cond_read_acquire(v, c) res_smp_cond_load_acquire(&(v)->counter, (c)) 321 322 /** 323 * resilient_queued_spin_lock_slowpath - acquire the queued spinlock 324 * @lock: Pointer to queued spinlock structure 325 * @val: Current value of the queued spinlock 32-bit word 326 * 327 * Return: 328 * * 0 - Lock was acquired successfully. 329 * * -EDEADLK - Lock acquisition failed because of AA/ABBA deadlock. 330 * * -ETIMEDOUT - Lock acquisition failed because of timeout. 331 * 332 * (queue tail, pending bit, lock value) 333 * 334 * fast : slow : unlock 335 * : : 336 * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0) 337 * : | ^--------.------. / : 338 * : v \ \ | : 339 * pending : (0,1,1) +--> (0,1,0) \ | : 340 * : | ^--' | | : 341 * : v | | : 342 * uncontended : (n,x,y) +--> (n,0,0) --' | : 343 * queue : | ^--' | : 344 * : v | : 345 * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : 346 * queue : ^--' : 347 */ 348 int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) 349 { 350 struct mcs_spinlock *prev, *next, *node; 351 struct rqspinlock_timeout ts; 352 int idx, ret = 0; 353 u32 old, tail; 354 355 BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); 356 357 if (resilient_virt_spin_lock_enabled()) 358 return resilient_virt_spin_lock(lock); 359 360 RES_INIT_TIMEOUT(ts); 361 362 /* 363 * Wait for in-progress pending->locked hand-overs with a bounded 364 * number of spins so that we guarantee forward progress. 365 * 366 * 0,1,0 -> 0,0,1 367 */ 368 if (val == _Q_PENDING_VAL) { 369 int cnt = _Q_PENDING_LOOPS; 370 val = atomic_cond_read_relaxed(&lock->val, 371 (VAL != _Q_PENDING_VAL) || !cnt--); 372 } 373 374 /* 375 * If we observe any contention; queue. 376 */ 377 if (val & ~_Q_LOCKED_MASK) 378 goto queue; 379 380 /* 381 * trylock || pending 382 * 383 * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock 384 */ 385 val = queued_fetch_set_pending_acquire(lock); 386 387 /* 388 * If we observe contention, there is a concurrent locker. 389 * 390 * Undo and queue; our setting of PENDING might have made the 391 * n,0,0 -> 0,0,0 transition fail and it will now be waiting 392 * on @next to become !NULL. 393 */ 394 if (unlikely(val & ~_Q_LOCKED_MASK)) { 395 396 /* Undo PENDING if we set it. */ 397 if (!(val & _Q_PENDING_MASK)) 398 clear_pending(lock); 399 400 goto queue; 401 } 402 403 /* 404 * Grab an entry in the held locks array, to enable deadlock detection. 405 */ 406 grab_held_lock_entry(lock); 407 408 /* 409 * We're pending, wait for the owner to go away. 410 * 411 * 0,1,1 -> *,1,0 412 * 413 * this wait loop must be a load-acquire such that we match the 414 * store-release that clears the locked bit and create lock 415 * sequentiality; this is because not all 416 * clear_pending_set_locked() implementations imply full 417 * barriers. 418 */ 419 if (val & _Q_LOCKED_MASK) { 420 RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); 421 res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_MASK)); 422 } 423 424 if (ret) { 425 /* 426 * We waited for the locked bit to go back to 0, as the pending 427 * waiter, but timed out. We need to clear the pending bit since 428 * we own it. Once a stuck owner has been recovered, the lock 429 * must be restored to a valid state, hence removing the pending 430 * bit is necessary. 431 * 432 * *,1,* -> *,0,* 433 */ 434 clear_pending(lock); 435 lockevent_inc(rqspinlock_lock_timeout); 436 goto err_release_entry; 437 } 438 439 /* 440 * take ownership and clear the pending bit. 441 * 442 * 0,1,0 -> 0,0,1 443 */ 444 clear_pending_set_locked(lock); 445 lockevent_inc(lock_pending); 446 return 0; 447 448 /* 449 * End of pending bit optimistic spinning and beginning of MCS 450 * queuing. 451 */ 452 queue: 453 lockevent_inc(lock_slowpath); 454 /* 455 * Grab deadlock detection entry for the queue path. 456 */ 457 grab_held_lock_entry(lock); 458 459 node = this_cpu_ptr(&rqnodes[0].mcs); 460 idx = node->count++; 461 tail = encode_tail(smp_processor_id(), idx); 462 463 trace_contention_begin(lock, LCB_F_SPIN); 464 465 /* 466 * 4 nodes are allocated based on the assumption that there will 467 * not be nested NMIs taking spinlocks. That may not be true in 468 * some architectures even though the chance of needing more than 469 * 4 nodes will still be extremely unlikely. When that happens, 470 * we fall back to spinning on the lock directly without using 471 * any MCS node. This is not the most elegant solution, but is 472 * simple enough. 473 */ 474 if (unlikely(idx >= _Q_MAX_NODES)) { 475 lockevent_inc(lock_no_node); 476 RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); 477 while (!queued_spin_trylock(lock)) { 478 if (RES_CHECK_TIMEOUT(ts, ret, ~0u)) { 479 lockevent_inc(rqspinlock_lock_timeout); 480 goto err_release_node; 481 } 482 cpu_relax(); 483 } 484 goto release; 485 } 486 487 node = grab_mcs_node(node, idx); 488 489 /* 490 * Keep counts of non-zero index values: 491 */ 492 lockevent_cond_inc(lock_use_node2 + idx - 1, idx); 493 494 /* 495 * Ensure that we increment the head node->count before initialising 496 * the actual node. If the compiler is kind enough to reorder these 497 * stores, then an IRQ could overwrite our assignments. 498 */ 499 barrier(); 500 501 node->locked = 0; 502 node->next = NULL; 503 504 /* 505 * We touched a (possibly) cold cacheline in the per-cpu queue node; 506 * attempt the trylock once more in the hope someone let go while we 507 * weren't watching. 508 */ 509 if (queued_spin_trylock(lock)) 510 goto release; 511 512 /* 513 * Ensure that the initialisation of @node is complete before we 514 * publish the updated tail via xchg_tail() and potentially link 515 * @node into the waitqueue via WRITE_ONCE(prev->next, node) below. 516 */ 517 smp_wmb(); 518 519 /* 520 * Publish the updated tail. 521 * We have already touched the queueing cacheline; don't bother with 522 * pending stuff. 523 * 524 * p,*,* -> n,*,* 525 */ 526 old = xchg_tail(lock, tail); 527 next = NULL; 528 529 /* 530 * if there was a previous node; link it and wait until reaching the 531 * head of the waitqueue. 532 */ 533 if (old & _Q_TAIL_MASK) { 534 int val; 535 536 prev = decode_tail(old, rqnodes); 537 538 /* Link @node into the waitqueue. */ 539 WRITE_ONCE(prev->next, node); 540 541 val = arch_mcs_spin_lock_contended(&node->locked); 542 if (val == RES_TIMEOUT_VAL) { 543 ret = -EDEADLK; 544 goto waitq_timeout; 545 } 546 547 /* 548 * While waiting for the MCS lock, the next pointer may have 549 * been set by another lock waiter. We optimistically load 550 * the next pointer & prefetch the cacheline for writing 551 * to reduce latency in the upcoming MCS unlock operation. 552 */ 553 next = READ_ONCE(node->next); 554 if (next) 555 prefetchw(next); 556 } 557 558 /* 559 * we're at the head of the waitqueue, wait for the owner & pending to 560 * go away. 561 * 562 * *,x,y -> *,0,0 563 * 564 * this wait loop must use a load-acquire such that we match the 565 * store-release that clears the locked bit and create lock 566 * sequentiality; this is because the set_locked() function below 567 * does not imply a full barrier. 568 * 569 * We use RES_DEF_TIMEOUT * 2 as the duration, as RES_DEF_TIMEOUT is 570 * meant to span maximum allowed time per critical section, and we may 571 * have both the owner of the lock and the pending bit waiter ahead of 572 * us. 573 */ 574 RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT * 2); 575 val = res_atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK) || 576 RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK)); 577 578 waitq_timeout: 579 if (ret) { 580 /* 581 * If the tail is still pointing to us, then we are the final waiter, 582 * and are responsible for resetting the tail back to 0. Otherwise, if 583 * the cmpxchg operation fails, we signal the next waiter to take exit 584 * and try the same. For a waiter with tail node 'n': 585 * 586 * n,*,* -> 0,*,* 587 * 588 * When performing cmpxchg for the whole word (NR_CPUS > 16k), it is 589 * possible locked/pending bits keep changing and we see failures even 590 * when we remain the head of wait queue. However, eventually, 591 * pending bit owner will unset the pending bit, and new waiters 592 * will queue behind us. This will leave the lock owner in 593 * charge, and it will eventually either set locked bit to 0, or 594 * leave it as 1, allowing us to make progress. 595 * 596 * We terminate the whole wait queue for two reasons. Firstly, 597 * we eschew per-waiter timeouts with one applied at the head of 598 * the wait queue. This allows everyone to break out faster 599 * once we've seen the owner / pending waiter not responding for 600 * the timeout duration from the head. Secondly, it avoids 601 * complicated synchronization, because when not leaving in FIFO 602 * order, prev's next pointer needs to be fixed up etc. 603 */ 604 if (!try_cmpxchg_tail(lock, tail, 0)) { 605 next = smp_cond_load_relaxed(&node->next, VAL); 606 WRITE_ONCE(next->locked, RES_TIMEOUT_VAL); 607 } 608 lockevent_inc(rqspinlock_lock_timeout); 609 goto err_release_node; 610 } 611 612 /* 613 * claim the lock: 614 * 615 * n,0,0 -> 0,0,1 : lock, uncontended 616 * *,*,0 -> *,*,1 : lock, contended 617 * 618 * If the queue head is the only one in the queue (lock value == tail) 619 * and nobody is pending, clear the tail code and grab the lock. 620 * Otherwise, we only need to grab the lock. 621 */ 622 623 /* 624 * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the 625 * above wait condition, therefore any concurrent setting of 626 * PENDING will make the uncontended transition fail. 627 */ 628 if ((val & _Q_TAIL_MASK) == tail) { 629 if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) 630 goto release; /* No contention */ 631 } 632 633 /* 634 * Either somebody is queued behind us or _Q_PENDING_VAL got set 635 * which will then detect the remaining tail and queue behind us 636 * ensuring we'll see a @next. 637 */ 638 set_locked(lock); 639 640 /* 641 * contended path; wait for next if not observed yet, release. 642 */ 643 if (!next) 644 next = smp_cond_load_relaxed(&node->next, (VAL)); 645 646 arch_mcs_spin_unlock_contended(&next->locked); 647 648 release: 649 trace_contention_end(lock, 0); 650 651 /* 652 * release the node 653 */ 654 __this_cpu_dec(rqnodes[0].mcs.count); 655 return ret; 656 err_release_node: 657 trace_contention_end(lock, ret); 658 __this_cpu_dec(rqnodes[0].mcs.count); 659 err_release_entry: 660 release_held_lock_entry(); 661 return ret; 662 } 663 EXPORT_SYMBOL_GPL(resilient_queued_spin_lock_slowpath); 664 665 #endif /* CONFIG_QUEUED_SPINLOCKS */ 666 667 __bpf_kfunc_start_defs(); 668 669 __bpf_kfunc int bpf_res_spin_lock(struct bpf_res_spin_lock *lock) 670 { 671 int ret; 672 673 BUILD_BUG_ON(sizeof(rqspinlock_t) != sizeof(struct bpf_res_spin_lock)); 674 BUILD_BUG_ON(__alignof__(rqspinlock_t) != __alignof__(struct bpf_res_spin_lock)); 675 676 preempt_disable(); 677 ret = res_spin_lock((rqspinlock_t *)lock); 678 if (unlikely(ret)) { 679 preempt_enable(); 680 return ret; 681 } 682 return 0; 683 } 684 685 __bpf_kfunc void bpf_res_spin_unlock(struct bpf_res_spin_lock *lock) 686 { 687 res_spin_unlock((rqspinlock_t *)lock); 688 preempt_enable(); 689 } 690 691 __bpf_kfunc int bpf_res_spin_lock_irqsave(struct bpf_res_spin_lock *lock, unsigned long *flags__irq_flag) 692 { 693 u64 *ptr = (u64 *)flags__irq_flag; 694 unsigned long flags; 695 int ret; 696 697 preempt_disable(); 698 local_irq_save(flags); 699 ret = res_spin_lock((rqspinlock_t *)lock); 700 if (unlikely(ret)) { 701 local_irq_restore(flags); 702 preempt_enable(); 703 return ret; 704 } 705 *ptr = flags; 706 return 0; 707 } 708 709 __bpf_kfunc void bpf_res_spin_unlock_irqrestore(struct bpf_res_spin_lock *lock, unsigned long *flags__irq_flag) 710 { 711 u64 *ptr = (u64 *)flags__irq_flag; 712 unsigned long flags = *ptr; 713 714 res_spin_unlock((rqspinlock_t *)lock); 715 local_irq_restore(flags); 716 preempt_enable(); 717 } 718 719 __bpf_kfunc_end_defs(); 720 721 BTF_KFUNCS_START(rqspinlock_kfunc_ids) 722 BTF_ID_FLAGS(func, bpf_res_spin_lock, KF_RET_NULL) 723 BTF_ID_FLAGS(func, bpf_res_spin_unlock) 724 BTF_ID_FLAGS(func, bpf_res_spin_lock_irqsave, KF_RET_NULL) 725 BTF_ID_FLAGS(func, bpf_res_spin_unlock_irqrestore) 726 BTF_KFUNCS_END(rqspinlock_kfunc_ids) 727 728 static const struct btf_kfunc_id_set rqspinlock_kfunc_set = { 729 .owner = THIS_MODULE, 730 .set = &rqspinlock_kfunc_ids, 731 }; 732 733 static __init int rqspinlock_register_kfuncs(void) 734 { 735 return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &rqspinlock_kfunc_set); 736 } 737 late_initcall(rqspinlock_register_kfuncs); 738