1 // SPDX-License-Identifier: GPL-2.0-or-later
2 #include <linux/bug.h>
3 #include <linux/compiler.h>
4 #include <linux/export.h>
5 #include <linux/percpu.h>
6 #include <linux/processor.h>
7 #include <linux/smp.h>
8 #include <linux/topology.h>
9 #include <linux/sched/clock.h>
10 #include <asm/qspinlock.h>
11 #include <asm/paravirt.h>
12
13 #define MAX_NODES 4
14
15 struct qnode {
16 struct qnode *next;
17 struct qspinlock *lock;
18 int cpu;
19 u8 sleepy; /* 1 if the previous vCPU was preempted or
20 * if the previous node was sleepy */
21 u8 locked; /* 1 if lock acquired */
22 };
23
24 struct qnodes {
25 int count;
26 struct qnode nodes[MAX_NODES];
27 };
28
29 /* Tuning parameters */
30 static int steal_spins __read_mostly = (1 << 5);
31 static int remote_steal_spins __read_mostly = (1 << 2);
32 #if _Q_SPIN_TRY_LOCK_STEAL == 1
33 static const bool maybe_stealers = true;
34 #else
35 static bool maybe_stealers __read_mostly = true;
36 #endif
37 static int head_spins __read_mostly = (1 << 8);
38
39 static bool pv_yield_owner __read_mostly = true;
40 static bool pv_yield_allow_steal __read_mostly = false;
41 static bool pv_spin_on_preempted_owner __read_mostly = false;
42 static bool pv_sleepy_lock __read_mostly = true;
43 static bool pv_sleepy_lock_sticky __read_mostly = false;
44 static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
45 static int pv_sleepy_lock_factor __read_mostly = 256;
46 static bool pv_yield_prev __read_mostly = true;
47 static bool pv_yield_sleepy_owner __read_mostly = true;
48 static bool pv_prod_head __read_mostly = false;
49
50 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
51 static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
52
53 #if _Q_SPIN_SPEC_BARRIER == 1
54 #define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
55 #else
56 #define spec_barrier() do { } while (0)
57 #endif
58
recently_sleepy(void)59 static __always_inline bool recently_sleepy(void)
60 {
61 /* pv_sleepy_lock is true when this is called */
62 if (pv_sleepy_lock_interval_ns) {
63 u64 seen = this_cpu_read(sleepy_lock_seen_clock);
64
65 if (seen) {
66 u64 delta = sched_clock() - seen;
67 if (delta < pv_sleepy_lock_interval_ns)
68 return true;
69 this_cpu_write(sleepy_lock_seen_clock, 0);
70 }
71 }
72
73 return false;
74 }
75
get_steal_spins(bool paravirt,bool sleepy)76 static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
77 {
78 if (paravirt && sleepy)
79 return steal_spins * pv_sleepy_lock_factor;
80 else
81 return steal_spins;
82 }
83
get_remote_steal_spins(bool paravirt,bool sleepy)84 static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
85 {
86 if (paravirt && sleepy)
87 return remote_steal_spins * pv_sleepy_lock_factor;
88 else
89 return remote_steal_spins;
90 }
91
get_head_spins(bool paravirt,bool sleepy)92 static __always_inline int get_head_spins(bool paravirt, bool sleepy)
93 {
94 if (paravirt && sleepy)
95 return head_spins * pv_sleepy_lock_factor;
96 else
97 return head_spins;
98 }
99
encode_tail_cpu(int cpu)100 static inline u32 encode_tail_cpu(int cpu)
101 {
102 return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
103 }
104
decode_tail_cpu(u32 val)105 static inline int decode_tail_cpu(u32 val)
106 {
107 return (val >> _Q_TAIL_CPU_OFFSET) - 1;
108 }
109
get_owner_cpu(u32 val)110 static inline int get_owner_cpu(u32 val)
111 {
112 return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
113 }
114
115 /*
116 * Try to acquire the lock if it was not already locked. If the tail matches
117 * mytail then clear it, otherwise leave it unchnaged. Return previous value.
118 *
119 * This is used by the head of the queue to acquire the lock and clean up
120 * its tail if it was the last one queued.
121 */
trylock_clean_tail(struct qspinlock * lock,u32 tail)122 static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
123 {
124 u32 newval = queued_spin_encode_locked_val();
125 u32 prev, tmp;
126
127 asm volatile(
128 "1: lwarx %0,0,%2,%7 # trylock_clean_tail \n"
129 /* This test is necessary if there could be stealers */
130 " andi. %1,%0,%5 \n"
131 " bne 3f \n"
132 /* Test whether the lock tail == mytail */
133 " and %1,%0,%6 \n"
134 " cmpw 0,%1,%3 \n"
135 /* Merge the new locked value */
136 " or %1,%1,%4 \n"
137 " bne 2f \n"
138 /* If the lock tail matched, then clear it, otherwise leave it. */
139 " andc %1,%1,%6 \n"
140 "2: stwcx. %1,0,%2 \n"
141 " bne- 1b \n"
142 "\t" PPC_ACQUIRE_BARRIER " \n"
143 "3: \n"
144 : "=&r" (prev), "=&r" (tmp)
145 : "r" (&lock->val), "r"(tail), "r" (newval),
146 "i" (_Q_LOCKED_VAL),
147 "r" (_Q_TAIL_CPU_MASK),
148 "i" (_Q_SPIN_EH_HINT)
149 : "cr0", "memory");
150
151 return prev;
152 }
153
154 /*
155 * Publish our tail, replacing previous tail. Return previous value.
156 *
157 * This provides a release barrier for publishing node, this pairs with the
158 * acquire barrier in get_tail_qnode() when the next CPU finds this tail
159 * value.
160 */
publish_tail_cpu(struct qspinlock * lock,u32 tail)161 static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
162 {
163 u32 prev, tmp;
164
165 kcsan_release();
166
167 asm volatile(
168 "\t" PPC_RELEASE_BARRIER " \n"
169 "1: lwarx %0,0,%2 # publish_tail_cpu \n"
170 " andc %1,%0,%4 \n"
171 " or %1,%1,%3 \n"
172 " stwcx. %1,0,%2 \n"
173 " bne- 1b \n"
174 : "=&r" (prev), "=&r"(tmp)
175 : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
176 : "cr0", "memory");
177
178 return prev;
179 }
180
set_mustq(struct qspinlock * lock)181 static __always_inline u32 set_mustq(struct qspinlock *lock)
182 {
183 u32 prev;
184
185 asm volatile(
186 "1: lwarx %0,0,%1 # set_mustq \n"
187 " or %0,%0,%2 \n"
188 " stwcx. %0,0,%1 \n"
189 " bne- 1b \n"
190 : "=&r" (prev)
191 : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
192 : "cr0", "memory");
193
194 return prev;
195 }
196
clear_mustq(struct qspinlock * lock)197 static __always_inline u32 clear_mustq(struct qspinlock *lock)
198 {
199 u32 prev;
200
201 asm volatile(
202 "1: lwarx %0,0,%1 # clear_mustq \n"
203 " andc %0,%0,%2 \n"
204 " stwcx. %0,0,%1 \n"
205 " bne- 1b \n"
206 : "=&r" (prev)
207 : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
208 : "cr0", "memory");
209
210 return prev;
211 }
212
try_set_sleepy(struct qspinlock * lock,u32 old)213 static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
214 {
215 u32 prev;
216 u32 new = old | _Q_SLEEPY_VAL;
217
218 BUG_ON(!(old & _Q_LOCKED_VAL));
219 BUG_ON(old & _Q_SLEEPY_VAL);
220
221 asm volatile(
222 "1: lwarx %0,0,%1 # try_set_sleepy \n"
223 " cmpw 0,%0,%2 \n"
224 " bne- 2f \n"
225 " stwcx. %3,0,%1 \n"
226 " bne- 1b \n"
227 "2: \n"
228 : "=&r" (prev)
229 : "r" (&lock->val), "r"(old), "r" (new)
230 : "cr0", "memory");
231
232 return likely(prev == old);
233 }
234
seen_sleepy_owner(struct qspinlock * lock,u32 val)235 static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
236 {
237 if (pv_sleepy_lock) {
238 if (pv_sleepy_lock_interval_ns)
239 this_cpu_write(sleepy_lock_seen_clock, sched_clock());
240 if (!(val & _Q_SLEEPY_VAL))
241 try_set_sleepy(lock, val);
242 }
243 }
244
seen_sleepy_lock(void)245 static __always_inline void seen_sleepy_lock(void)
246 {
247 if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
248 this_cpu_write(sleepy_lock_seen_clock, sched_clock());
249 }
250
seen_sleepy_node(void)251 static __always_inline void seen_sleepy_node(void)
252 {
253 if (pv_sleepy_lock) {
254 if (pv_sleepy_lock_interval_ns)
255 this_cpu_write(sleepy_lock_seen_clock, sched_clock());
256 /* Don't set sleepy because we likely have a stale val */
257 }
258 }
259
get_tail_qnode(struct qspinlock * lock,int prev_cpu)260 static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu)
261 {
262 struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu);
263 int idx;
264
265 /*
266 * After publishing the new tail and finding a previous tail in the
267 * previous val (which is the control dependency), this barrier
268 * orders the release barrier in publish_tail_cpu performed by the
269 * last CPU, with subsequently looking at its qnode structures
270 * after the barrier.
271 */
272 smp_acquire__after_ctrl_dep();
273
274 for (idx = 0; idx < MAX_NODES; idx++) {
275 struct qnode *qnode = &qnodesp->nodes[idx];
276 if (qnode->lock == lock)
277 return qnode;
278 }
279
280 BUG();
281 }
282
283 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
__yield_to_locked_owner(struct qspinlock * lock,u32 val,bool paravirt,bool mustq)284 static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
285 {
286 int owner;
287 u32 yield_count;
288 bool preempted = false;
289
290 BUG_ON(!(val & _Q_LOCKED_VAL));
291
292 if (!paravirt)
293 goto relax;
294
295 if (!pv_yield_owner)
296 goto relax;
297
298 owner = get_owner_cpu(val);
299 yield_count = yield_count_of(owner);
300
301 if ((yield_count & 1) == 0)
302 goto relax; /* owner vcpu is running */
303
304 spin_end();
305
306 seen_sleepy_owner(lock, val);
307 preempted = true;
308
309 /*
310 * Read the lock word after sampling the yield count. On the other side
311 * there may a wmb because the yield count update is done by the
312 * hypervisor preemption and the value update by the OS, however this
313 * ordering might reduce the chance of out of order accesses and
314 * improve the heuristic.
315 */
316 smp_rmb();
317
318 if (READ_ONCE(lock->val) == val) {
319 if (mustq)
320 clear_mustq(lock);
321 yield_to_preempted(owner, yield_count);
322 if (mustq)
323 set_mustq(lock);
324 spin_begin();
325
326 /* Don't relax if we yielded. Maybe we should? */
327 return preempted;
328 }
329 spin_begin();
330 relax:
331 spin_cpu_relax();
332
333 return preempted;
334 }
335
336 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
yield_to_locked_owner(struct qspinlock * lock,u32 val,bool paravirt)337 static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
338 {
339 return __yield_to_locked_owner(lock, val, paravirt, false);
340 }
341
342 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
yield_head_to_locked_owner(struct qspinlock * lock,u32 val,bool paravirt)343 static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
344 {
345 bool mustq = false;
346
347 if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
348 mustq = true;
349
350 return __yield_to_locked_owner(lock, val, paravirt, mustq);
351 }
352
propagate_sleepy(struct qnode * node,u32 val,bool paravirt)353 static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt)
354 {
355 struct qnode *next;
356 int owner;
357
358 if (!paravirt)
359 return;
360 if (!pv_yield_sleepy_owner)
361 return;
362
363 next = READ_ONCE(node->next);
364 if (!next)
365 return;
366
367 if (next->sleepy)
368 return;
369
370 owner = get_owner_cpu(val);
371 if (vcpu_is_preempted(owner))
372 next->sleepy = 1;
373 }
374
375 /* Called inside spin_begin() */
yield_to_prev(struct qspinlock * lock,struct qnode * node,int prev_cpu,bool paravirt)376 static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
377 {
378 u32 yield_count;
379 bool preempted = false;
380
381 if (!paravirt)
382 goto relax;
383
384 if (!pv_yield_sleepy_owner)
385 goto yield_prev;
386
387 /*
388 * If the previous waiter was preempted it might not be able to
389 * propagate sleepy to us, so check the lock in that case too.
390 */
391 if (node->sleepy || vcpu_is_preempted(prev_cpu)) {
392 u32 val = READ_ONCE(lock->val);
393
394 if (val & _Q_LOCKED_VAL) {
395 if (node->next && !node->next->sleepy) {
396 /*
397 * Propagate sleepy to next waiter. Only if
398 * owner is preempted, which allows the queue
399 * to become "non-sleepy" if vCPU preemption
400 * ceases to occur, even if the lock remains
401 * highly contended.
402 */
403 if (vcpu_is_preempted(get_owner_cpu(val)))
404 node->next->sleepy = 1;
405 }
406
407 preempted = yield_to_locked_owner(lock, val, paravirt);
408 if (preempted)
409 return preempted;
410 }
411 node->sleepy = false;
412 }
413
414 yield_prev:
415 if (!pv_yield_prev)
416 goto relax;
417
418 yield_count = yield_count_of(prev_cpu);
419 if ((yield_count & 1) == 0)
420 goto relax; /* owner vcpu is running */
421
422 spin_end();
423
424 preempted = true;
425 seen_sleepy_node();
426
427 smp_rmb(); /* See __yield_to_locked_owner comment */
428
429 if (!READ_ONCE(node->locked)) {
430 yield_to_preempted(prev_cpu, yield_count);
431 spin_begin();
432 return preempted;
433 }
434 spin_begin();
435
436 relax:
437 spin_cpu_relax();
438
439 return preempted;
440 }
441
steal_break(u32 val,int iters,bool paravirt,bool sleepy)442 static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
443 {
444 if (iters >= get_steal_spins(paravirt, sleepy))
445 return true;
446
447 if (IS_ENABLED(CONFIG_NUMA) &&
448 (iters >= get_remote_steal_spins(paravirt, sleepy))) {
449 int cpu = get_owner_cpu(val);
450 if (numa_node_id() != cpu_to_node(cpu))
451 return true;
452 }
453 return false;
454 }
455
try_to_steal_lock(struct qspinlock * lock,bool paravirt)456 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
457 {
458 bool seen_preempted = false;
459 bool sleepy = false;
460 int iters = 0;
461 u32 val;
462
463 if (!steal_spins) {
464 /* XXX: should spin_on_preempted_owner do anything here? */
465 return false;
466 }
467
468 /* Attempt to steal the lock */
469 spin_begin();
470 do {
471 bool preempted = false;
472
473 val = READ_ONCE(lock->val);
474 if (val & _Q_MUST_Q_VAL)
475 break;
476 spec_barrier();
477
478 if (unlikely(!(val & _Q_LOCKED_VAL))) {
479 spin_end();
480 if (__queued_spin_trylock_steal(lock))
481 return true;
482 spin_begin();
483 } else {
484 preempted = yield_to_locked_owner(lock, val, paravirt);
485 }
486
487 if (paravirt && pv_sleepy_lock) {
488 if (!sleepy) {
489 if (val & _Q_SLEEPY_VAL) {
490 seen_sleepy_lock();
491 sleepy = true;
492 } else if (recently_sleepy()) {
493 sleepy = true;
494 }
495 }
496 if (pv_sleepy_lock_sticky && seen_preempted &&
497 !(val & _Q_SLEEPY_VAL)) {
498 if (try_set_sleepy(lock, val))
499 val |= _Q_SLEEPY_VAL;
500 }
501 }
502
503 if (preempted) {
504 seen_preempted = true;
505 sleepy = true;
506 if (!pv_spin_on_preempted_owner)
507 iters++;
508 /*
509 * pv_spin_on_preempted_owner don't increase iters
510 * while the owner is preempted -- we won't interfere
511 * with it by definition. This could introduce some
512 * latency issue if we continually observe preempted
513 * owners, but hopefully that's a rare corner case of
514 * a badly oversubscribed system.
515 */
516 } else {
517 iters++;
518 }
519 } while (!steal_break(val, iters, paravirt, sleepy));
520
521 spin_end();
522
523 return false;
524 }
525
queued_spin_lock_mcs_queue(struct qspinlock * lock,bool paravirt)526 static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
527 {
528 struct qnodes *qnodesp;
529 struct qnode *next, *node;
530 u32 val, old, tail;
531 bool seen_preempted = false;
532 bool sleepy = false;
533 bool mustq = false;
534 int idx;
535 int iters = 0;
536
537 BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
538
539 qnodesp = this_cpu_ptr(&qnodes);
540 if (unlikely(qnodesp->count >= MAX_NODES)) {
541 spec_barrier();
542 while (!queued_spin_trylock(lock))
543 cpu_relax();
544 return;
545 }
546
547 idx = qnodesp->count++;
548 /*
549 * Ensure that we increment the head node->count before initialising
550 * the actual node. If the compiler is kind enough to reorder these
551 * stores, then an IRQ could overwrite our assignments.
552 */
553 barrier();
554 node = &qnodesp->nodes[idx];
555 node->next = NULL;
556 node->lock = lock;
557 node->cpu = smp_processor_id();
558 node->sleepy = 0;
559 node->locked = 0;
560
561 tail = encode_tail_cpu(node->cpu);
562
563 /*
564 * Assign all attributes of a node before it can be published.
565 * Issues an lwsync, serving as a release barrier, as well as a
566 * compiler barrier.
567 */
568 old = publish_tail_cpu(lock, tail);
569
570 /*
571 * If there was a previous node; link it and wait until reaching the
572 * head of the waitqueue.
573 */
574 if (old & _Q_TAIL_CPU_MASK) {
575 int prev_cpu = decode_tail_cpu(old);
576 struct qnode *prev = get_tail_qnode(lock, prev_cpu);
577
578 /* Link @node into the waitqueue. */
579 WRITE_ONCE(prev->next, node);
580
581 /* Wait for mcs node lock to be released */
582 spin_begin();
583 while (!READ_ONCE(node->locked)) {
584 spec_barrier();
585
586 if (yield_to_prev(lock, node, prev_cpu, paravirt))
587 seen_preempted = true;
588 }
589 spec_barrier();
590 spin_end();
591
592 smp_rmb(); /* acquire barrier for the mcs lock */
593
594 /*
595 * Generic qspinlocks have this prefetch here, but it seems
596 * like it could cause additional line transitions because
597 * the waiter will keep loading from it.
598 */
599 if (_Q_SPIN_PREFETCH_NEXT) {
600 next = READ_ONCE(node->next);
601 if (next)
602 prefetchw(next);
603 }
604 }
605
606 /* We're at the head of the waitqueue, wait for the lock. */
607 again:
608 spin_begin();
609 for (;;) {
610 bool preempted;
611
612 val = READ_ONCE(lock->val);
613 if (!(val & _Q_LOCKED_VAL))
614 break;
615 spec_barrier();
616
617 if (paravirt && pv_sleepy_lock && maybe_stealers) {
618 if (!sleepy) {
619 if (val & _Q_SLEEPY_VAL) {
620 seen_sleepy_lock();
621 sleepy = true;
622 } else if (recently_sleepy()) {
623 sleepy = true;
624 }
625 }
626 if (pv_sleepy_lock_sticky && seen_preempted &&
627 !(val & _Q_SLEEPY_VAL)) {
628 if (try_set_sleepy(lock, val))
629 val |= _Q_SLEEPY_VAL;
630 }
631 }
632
633 propagate_sleepy(node, val, paravirt);
634 preempted = yield_head_to_locked_owner(lock, val, paravirt);
635 if (!maybe_stealers)
636 continue;
637
638 if (preempted)
639 seen_preempted = true;
640
641 if (paravirt && preempted) {
642 sleepy = true;
643
644 if (!pv_spin_on_preempted_owner)
645 iters++;
646 } else {
647 iters++;
648 }
649
650 if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
651 mustq = true;
652 set_mustq(lock);
653 val |= _Q_MUST_Q_VAL;
654 }
655 }
656 spec_barrier();
657 spin_end();
658
659 /* If we're the last queued, must clean up the tail. */
660 old = trylock_clean_tail(lock, tail);
661 if (unlikely(old & _Q_LOCKED_VAL)) {
662 BUG_ON(!maybe_stealers);
663 goto again; /* Can only be true if maybe_stealers. */
664 }
665
666 if ((old & _Q_TAIL_CPU_MASK) == tail)
667 goto release; /* We were the tail, no next. */
668
669 /* There is a next, must wait for node->next != NULL (MCS protocol) */
670 next = READ_ONCE(node->next);
671 if (!next) {
672 spin_begin();
673 while (!(next = READ_ONCE(node->next)))
674 cpu_relax();
675 spin_end();
676 }
677 spec_barrier();
678
679 /*
680 * Unlock the next mcs waiter node. Release barrier is not required
681 * here because the acquirer is only accessing the lock word, and
682 * the acquire barrier we took the lock with orders that update vs
683 * this store to locked. The corresponding barrier is the smp_rmb()
684 * acquire barrier for mcs lock, above.
685 */
686 if (paravirt && pv_prod_head) {
687 int next_cpu = next->cpu;
688 WRITE_ONCE(next->locked, 1);
689 if (_Q_SPIN_MISO)
690 asm volatile("miso" ::: "memory");
691 if (vcpu_is_preempted(next_cpu))
692 prod_cpu(next_cpu);
693 } else {
694 WRITE_ONCE(next->locked, 1);
695 if (_Q_SPIN_MISO)
696 asm volatile("miso" ::: "memory");
697 }
698
699 release:
700 qnodesp->count--; /* release the node */
701 }
702
queued_spin_lock_slowpath(struct qspinlock * lock)703 void queued_spin_lock_slowpath(struct qspinlock *lock)
704 {
705 /*
706 * This looks funny, but it induces the compiler to inline both
707 * sides of the branch rather than share code as when the condition
708 * is passed as the paravirt argument to the functions.
709 */
710 if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
711 if (try_to_steal_lock(lock, true)) {
712 spec_barrier();
713 return;
714 }
715 queued_spin_lock_mcs_queue(lock, true);
716 } else {
717 if (try_to_steal_lock(lock, false)) {
718 spec_barrier();
719 return;
720 }
721 queued_spin_lock_mcs_queue(lock, false);
722 }
723 }
724 EXPORT_SYMBOL(queued_spin_lock_slowpath);
725
726 #ifdef CONFIG_PARAVIRT_SPINLOCKS
pv_spinlocks_init(void)727 void pv_spinlocks_init(void)
728 {
729 }
730 #endif
731
732 #include <linux/debugfs.h>
steal_spins_set(void * data,u64 val)733 static int steal_spins_set(void *data, u64 val)
734 {
735 #if _Q_SPIN_TRY_LOCK_STEAL == 1
736 /* MAYBE_STEAL remains true */
737 steal_spins = val;
738 #else
739 static DEFINE_MUTEX(lock);
740
741 /*
742 * The lock slow path has a !maybe_stealers case that can assume
743 * the head of queue will not see concurrent waiters. That waiter
744 * is unsafe in the presence of stealers, so must keep them away
745 * from one another.
746 */
747
748 mutex_lock(&lock);
749 if (val && !steal_spins) {
750 maybe_stealers = true;
751 /* wait for queue head waiter to go away */
752 synchronize_rcu();
753 steal_spins = val;
754 } else if (!val && steal_spins) {
755 steal_spins = val;
756 /* wait for all possible stealers to go away */
757 synchronize_rcu();
758 maybe_stealers = false;
759 } else {
760 steal_spins = val;
761 }
762 mutex_unlock(&lock);
763 #endif
764
765 return 0;
766 }
767
steal_spins_get(void * data,u64 * val)768 static int steal_spins_get(void *data, u64 *val)
769 {
770 *val = steal_spins;
771
772 return 0;
773 }
774
775 DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
776
remote_steal_spins_set(void * data,u64 val)777 static int remote_steal_spins_set(void *data, u64 val)
778 {
779 remote_steal_spins = val;
780
781 return 0;
782 }
783
remote_steal_spins_get(void * data,u64 * val)784 static int remote_steal_spins_get(void *data, u64 *val)
785 {
786 *val = remote_steal_spins;
787
788 return 0;
789 }
790
791 DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
792
head_spins_set(void * data,u64 val)793 static int head_spins_set(void *data, u64 val)
794 {
795 head_spins = val;
796
797 return 0;
798 }
799
head_spins_get(void * data,u64 * val)800 static int head_spins_get(void *data, u64 *val)
801 {
802 *val = head_spins;
803
804 return 0;
805 }
806
807 DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
808
pv_yield_owner_set(void * data,u64 val)809 static int pv_yield_owner_set(void *data, u64 val)
810 {
811 pv_yield_owner = !!val;
812
813 return 0;
814 }
815
pv_yield_owner_get(void * data,u64 * val)816 static int pv_yield_owner_get(void *data, u64 *val)
817 {
818 *val = pv_yield_owner;
819
820 return 0;
821 }
822
823 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
824
pv_yield_allow_steal_set(void * data,u64 val)825 static int pv_yield_allow_steal_set(void *data, u64 val)
826 {
827 pv_yield_allow_steal = !!val;
828
829 return 0;
830 }
831
pv_yield_allow_steal_get(void * data,u64 * val)832 static int pv_yield_allow_steal_get(void *data, u64 *val)
833 {
834 *val = pv_yield_allow_steal;
835
836 return 0;
837 }
838
839 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
840
pv_spin_on_preempted_owner_set(void * data,u64 val)841 static int pv_spin_on_preempted_owner_set(void *data, u64 val)
842 {
843 pv_spin_on_preempted_owner = !!val;
844
845 return 0;
846 }
847
pv_spin_on_preempted_owner_get(void * data,u64 * val)848 static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
849 {
850 *val = pv_spin_on_preempted_owner;
851
852 return 0;
853 }
854
855 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
856
pv_sleepy_lock_set(void * data,u64 val)857 static int pv_sleepy_lock_set(void *data, u64 val)
858 {
859 pv_sleepy_lock = !!val;
860
861 return 0;
862 }
863
pv_sleepy_lock_get(void * data,u64 * val)864 static int pv_sleepy_lock_get(void *data, u64 *val)
865 {
866 *val = pv_sleepy_lock;
867
868 return 0;
869 }
870
871 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
872
pv_sleepy_lock_sticky_set(void * data,u64 val)873 static int pv_sleepy_lock_sticky_set(void *data, u64 val)
874 {
875 pv_sleepy_lock_sticky = !!val;
876
877 return 0;
878 }
879
pv_sleepy_lock_sticky_get(void * data,u64 * val)880 static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
881 {
882 *val = pv_sleepy_lock_sticky;
883
884 return 0;
885 }
886
887 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
888
pv_sleepy_lock_interval_ns_set(void * data,u64 val)889 static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
890 {
891 pv_sleepy_lock_interval_ns = val;
892
893 return 0;
894 }
895
pv_sleepy_lock_interval_ns_get(void * data,u64 * val)896 static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
897 {
898 *val = pv_sleepy_lock_interval_ns;
899
900 return 0;
901 }
902
903 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
904
pv_sleepy_lock_factor_set(void * data,u64 val)905 static int pv_sleepy_lock_factor_set(void *data, u64 val)
906 {
907 pv_sleepy_lock_factor = val;
908
909 return 0;
910 }
911
pv_sleepy_lock_factor_get(void * data,u64 * val)912 static int pv_sleepy_lock_factor_get(void *data, u64 *val)
913 {
914 *val = pv_sleepy_lock_factor;
915
916 return 0;
917 }
918
919 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
920
pv_yield_prev_set(void * data,u64 val)921 static int pv_yield_prev_set(void *data, u64 val)
922 {
923 pv_yield_prev = !!val;
924
925 return 0;
926 }
927
pv_yield_prev_get(void * data,u64 * val)928 static int pv_yield_prev_get(void *data, u64 *val)
929 {
930 *val = pv_yield_prev;
931
932 return 0;
933 }
934
935 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
936
pv_yield_sleepy_owner_set(void * data,u64 val)937 static int pv_yield_sleepy_owner_set(void *data, u64 val)
938 {
939 pv_yield_sleepy_owner = !!val;
940
941 return 0;
942 }
943
pv_yield_sleepy_owner_get(void * data,u64 * val)944 static int pv_yield_sleepy_owner_get(void *data, u64 *val)
945 {
946 *val = pv_yield_sleepy_owner;
947
948 return 0;
949 }
950
951 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n");
952
pv_prod_head_set(void * data,u64 val)953 static int pv_prod_head_set(void *data, u64 val)
954 {
955 pv_prod_head = !!val;
956
957 return 0;
958 }
959
pv_prod_head_get(void * data,u64 * val)960 static int pv_prod_head_get(void *data, u64 *val)
961 {
962 *val = pv_prod_head;
963
964 return 0;
965 }
966
967 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
968
spinlock_debugfs_init(void)969 static __init int spinlock_debugfs_init(void)
970 {
971 debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
972 debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
973 debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
974 if (is_shared_processor()) {
975 debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
976 debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
977 debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
978 debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
979 debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
980 debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
981 debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
982 debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
983 debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner);
984 debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
985 }
986
987 return 0;
988 }
989 device_initcall(spinlock_debugfs_init);
990