xref: /qemu/util/lockcnt.c (revision 96215036f47403438c7c7869b7cd419bd7a11f82)
1 /*
2  * QemuLockCnt implementation
3  *
4  * Copyright Red Hat, Inc. 2017
5  *
6  * Author:
7  *   Paolo Bonzini <pbonzini@redhat.com>
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/lockcnt.h"
11 #include "qemu/thread.h"
12 #include "qemu/atomic.h"
13 #include "trace.h"
14 
15 #ifdef HAVE_FUTEX
16 
17 /*
18  * When futex is available, bits 0-1 are a futex-based lock, bits 2-31 are the
19  * counter.
20  * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok,
21  * this is not the most relaxing citation I could make...).  It is similar
22  * to mutex2 in the paper.
23  */
24 
25 #define QEMU_LOCKCNT_STATE_MASK    3
26 #define QEMU_LOCKCNT_STATE_FREE    0   /* free, uncontended */
27 #define QEMU_LOCKCNT_STATE_LOCKED  1   /* locked, uncontended */
28 #define QEMU_LOCKCNT_STATE_WAITING 2   /* locked, contended */
29 
30 #define QEMU_LOCKCNT_COUNT_STEP    4
31 #define QEMU_LOCKCNT_COUNT_SHIFT   2
32 
qemu_lockcnt_init(QemuLockCnt * lockcnt)33 void qemu_lockcnt_init(QemuLockCnt *lockcnt)
34 {
35     lockcnt->count = 0;
36 }
37 
qemu_lockcnt_destroy(QemuLockCnt * lockcnt)38 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
39 {
40 }
41 
42 /* *val is the current value of lockcnt->count.
43  *
44  * If the lock is free, try a cmpxchg from *val to new_if_free; return
45  * true and set *val to the old value found by the cmpxchg in
46  * lockcnt->count.
47  *
48  * If the lock is taken, wait for it to be released and return false
49  * *without trying again to take the lock*.  Again, set *val to the
50  * new value of lockcnt->count.
51  *
52  * If *waited is true on return, new_if_free's bottom two bits must not
53  * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller
54  * does not know if there are other waiters.  Furthermore, after *waited
55  * is set the caller has effectively acquired the lock.  If it returns
56  * with the lock not taken, it must wake another futex waiter.
57  */
qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt * lockcnt,int * val,int new_if_free,bool * waited)58 static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val,
59                                          int new_if_free, bool *waited)
60 {
61     /* Fast path for when the lock is free.  */
62     if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) {
63         int expected = *val;
64 
65         trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free);
66         *val = qatomic_cmpxchg(&lockcnt->count, expected, new_if_free);
67         if (*val == expected) {
68             trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free);
69             *val = new_if_free;
70             return true;
71         }
72     }
73 
74     /* The slow path moves from locked to waiting if necessary, then
75      * does a futex wait.  Both steps can be repeated ad nauseam,
76      * only getting out of the loop if we can have another shot at the
77      * fast path.  Once we can, get out to compute the new destination
78      * value for the fast path.
79      */
80     while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) {
81         if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) {
82             int expected = *val;
83             int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING;
84 
85             trace_lockcnt_futex_wait_prepare(lockcnt, expected, new);
86             *val = qatomic_cmpxchg(&lockcnt->count, expected, new);
87             if (*val == expected) {
88                 *val = new;
89             }
90             continue;
91         }
92 
93         if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) {
94             *waited = true;
95             trace_lockcnt_futex_wait(lockcnt, *val);
96             qemu_futex_wait(&lockcnt->count, *val);
97             *val = qatomic_read(&lockcnt->count);
98             trace_lockcnt_futex_wait_resume(lockcnt, *val);
99             continue;
100         }
101 
102         abort();
103     }
104     return false;
105 }
106 
lockcnt_wake(QemuLockCnt * lockcnt)107 static void lockcnt_wake(QemuLockCnt *lockcnt)
108 {
109     trace_lockcnt_futex_wake(lockcnt);
110     qemu_futex_wake_single(&lockcnt->count);
111 }
112 
qemu_lockcnt_inc(QemuLockCnt * lockcnt)113 void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
114 {
115     int val = qatomic_read(&lockcnt->count);
116     bool waited = false;
117 
118     for (;;) {
119         if (val >= QEMU_LOCKCNT_COUNT_STEP) {
120             int expected = val;
121             val = qatomic_cmpxchg(&lockcnt->count, val,
122                                   val + QEMU_LOCKCNT_COUNT_STEP);
123             if (val == expected) {
124                 break;
125             }
126         } else {
127             /* The fast path is (0, unlocked)->(1, unlocked).  */
128             if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP,
129                                              &waited)) {
130                 break;
131             }
132         }
133     }
134 
135     /* If we were woken by another thread, we should also wake one because
136      * we are effectively releasing the lock that was given to us.  This is
137      * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING
138      * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and
139      * wake someone.
140      */
141     if (waited) {
142         lockcnt_wake(lockcnt);
143     }
144 }
145 
qemu_lockcnt_dec(QemuLockCnt * lockcnt)146 void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
147 {
148     qatomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP);
149 }
150 
151 /* Decrement a counter, and return locked if it is decremented to zero.
152  * If the function returns true, it is impossible for the counter to
153  * become nonzero until the next qemu_lockcnt_unlock.
154  */
qemu_lockcnt_dec_and_lock(QemuLockCnt * lockcnt)155 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
156 {
157     int val = qatomic_read(&lockcnt->count);
158     int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
159     bool waited = false;
160 
161     for (;;) {
162         if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) {
163             int expected = val;
164             val = qatomic_cmpxchg(&lockcnt->count, val,
165                                   val - QEMU_LOCKCNT_COUNT_STEP);
166             if (val == expected) {
167                 break;
168             }
169         } else {
170             /* If count is going 1->0, take the lock. The fast path is
171              * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
172              */
173             if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
174                 return true;
175             }
176 
177             if (waited) {
178                 /* At this point we do not know if there are more waiters.  Assume
179                  * there are.
180                  */
181                 locked_state = QEMU_LOCKCNT_STATE_WAITING;
182             }
183         }
184     }
185 
186     /* If we were woken by another thread, but we're returning in unlocked
187      * state, we should also wake a thread because we are effectively
188      * releasing the lock that was given to us.  This is the case where
189      * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
190      * bits, and qemu_lockcnt_unlock would find it and wake someone.
191      */
192     if (waited) {
193         lockcnt_wake(lockcnt);
194     }
195     return false;
196 }
197 
198 /* If the counter is one, decrement it and return locked.  Otherwise do
199  * nothing.
200  *
201  * If the function returns true, it is impossible for the counter to
202  * become nonzero until the next qemu_lockcnt_unlock.
203  */
qemu_lockcnt_dec_if_lock(QemuLockCnt * lockcnt)204 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
205 {
206     int val = qatomic_read(&lockcnt->count);
207     int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
208     bool waited = false;
209 
210     while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) {
211         /* If count is going 1->0, take the lock. The fast path is
212          * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
213          */
214         if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
215             return true;
216         }
217 
218         if (waited) {
219             /* At this point we do not know if there are more waiters.  Assume
220              * there are.
221              */
222             locked_state = QEMU_LOCKCNT_STATE_WAITING;
223         }
224     }
225 
226     /* If we were woken by another thread, but we're returning in unlocked
227      * state, we should also wake a thread because we are effectively
228      * releasing the lock that was given to us.  This is the case where
229      * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
230      * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone.
231      */
232     if (waited) {
233         lockcnt_wake(lockcnt);
234     }
235     return false;
236 }
237 
qemu_lockcnt_lock(QemuLockCnt * lockcnt)238 void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
239 {
240     int val = qatomic_read(&lockcnt->count);
241     int step = QEMU_LOCKCNT_STATE_LOCKED;
242     bool waited = false;
243 
244     /* The third argument is only used if the low bits of val are 0
245      * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired
246      * state.
247      */
248     while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) {
249         if (waited) {
250             /* At this point we do not know if there are more waiters.  Assume
251              * there are.
252              */
253             step = QEMU_LOCKCNT_STATE_WAITING;
254         }
255     }
256 }
257 
qemu_lockcnt_inc_and_unlock(QemuLockCnt * lockcnt)258 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
259 {
260     int expected, new, val;
261 
262     val = qatomic_read(&lockcnt->count);
263     do {
264         expected = val;
265         new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK;
266         trace_lockcnt_unlock_attempt(lockcnt, val, new);
267         val = qatomic_cmpxchg(&lockcnt->count, val, new);
268     } while (val != expected);
269 
270     trace_lockcnt_unlock_success(lockcnt, val, new);
271     if (val & QEMU_LOCKCNT_STATE_WAITING) {
272         lockcnt_wake(lockcnt);
273     }
274 }
275 
qemu_lockcnt_unlock(QemuLockCnt * lockcnt)276 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
277 {
278     int expected, new, val;
279 
280     val = qatomic_read(&lockcnt->count);
281     do {
282         expected = val;
283         new = val & ~QEMU_LOCKCNT_STATE_MASK;
284         trace_lockcnt_unlock_attempt(lockcnt, val, new);
285         val = qatomic_cmpxchg(&lockcnt->count, val, new);
286     } while (val != expected);
287 
288     trace_lockcnt_unlock_success(lockcnt, val, new);
289     if (val & QEMU_LOCKCNT_STATE_WAITING) {
290         lockcnt_wake(lockcnt);
291     }
292 }
293 
qemu_lockcnt_count(QemuLockCnt * lockcnt)294 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
295 {
296     return qatomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT;
297 }
298 #else
qemu_lockcnt_init(QemuLockCnt * lockcnt)299 void qemu_lockcnt_init(QemuLockCnt *lockcnt)
300 {
301     qemu_mutex_init(&lockcnt->mutex);
302     lockcnt->count = 0;
303 }
304 
qemu_lockcnt_destroy(QemuLockCnt * lockcnt)305 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
306 {
307     qemu_mutex_destroy(&lockcnt->mutex);
308 }
309 
qemu_lockcnt_inc(QemuLockCnt * lockcnt)310 void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
311 {
312     int old;
313     for (;;) {
314         old = qatomic_read(&lockcnt->count);
315         if (old == 0) {
316             qemu_lockcnt_lock(lockcnt);
317             qemu_lockcnt_inc_and_unlock(lockcnt);
318             return;
319         } else {
320             if (qatomic_cmpxchg(&lockcnt->count, old, old + 1) == old) {
321                 return;
322             }
323         }
324     }
325 }
326 
qemu_lockcnt_dec(QemuLockCnt * lockcnt)327 void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
328 {
329     qatomic_dec(&lockcnt->count);
330 }
331 
332 /* Decrement a counter, and return locked if it is decremented to zero.
333  * It is impossible for the counter to become nonzero while the mutex
334  * is taken.
335  */
qemu_lockcnt_dec_and_lock(QemuLockCnt * lockcnt)336 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
337 {
338     int val = qatomic_read(&lockcnt->count);
339     while (val > 1) {
340         int old = qatomic_cmpxchg(&lockcnt->count, val, val - 1);
341         if (old != val) {
342             val = old;
343             continue;
344         }
345 
346         return false;
347     }
348 
349     qemu_lockcnt_lock(lockcnt);
350     if (qatomic_fetch_dec(&lockcnt->count) == 1) {
351         return true;
352     }
353 
354     qemu_lockcnt_unlock(lockcnt);
355     return false;
356 }
357 
358 /* Decrement a counter and return locked if it is decremented to zero.
359  * Otherwise do nothing.
360  *
361  * It is impossible for the counter to become nonzero while the mutex
362  * is taken.
363  */
qemu_lockcnt_dec_if_lock(QemuLockCnt * lockcnt)364 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
365 {
366     /* No need for acquire semantics if we return false.  */
367     int val = qatomic_read(&lockcnt->count);
368     if (val > 1) {
369         return false;
370     }
371 
372     qemu_lockcnt_lock(lockcnt);
373     if (qatomic_fetch_dec(&lockcnt->count) == 1) {
374         return true;
375     }
376 
377     qemu_lockcnt_inc_and_unlock(lockcnt);
378     return false;
379 }
380 
qemu_lockcnt_lock(QemuLockCnt * lockcnt)381 void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
382 {
383     qemu_mutex_lock(&lockcnt->mutex);
384 }
385 
qemu_lockcnt_inc_and_unlock(QemuLockCnt * lockcnt)386 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
387 {
388     qatomic_inc(&lockcnt->count);
389     qemu_mutex_unlock(&lockcnt->mutex);
390 }
391 
qemu_lockcnt_unlock(QemuLockCnt * lockcnt)392 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
393 {
394     qemu_mutex_unlock(&lockcnt->mutex);
395 }
396 
qemu_lockcnt_count(QemuLockCnt * lockcnt)397 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
398 {
399     return qatomic_read(&lockcnt->count);
400 }
401 #endif
402