1 /*
2 * QemuLockCnt implementation
3 *
4 * Copyright Red Hat, Inc. 2017
5 *
6 * Author:
7 * Paolo Bonzini <pbonzini@redhat.com>
8 */
9 #include "qemu/osdep.h"
10 #include "qemu/lockcnt.h"
11 #include "qemu/thread.h"
12 #include "qemu/atomic.h"
13 #include "trace.h"
14
15 #ifdef HAVE_FUTEX
16
17 /*
18 * When futex is available, bits 0-1 are a futex-based lock, bits 2-31 are the
19 * counter.
20 * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok,
21 * this is not the most relaxing citation I could make...). It is similar
22 * to mutex2 in the paper.
23 */
24
25 #define QEMU_LOCKCNT_STATE_MASK 3
26 #define QEMU_LOCKCNT_STATE_FREE 0 /* free, uncontended */
27 #define QEMU_LOCKCNT_STATE_LOCKED 1 /* locked, uncontended */
28 #define QEMU_LOCKCNT_STATE_WAITING 2 /* locked, contended */
29
30 #define QEMU_LOCKCNT_COUNT_STEP 4
31 #define QEMU_LOCKCNT_COUNT_SHIFT 2
32
qemu_lockcnt_init(QemuLockCnt * lockcnt)33 void qemu_lockcnt_init(QemuLockCnt *lockcnt)
34 {
35 lockcnt->count = 0;
36 }
37
qemu_lockcnt_destroy(QemuLockCnt * lockcnt)38 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
39 {
40 }
41
42 /* *val is the current value of lockcnt->count.
43 *
44 * If the lock is free, try a cmpxchg from *val to new_if_free; return
45 * true and set *val to the old value found by the cmpxchg in
46 * lockcnt->count.
47 *
48 * If the lock is taken, wait for it to be released and return false
49 * *without trying again to take the lock*. Again, set *val to the
50 * new value of lockcnt->count.
51 *
52 * If *waited is true on return, new_if_free's bottom two bits must not
53 * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller
54 * does not know if there are other waiters. Furthermore, after *waited
55 * is set the caller has effectively acquired the lock. If it returns
56 * with the lock not taken, it must wake another futex waiter.
57 */
qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt * lockcnt,int * val,int new_if_free,bool * waited)58 static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val,
59 int new_if_free, bool *waited)
60 {
61 /* Fast path for when the lock is free. */
62 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) {
63 int expected = *val;
64
65 trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free);
66 *val = qatomic_cmpxchg(&lockcnt->count, expected, new_if_free);
67 if (*val == expected) {
68 trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free);
69 *val = new_if_free;
70 return true;
71 }
72 }
73
74 /* The slow path moves from locked to waiting if necessary, then
75 * does a futex wait. Both steps can be repeated ad nauseam,
76 * only getting out of the loop if we can have another shot at the
77 * fast path. Once we can, get out to compute the new destination
78 * value for the fast path.
79 */
80 while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) {
81 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) {
82 int expected = *val;
83 int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING;
84
85 trace_lockcnt_futex_wait_prepare(lockcnt, expected, new);
86 *val = qatomic_cmpxchg(&lockcnt->count, expected, new);
87 if (*val == expected) {
88 *val = new;
89 }
90 continue;
91 }
92
93 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) {
94 *waited = true;
95 trace_lockcnt_futex_wait(lockcnt, *val);
96 qemu_futex_wait(&lockcnt->count, *val);
97 *val = qatomic_read(&lockcnt->count);
98 trace_lockcnt_futex_wait_resume(lockcnt, *val);
99 continue;
100 }
101
102 abort();
103 }
104 return false;
105 }
106
lockcnt_wake(QemuLockCnt * lockcnt)107 static void lockcnt_wake(QemuLockCnt *lockcnt)
108 {
109 trace_lockcnt_futex_wake(lockcnt);
110 qemu_futex_wake_single(&lockcnt->count);
111 }
112
qemu_lockcnt_inc(QemuLockCnt * lockcnt)113 void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
114 {
115 int val = qatomic_read(&lockcnt->count);
116 bool waited = false;
117
118 for (;;) {
119 if (val >= QEMU_LOCKCNT_COUNT_STEP) {
120 int expected = val;
121 val = qatomic_cmpxchg(&lockcnt->count, val,
122 val + QEMU_LOCKCNT_COUNT_STEP);
123 if (val == expected) {
124 break;
125 }
126 } else {
127 /* The fast path is (0, unlocked)->(1, unlocked). */
128 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP,
129 &waited)) {
130 break;
131 }
132 }
133 }
134
135 /* If we were woken by another thread, we should also wake one because
136 * we are effectively releasing the lock that was given to us. This is
137 * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING
138 * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and
139 * wake someone.
140 */
141 if (waited) {
142 lockcnt_wake(lockcnt);
143 }
144 }
145
qemu_lockcnt_dec(QemuLockCnt * lockcnt)146 void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
147 {
148 qatomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP);
149 }
150
151 /* Decrement a counter, and return locked if it is decremented to zero.
152 * If the function returns true, it is impossible for the counter to
153 * become nonzero until the next qemu_lockcnt_unlock.
154 */
qemu_lockcnt_dec_and_lock(QemuLockCnt * lockcnt)155 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
156 {
157 int val = qatomic_read(&lockcnt->count);
158 int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
159 bool waited = false;
160
161 for (;;) {
162 if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) {
163 int expected = val;
164 val = qatomic_cmpxchg(&lockcnt->count, val,
165 val - QEMU_LOCKCNT_COUNT_STEP);
166 if (val == expected) {
167 break;
168 }
169 } else {
170 /* If count is going 1->0, take the lock. The fast path is
171 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
172 */
173 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
174 return true;
175 }
176
177 if (waited) {
178 /* At this point we do not know if there are more waiters. Assume
179 * there are.
180 */
181 locked_state = QEMU_LOCKCNT_STATE_WAITING;
182 }
183 }
184 }
185
186 /* If we were woken by another thread, but we're returning in unlocked
187 * state, we should also wake a thread because we are effectively
188 * releasing the lock that was given to us. This is the case where
189 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
190 * bits, and qemu_lockcnt_unlock would find it and wake someone.
191 */
192 if (waited) {
193 lockcnt_wake(lockcnt);
194 }
195 return false;
196 }
197
198 /* If the counter is one, decrement it and return locked. Otherwise do
199 * nothing.
200 *
201 * If the function returns true, it is impossible for the counter to
202 * become nonzero until the next qemu_lockcnt_unlock.
203 */
qemu_lockcnt_dec_if_lock(QemuLockCnt * lockcnt)204 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
205 {
206 int val = qatomic_read(&lockcnt->count);
207 int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
208 bool waited = false;
209
210 while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) {
211 /* If count is going 1->0, take the lock. The fast path is
212 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
213 */
214 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
215 return true;
216 }
217
218 if (waited) {
219 /* At this point we do not know if there are more waiters. Assume
220 * there are.
221 */
222 locked_state = QEMU_LOCKCNT_STATE_WAITING;
223 }
224 }
225
226 /* If we were woken by another thread, but we're returning in unlocked
227 * state, we should also wake a thread because we are effectively
228 * releasing the lock that was given to us. This is the case where
229 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
230 * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone.
231 */
232 if (waited) {
233 lockcnt_wake(lockcnt);
234 }
235 return false;
236 }
237
qemu_lockcnt_lock(QemuLockCnt * lockcnt)238 void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
239 {
240 int val = qatomic_read(&lockcnt->count);
241 int step = QEMU_LOCKCNT_STATE_LOCKED;
242 bool waited = false;
243
244 /* The third argument is only used if the low bits of val are 0
245 * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired
246 * state.
247 */
248 while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) {
249 if (waited) {
250 /* At this point we do not know if there are more waiters. Assume
251 * there are.
252 */
253 step = QEMU_LOCKCNT_STATE_WAITING;
254 }
255 }
256 }
257
qemu_lockcnt_inc_and_unlock(QemuLockCnt * lockcnt)258 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
259 {
260 int expected, new, val;
261
262 val = qatomic_read(&lockcnt->count);
263 do {
264 expected = val;
265 new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK;
266 trace_lockcnt_unlock_attempt(lockcnt, val, new);
267 val = qatomic_cmpxchg(&lockcnt->count, val, new);
268 } while (val != expected);
269
270 trace_lockcnt_unlock_success(lockcnt, val, new);
271 if (val & QEMU_LOCKCNT_STATE_WAITING) {
272 lockcnt_wake(lockcnt);
273 }
274 }
275
qemu_lockcnt_unlock(QemuLockCnt * lockcnt)276 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
277 {
278 int expected, new, val;
279
280 val = qatomic_read(&lockcnt->count);
281 do {
282 expected = val;
283 new = val & ~QEMU_LOCKCNT_STATE_MASK;
284 trace_lockcnt_unlock_attempt(lockcnt, val, new);
285 val = qatomic_cmpxchg(&lockcnt->count, val, new);
286 } while (val != expected);
287
288 trace_lockcnt_unlock_success(lockcnt, val, new);
289 if (val & QEMU_LOCKCNT_STATE_WAITING) {
290 lockcnt_wake(lockcnt);
291 }
292 }
293
qemu_lockcnt_count(QemuLockCnt * lockcnt)294 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
295 {
296 return qatomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT;
297 }
298 #else
qemu_lockcnt_init(QemuLockCnt * lockcnt)299 void qemu_lockcnt_init(QemuLockCnt *lockcnt)
300 {
301 qemu_mutex_init(&lockcnt->mutex);
302 lockcnt->count = 0;
303 }
304
qemu_lockcnt_destroy(QemuLockCnt * lockcnt)305 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
306 {
307 qemu_mutex_destroy(&lockcnt->mutex);
308 }
309
qemu_lockcnt_inc(QemuLockCnt * lockcnt)310 void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
311 {
312 int old;
313 for (;;) {
314 old = qatomic_read(&lockcnt->count);
315 if (old == 0) {
316 qemu_lockcnt_lock(lockcnt);
317 qemu_lockcnt_inc_and_unlock(lockcnt);
318 return;
319 } else {
320 if (qatomic_cmpxchg(&lockcnt->count, old, old + 1) == old) {
321 return;
322 }
323 }
324 }
325 }
326
qemu_lockcnt_dec(QemuLockCnt * lockcnt)327 void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
328 {
329 qatomic_dec(&lockcnt->count);
330 }
331
332 /* Decrement a counter, and return locked if it is decremented to zero.
333 * It is impossible for the counter to become nonzero while the mutex
334 * is taken.
335 */
qemu_lockcnt_dec_and_lock(QemuLockCnt * lockcnt)336 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
337 {
338 int val = qatomic_read(&lockcnt->count);
339 while (val > 1) {
340 int old = qatomic_cmpxchg(&lockcnt->count, val, val - 1);
341 if (old != val) {
342 val = old;
343 continue;
344 }
345
346 return false;
347 }
348
349 qemu_lockcnt_lock(lockcnt);
350 if (qatomic_fetch_dec(&lockcnt->count) == 1) {
351 return true;
352 }
353
354 qemu_lockcnt_unlock(lockcnt);
355 return false;
356 }
357
358 /* Decrement a counter and return locked if it is decremented to zero.
359 * Otherwise do nothing.
360 *
361 * It is impossible for the counter to become nonzero while the mutex
362 * is taken.
363 */
qemu_lockcnt_dec_if_lock(QemuLockCnt * lockcnt)364 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
365 {
366 /* No need for acquire semantics if we return false. */
367 int val = qatomic_read(&lockcnt->count);
368 if (val > 1) {
369 return false;
370 }
371
372 qemu_lockcnt_lock(lockcnt);
373 if (qatomic_fetch_dec(&lockcnt->count) == 1) {
374 return true;
375 }
376
377 qemu_lockcnt_inc_and_unlock(lockcnt);
378 return false;
379 }
380
qemu_lockcnt_lock(QemuLockCnt * lockcnt)381 void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
382 {
383 qemu_mutex_lock(&lockcnt->mutex);
384 }
385
qemu_lockcnt_inc_and_unlock(QemuLockCnt * lockcnt)386 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
387 {
388 qatomic_inc(&lockcnt->count);
389 qemu_mutex_unlock(&lockcnt->mutex);
390 }
391
qemu_lockcnt_unlock(QemuLockCnt * lockcnt)392 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
393 {
394 qemu_mutex_unlock(&lockcnt->mutex);
395 }
396
qemu_lockcnt_count(QemuLockCnt * lockcnt)397 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
398 {
399 return qatomic_read(&lockcnt->count);
400 }
401 #endif
402