xref: /qemu/util/qemu-thread-posix.c (revision aa3a285b5bc56a4208b3b57d4a55291e9c260107)
1 /*
2  * Wrappers around mutex/cond/thread functions
3  *
4  * Copyright Red Hat, Inc. 2009
5  *
6  * Author:
7  *  Marcelo Tosatti <mtosatti@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  *
12  */
13 #include "qemu/osdep.h"
14 #include "qemu/thread.h"
15 #include "qemu/atomic.h"
16 #include "qemu/notify.h"
17 #include "qemu-thread-common.h"
18 #include "qemu/tsan.h"
19 #include "qemu/bitmap.h"
20 #include "qemu/clang-tsa.h"
21 
22 #ifdef CONFIG_PTHREAD_SET_NAME_NP
23 #include <pthread_np.h>
24 #endif
25 
26 static bool name_threads;
27 
28 void qemu_thread_naming(bool enable)
29 {
30     name_threads = enable;
31 
32 #if !defined CONFIG_PTHREAD_SETNAME_NP_W_TID && \
33     !defined CONFIG_PTHREAD_SETNAME_NP_WO_TID && \
34     !defined CONFIG_PTHREAD_SET_NAME_NP
35     /* This is a debugging option, not fatal */
36     if (enable) {
37         fprintf(stderr, "qemu: thread naming not supported on this host\n");
38     }
39 #endif
40 }
41 
42 static void error_exit(int err, const char *msg)
43 {
44     fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err));
45     abort();
46 }
47 
48 static inline clockid_t qemu_timedwait_clockid(void)
49 {
50 #ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK
51     return CLOCK_MONOTONIC;
52 #else
53     return CLOCK_REALTIME;
54 #endif
55 }
56 
57 static void compute_abs_deadline(struct timespec *ts, int ms)
58 {
59     clock_gettime(qemu_timedwait_clockid(), ts);
60     ts->tv_nsec += (ms % 1000) * 1000000;
61     ts->tv_sec += ms / 1000;
62     if (ts->tv_nsec >= 1000000000) {
63         ts->tv_sec++;
64         ts->tv_nsec -= 1000000000;
65     }
66 }
67 
68 void qemu_mutex_init(QemuMutex *mutex)
69 {
70     int err;
71 
72     err = pthread_mutex_init(&mutex->lock, NULL);
73     if (err)
74         error_exit(err, __func__);
75     qemu_mutex_post_init(mutex);
76 }
77 
78 void qemu_mutex_destroy(QemuMutex *mutex)
79 {
80     int err;
81 
82     assert(mutex->initialized);
83     mutex->initialized = false;
84     err = pthread_mutex_destroy(&mutex->lock);
85     if (err)
86         error_exit(err, __func__);
87 }
88 
89 void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line)
90 {
91     int err;
92 
93     assert(mutex->initialized);
94     qemu_mutex_pre_lock(mutex, file, line);
95     err = pthread_mutex_lock(&mutex->lock);
96     if (err)
97         error_exit(err, __func__);
98     qemu_mutex_post_lock(mutex, file, line);
99 }
100 
101 int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line)
102 {
103     int err;
104 
105     assert(mutex->initialized);
106     err = pthread_mutex_trylock(&mutex->lock);
107     if (err == 0) {
108         qemu_mutex_post_lock(mutex, file, line);
109         return 0;
110     }
111     if (err != EBUSY) {
112         error_exit(err, __func__);
113     }
114     return -EBUSY;
115 }
116 
117 void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line)
118 {
119     int err;
120 
121     assert(mutex->initialized);
122     qemu_mutex_pre_unlock(mutex, file, line);
123     err = pthread_mutex_unlock(&mutex->lock);
124     if (err)
125         error_exit(err, __func__);
126 }
127 
128 void qemu_rec_mutex_init(QemuRecMutex *mutex)
129 {
130     int err;
131     pthread_mutexattr_t attr;
132 
133     pthread_mutexattr_init(&attr);
134     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
135     err = pthread_mutex_init(&mutex->m.lock, &attr);
136     pthread_mutexattr_destroy(&attr);
137     if (err) {
138         error_exit(err, __func__);
139     }
140     mutex->m.initialized = true;
141 }
142 
143 void qemu_rec_mutex_destroy(QemuRecMutex *mutex)
144 {
145     qemu_mutex_destroy(&mutex->m);
146 }
147 
148 void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line)
149 {
150     qemu_mutex_lock_impl(&mutex->m, file, line);
151 }
152 
153 int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line)
154 {
155     return qemu_mutex_trylock_impl(&mutex->m, file, line);
156 }
157 
158 void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line)
159 {
160     qemu_mutex_unlock_impl(&mutex->m, file, line);
161 }
162 
163 void qemu_cond_init(QemuCond *cond)
164 {
165     pthread_condattr_t attr;
166     int err;
167 
168     err = pthread_condattr_init(&attr);
169     if (err) {
170         error_exit(err, __func__);
171     }
172 #ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK
173     err = pthread_condattr_setclock(&attr, qemu_timedwait_clockid());
174     if (err) {
175         error_exit(err, __func__);
176     }
177 #endif
178     err = pthread_cond_init(&cond->cond, &attr);
179     if (err) {
180         error_exit(err, __func__);
181     }
182     err = pthread_condattr_destroy(&attr);
183     if (err) {
184         error_exit(err, __func__);
185     }
186     cond->initialized = true;
187 }
188 
189 void qemu_cond_destroy(QemuCond *cond)
190 {
191     int err;
192 
193     assert(cond->initialized);
194     cond->initialized = false;
195     err = pthread_cond_destroy(&cond->cond);
196     if (err)
197         error_exit(err, __func__);
198 }
199 
200 void qemu_cond_signal(QemuCond *cond)
201 {
202     int err;
203 
204     assert(cond->initialized);
205     err = pthread_cond_signal(&cond->cond);
206     if (err)
207         error_exit(err, __func__);
208 }
209 
210 void qemu_cond_broadcast(QemuCond *cond)
211 {
212     int err;
213 
214     assert(cond->initialized);
215     err = pthread_cond_broadcast(&cond->cond);
216     if (err)
217         error_exit(err, __func__);
218 }
219 
220 void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, const int line)
221 {
222     int err;
223 
224     assert(cond->initialized);
225     qemu_mutex_pre_unlock(mutex, file, line);
226     err = pthread_cond_wait(&cond->cond, &mutex->lock);
227     qemu_mutex_post_lock(mutex, file, line);
228     if (err)
229         error_exit(err, __func__);
230 }
231 
232 static bool TSA_NO_TSA
233 qemu_cond_timedwait_ts(QemuCond *cond, QemuMutex *mutex, struct timespec *ts,
234                        const char *file, const int line)
235 {
236     int err;
237 
238     assert(cond->initialized);
239     trace_qemu_mutex_unlock(mutex, file, line);
240     err = pthread_cond_timedwait(&cond->cond, &mutex->lock, ts);
241     trace_qemu_mutex_locked(mutex, file, line);
242     if (err && err != ETIMEDOUT) {
243         error_exit(err, __func__);
244     }
245     return err != ETIMEDOUT;
246 }
247 
248 bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms,
249                               const char *file, const int line)
250 {
251     struct timespec ts;
252 
253     compute_abs_deadline(&ts, ms);
254     return qemu_cond_timedwait_ts(cond, mutex, &ts, file, line);
255 }
256 
257 void qemu_sem_init(QemuSemaphore *sem, int init)
258 {
259     qemu_mutex_init(&sem->mutex);
260     qemu_cond_init(&sem->cond);
261 
262     if (init < 0) {
263         error_exit(EINVAL, __func__);
264     }
265     sem->count = init;
266 }
267 
268 void qemu_sem_destroy(QemuSemaphore *sem)
269 {
270     qemu_cond_destroy(&sem->cond);
271     qemu_mutex_destroy(&sem->mutex);
272 }
273 
274 void qemu_sem_post(QemuSemaphore *sem)
275 {
276     qemu_mutex_lock(&sem->mutex);
277     if (sem->count == UINT_MAX) {
278         error_exit(EINVAL, __func__);
279     } else {
280         sem->count++;
281         qemu_cond_signal(&sem->cond);
282     }
283     qemu_mutex_unlock(&sem->mutex);
284 }
285 
286 int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
287 {
288     bool rc = true;
289     struct timespec ts;
290 
291     compute_abs_deadline(&ts, ms);
292     qemu_mutex_lock(&sem->mutex);
293     while (sem->count == 0) {
294         if (ms == 0) {
295             rc = false;
296         } else {
297             rc = qemu_cond_timedwait_ts(&sem->cond, &sem->mutex, &ts,
298                                         __FILE__, __LINE__);
299         }
300         if (!rc) { /* timeout */
301             break;
302         }
303     }
304     if (rc) {
305         --sem->count;
306     }
307     qemu_mutex_unlock(&sem->mutex);
308     return (rc ? 0 : -1);
309 }
310 
311 void qemu_sem_wait(QemuSemaphore *sem)
312 {
313     qemu_mutex_lock(&sem->mutex);
314     while (sem->count == 0) {
315         qemu_cond_wait(&sem->cond, &sem->mutex);
316     }
317     --sem->count;
318     qemu_mutex_unlock(&sem->mutex);
319 }
320 
321 #ifdef __linux__
322 #include "qemu/futex.h"
323 #else
324 static inline void qemu_futex_wake(QemuEvent *ev, int n)
325 {
326     assert(ev->initialized);
327     pthread_mutex_lock(&ev->lock);
328     if (n == 1) {
329         pthread_cond_signal(&ev->cond);
330     } else {
331         pthread_cond_broadcast(&ev->cond);
332     }
333     pthread_mutex_unlock(&ev->lock);
334 }
335 
336 static inline void qemu_futex_wait(QemuEvent *ev, unsigned val)
337 {
338     assert(ev->initialized);
339     pthread_mutex_lock(&ev->lock);
340     if (ev->value == val) {
341         pthread_cond_wait(&ev->cond, &ev->lock);
342     }
343     pthread_mutex_unlock(&ev->lock);
344 }
345 #endif
346 
347 /* Valid transitions:
348  * - free->set, when setting the event
349  * - busy->set, when setting the event, followed by qemu_futex_wake
350  * - set->free, when resetting the event
351  * - free->busy, when waiting
352  *
353  * set->busy does not happen (it can be observed from the outside but
354  * it really is set->free->busy).
355  *
356  * busy->free provably cannot happen; to enforce it, the set->free transition
357  * is done with an OR, which becomes a no-op if the event has concurrently
358  * transitioned to free or busy.
359  */
360 
361 #define EV_SET         0
362 #define EV_FREE        1
363 #define EV_BUSY       -1
364 
365 void qemu_event_init(QemuEvent *ev, bool init)
366 {
367 #ifndef __linux__
368     pthread_mutex_init(&ev->lock, NULL);
369     pthread_cond_init(&ev->cond, NULL);
370 #endif
371 
372     ev->value = (init ? EV_SET : EV_FREE);
373     ev->initialized = true;
374 }
375 
376 void qemu_event_destroy(QemuEvent *ev)
377 {
378     assert(ev->initialized);
379     ev->initialized = false;
380 #ifndef __linux__
381     pthread_mutex_destroy(&ev->lock);
382     pthread_cond_destroy(&ev->cond);
383 #endif
384 }
385 
386 void qemu_event_set(QemuEvent *ev)
387 {
388     assert(ev->initialized);
389 
390     /*
391      * Pairs with both qemu_event_reset() and qemu_event_wait().
392      *
393      * qemu_event_set has release semantics, but because it *loads*
394      * ev->value we need a full memory barrier here.
395      */
396     smp_mb();
397     if (qatomic_read(&ev->value) != EV_SET) {
398         int old = qatomic_xchg(&ev->value, EV_SET);
399 
400         /* Pairs with memory barrier in kernel futex_wait system call.  */
401         smp_mb__after_rmw();
402         if (old == EV_BUSY) {
403             /* There were waiters, wake them up.  */
404             qemu_futex_wake(ev, INT_MAX);
405         }
406     }
407 }
408 
409 void qemu_event_reset(QemuEvent *ev)
410 {
411     assert(ev->initialized);
412 
413     /*
414      * If there was a concurrent reset (or even reset+wait),
415      * do nothing.  Otherwise change EV_SET->EV_FREE.
416      */
417     qatomic_or(&ev->value, EV_FREE);
418 
419     /*
420      * Order reset before checking the condition in the caller.
421      * Pairs with the first memory barrier in qemu_event_set().
422      */
423     smp_mb__after_rmw();
424 }
425 
426 void qemu_event_wait(QemuEvent *ev)
427 {
428     unsigned value;
429 
430     assert(ev->initialized);
431 
432     /*
433      * qemu_event_wait must synchronize with qemu_event_set even if it does
434      * not go down the slow path, so this load-acquire is needed that
435      * synchronizes with the first memory barrier in qemu_event_set().
436      *
437      * If we do go down the slow path, there is no requirement at all: we
438      * might miss a qemu_event_set() here but ultimately the memory barrier in
439      * qemu_futex_wait() will ensure the check is done correctly.
440      */
441     value = qatomic_load_acquire(&ev->value);
442     if (value != EV_SET) {
443         if (value == EV_FREE) {
444             /*
445              * Leave the event reset and tell qemu_event_set that there are
446              * waiters.  No need to retry, because there cannot be a concurrent
447              * busy->free transition.  After the CAS, the event will be either
448              * set or busy.
449              *
450              * This cmpxchg doesn't have particular ordering requirements if it
451              * succeeds (moving the store earlier can only cause qemu_event_set()
452              * to issue _more_ wakeups), the failing case needs acquire semantics
453              * like the load above.
454              */
455             if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
456                 return;
457             }
458         }
459 
460         /*
461          * This is the final check for a concurrent set, so it does need
462          * a smp_mb() pairing with the second barrier of qemu_event_set().
463          * The barrier is inside the FUTEX_WAIT system call.
464          */
465         qemu_futex_wait(ev, EV_BUSY);
466     }
467 }
468 
469 static __thread NotifierList thread_exit;
470 
471 /*
472  * Note that in this implementation you can register a thread-exit
473  * notifier for the main thread, but it will never be called.
474  * This is OK because main thread exit can only happen when the
475  * entire process is exiting, and the API allows notifiers to not
476  * be called on process exit.
477  */
478 void qemu_thread_atexit_add(Notifier *notifier)
479 {
480     notifier_list_add(&thread_exit, notifier);
481 }
482 
483 void qemu_thread_atexit_remove(Notifier *notifier)
484 {
485     notifier_remove(notifier);
486 }
487 
488 static void qemu_thread_atexit_notify(void *arg)
489 {
490     /*
491      * Called when non-main thread exits (via qemu_thread_exit()
492      * or by returning from its start routine.)
493      */
494     notifier_list_notify(&thread_exit, NULL);
495 }
496 
497 typedef struct {
498     void *(*start_routine)(void *);
499     void *arg;
500     char *name;
501 } QemuThreadArgs;
502 
503 static void *qemu_thread_start(void *args)
504 {
505     QemuThreadArgs *qemu_thread_args = args;
506     void *(*start_routine)(void *) = qemu_thread_args->start_routine;
507     void *arg = qemu_thread_args->arg;
508     void *r;
509 
510     /* Attempt to set the threads name; note that this is for debug, so
511      * we're not going to fail if we can't set it.
512      */
513     if (name_threads && qemu_thread_args->name) {
514 # if defined(CONFIG_PTHREAD_SETNAME_NP_W_TID)
515         pthread_setname_np(pthread_self(), qemu_thread_args->name);
516 # elif defined(CONFIG_PTHREAD_SETNAME_NP_WO_TID)
517         pthread_setname_np(qemu_thread_args->name);
518 # elif defined(CONFIG_PTHREAD_SET_NAME_NP)
519         pthread_set_name_np(pthread_self(), qemu_thread_args->name);
520 # endif
521     }
522     QEMU_TSAN_ANNOTATE_THREAD_NAME(qemu_thread_args->name);
523     g_free(qemu_thread_args->name);
524     g_free(qemu_thread_args);
525 
526     /*
527      * GCC 11 with glibc 2.17 on PowerPC reports
528      *
529      * qemu-thread-posix.c:540:5: error: ‘__sigsetjmp’ accessing 656 bytes
530      *   in a region of size 528 [-Werror=stringop-overflow=]
531      * 540 |     pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
532      *     |     ^~~~~~~~~~~~~~~~~~~~
533      *
534      * which is clearly nonsense.
535      */
536 #pragma GCC diagnostic push
537 #ifndef __clang__
538 #pragma GCC diagnostic ignored "-Wstringop-overflow"
539 #endif
540 
541     pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
542     r = start_routine(arg);
543     pthread_cleanup_pop(1);
544 
545 #pragma GCC diagnostic pop
546 
547     return r;
548 }
549 
550 void qemu_thread_create(QemuThread *thread, const char *name,
551                        void *(*start_routine)(void*),
552                        void *arg, int mode)
553 {
554     sigset_t set, oldset;
555     int err;
556     pthread_attr_t attr;
557     QemuThreadArgs *qemu_thread_args;
558 
559     err = pthread_attr_init(&attr);
560     if (err) {
561         error_exit(err, __func__);
562     }
563 
564     if (mode == QEMU_THREAD_DETACHED) {
565         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
566     }
567 
568     /* Leave signal handling to the iothread.  */
569     sigfillset(&set);
570     /* Blocking the signals can result in undefined behaviour. */
571     sigdelset(&set, SIGSEGV);
572     sigdelset(&set, SIGFPE);
573     sigdelset(&set, SIGILL);
574     /* TODO avoid SIGBUS loss on macOS */
575     pthread_sigmask(SIG_SETMASK, &set, &oldset);
576 
577     qemu_thread_args = g_new0(QemuThreadArgs, 1);
578     qemu_thread_args->name = g_strdup(name);
579     qemu_thread_args->start_routine = start_routine;
580     qemu_thread_args->arg = arg;
581 
582     err = pthread_create(&thread->thread, &attr,
583                          qemu_thread_start, qemu_thread_args);
584 
585     if (err)
586         error_exit(err, __func__);
587 
588     pthread_sigmask(SIG_SETMASK, &oldset, NULL);
589 
590     pthread_attr_destroy(&attr);
591 }
592 
593 int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus,
594                              unsigned long nbits)
595 {
596 #if defined(CONFIG_PTHREAD_AFFINITY_NP)
597     const size_t setsize = CPU_ALLOC_SIZE(nbits);
598     unsigned long value;
599     cpu_set_t *cpuset;
600     int err;
601 
602     cpuset = CPU_ALLOC(nbits);
603     g_assert(cpuset);
604 
605     CPU_ZERO_S(setsize, cpuset);
606     value = find_first_bit(host_cpus, nbits);
607     while (value < nbits) {
608         CPU_SET_S(value, setsize, cpuset);
609         value = find_next_bit(host_cpus, nbits, value + 1);
610     }
611 
612     err = pthread_setaffinity_np(thread->thread, setsize, cpuset);
613     CPU_FREE(cpuset);
614     return err;
615 #else
616     return -ENOSYS;
617 #endif
618 }
619 
620 int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus,
621                              unsigned long *nbits)
622 {
623 #if defined(CONFIG_PTHREAD_AFFINITY_NP)
624     unsigned long tmpbits;
625     cpu_set_t *cpuset;
626     size_t setsize;
627     int i, err;
628 
629     tmpbits = CPU_SETSIZE;
630     while (true) {
631         setsize = CPU_ALLOC_SIZE(tmpbits);
632         cpuset = CPU_ALLOC(tmpbits);
633         g_assert(cpuset);
634 
635         err = pthread_getaffinity_np(thread->thread, setsize, cpuset);
636         if (err) {
637             CPU_FREE(cpuset);
638             if (err != -EINVAL) {
639                 return err;
640             }
641             tmpbits *= 2;
642         } else {
643             break;
644         }
645     }
646 
647     /* Convert the result into a proper bitmap. */
648     *nbits = tmpbits;
649     *host_cpus = bitmap_new(tmpbits);
650     for (i = 0; i < tmpbits; i++) {
651         if (CPU_ISSET(i, cpuset)) {
652             set_bit(i, *host_cpus);
653         }
654     }
655     CPU_FREE(cpuset);
656     return 0;
657 #else
658     return -ENOSYS;
659 #endif
660 }
661 
662 void qemu_thread_get_self(QemuThread *thread)
663 {
664     thread->thread = pthread_self();
665 }
666 
667 bool qemu_thread_is_self(QemuThread *thread)
668 {
669    return pthread_equal(pthread_self(), thread->thread);
670 }
671 
672 void qemu_thread_exit(void *retval)
673 {
674     pthread_exit(retval);
675 }
676 
677 void *qemu_thread_join(QemuThread *thread)
678 {
679     int err;
680     void *ret;
681 
682     err = pthread_join(thread->thread, &ret);
683     if (err) {
684         error_exit(err, __func__);
685     }
686     return ret;
687 }
688