1 /* 2 * Wrappers around mutex/cond/thread functions 3 * 4 * Copyright Red Hat, Inc. 2009 5 * 6 * Author: 7 * Marcelo Tosatti <mtosatti@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 * 12 */ 13 #include "qemu/osdep.h" 14 #include "qemu/thread.h" 15 #include "qemu/atomic.h" 16 #include "qemu/notify.h" 17 #include "qemu-thread-common.h" 18 #include "qemu/tsan.h" 19 #include "qemu/bitmap.h" 20 #include "qemu/clang-tsa.h" 21 22 #ifdef CONFIG_PTHREAD_SET_NAME_NP 23 #include <pthread_np.h> 24 #endif 25 26 static bool name_threads; 27 28 void qemu_thread_naming(bool enable) 29 { 30 name_threads = enable; 31 32 #if !defined CONFIG_PTHREAD_SETNAME_NP_W_TID && \ 33 !defined CONFIG_PTHREAD_SETNAME_NP_WO_TID && \ 34 !defined CONFIG_PTHREAD_SET_NAME_NP 35 /* This is a debugging option, not fatal */ 36 if (enable) { 37 fprintf(stderr, "qemu: thread naming not supported on this host\n"); 38 } 39 #endif 40 } 41 42 static void error_exit(int err, const char *msg) 43 { 44 fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err)); 45 abort(); 46 } 47 48 static inline clockid_t qemu_timedwait_clockid(void) 49 { 50 #ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK 51 return CLOCK_MONOTONIC; 52 #else 53 return CLOCK_REALTIME; 54 #endif 55 } 56 57 static void compute_abs_deadline(struct timespec *ts, int ms) 58 { 59 clock_gettime(qemu_timedwait_clockid(), ts); 60 ts->tv_nsec += (ms % 1000) * 1000000; 61 ts->tv_sec += ms / 1000; 62 if (ts->tv_nsec >= 1000000000) { 63 ts->tv_sec++; 64 ts->tv_nsec -= 1000000000; 65 } 66 } 67 68 void qemu_mutex_init(QemuMutex *mutex) 69 { 70 int err; 71 72 err = pthread_mutex_init(&mutex->lock, NULL); 73 if (err) 74 error_exit(err, __func__); 75 qemu_mutex_post_init(mutex); 76 } 77 78 void qemu_mutex_destroy(QemuMutex *mutex) 79 { 80 int err; 81 82 assert(mutex->initialized); 83 mutex->initialized = false; 84 err = pthread_mutex_destroy(&mutex->lock); 85 if (err) 86 error_exit(err, __func__); 87 } 88 89 void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line) 90 { 91 int err; 92 93 assert(mutex->initialized); 94 qemu_mutex_pre_lock(mutex, file, line); 95 err = pthread_mutex_lock(&mutex->lock); 96 if (err) 97 error_exit(err, __func__); 98 qemu_mutex_post_lock(mutex, file, line); 99 } 100 101 int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line) 102 { 103 int err; 104 105 assert(mutex->initialized); 106 err = pthread_mutex_trylock(&mutex->lock); 107 if (err == 0) { 108 qemu_mutex_post_lock(mutex, file, line); 109 return 0; 110 } 111 if (err != EBUSY) { 112 error_exit(err, __func__); 113 } 114 return -EBUSY; 115 } 116 117 void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line) 118 { 119 int err; 120 121 assert(mutex->initialized); 122 qemu_mutex_pre_unlock(mutex, file, line); 123 err = pthread_mutex_unlock(&mutex->lock); 124 if (err) 125 error_exit(err, __func__); 126 } 127 128 void qemu_rec_mutex_init(QemuRecMutex *mutex) 129 { 130 int err; 131 pthread_mutexattr_t attr; 132 133 pthread_mutexattr_init(&attr); 134 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); 135 err = pthread_mutex_init(&mutex->m.lock, &attr); 136 pthread_mutexattr_destroy(&attr); 137 if (err) { 138 error_exit(err, __func__); 139 } 140 mutex->m.initialized = true; 141 } 142 143 void qemu_rec_mutex_destroy(QemuRecMutex *mutex) 144 { 145 qemu_mutex_destroy(&mutex->m); 146 } 147 148 void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line) 149 { 150 qemu_mutex_lock_impl(&mutex->m, file, line); 151 } 152 153 int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line) 154 { 155 return qemu_mutex_trylock_impl(&mutex->m, file, line); 156 } 157 158 void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line) 159 { 160 qemu_mutex_unlock_impl(&mutex->m, file, line); 161 } 162 163 void qemu_cond_init(QemuCond *cond) 164 { 165 pthread_condattr_t attr; 166 int err; 167 168 err = pthread_condattr_init(&attr); 169 if (err) { 170 error_exit(err, __func__); 171 } 172 #ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK 173 err = pthread_condattr_setclock(&attr, qemu_timedwait_clockid()); 174 if (err) { 175 error_exit(err, __func__); 176 } 177 #endif 178 err = pthread_cond_init(&cond->cond, &attr); 179 if (err) { 180 error_exit(err, __func__); 181 } 182 err = pthread_condattr_destroy(&attr); 183 if (err) { 184 error_exit(err, __func__); 185 } 186 cond->initialized = true; 187 } 188 189 void qemu_cond_destroy(QemuCond *cond) 190 { 191 int err; 192 193 assert(cond->initialized); 194 cond->initialized = false; 195 err = pthread_cond_destroy(&cond->cond); 196 if (err) 197 error_exit(err, __func__); 198 } 199 200 void qemu_cond_signal(QemuCond *cond) 201 { 202 int err; 203 204 assert(cond->initialized); 205 err = pthread_cond_signal(&cond->cond); 206 if (err) 207 error_exit(err, __func__); 208 } 209 210 void qemu_cond_broadcast(QemuCond *cond) 211 { 212 int err; 213 214 assert(cond->initialized); 215 err = pthread_cond_broadcast(&cond->cond); 216 if (err) 217 error_exit(err, __func__); 218 } 219 220 void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, const int line) 221 { 222 int err; 223 224 assert(cond->initialized); 225 qemu_mutex_pre_unlock(mutex, file, line); 226 err = pthread_cond_wait(&cond->cond, &mutex->lock); 227 qemu_mutex_post_lock(mutex, file, line); 228 if (err) 229 error_exit(err, __func__); 230 } 231 232 static bool TSA_NO_TSA 233 qemu_cond_timedwait_ts(QemuCond *cond, QemuMutex *mutex, struct timespec *ts, 234 const char *file, const int line) 235 { 236 int err; 237 238 assert(cond->initialized); 239 trace_qemu_mutex_unlock(mutex, file, line); 240 err = pthread_cond_timedwait(&cond->cond, &mutex->lock, ts); 241 trace_qemu_mutex_locked(mutex, file, line); 242 if (err && err != ETIMEDOUT) { 243 error_exit(err, __func__); 244 } 245 return err != ETIMEDOUT; 246 } 247 248 bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms, 249 const char *file, const int line) 250 { 251 struct timespec ts; 252 253 compute_abs_deadline(&ts, ms); 254 return qemu_cond_timedwait_ts(cond, mutex, &ts, file, line); 255 } 256 257 void qemu_sem_init(QemuSemaphore *sem, int init) 258 { 259 qemu_mutex_init(&sem->mutex); 260 qemu_cond_init(&sem->cond); 261 262 if (init < 0) { 263 error_exit(EINVAL, __func__); 264 } 265 sem->count = init; 266 } 267 268 void qemu_sem_destroy(QemuSemaphore *sem) 269 { 270 qemu_cond_destroy(&sem->cond); 271 qemu_mutex_destroy(&sem->mutex); 272 } 273 274 void qemu_sem_post(QemuSemaphore *sem) 275 { 276 qemu_mutex_lock(&sem->mutex); 277 if (sem->count == UINT_MAX) { 278 error_exit(EINVAL, __func__); 279 } else { 280 sem->count++; 281 qemu_cond_signal(&sem->cond); 282 } 283 qemu_mutex_unlock(&sem->mutex); 284 } 285 286 int qemu_sem_timedwait(QemuSemaphore *sem, int ms) 287 { 288 bool rc = true; 289 struct timespec ts; 290 291 compute_abs_deadline(&ts, ms); 292 qemu_mutex_lock(&sem->mutex); 293 while (sem->count == 0) { 294 if (ms == 0) { 295 rc = false; 296 } else { 297 rc = qemu_cond_timedwait_ts(&sem->cond, &sem->mutex, &ts, 298 __FILE__, __LINE__); 299 } 300 if (!rc) { /* timeout */ 301 break; 302 } 303 } 304 if (rc) { 305 --sem->count; 306 } 307 qemu_mutex_unlock(&sem->mutex); 308 return (rc ? 0 : -1); 309 } 310 311 void qemu_sem_wait(QemuSemaphore *sem) 312 { 313 qemu_mutex_lock(&sem->mutex); 314 while (sem->count == 0) { 315 qemu_cond_wait(&sem->cond, &sem->mutex); 316 } 317 --sem->count; 318 qemu_mutex_unlock(&sem->mutex); 319 } 320 321 #ifdef __linux__ 322 #include "qemu/futex.h" 323 #else 324 static inline void qemu_futex_wake(QemuEvent *ev, int n) 325 { 326 assert(ev->initialized); 327 pthread_mutex_lock(&ev->lock); 328 if (n == 1) { 329 pthread_cond_signal(&ev->cond); 330 } else { 331 pthread_cond_broadcast(&ev->cond); 332 } 333 pthread_mutex_unlock(&ev->lock); 334 } 335 336 static inline void qemu_futex_wait(QemuEvent *ev, unsigned val) 337 { 338 assert(ev->initialized); 339 pthread_mutex_lock(&ev->lock); 340 if (ev->value == val) { 341 pthread_cond_wait(&ev->cond, &ev->lock); 342 } 343 pthread_mutex_unlock(&ev->lock); 344 } 345 #endif 346 347 /* Valid transitions: 348 * - free->set, when setting the event 349 * - busy->set, when setting the event, followed by qemu_futex_wake 350 * - set->free, when resetting the event 351 * - free->busy, when waiting 352 * 353 * set->busy does not happen (it can be observed from the outside but 354 * it really is set->free->busy). 355 * 356 * busy->free provably cannot happen; to enforce it, the set->free transition 357 * is done with an OR, which becomes a no-op if the event has concurrently 358 * transitioned to free or busy. 359 */ 360 361 #define EV_SET 0 362 #define EV_FREE 1 363 #define EV_BUSY -1 364 365 void qemu_event_init(QemuEvent *ev, bool init) 366 { 367 #ifndef __linux__ 368 pthread_mutex_init(&ev->lock, NULL); 369 pthread_cond_init(&ev->cond, NULL); 370 #endif 371 372 ev->value = (init ? EV_SET : EV_FREE); 373 ev->initialized = true; 374 } 375 376 void qemu_event_destroy(QemuEvent *ev) 377 { 378 assert(ev->initialized); 379 ev->initialized = false; 380 #ifndef __linux__ 381 pthread_mutex_destroy(&ev->lock); 382 pthread_cond_destroy(&ev->cond); 383 #endif 384 } 385 386 void qemu_event_set(QemuEvent *ev) 387 { 388 assert(ev->initialized); 389 390 /* 391 * Pairs with both qemu_event_reset() and qemu_event_wait(). 392 * 393 * qemu_event_set has release semantics, but because it *loads* 394 * ev->value we need a full memory barrier here. 395 */ 396 smp_mb(); 397 if (qatomic_read(&ev->value) != EV_SET) { 398 int old = qatomic_xchg(&ev->value, EV_SET); 399 400 /* Pairs with memory barrier in kernel futex_wait system call. */ 401 smp_mb__after_rmw(); 402 if (old == EV_BUSY) { 403 /* There were waiters, wake them up. */ 404 qemu_futex_wake(ev, INT_MAX); 405 } 406 } 407 } 408 409 void qemu_event_reset(QemuEvent *ev) 410 { 411 assert(ev->initialized); 412 413 /* 414 * If there was a concurrent reset (or even reset+wait), 415 * do nothing. Otherwise change EV_SET->EV_FREE. 416 */ 417 qatomic_or(&ev->value, EV_FREE); 418 419 /* 420 * Order reset before checking the condition in the caller. 421 * Pairs with the first memory barrier in qemu_event_set(). 422 */ 423 smp_mb__after_rmw(); 424 } 425 426 void qemu_event_wait(QemuEvent *ev) 427 { 428 unsigned value; 429 430 assert(ev->initialized); 431 432 /* 433 * qemu_event_wait must synchronize with qemu_event_set even if it does 434 * not go down the slow path, so this load-acquire is needed that 435 * synchronizes with the first memory barrier in qemu_event_set(). 436 * 437 * If we do go down the slow path, there is no requirement at all: we 438 * might miss a qemu_event_set() here but ultimately the memory barrier in 439 * qemu_futex_wait() will ensure the check is done correctly. 440 */ 441 value = qatomic_load_acquire(&ev->value); 442 if (value != EV_SET) { 443 if (value == EV_FREE) { 444 /* 445 * Leave the event reset and tell qemu_event_set that there are 446 * waiters. No need to retry, because there cannot be a concurrent 447 * busy->free transition. After the CAS, the event will be either 448 * set or busy. 449 * 450 * This cmpxchg doesn't have particular ordering requirements if it 451 * succeeds (moving the store earlier can only cause qemu_event_set() 452 * to issue _more_ wakeups), the failing case needs acquire semantics 453 * like the load above. 454 */ 455 if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { 456 return; 457 } 458 } 459 460 /* 461 * This is the final check for a concurrent set, so it does need 462 * a smp_mb() pairing with the second barrier of qemu_event_set(). 463 * The barrier is inside the FUTEX_WAIT system call. 464 */ 465 qemu_futex_wait(ev, EV_BUSY); 466 } 467 } 468 469 static __thread NotifierList thread_exit; 470 471 /* 472 * Note that in this implementation you can register a thread-exit 473 * notifier for the main thread, but it will never be called. 474 * This is OK because main thread exit can only happen when the 475 * entire process is exiting, and the API allows notifiers to not 476 * be called on process exit. 477 */ 478 void qemu_thread_atexit_add(Notifier *notifier) 479 { 480 notifier_list_add(&thread_exit, notifier); 481 } 482 483 void qemu_thread_atexit_remove(Notifier *notifier) 484 { 485 notifier_remove(notifier); 486 } 487 488 static void qemu_thread_atexit_notify(void *arg) 489 { 490 /* 491 * Called when non-main thread exits (via qemu_thread_exit() 492 * or by returning from its start routine.) 493 */ 494 notifier_list_notify(&thread_exit, NULL); 495 } 496 497 typedef struct { 498 void *(*start_routine)(void *); 499 void *arg; 500 char *name; 501 } QemuThreadArgs; 502 503 static void *qemu_thread_start(void *args) 504 { 505 QemuThreadArgs *qemu_thread_args = args; 506 void *(*start_routine)(void *) = qemu_thread_args->start_routine; 507 void *arg = qemu_thread_args->arg; 508 void *r; 509 510 /* Attempt to set the threads name; note that this is for debug, so 511 * we're not going to fail if we can't set it. 512 */ 513 if (name_threads && qemu_thread_args->name) { 514 # if defined(CONFIG_PTHREAD_SETNAME_NP_W_TID) 515 pthread_setname_np(pthread_self(), qemu_thread_args->name); 516 # elif defined(CONFIG_PTHREAD_SETNAME_NP_WO_TID) 517 pthread_setname_np(qemu_thread_args->name); 518 # elif defined(CONFIG_PTHREAD_SET_NAME_NP) 519 pthread_set_name_np(pthread_self(), qemu_thread_args->name); 520 # endif 521 } 522 QEMU_TSAN_ANNOTATE_THREAD_NAME(qemu_thread_args->name); 523 g_free(qemu_thread_args->name); 524 g_free(qemu_thread_args); 525 526 /* 527 * GCC 11 with glibc 2.17 on PowerPC reports 528 * 529 * qemu-thread-posix.c:540:5: error: ‘__sigsetjmp’ accessing 656 bytes 530 * in a region of size 528 [-Werror=stringop-overflow=] 531 * 540 | pthread_cleanup_push(qemu_thread_atexit_notify, NULL); 532 * | ^~~~~~~~~~~~~~~~~~~~ 533 * 534 * which is clearly nonsense. 535 */ 536 #pragma GCC diagnostic push 537 #ifndef __clang__ 538 #pragma GCC diagnostic ignored "-Wstringop-overflow" 539 #endif 540 541 pthread_cleanup_push(qemu_thread_atexit_notify, NULL); 542 r = start_routine(arg); 543 pthread_cleanup_pop(1); 544 545 #pragma GCC diagnostic pop 546 547 return r; 548 } 549 550 void qemu_thread_create(QemuThread *thread, const char *name, 551 void *(*start_routine)(void*), 552 void *arg, int mode) 553 { 554 sigset_t set, oldset; 555 int err; 556 pthread_attr_t attr; 557 QemuThreadArgs *qemu_thread_args; 558 559 err = pthread_attr_init(&attr); 560 if (err) { 561 error_exit(err, __func__); 562 } 563 564 if (mode == QEMU_THREAD_DETACHED) { 565 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); 566 } 567 568 /* Leave signal handling to the iothread. */ 569 sigfillset(&set); 570 /* Blocking the signals can result in undefined behaviour. */ 571 sigdelset(&set, SIGSEGV); 572 sigdelset(&set, SIGFPE); 573 sigdelset(&set, SIGILL); 574 /* TODO avoid SIGBUS loss on macOS */ 575 pthread_sigmask(SIG_SETMASK, &set, &oldset); 576 577 qemu_thread_args = g_new0(QemuThreadArgs, 1); 578 qemu_thread_args->name = g_strdup(name); 579 qemu_thread_args->start_routine = start_routine; 580 qemu_thread_args->arg = arg; 581 582 err = pthread_create(&thread->thread, &attr, 583 qemu_thread_start, qemu_thread_args); 584 585 if (err) 586 error_exit(err, __func__); 587 588 pthread_sigmask(SIG_SETMASK, &oldset, NULL); 589 590 pthread_attr_destroy(&attr); 591 } 592 593 int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus, 594 unsigned long nbits) 595 { 596 #if defined(CONFIG_PTHREAD_AFFINITY_NP) 597 const size_t setsize = CPU_ALLOC_SIZE(nbits); 598 unsigned long value; 599 cpu_set_t *cpuset; 600 int err; 601 602 cpuset = CPU_ALLOC(nbits); 603 g_assert(cpuset); 604 605 CPU_ZERO_S(setsize, cpuset); 606 value = find_first_bit(host_cpus, nbits); 607 while (value < nbits) { 608 CPU_SET_S(value, setsize, cpuset); 609 value = find_next_bit(host_cpus, nbits, value + 1); 610 } 611 612 err = pthread_setaffinity_np(thread->thread, setsize, cpuset); 613 CPU_FREE(cpuset); 614 return err; 615 #else 616 return -ENOSYS; 617 #endif 618 } 619 620 int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus, 621 unsigned long *nbits) 622 { 623 #if defined(CONFIG_PTHREAD_AFFINITY_NP) 624 unsigned long tmpbits; 625 cpu_set_t *cpuset; 626 size_t setsize; 627 int i, err; 628 629 tmpbits = CPU_SETSIZE; 630 while (true) { 631 setsize = CPU_ALLOC_SIZE(tmpbits); 632 cpuset = CPU_ALLOC(tmpbits); 633 g_assert(cpuset); 634 635 err = pthread_getaffinity_np(thread->thread, setsize, cpuset); 636 if (err) { 637 CPU_FREE(cpuset); 638 if (err != -EINVAL) { 639 return err; 640 } 641 tmpbits *= 2; 642 } else { 643 break; 644 } 645 } 646 647 /* Convert the result into a proper bitmap. */ 648 *nbits = tmpbits; 649 *host_cpus = bitmap_new(tmpbits); 650 for (i = 0; i < tmpbits; i++) { 651 if (CPU_ISSET(i, cpuset)) { 652 set_bit(i, *host_cpus); 653 } 654 } 655 CPU_FREE(cpuset); 656 return 0; 657 #else 658 return -ENOSYS; 659 #endif 660 } 661 662 void qemu_thread_get_self(QemuThread *thread) 663 { 664 thread->thread = pthread_self(); 665 } 666 667 bool qemu_thread_is_self(QemuThread *thread) 668 { 669 return pthread_equal(pthread_self(), thread->thread); 670 } 671 672 void qemu_thread_exit(void *retval) 673 { 674 pthread_exit(retval); 675 } 676 677 void *qemu_thread_join(QemuThread *thread) 678 { 679 int err; 680 void *ret; 681 682 err = pthread_join(thread->thread, &ret); 683 if (err) { 684 error_exit(err, __func__); 685 } 686 return ret; 687 } 688