14f999d05SKevin Wolf /* 2c2b38b27SPaolo Bonzini * Data plane event loop 34f999d05SKevin Wolf * 44f999d05SKevin Wolf * Copyright (c) 2003-2008 Fabrice Bellard 5c2b38b27SPaolo Bonzini * Copyright (c) 2009-2017 QEMU contributors 64f999d05SKevin Wolf * 74f999d05SKevin Wolf * Permission is hereby granted, free of charge, to any person obtaining a copy 84f999d05SKevin Wolf * of this software and associated documentation files (the "Software"), to deal 94f999d05SKevin Wolf * in the Software without restriction, including without limitation the rights 104f999d05SKevin Wolf * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 114f999d05SKevin Wolf * copies of the Software, and to permit persons to whom the Software is 124f999d05SKevin Wolf * furnished to do so, subject to the following conditions: 134f999d05SKevin Wolf * 144f999d05SKevin Wolf * The above copyright notice and this permission notice shall be included in 154f999d05SKevin Wolf * all copies or substantial portions of the Software. 164f999d05SKevin Wolf * 174f999d05SKevin Wolf * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 184f999d05SKevin Wolf * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 194f999d05SKevin Wolf * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 204f999d05SKevin Wolf * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 214f999d05SKevin Wolf * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 224f999d05SKevin Wolf * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 234f999d05SKevin Wolf * THE SOFTWARE. 244f999d05SKevin Wolf */ 254f999d05SKevin Wolf 26d38ea87aSPeter Maydell #include "qemu/osdep.h" 27da34e65cSMarkus Armbruster #include "qapi/error.h" 284f999d05SKevin Wolf #include "qemu-common.h" 29737e150eSPaolo Bonzini #include "block/aio.h" 309b34277dSStefan Hajnoczi #include "block/thread-pool.h" 311de7afc9SPaolo Bonzini #include "qemu/main-loop.h" 320ceb849bSPaolo Bonzini #include "qemu/atomic.h" 330187f5c9SPaolo Bonzini #include "block/raw-aio.h" 340c330a73SPaolo Bonzini #include "qemu/coroutine_int.h" 350c330a73SPaolo Bonzini #include "trace.h" 369a1e9481SKevin Wolf 374f999d05SKevin Wolf /***********************************************************/ 384f999d05SKevin Wolf /* bottom halves (can be seen as timers which expire ASAP) */ 394f999d05SKevin Wolf 404f999d05SKevin Wolf struct QEMUBH { 412f4dc3c1SPaolo Bonzini AioContext *ctx; 424f999d05SKevin Wolf QEMUBHFunc *cb; 434f999d05SKevin Wolf void *opaque; 444f999d05SKevin Wolf QEMUBH *next; 459b47b17eSStefan Weil bool scheduled; 469b47b17eSStefan Weil bool idle; 479b47b17eSStefan Weil bool deleted; 484f999d05SKevin Wolf }; 494f999d05SKevin Wolf 505b8bb359SPaolo Bonzini void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) 515b8bb359SPaolo Bonzini { 525b8bb359SPaolo Bonzini QEMUBH *bh; 535b8bb359SPaolo Bonzini bh = g_new(QEMUBH, 1); 545b8bb359SPaolo Bonzini *bh = (QEMUBH){ 555b8bb359SPaolo Bonzini .ctx = ctx, 565b8bb359SPaolo Bonzini .cb = cb, 575b8bb359SPaolo Bonzini .opaque = opaque, 585b8bb359SPaolo Bonzini }; 59d7c99a12SPaolo Bonzini qemu_lockcnt_lock(&ctx->list_lock); 605b8bb359SPaolo Bonzini bh->next = ctx->first_bh; 615b8bb359SPaolo Bonzini bh->scheduled = 1; 625b8bb359SPaolo Bonzini bh->deleted = 1; 635b8bb359SPaolo Bonzini /* Make sure that the members are ready before putting bh into list */ 645b8bb359SPaolo Bonzini smp_wmb(); 655b8bb359SPaolo Bonzini ctx->first_bh = bh; 66d7c99a12SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 67c9d1a561SPaolo Bonzini aio_notify(ctx); 685b8bb359SPaolo Bonzini } 695b8bb359SPaolo Bonzini 70f627aab1SPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) 714f999d05SKevin Wolf { 724f999d05SKevin Wolf QEMUBH *bh; 73ee82310fSPaolo Bonzini bh = g_new(QEMUBH, 1); 74ee82310fSPaolo Bonzini *bh = (QEMUBH){ 75ee82310fSPaolo Bonzini .ctx = ctx, 76ee82310fSPaolo Bonzini .cb = cb, 77ee82310fSPaolo Bonzini .opaque = opaque, 78ee82310fSPaolo Bonzini }; 79d7c99a12SPaolo Bonzini qemu_lockcnt_lock(&ctx->list_lock); 80f627aab1SPaolo Bonzini bh->next = ctx->first_bh; 81dcc772e2SLiu Ping Fan /* Make sure that the members are ready before putting bh into list */ 82dcc772e2SLiu Ping Fan smp_wmb(); 83f627aab1SPaolo Bonzini ctx->first_bh = bh; 84d7c99a12SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 854f999d05SKevin Wolf return bh; 864f999d05SKevin Wolf } 874f999d05SKevin Wolf 88df281b80SPavel Dovgalyuk void aio_bh_call(QEMUBH *bh) 89df281b80SPavel Dovgalyuk { 90df281b80SPavel Dovgalyuk bh->cb(bh->opaque); 91df281b80SPavel Dovgalyuk } 92df281b80SPavel Dovgalyuk 93dcc772e2SLiu Ping Fan /* Multiple occurrences of aio_bh_poll cannot be called concurrently */ 94f627aab1SPaolo Bonzini int aio_bh_poll(AioContext *ctx) 954f999d05SKevin Wolf { 967887f620SKevin Wolf QEMUBH *bh, **bhp, *next; 974f999d05SKevin Wolf int ret; 987d506c90SPaolo Bonzini bool deleted = false; 99648fb0eaSKevin Wolf 100d7c99a12SPaolo Bonzini qemu_lockcnt_inc(&ctx->list_lock); 1014f999d05SKevin Wolf 1024f999d05SKevin Wolf ret = 0; 103d7c99a12SPaolo Bonzini for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) { 104d7c99a12SPaolo Bonzini next = atomic_rcu_read(&bh->next); 105e8d3b1a2SPaolo Bonzini /* The atomic_xchg is paired with the one in qemu_bh_schedule. The 106e8d3b1a2SPaolo Bonzini * implicit memory barrier ensures that the callback sees all writes 107e8d3b1a2SPaolo Bonzini * done by the scheduling thread. It also ensures that the scheduling 108e8d3b1a2SPaolo Bonzini * thread sees the zero before bh->cb has run, and thus will call 109e8d3b1a2SPaolo Bonzini * aio_notify again if necessary. 110dcc772e2SLiu Ping Fan */ 1115b8bb359SPaolo Bonzini if (atomic_xchg(&bh->scheduled, 0)) { 11265c1b5b6SPaolo Bonzini /* Idle BHs don't count as progress */ 11365c1b5b6SPaolo Bonzini if (!bh->idle) { 1144f999d05SKevin Wolf ret = 1; 115ca96ac44SStefan Hajnoczi } 1164f999d05SKevin Wolf bh->idle = 0; 117df281b80SPavel Dovgalyuk aio_bh_call(bh); 1184f999d05SKevin Wolf } 1197d506c90SPaolo Bonzini if (bh->deleted) { 1207d506c90SPaolo Bonzini deleted = true; 1217d506c90SPaolo Bonzini } 1224f999d05SKevin Wolf } 1234f999d05SKevin Wolf 1244f999d05SKevin Wolf /* remove deleted bhs */ 1257d506c90SPaolo Bonzini if (!deleted) { 1267d506c90SPaolo Bonzini qemu_lockcnt_dec(&ctx->list_lock); 1277d506c90SPaolo Bonzini return ret; 1287d506c90SPaolo Bonzini } 1297d506c90SPaolo Bonzini 130d7c99a12SPaolo Bonzini if (qemu_lockcnt_dec_and_lock(&ctx->list_lock)) { 131f627aab1SPaolo Bonzini bhp = &ctx->first_bh; 1324f999d05SKevin Wolf while (*bhp) { 1334f999d05SKevin Wolf bh = *bhp; 1345b8bb359SPaolo Bonzini if (bh->deleted && !bh->scheduled) { 1354f999d05SKevin Wolf *bhp = bh->next; 1367267c094SAnthony Liguori g_free(bh); 137648fb0eaSKevin Wolf } else { 1384f999d05SKevin Wolf bhp = &bh->next; 1394f999d05SKevin Wolf } 140648fb0eaSKevin Wolf } 141d7c99a12SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 142648fb0eaSKevin Wolf } 1434f999d05SKevin Wolf return ret; 1444f999d05SKevin Wolf } 1454f999d05SKevin Wolf 1464f999d05SKevin Wolf void qemu_bh_schedule_idle(QEMUBH *bh) 1474f999d05SKevin Wolf { 1484f999d05SKevin Wolf bh->idle = 1; 149dcc772e2SLiu Ping Fan /* Make sure that idle & any writes needed by the callback are done 150dcc772e2SLiu Ping Fan * before the locations are read in the aio_bh_poll. 151dcc772e2SLiu Ping Fan */ 152e8d3b1a2SPaolo Bonzini atomic_mb_set(&bh->scheduled, 1); 1534f999d05SKevin Wolf } 1544f999d05SKevin Wolf 1554f999d05SKevin Wolf void qemu_bh_schedule(QEMUBH *bh) 1564f999d05SKevin Wolf { 157924fe129SStefan Hajnoczi AioContext *ctx; 158924fe129SStefan Hajnoczi 159924fe129SStefan Hajnoczi ctx = bh->ctx; 1604f999d05SKevin Wolf bh->idle = 0; 161e8d3b1a2SPaolo Bonzini /* The memory barrier implicit in atomic_xchg makes sure that: 162924fe129SStefan Hajnoczi * 1. idle & any writes needed by the callback are done before the 163924fe129SStefan Hajnoczi * locations are read in the aio_bh_poll. 164924fe129SStefan Hajnoczi * 2. ctx is loaded before scheduled is set and the callback has a chance 165924fe129SStefan Hajnoczi * to execute. 166dcc772e2SLiu Ping Fan */ 167e8d3b1a2SPaolo Bonzini if (atomic_xchg(&bh->scheduled, 1) == 0) { 168924fe129SStefan Hajnoczi aio_notify(ctx); 1694f999d05SKevin Wolf } 170e8d3b1a2SPaolo Bonzini } 1714f999d05SKevin Wolf 172dcc772e2SLiu Ping Fan 173dcc772e2SLiu Ping Fan /* This func is async. 174dcc772e2SLiu Ping Fan */ 1754f999d05SKevin Wolf void qemu_bh_cancel(QEMUBH *bh) 1764f999d05SKevin Wolf { 1774f999d05SKevin Wolf bh->scheduled = 0; 1784f999d05SKevin Wolf } 1794f999d05SKevin Wolf 180dcc772e2SLiu Ping Fan /* This func is async.The bottom half will do the delete action at the finial 181dcc772e2SLiu Ping Fan * end. 182dcc772e2SLiu Ping Fan */ 1834f999d05SKevin Wolf void qemu_bh_delete(QEMUBH *bh) 1844f999d05SKevin Wolf { 1854f999d05SKevin Wolf bh->scheduled = 0; 1864f999d05SKevin Wolf bh->deleted = 1; 1874f999d05SKevin Wolf } 1884f999d05SKevin Wolf 189845ca10dSPaolo Bonzini int64_t 190845ca10dSPaolo Bonzini aio_compute_timeout(AioContext *ctx) 1914f999d05SKevin Wolf { 192845ca10dSPaolo Bonzini int64_t deadline; 193845ca10dSPaolo Bonzini int timeout = -1; 1944f999d05SKevin Wolf QEMUBH *bh; 1954f999d05SKevin Wolf 196d7c99a12SPaolo Bonzini for (bh = atomic_rcu_read(&ctx->first_bh); bh; 197d7c99a12SPaolo Bonzini bh = atomic_rcu_read(&bh->next)) { 1985b8bb359SPaolo Bonzini if (bh->scheduled) { 1994f999d05SKevin Wolf if (bh->idle) { 2004f999d05SKevin Wolf /* idle bottom halves will be polled at least 2014f999d05SKevin Wolf * every 10ms */ 202845ca10dSPaolo Bonzini timeout = 10000000; 2034f999d05SKevin Wolf } else { 2044f999d05SKevin Wolf /* non-idle bottom halves will be executed 2054f999d05SKevin Wolf * immediately */ 206845ca10dSPaolo Bonzini return 0; 2074f999d05SKevin Wolf } 2084f999d05SKevin Wolf } 2094f999d05SKevin Wolf } 2104f999d05SKevin Wolf 211845ca10dSPaolo Bonzini deadline = timerlistgroup_deadline_ns(&ctx->tlg); 212533a8cf3SAlex Bligh if (deadline == 0) { 213845ca10dSPaolo Bonzini return 0; 214533a8cf3SAlex Bligh } else { 215845ca10dSPaolo Bonzini return qemu_soonest_timeout(timeout, deadline); 216845ca10dSPaolo Bonzini } 217533a8cf3SAlex Bligh } 218533a8cf3SAlex Bligh 219845ca10dSPaolo Bonzini static gboolean 220845ca10dSPaolo Bonzini aio_ctx_prepare(GSource *source, gint *timeout) 221845ca10dSPaolo Bonzini { 222845ca10dSPaolo Bonzini AioContext *ctx = (AioContext *) source; 223845ca10dSPaolo Bonzini 224eabc9779SPaolo Bonzini atomic_or(&ctx->notify_me, 1); 225eabc9779SPaolo Bonzini 226845ca10dSPaolo Bonzini /* We assume there is no timeout already supplied */ 227845ca10dSPaolo Bonzini *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); 228a3462c65SPaolo Bonzini 229a3462c65SPaolo Bonzini if (aio_prepare(ctx)) { 230a3462c65SPaolo Bonzini *timeout = 0; 231a3462c65SPaolo Bonzini } 232a3462c65SPaolo Bonzini 233845ca10dSPaolo Bonzini return *timeout == 0; 234e3713e00SPaolo Bonzini } 235e3713e00SPaolo Bonzini 236e3713e00SPaolo Bonzini static gboolean 237e3713e00SPaolo Bonzini aio_ctx_check(GSource *source) 238e3713e00SPaolo Bonzini { 239e3713e00SPaolo Bonzini AioContext *ctx = (AioContext *) source; 240e3713e00SPaolo Bonzini QEMUBH *bh; 241e3713e00SPaolo Bonzini 242eabc9779SPaolo Bonzini atomic_and(&ctx->notify_me, ~1); 24305e514b1SPaolo Bonzini aio_notify_accept(ctx); 24421a03d17SPaolo Bonzini 245e3713e00SPaolo Bonzini for (bh = ctx->first_bh; bh; bh = bh->next) { 2465b8bb359SPaolo Bonzini if (bh->scheduled) { 247e3713e00SPaolo Bonzini return true; 248e3713e00SPaolo Bonzini } 249e3713e00SPaolo Bonzini } 250533a8cf3SAlex Bligh return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0); 251e3713e00SPaolo Bonzini } 252e3713e00SPaolo Bonzini 253e3713e00SPaolo Bonzini static gboolean 254e3713e00SPaolo Bonzini aio_ctx_dispatch(GSource *source, 255e3713e00SPaolo Bonzini GSourceFunc callback, 256e3713e00SPaolo Bonzini gpointer user_data) 257e3713e00SPaolo Bonzini { 258e3713e00SPaolo Bonzini AioContext *ctx = (AioContext *) source; 259e3713e00SPaolo Bonzini 260e3713e00SPaolo Bonzini assert(callback == NULL); 261721671adSStefan Hajnoczi aio_dispatch(ctx, true); 262e3713e00SPaolo Bonzini return true; 263e3713e00SPaolo Bonzini } 264e3713e00SPaolo Bonzini 2652f4dc3c1SPaolo Bonzini static void 2662f4dc3c1SPaolo Bonzini aio_ctx_finalize(GSource *source) 2672f4dc3c1SPaolo Bonzini { 2682f4dc3c1SPaolo Bonzini AioContext *ctx = (AioContext *) source; 2692f4dc3c1SPaolo Bonzini 2709b34277dSStefan Hajnoczi thread_pool_free(ctx->thread_pool); 271a076972aSStefan Hajnoczi 2720187f5c9SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 2730187f5c9SPaolo Bonzini if (ctx->linux_aio) { 2740187f5c9SPaolo Bonzini laio_detach_aio_context(ctx->linux_aio, ctx); 2750187f5c9SPaolo Bonzini laio_cleanup(ctx->linux_aio); 2760187f5c9SPaolo Bonzini ctx->linux_aio = NULL; 2770187f5c9SPaolo Bonzini } 2780187f5c9SPaolo Bonzini #endif 2790187f5c9SPaolo Bonzini 2800c330a73SPaolo Bonzini assert(QSLIST_EMPTY(&ctx->scheduled_coroutines)); 2810c330a73SPaolo Bonzini qemu_bh_delete(ctx->co_schedule_bh); 2820c330a73SPaolo Bonzini 283d7c99a12SPaolo Bonzini qemu_lockcnt_lock(&ctx->list_lock); 284d7c99a12SPaolo Bonzini assert(!qemu_lockcnt_count(&ctx->list_lock)); 285a076972aSStefan Hajnoczi while (ctx->first_bh) { 286a076972aSStefan Hajnoczi QEMUBH *next = ctx->first_bh->next; 287a076972aSStefan Hajnoczi 288a076972aSStefan Hajnoczi /* qemu_bh_delete() must have been called on BHs in this AioContext */ 289a076972aSStefan Hajnoczi assert(ctx->first_bh->deleted); 290a076972aSStefan Hajnoczi 291a076972aSStefan Hajnoczi g_free(ctx->first_bh); 292a076972aSStefan Hajnoczi ctx->first_bh = next; 293a076972aSStefan Hajnoczi } 294d7c99a12SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 295a076972aSStefan Hajnoczi 296f6a51c84SStefan Hajnoczi aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL); 2972f4dc3c1SPaolo Bonzini event_notifier_cleanup(&ctx->notifier); 2983fe71223SPaolo Bonzini qemu_rec_mutex_destroy(&ctx->lock); 299d7c99a12SPaolo Bonzini qemu_lockcnt_destroy(&ctx->list_lock); 300dae21b98SAlex Bligh timerlistgroup_deinit(&ctx->tlg); 3012f4dc3c1SPaolo Bonzini } 3022f4dc3c1SPaolo Bonzini 303e3713e00SPaolo Bonzini static GSourceFuncs aio_source_funcs = { 304e3713e00SPaolo Bonzini aio_ctx_prepare, 305e3713e00SPaolo Bonzini aio_ctx_check, 306e3713e00SPaolo Bonzini aio_ctx_dispatch, 3072f4dc3c1SPaolo Bonzini aio_ctx_finalize 308e3713e00SPaolo Bonzini }; 309e3713e00SPaolo Bonzini 310e3713e00SPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx) 311e3713e00SPaolo Bonzini { 312e3713e00SPaolo Bonzini g_source_ref(&ctx->source); 313e3713e00SPaolo Bonzini return &ctx->source; 314e3713e00SPaolo Bonzini } 315a915f4bcSPaolo Bonzini 3169b34277dSStefan Hajnoczi ThreadPool *aio_get_thread_pool(AioContext *ctx) 3179b34277dSStefan Hajnoczi { 3189b34277dSStefan Hajnoczi if (!ctx->thread_pool) { 3199b34277dSStefan Hajnoczi ctx->thread_pool = thread_pool_new(ctx); 3209b34277dSStefan Hajnoczi } 3219b34277dSStefan Hajnoczi return ctx->thread_pool; 3229b34277dSStefan Hajnoczi } 3239b34277dSStefan Hajnoczi 3240187f5c9SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 3250187f5c9SPaolo Bonzini LinuxAioState *aio_get_linux_aio(AioContext *ctx) 3260187f5c9SPaolo Bonzini { 3270187f5c9SPaolo Bonzini if (!ctx->linux_aio) { 3280187f5c9SPaolo Bonzini ctx->linux_aio = laio_init(); 3290187f5c9SPaolo Bonzini laio_attach_aio_context(ctx->linux_aio, ctx); 3300187f5c9SPaolo Bonzini } 3310187f5c9SPaolo Bonzini return ctx->linux_aio; 3320187f5c9SPaolo Bonzini } 3330187f5c9SPaolo Bonzini #endif 3340187f5c9SPaolo Bonzini 3352f4dc3c1SPaolo Bonzini void aio_notify(AioContext *ctx) 3362f4dc3c1SPaolo Bonzini { 337eabc9779SPaolo Bonzini /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs 338eabc9779SPaolo Bonzini * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll. 339eabc9779SPaolo Bonzini */ 3400ceb849bSPaolo Bonzini smp_mb(); 341eabc9779SPaolo Bonzini if (ctx->notify_me) { 3422f4dc3c1SPaolo Bonzini event_notifier_set(&ctx->notifier); 34305e514b1SPaolo Bonzini atomic_mb_set(&ctx->notified, true); 34405e514b1SPaolo Bonzini } 34505e514b1SPaolo Bonzini } 34605e514b1SPaolo Bonzini 34705e514b1SPaolo Bonzini void aio_notify_accept(AioContext *ctx) 34805e514b1SPaolo Bonzini { 34905e514b1SPaolo Bonzini if (atomic_xchg(&ctx->notified, false)) { 35005e514b1SPaolo Bonzini event_notifier_test_and_clear(&ctx->notifier); 3512f4dc3c1SPaolo Bonzini } 3520ceb849bSPaolo Bonzini } 3532f4dc3c1SPaolo Bonzini 354d5541d86SAlex Bligh static void aio_timerlist_notify(void *opaque) 355d5541d86SAlex Bligh { 356d5541d86SAlex Bligh aio_notify(opaque); 357d5541d86SAlex Bligh } 358d5541d86SAlex Bligh 35921a03d17SPaolo Bonzini static void event_notifier_dummy_cb(EventNotifier *e) 36021a03d17SPaolo Bonzini { 36121a03d17SPaolo Bonzini } 36221a03d17SPaolo Bonzini 3634a1cba38SStefan Hajnoczi /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */ 3644a1cba38SStefan Hajnoczi static bool event_notifier_poll(void *opaque) 3654a1cba38SStefan Hajnoczi { 3664a1cba38SStefan Hajnoczi EventNotifier *e = opaque; 3674a1cba38SStefan Hajnoczi AioContext *ctx = container_of(e, AioContext, notifier); 3684a1cba38SStefan Hajnoczi 3694a1cba38SStefan Hajnoczi return atomic_read(&ctx->notified); 3704a1cba38SStefan Hajnoczi } 3714a1cba38SStefan Hajnoczi 3720c330a73SPaolo Bonzini static void co_schedule_bh_cb(void *opaque) 3730c330a73SPaolo Bonzini { 3740c330a73SPaolo Bonzini AioContext *ctx = opaque; 3750c330a73SPaolo Bonzini QSLIST_HEAD(, Coroutine) straight, reversed; 3760c330a73SPaolo Bonzini 3770c330a73SPaolo Bonzini QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines); 3780c330a73SPaolo Bonzini QSLIST_INIT(&straight); 3790c330a73SPaolo Bonzini 3800c330a73SPaolo Bonzini while (!QSLIST_EMPTY(&reversed)) { 3810c330a73SPaolo Bonzini Coroutine *co = QSLIST_FIRST(&reversed); 3820c330a73SPaolo Bonzini QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next); 3830c330a73SPaolo Bonzini QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next); 3840c330a73SPaolo Bonzini } 3850c330a73SPaolo Bonzini 3860c330a73SPaolo Bonzini while (!QSLIST_EMPTY(&straight)) { 3870c330a73SPaolo Bonzini Coroutine *co = QSLIST_FIRST(&straight); 3880c330a73SPaolo Bonzini QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); 3890c330a73SPaolo Bonzini trace_aio_co_schedule_bh_cb(ctx, co); 390*1919631eSPaolo Bonzini aio_context_acquire(ctx); 3910c330a73SPaolo Bonzini qemu_coroutine_enter(co); 392*1919631eSPaolo Bonzini aio_context_release(ctx); 3930c330a73SPaolo Bonzini } 3940c330a73SPaolo Bonzini } 3950c330a73SPaolo Bonzini 3962f78e491SChrysostomos Nanakos AioContext *aio_context_new(Error **errp) 397f627aab1SPaolo Bonzini { 3982f78e491SChrysostomos Nanakos int ret; 3992f4dc3c1SPaolo Bonzini AioContext *ctx; 40037fcee5dSFam Zheng 4012f4dc3c1SPaolo Bonzini ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); 4027e003465SCao jin aio_context_setup(ctx); 4037e003465SCao jin 4042f78e491SChrysostomos Nanakos ret = event_notifier_init(&ctx->notifier, false); 4052f78e491SChrysostomos Nanakos if (ret < 0) { 4062f78e491SChrysostomos Nanakos error_setg_errno(errp, -ret, "Failed to initialize event notifier"); 40737fcee5dSFam Zheng goto fail; 4082f78e491SChrysostomos Nanakos } 409fcf5def1SPaolo Bonzini g_source_set_can_recurse(&ctx->source, true); 410d7c99a12SPaolo Bonzini qemu_lockcnt_init(&ctx->list_lock); 4110c330a73SPaolo Bonzini 4120c330a73SPaolo Bonzini ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx); 4130c330a73SPaolo Bonzini QSLIST_INIT(&ctx->scheduled_coroutines); 4140c330a73SPaolo Bonzini 4152f78e491SChrysostomos Nanakos aio_set_event_notifier(ctx, &ctx->notifier, 416dca21ef2SFam Zheng false, 4172f78e491SChrysostomos Nanakos (EventNotifierHandler *) 418f6a51c84SStefan Hajnoczi event_notifier_dummy_cb, 4194a1cba38SStefan Hajnoczi event_notifier_poll); 4200187f5c9SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 4210187f5c9SPaolo Bonzini ctx->linux_aio = NULL; 4220187f5c9SPaolo Bonzini #endif 4239b34277dSStefan Hajnoczi ctx->thread_pool = NULL; 4243fe71223SPaolo Bonzini qemu_rec_mutex_init(&ctx->lock); 425d5541d86SAlex Bligh timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); 4262f4dc3c1SPaolo Bonzini 42782a41186SStefan Hajnoczi ctx->poll_ns = 0; 4284a1cba38SStefan Hajnoczi ctx->poll_max_ns = 0; 42982a41186SStefan Hajnoczi ctx->poll_grow = 0; 43082a41186SStefan Hajnoczi ctx->poll_shrink = 0; 4314a1cba38SStefan Hajnoczi 4322f4dc3c1SPaolo Bonzini return ctx; 43337fcee5dSFam Zheng fail: 43437fcee5dSFam Zheng g_source_destroy(&ctx->source); 43537fcee5dSFam Zheng return NULL; 436e3713e00SPaolo Bonzini } 437e3713e00SPaolo Bonzini 4380c330a73SPaolo Bonzini void aio_co_schedule(AioContext *ctx, Coroutine *co) 4390c330a73SPaolo Bonzini { 4400c330a73SPaolo Bonzini trace_aio_co_schedule(ctx, co); 4410c330a73SPaolo Bonzini QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, 4420c330a73SPaolo Bonzini co, co_scheduled_next); 4430c330a73SPaolo Bonzini qemu_bh_schedule(ctx->co_schedule_bh); 4440c330a73SPaolo Bonzini } 4450c330a73SPaolo Bonzini 4460c330a73SPaolo Bonzini void aio_co_wake(struct Coroutine *co) 4470c330a73SPaolo Bonzini { 4480c330a73SPaolo Bonzini AioContext *ctx; 4490c330a73SPaolo Bonzini 4500c330a73SPaolo Bonzini /* Read coroutine before co->ctx. Matches smp_wmb in 4510c330a73SPaolo Bonzini * qemu_coroutine_enter. 4520c330a73SPaolo Bonzini */ 4530c330a73SPaolo Bonzini smp_read_barrier_depends(); 4540c330a73SPaolo Bonzini ctx = atomic_read(&co->ctx); 4550c330a73SPaolo Bonzini 4560c330a73SPaolo Bonzini if (ctx != qemu_get_current_aio_context()) { 4570c330a73SPaolo Bonzini aio_co_schedule(ctx, co); 4580c330a73SPaolo Bonzini return; 4590c330a73SPaolo Bonzini } 4600c330a73SPaolo Bonzini 4610c330a73SPaolo Bonzini if (qemu_in_coroutine()) { 4620c330a73SPaolo Bonzini Coroutine *self = qemu_coroutine_self(); 4630c330a73SPaolo Bonzini assert(self != co); 4640c330a73SPaolo Bonzini QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next); 4650c330a73SPaolo Bonzini } else { 4660c330a73SPaolo Bonzini aio_context_acquire(ctx); 4670c330a73SPaolo Bonzini qemu_coroutine_enter(co); 4680c330a73SPaolo Bonzini aio_context_release(ctx); 4690c330a73SPaolo Bonzini } 4700c330a73SPaolo Bonzini } 4710c330a73SPaolo Bonzini 472e3713e00SPaolo Bonzini void aio_context_ref(AioContext *ctx) 473e3713e00SPaolo Bonzini { 474e3713e00SPaolo Bonzini g_source_ref(&ctx->source); 475e3713e00SPaolo Bonzini } 476e3713e00SPaolo Bonzini 477e3713e00SPaolo Bonzini void aio_context_unref(AioContext *ctx) 478e3713e00SPaolo Bonzini { 479e3713e00SPaolo Bonzini g_source_unref(&ctx->source); 480f627aab1SPaolo Bonzini } 48198563fc3SStefan Hajnoczi 48298563fc3SStefan Hajnoczi void aio_context_acquire(AioContext *ctx) 48398563fc3SStefan Hajnoczi { 4843fe71223SPaolo Bonzini qemu_rec_mutex_lock(&ctx->lock); 48598563fc3SStefan Hajnoczi } 48698563fc3SStefan Hajnoczi 48798563fc3SStefan Hajnoczi void aio_context_release(AioContext *ctx) 48898563fc3SStefan Hajnoczi { 4893fe71223SPaolo Bonzini qemu_rec_mutex_unlock(&ctx->lock); 49098563fc3SStefan Hajnoczi } 491