xref: /qemu/util/async.c (revision 1919631e6b5562e474690853eca3c35610201e16)
14f999d05SKevin Wolf /*
2c2b38b27SPaolo Bonzini  * Data plane event loop
34f999d05SKevin Wolf  *
44f999d05SKevin Wolf  * Copyright (c) 2003-2008 Fabrice Bellard
5c2b38b27SPaolo Bonzini  * Copyright (c) 2009-2017 QEMU contributors
64f999d05SKevin Wolf  *
74f999d05SKevin Wolf  * Permission is hereby granted, free of charge, to any person obtaining a copy
84f999d05SKevin Wolf  * of this software and associated documentation files (the "Software"), to deal
94f999d05SKevin Wolf  * in the Software without restriction, including without limitation the rights
104f999d05SKevin Wolf  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
114f999d05SKevin Wolf  * copies of the Software, and to permit persons to whom the Software is
124f999d05SKevin Wolf  * furnished to do so, subject to the following conditions:
134f999d05SKevin Wolf  *
144f999d05SKevin Wolf  * The above copyright notice and this permission notice shall be included in
154f999d05SKevin Wolf  * all copies or substantial portions of the Software.
164f999d05SKevin Wolf  *
174f999d05SKevin Wolf  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
184f999d05SKevin Wolf  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
194f999d05SKevin Wolf  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
204f999d05SKevin Wolf  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
214f999d05SKevin Wolf  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
224f999d05SKevin Wolf  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
234f999d05SKevin Wolf  * THE SOFTWARE.
244f999d05SKevin Wolf  */
254f999d05SKevin Wolf 
26d38ea87aSPeter Maydell #include "qemu/osdep.h"
27da34e65cSMarkus Armbruster #include "qapi/error.h"
284f999d05SKevin Wolf #include "qemu-common.h"
29737e150eSPaolo Bonzini #include "block/aio.h"
309b34277dSStefan Hajnoczi #include "block/thread-pool.h"
311de7afc9SPaolo Bonzini #include "qemu/main-loop.h"
320ceb849bSPaolo Bonzini #include "qemu/atomic.h"
330187f5c9SPaolo Bonzini #include "block/raw-aio.h"
340c330a73SPaolo Bonzini #include "qemu/coroutine_int.h"
350c330a73SPaolo Bonzini #include "trace.h"
369a1e9481SKevin Wolf 
374f999d05SKevin Wolf /***********************************************************/
384f999d05SKevin Wolf /* bottom halves (can be seen as timers which expire ASAP) */
394f999d05SKevin Wolf 
404f999d05SKevin Wolf struct QEMUBH {
412f4dc3c1SPaolo Bonzini     AioContext *ctx;
424f999d05SKevin Wolf     QEMUBHFunc *cb;
434f999d05SKevin Wolf     void *opaque;
444f999d05SKevin Wolf     QEMUBH *next;
459b47b17eSStefan Weil     bool scheduled;
469b47b17eSStefan Weil     bool idle;
479b47b17eSStefan Weil     bool deleted;
484f999d05SKevin Wolf };
494f999d05SKevin Wolf 
505b8bb359SPaolo Bonzini void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
515b8bb359SPaolo Bonzini {
525b8bb359SPaolo Bonzini     QEMUBH *bh;
535b8bb359SPaolo Bonzini     bh = g_new(QEMUBH, 1);
545b8bb359SPaolo Bonzini     *bh = (QEMUBH){
555b8bb359SPaolo Bonzini         .ctx = ctx,
565b8bb359SPaolo Bonzini         .cb = cb,
575b8bb359SPaolo Bonzini         .opaque = opaque,
585b8bb359SPaolo Bonzini     };
59d7c99a12SPaolo Bonzini     qemu_lockcnt_lock(&ctx->list_lock);
605b8bb359SPaolo Bonzini     bh->next = ctx->first_bh;
615b8bb359SPaolo Bonzini     bh->scheduled = 1;
625b8bb359SPaolo Bonzini     bh->deleted = 1;
635b8bb359SPaolo Bonzini     /* Make sure that the members are ready before putting bh into list */
645b8bb359SPaolo Bonzini     smp_wmb();
655b8bb359SPaolo Bonzini     ctx->first_bh = bh;
66d7c99a12SPaolo Bonzini     qemu_lockcnt_unlock(&ctx->list_lock);
67c9d1a561SPaolo Bonzini     aio_notify(ctx);
685b8bb359SPaolo Bonzini }
695b8bb359SPaolo Bonzini 
70f627aab1SPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
714f999d05SKevin Wolf {
724f999d05SKevin Wolf     QEMUBH *bh;
73ee82310fSPaolo Bonzini     bh = g_new(QEMUBH, 1);
74ee82310fSPaolo Bonzini     *bh = (QEMUBH){
75ee82310fSPaolo Bonzini         .ctx = ctx,
76ee82310fSPaolo Bonzini         .cb = cb,
77ee82310fSPaolo Bonzini         .opaque = opaque,
78ee82310fSPaolo Bonzini     };
79d7c99a12SPaolo Bonzini     qemu_lockcnt_lock(&ctx->list_lock);
80f627aab1SPaolo Bonzini     bh->next = ctx->first_bh;
81dcc772e2SLiu Ping Fan     /* Make sure that the members are ready before putting bh into list */
82dcc772e2SLiu Ping Fan     smp_wmb();
83f627aab1SPaolo Bonzini     ctx->first_bh = bh;
84d7c99a12SPaolo Bonzini     qemu_lockcnt_unlock(&ctx->list_lock);
854f999d05SKevin Wolf     return bh;
864f999d05SKevin Wolf }
874f999d05SKevin Wolf 
88df281b80SPavel Dovgalyuk void aio_bh_call(QEMUBH *bh)
89df281b80SPavel Dovgalyuk {
90df281b80SPavel Dovgalyuk     bh->cb(bh->opaque);
91df281b80SPavel Dovgalyuk }
92df281b80SPavel Dovgalyuk 
93dcc772e2SLiu Ping Fan /* Multiple occurrences of aio_bh_poll cannot be called concurrently */
94f627aab1SPaolo Bonzini int aio_bh_poll(AioContext *ctx)
954f999d05SKevin Wolf {
967887f620SKevin Wolf     QEMUBH *bh, **bhp, *next;
974f999d05SKevin Wolf     int ret;
987d506c90SPaolo Bonzini     bool deleted = false;
99648fb0eaSKevin Wolf 
100d7c99a12SPaolo Bonzini     qemu_lockcnt_inc(&ctx->list_lock);
1014f999d05SKevin Wolf 
1024f999d05SKevin Wolf     ret = 0;
103d7c99a12SPaolo Bonzini     for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
104d7c99a12SPaolo Bonzini         next = atomic_rcu_read(&bh->next);
105e8d3b1a2SPaolo Bonzini         /* The atomic_xchg is paired with the one in qemu_bh_schedule.  The
106e8d3b1a2SPaolo Bonzini          * implicit memory barrier ensures that the callback sees all writes
107e8d3b1a2SPaolo Bonzini          * done by the scheduling thread.  It also ensures that the scheduling
108e8d3b1a2SPaolo Bonzini          * thread sees the zero before bh->cb has run, and thus will call
109e8d3b1a2SPaolo Bonzini          * aio_notify again if necessary.
110dcc772e2SLiu Ping Fan          */
1115b8bb359SPaolo Bonzini         if (atomic_xchg(&bh->scheduled, 0)) {
11265c1b5b6SPaolo Bonzini             /* Idle BHs don't count as progress */
11365c1b5b6SPaolo Bonzini             if (!bh->idle) {
1144f999d05SKevin Wolf                 ret = 1;
115ca96ac44SStefan Hajnoczi             }
1164f999d05SKevin Wolf             bh->idle = 0;
117df281b80SPavel Dovgalyuk             aio_bh_call(bh);
1184f999d05SKevin Wolf         }
1197d506c90SPaolo Bonzini         if (bh->deleted) {
1207d506c90SPaolo Bonzini             deleted = true;
1217d506c90SPaolo Bonzini         }
1224f999d05SKevin Wolf     }
1234f999d05SKevin Wolf 
1244f999d05SKevin Wolf     /* remove deleted bhs */
1257d506c90SPaolo Bonzini     if (!deleted) {
1267d506c90SPaolo Bonzini         qemu_lockcnt_dec(&ctx->list_lock);
1277d506c90SPaolo Bonzini         return ret;
1287d506c90SPaolo Bonzini     }
1297d506c90SPaolo Bonzini 
130d7c99a12SPaolo Bonzini     if (qemu_lockcnt_dec_and_lock(&ctx->list_lock)) {
131f627aab1SPaolo Bonzini         bhp = &ctx->first_bh;
1324f999d05SKevin Wolf         while (*bhp) {
1334f999d05SKevin Wolf             bh = *bhp;
1345b8bb359SPaolo Bonzini             if (bh->deleted && !bh->scheduled) {
1354f999d05SKevin Wolf                 *bhp = bh->next;
1367267c094SAnthony Liguori                 g_free(bh);
137648fb0eaSKevin Wolf             } else {
1384f999d05SKevin Wolf                 bhp = &bh->next;
1394f999d05SKevin Wolf             }
140648fb0eaSKevin Wolf         }
141d7c99a12SPaolo Bonzini         qemu_lockcnt_unlock(&ctx->list_lock);
142648fb0eaSKevin Wolf     }
1434f999d05SKevin Wolf     return ret;
1444f999d05SKevin Wolf }
1454f999d05SKevin Wolf 
1464f999d05SKevin Wolf void qemu_bh_schedule_idle(QEMUBH *bh)
1474f999d05SKevin Wolf {
1484f999d05SKevin Wolf     bh->idle = 1;
149dcc772e2SLiu Ping Fan     /* Make sure that idle & any writes needed by the callback are done
150dcc772e2SLiu Ping Fan      * before the locations are read in the aio_bh_poll.
151dcc772e2SLiu Ping Fan      */
152e8d3b1a2SPaolo Bonzini     atomic_mb_set(&bh->scheduled, 1);
1534f999d05SKevin Wolf }
1544f999d05SKevin Wolf 
1554f999d05SKevin Wolf void qemu_bh_schedule(QEMUBH *bh)
1564f999d05SKevin Wolf {
157924fe129SStefan Hajnoczi     AioContext *ctx;
158924fe129SStefan Hajnoczi 
159924fe129SStefan Hajnoczi     ctx = bh->ctx;
1604f999d05SKevin Wolf     bh->idle = 0;
161e8d3b1a2SPaolo Bonzini     /* The memory barrier implicit in atomic_xchg makes sure that:
162924fe129SStefan Hajnoczi      * 1. idle & any writes needed by the callback are done before the
163924fe129SStefan Hajnoczi      *    locations are read in the aio_bh_poll.
164924fe129SStefan Hajnoczi      * 2. ctx is loaded before scheduled is set and the callback has a chance
165924fe129SStefan Hajnoczi      *    to execute.
166dcc772e2SLiu Ping Fan      */
167e8d3b1a2SPaolo Bonzini     if (atomic_xchg(&bh->scheduled, 1) == 0) {
168924fe129SStefan Hajnoczi         aio_notify(ctx);
1694f999d05SKevin Wolf     }
170e8d3b1a2SPaolo Bonzini }
1714f999d05SKevin Wolf 
172dcc772e2SLiu Ping Fan 
173dcc772e2SLiu Ping Fan /* This func is async.
174dcc772e2SLiu Ping Fan  */
1754f999d05SKevin Wolf void qemu_bh_cancel(QEMUBH *bh)
1764f999d05SKevin Wolf {
1774f999d05SKevin Wolf     bh->scheduled = 0;
1784f999d05SKevin Wolf }
1794f999d05SKevin Wolf 
180dcc772e2SLiu Ping Fan /* This func is async.The bottom half will do the delete action at the finial
181dcc772e2SLiu Ping Fan  * end.
182dcc772e2SLiu Ping Fan  */
1834f999d05SKevin Wolf void qemu_bh_delete(QEMUBH *bh)
1844f999d05SKevin Wolf {
1854f999d05SKevin Wolf     bh->scheduled = 0;
1864f999d05SKevin Wolf     bh->deleted = 1;
1874f999d05SKevin Wolf }
1884f999d05SKevin Wolf 
189845ca10dSPaolo Bonzini int64_t
190845ca10dSPaolo Bonzini aio_compute_timeout(AioContext *ctx)
1914f999d05SKevin Wolf {
192845ca10dSPaolo Bonzini     int64_t deadline;
193845ca10dSPaolo Bonzini     int timeout = -1;
1944f999d05SKevin Wolf     QEMUBH *bh;
1954f999d05SKevin Wolf 
196d7c99a12SPaolo Bonzini     for (bh = atomic_rcu_read(&ctx->first_bh); bh;
197d7c99a12SPaolo Bonzini          bh = atomic_rcu_read(&bh->next)) {
1985b8bb359SPaolo Bonzini         if (bh->scheduled) {
1994f999d05SKevin Wolf             if (bh->idle) {
2004f999d05SKevin Wolf                 /* idle bottom halves will be polled at least
2014f999d05SKevin Wolf                  * every 10ms */
202845ca10dSPaolo Bonzini                 timeout = 10000000;
2034f999d05SKevin Wolf             } else {
2044f999d05SKevin Wolf                 /* non-idle bottom halves will be executed
2054f999d05SKevin Wolf                  * immediately */
206845ca10dSPaolo Bonzini                 return 0;
2074f999d05SKevin Wolf             }
2084f999d05SKevin Wolf         }
2094f999d05SKevin Wolf     }
2104f999d05SKevin Wolf 
211845ca10dSPaolo Bonzini     deadline = timerlistgroup_deadline_ns(&ctx->tlg);
212533a8cf3SAlex Bligh     if (deadline == 0) {
213845ca10dSPaolo Bonzini         return 0;
214533a8cf3SAlex Bligh     } else {
215845ca10dSPaolo Bonzini         return qemu_soonest_timeout(timeout, deadline);
216845ca10dSPaolo Bonzini     }
217533a8cf3SAlex Bligh }
218533a8cf3SAlex Bligh 
219845ca10dSPaolo Bonzini static gboolean
220845ca10dSPaolo Bonzini aio_ctx_prepare(GSource *source, gint    *timeout)
221845ca10dSPaolo Bonzini {
222845ca10dSPaolo Bonzini     AioContext *ctx = (AioContext *) source;
223845ca10dSPaolo Bonzini 
224eabc9779SPaolo Bonzini     atomic_or(&ctx->notify_me, 1);
225eabc9779SPaolo Bonzini 
226845ca10dSPaolo Bonzini     /* We assume there is no timeout already supplied */
227845ca10dSPaolo Bonzini     *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
228a3462c65SPaolo Bonzini 
229a3462c65SPaolo Bonzini     if (aio_prepare(ctx)) {
230a3462c65SPaolo Bonzini         *timeout = 0;
231a3462c65SPaolo Bonzini     }
232a3462c65SPaolo Bonzini 
233845ca10dSPaolo Bonzini     return *timeout == 0;
234e3713e00SPaolo Bonzini }
235e3713e00SPaolo Bonzini 
236e3713e00SPaolo Bonzini static gboolean
237e3713e00SPaolo Bonzini aio_ctx_check(GSource *source)
238e3713e00SPaolo Bonzini {
239e3713e00SPaolo Bonzini     AioContext *ctx = (AioContext *) source;
240e3713e00SPaolo Bonzini     QEMUBH *bh;
241e3713e00SPaolo Bonzini 
242eabc9779SPaolo Bonzini     atomic_and(&ctx->notify_me, ~1);
24305e514b1SPaolo Bonzini     aio_notify_accept(ctx);
24421a03d17SPaolo Bonzini 
245e3713e00SPaolo Bonzini     for (bh = ctx->first_bh; bh; bh = bh->next) {
2465b8bb359SPaolo Bonzini         if (bh->scheduled) {
247e3713e00SPaolo Bonzini             return true;
248e3713e00SPaolo Bonzini         }
249e3713e00SPaolo Bonzini     }
250533a8cf3SAlex Bligh     return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
251e3713e00SPaolo Bonzini }
252e3713e00SPaolo Bonzini 
253e3713e00SPaolo Bonzini static gboolean
254e3713e00SPaolo Bonzini aio_ctx_dispatch(GSource     *source,
255e3713e00SPaolo Bonzini                  GSourceFunc  callback,
256e3713e00SPaolo Bonzini                  gpointer     user_data)
257e3713e00SPaolo Bonzini {
258e3713e00SPaolo Bonzini     AioContext *ctx = (AioContext *) source;
259e3713e00SPaolo Bonzini 
260e3713e00SPaolo Bonzini     assert(callback == NULL);
261721671adSStefan Hajnoczi     aio_dispatch(ctx, true);
262e3713e00SPaolo Bonzini     return true;
263e3713e00SPaolo Bonzini }
264e3713e00SPaolo Bonzini 
2652f4dc3c1SPaolo Bonzini static void
2662f4dc3c1SPaolo Bonzini aio_ctx_finalize(GSource     *source)
2672f4dc3c1SPaolo Bonzini {
2682f4dc3c1SPaolo Bonzini     AioContext *ctx = (AioContext *) source;
2692f4dc3c1SPaolo Bonzini 
2709b34277dSStefan Hajnoczi     thread_pool_free(ctx->thread_pool);
271a076972aSStefan Hajnoczi 
2720187f5c9SPaolo Bonzini #ifdef CONFIG_LINUX_AIO
2730187f5c9SPaolo Bonzini     if (ctx->linux_aio) {
2740187f5c9SPaolo Bonzini         laio_detach_aio_context(ctx->linux_aio, ctx);
2750187f5c9SPaolo Bonzini         laio_cleanup(ctx->linux_aio);
2760187f5c9SPaolo Bonzini         ctx->linux_aio = NULL;
2770187f5c9SPaolo Bonzini     }
2780187f5c9SPaolo Bonzini #endif
2790187f5c9SPaolo Bonzini 
2800c330a73SPaolo Bonzini     assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
2810c330a73SPaolo Bonzini     qemu_bh_delete(ctx->co_schedule_bh);
2820c330a73SPaolo Bonzini 
283d7c99a12SPaolo Bonzini     qemu_lockcnt_lock(&ctx->list_lock);
284d7c99a12SPaolo Bonzini     assert(!qemu_lockcnt_count(&ctx->list_lock));
285a076972aSStefan Hajnoczi     while (ctx->first_bh) {
286a076972aSStefan Hajnoczi         QEMUBH *next = ctx->first_bh->next;
287a076972aSStefan Hajnoczi 
288a076972aSStefan Hajnoczi         /* qemu_bh_delete() must have been called on BHs in this AioContext */
289a076972aSStefan Hajnoczi         assert(ctx->first_bh->deleted);
290a076972aSStefan Hajnoczi 
291a076972aSStefan Hajnoczi         g_free(ctx->first_bh);
292a076972aSStefan Hajnoczi         ctx->first_bh = next;
293a076972aSStefan Hajnoczi     }
294d7c99a12SPaolo Bonzini     qemu_lockcnt_unlock(&ctx->list_lock);
295a076972aSStefan Hajnoczi 
296f6a51c84SStefan Hajnoczi     aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL);
2972f4dc3c1SPaolo Bonzini     event_notifier_cleanup(&ctx->notifier);
2983fe71223SPaolo Bonzini     qemu_rec_mutex_destroy(&ctx->lock);
299d7c99a12SPaolo Bonzini     qemu_lockcnt_destroy(&ctx->list_lock);
300dae21b98SAlex Bligh     timerlistgroup_deinit(&ctx->tlg);
3012f4dc3c1SPaolo Bonzini }
3022f4dc3c1SPaolo Bonzini 
303e3713e00SPaolo Bonzini static GSourceFuncs aio_source_funcs = {
304e3713e00SPaolo Bonzini     aio_ctx_prepare,
305e3713e00SPaolo Bonzini     aio_ctx_check,
306e3713e00SPaolo Bonzini     aio_ctx_dispatch,
3072f4dc3c1SPaolo Bonzini     aio_ctx_finalize
308e3713e00SPaolo Bonzini };
309e3713e00SPaolo Bonzini 
310e3713e00SPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx)
311e3713e00SPaolo Bonzini {
312e3713e00SPaolo Bonzini     g_source_ref(&ctx->source);
313e3713e00SPaolo Bonzini     return &ctx->source;
314e3713e00SPaolo Bonzini }
315a915f4bcSPaolo Bonzini 
3169b34277dSStefan Hajnoczi ThreadPool *aio_get_thread_pool(AioContext *ctx)
3179b34277dSStefan Hajnoczi {
3189b34277dSStefan Hajnoczi     if (!ctx->thread_pool) {
3199b34277dSStefan Hajnoczi         ctx->thread_pool = thread_pool_new(ctx);
3209b34277dSStefan Hajnoczi     }
3219b34277dSStefan Hajnoczi     return ctx->thread_pool;
3229b34277dSStefan Hajnoczi }
3239b34277dSStefan Hajnoczi 
3240187f5c9SPaolo Bonzini #ifdef CONFIG_LINUX_AIO
3250187f5c9SPaolo Bonzini LinuxAioState *aio_get_linux_aio(AioContext *ctx)
3260187f5c9SPaolo Bonzini {
3270187f5c9SPaolo Bonzini     if (!ctx->linux_aio) {
3280187f5c9SPaolo Bonzini         ctx->linux_aio = laio_init();
3290187f5c9SPaolo Bonzini         laio_attach_aio_context(ctx->linux_aio, ctx);
3300187f5c9SPaolo Bonzini     }
3310187f5c9SPaolo Bonzini     return ctx->linux_aio;
3320187f5c9SPaolo Bonzini }
3330187f5c9SPaolo Bonzini #endif
3340187f5c9SPaolo Bonzini 
3352f4dc3c1SPaolo Bonzini void aio_notify(AioContext *ctx)
3362f4dc3c1SPaolo Bonzini {
337eabc9779SPaolo Bonzini     /* Write e.g. bh->scheduled before reading ctx->notify_me.  Pairs
338eabc9779SPaolo Bonzini      * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
339eabc9779SPaolo Bonzini      */
3400ceb849bSPaolo Bonzini     smp_mb();
341eabc9779SPaolo Bonzini     if (ctx->notify_me) {
3422f4dc3c1SPaolo Bonzini         event_notifier_set(&ctx->notifier);
34305e514b1SPaolo Bonzini         atomic_mb_set(&ctx->notified, true);
34405e514b1SPaolo Bonzini     }
34505e514b1SPaolo Bonzini }
34605e514b1SPaolo Bonzini 
34705e514b1SPaolo Bonzini void aio_notify_accept(AioContext *ctx)
34805e514b1SPaolo Bonzini {
34905e514b1SPaolo Bonzini     if (atomic_xchg(&ctx->notified, false)) {
35005e514b1SPaolo Bonzini         event_notifier_test_and_clear(&ctx->notifier);
3512f4dc3c1SPaolo Bonzini     }
3520ceb849bSPaolo Bonzini }
3532f4dc3c1SPaolo Bonzini 
354d5541d86SAlex Bligh static void aio_timerlist_notify(void *opaque)
355d5541d86SAlex Bligh {
356d5541d86SAlex Bligh     aio_notify(opaque);
357d5541d86SAlex Bligh }
358d5541d86SAlex Bligh 
35921a03d17SPaolo Bonzini static void event_notifier_dummy_cb(EventNotifier *e)
36021a03d17SPaolo Bonzini {
36121a03d17SPaolo Bonzini }
36221a03d17SPaolo Bonzini 
3634a1cba38SStefan Hajnoczi /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
3644a1cba38SStefan Hajnoczi static bool event_notifier_poll(void *opaque)
3654a1cba38SStefan Hajnoczi {
3664a1cba38SStefan Hajnoczi     EventNotifier *e = opaque;
3674a1cba38SStefan Hajnoczi     AioContext *ctx = container_of(e, AioContext, notifier);
3684a1cba38SStefan Hajnoczi 
3694a1cba38SStefan Hajnoczi     return atomic_read(&ctx->notified);
3704a1cba38SStefan Hajnoczi }
3714a1cba38SStefan Hajnoczi 
3720c330a73SPaolo Bonzini static void co_schedule_bh_cb(void *opaque)
3730c330a73SPaolo Bonzini {
3740c330a73SPaolo Bonzini     AioContext *ctx = opaque;
3750c330a73SPaolo Bonzini     QSLIST_HEAD(, Coroutine) straight, reversed;
3760c330a73SPaolo Bonzini 
3770c330a73SPaolo Bonzini     QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines);
3780c330a73SPaolo Bonzini     QSLIST_INIT(&straight);
3790c330a73SPaolo Bonzini 
3800c330a73SPaolo Bonzini     while (!QSLIST_EMPTY(&reversed)) {
3810c330a73SPaolo Bonzini         Coroutine *co = QSLIST_FIRST(&reversed);
3820c330a73SPaolo Bonzini         QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next);
3830c330a73SPaolo Bonzini         QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next);
3840c330a73SPaolo Bonzini     }
3850c330a73SPaolo Bonzini 
3860c330a73SPaolo Bonzini     while (!QSLIST_EMPTY(&straight)) {
3870c330a73SPaolo Bonzini         Coroutine *co = QSLIST_FIRST(&straight);
3880c330a73SPaolo Bonzini         QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
3890c330a73SPaolo Bonzini         trace_aio_co_schedule_bh_cb(ctx, co);
390*1919631eSPaolo Bonzini         aio_context_acquire(ctx);
3910c330a73SPaolo Bonzini         qemu_coroutine_enter(co);
392*1919631eSPaolo Bonzini         aio_context_release(ctx);
3930c330a73SPaolo Bonzini     }
3940c330a73SPaolo Bonzini }
3950c330a73SPaolo Bonzini 
3962f78e491SChrysostomos Nanakos AioContext *aio_context_new(Error **errp)
397f627aab1SPaolo Bonzini {
3982f78e491SChrysostomos Nanakos     int ret;
3992f4dc3c1SPaolo Bonzini     AioContext *ctx;
40037fcee5dSFam Zheng 
4012f4dc3c1SPaolo Bonzini     ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
4027e003465SCao jin     aio_context_setup(ctx);
4037e003465SCao jin 
4042f78e491SChrysostomos Nanakos     ret = event_notifier_init(&ctx->notifier, false);
4052f78e491SChrysostomos Nanakos     if (ret < 0) {
4062f78e491SChrysostomos Nanakos         error_setg_errno(errp, -ret, "Failed to initialize event notifier");
40737fcee5dSFam Zheng         goto fail;
4082f78e491SChrysostomos Nanakos     }
409fcf5def1SPaolo Bonzini     g_source_set_can_recurse(&ctx->source, true);
410d7c99a12SPaolo Bonzini     qemu_lockcnt_init(&ctx->list_lock);
4110c330a73SPaolo Bonzini 
4120c330a73SPaolo Bonzini     ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx);
4130c330a73SPaolo Bonzini     QSLIST_INIT(&ctx->scheduled_coroutines);
4140c330a73SPaolo Bonzini 
4152f78e491SChrysostomos Nanakos     aio_set_event_notifier(ctx, &ctx->notifier,
416dca21ef2SFam Zheng                            false,
4172f78e491SChrysostomos Nanakos                            (EventNotifierHandler *)
418f6a51c84SStefan Hajnoczi                            event_notifier_dummy_cb,
4194a1cba38SStefan Hajnoczi                            event_notifier_poll);
4200187f5c9SPaolo Bonzini #ifdef CONFIG_LINUX_AIO
4210187f5c9SPaolo Bonzini     ctx->linux_aio = NULL;
4220187f5c9SPaolo Bonzini #endif
4239b34277dSStefan Hajnoczi     ctx->thread_pool = NULL;
4243fe71223SPaolo Bonzini     qemu_rec_mutex_init(&ctx->lock);
425d5541d86SAlex Bligh     timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
4262f4dc3c1SPaolo Bonzini 
42782a41186SStefan Hajnoczi     ctx->poll_ns = 0;
4284a1cba38SStefan Hajnoczi     ctx->poll_max_ns = 0;
42982a41186SStefan Hajnoczi     ctx->poll_grow = 0;
43082a41186SStefan Hajnoczi     ctx->poll_shrink = 0;
4314a1cba38SStefan Hajnoczi 
4322f4dc3c1SPaolo Bonzini     return ctx;
43337fcee5dSFam Zheng fail:
43437fcee5dSFam Zheng     g_source_destroy(&ctx->source);
43537fcee5dSFam Zheng     return NULL;
436e3713e00SPaolo Bonzini }
437e3713e00SPaolo Bonzini 
4380c330a73SPaolo Bonzini void aio_co_schedule(AioContext *ctx, Coroutine *co)
4390c330a73SPaolo Bonzini {
4400c330a73SPaolo Bonzini     trace_aio_co_schedule(ctx, co);
4410c330a73SPaolo Bonzini     QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
4420c330a73SPaolo Bonzini                               co, co_scheduled_next);
4430c330a73SPaolo Bonzini     qemu_bh_schedule(ctx->co_schedule_bh);
4440c330a73SPaolo Bonzini }
4450c330a73SPaolo Bonzini 
4460c330a73SPaolo Bonzini void aio_co_wake(struct Coroutine *co)
4470c330a73SPaolo Bonzini {
4480c330a73SPaolo Bonzini     AioContext *ctx;
4490c330a73SPaolo Bonzini 
4500c330a73SPaolo Bonzini     /* Read coroutine before co->ctx.  Matches smp_wmb in
4510c330a73SPaolo Bonzini      * qemu_coroutine_enter.
4520c330a73SPaolo Bonzini      */
4530c330a73SPaolo Bonzini     smp_read_barrier_depends();
4540c330a73SPaolo Bonzini     ctx = atomic_read(&co->ctx);
4550c330a73SPaolo Bonzini 
4560c330a73SPaolo Bonzini     if (ctx != qemu_get_current_aio_context()) {
4570c330a73SPaolo Bonzini         aio_co_schedule(ctx, co);
4580c330a73SPaolo Bonzini         return;
4590c330a73SPaolo Bonzini     }
4600c330a73SPaolo Bonzini 
4610c330a73SPaolo Bonzini     if (qemu_in_coroutine()) {
4620c330a73SPaolo Bonzini         Coroutine *self = qemu_coroutine_self();
4630c330a73SPaolo Bonzini         assert(self != co);
4640c330a73SPaolo Bonzini         QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
4650c330a73SPaolo Bonzini     } else {
4660c330a73SPaolo Bonzini         aio_context_acquire(ctx);
4670c330a73SPaolo Bonzini         qemu_coroutine_enter(co);
4680c330a73SPaolo Bonzini         aio_context_release(ctx);
4690c330a73SPaolo Bonzini     }
4700c330a73SPaolo Bonzini }
4710c330a73SPaolo Bonzini 
472e3713e00SPaolo Bonzini void aio_context_ref(AioContext *ctx)
473e3713e00SPaolo Bonzini {
474e3713e00SPaolo Bonzini     g_source_ref(&ctx->source);
475e3713e00SPaolo Bonzini }
476e3713e00SPaolo Bonzini 
477e3713e00SPaolo Bonzini void aio_context_unref(AioContext *ctx)
478e3713e00SPaolo Bonzini {
479e3713e00SPaolo Bonzini     g_source_unref(&ctx->source);
480f627aab1SPaolo Bonzini }
48198563fc3SStefan Hajnoczi 
48298563fc3SStefan Hajnoczi void aio_context_acquire(AioContext *ctx)
48398563fc3SStefan Hajnoczi {
4843fe71223SPaolo Bonzini     qemu_rec_mutex_lock(&ctx->lock);
48598563fc3SStefan Hajnoczi }
48698563fc3SStefan Hajnoczi 
48798563fc3SStefan Hajnoczi void aio_context_release(AioContext *ctx)
48898563fc3SStefan Hajnoczi {
4893fe71223SPaolo Bonzini     qemu_rec_mutex_unlock(&ctx->lock);
49098563fc3SStefan Hajnoczi }
491