1a76bab49Saliguori /* 2a76bab49Saliguori * QEMU aio implementation 3a76bab49Saliguori * 4a76bab49Saliguori * Copyright IBM, Corp. 2008 5a76bab49Saliguori * 6a76bab49Saliguori * Authors: 7a76bab49Saliguori * Anthony Liguori <aliguori@us.ibm.com> 8a76bab49Saliguori * 9a76bab49Saliguori * This work is licensed under the terms of the GNU GPL, version 2. See 10a76bab49Saliguori * the COPYING file in the top-level directory. 11a76bab49Saliguori * 12a76bab49Saliguori */ 13a76bab49Saliguori 14a76bab49Saliguori #ifndef QEMU_AIO_H 15a76bab49Saliguori #define QEMU_AIO_H 16a76bab49Saliguori 17*73fd282eSStefan Hajnoczi #ifdef CONFIG_LINUX_IO_URING 18*73fd282eSStefan Hajnoczi #include <liburing.h> 19*73fd282eSStefan Hajnoczi #endif 201de7afc9SPaolo Bonzini #include "qemu/queue.h" 211de7afc9SPaolo Bonzini #include "qemu/event_notifier.h" 22dcc772e2SLiu Ping Fan #include "qemu/thread.h" 23dae21b98SAlex Bligh #include "qemu/timer.h" 24a76bab49Saliguori 257c84b1b8SMarkus Armbruster typedef struct BlockAIOCB BlockAIOCB; 26097310b5SMarkus Armbruster typedef void BlockCompletionFunc(void *opaque, int ret); 2785e8dab1SPaolo Bonzini 28d7331bedSStefan Hajnoczi typedef struct AIOCBInfo { 297c84b1b8SMarkus Armbruster void (*cancel_async)(BlockAIOCB *acb); 307c84b1b8SMarkus Armbruster AioContext *(*get_aio_context)(BlockAIOCB *acb); 318c82e9a4SStefan Hajnoczi size_t aiocb_size; 32d7331bedSStefan Hajnoczi } AIOCBInfo; 3385e8dab1SPaolo Bonzini 347c84b1b8SMarkus Armbruster struct BlockAIOCB { 35d7331bedSStefan Hajnoczi const AIOCBInfo *aiocb_info; 3685e8dab1SPaolo Bonzini BlockDriverState *bs; 37097310b5SMarkus Armbruster BlockCompletionFunc *cb; 3885e8dab1SPaolo Bonzini void *opaque; 39f197fe2bSFam Zheng int refcnt; 4085e8dab1SPaolo Bonzini }; 4185e8dab1SPaolo Bonzini 42d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, 43097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque); 448007429aSFam Zheng void qemu_aio_unref(void *p); 45f197fe2bSFam Zheng void qemu_aio_ref(void *p); 4685e8dab1SPaolo Bonzini 47f627aab1SPaolo Bonzini typedef struct AioHandler AioHandler; 484749079cSStefan Hajnoczi typedef QLIST_HEAD(, AioHandler) AioHandlerList; 49f627aab1SPaolo Bonzini typedef void QEMUBHFunc(void *opaque); 50f6a51c84SStefan Hajnoczi typedef bool AioPollFn(void *opaque); 51f627aab1SPaolo Bonzini typedef void IOHandler(void *opaque); 52f627aab1SPaolo Bonzini 530c330a73SPaolo Bonzini struct Coroutine; 540187f5c9SPaolo Bonzini struct ThreadPool; 550187f5c9SPaolo Bonzini struct LinuxAioState; 566663a0a3SAarushi Mehta struct LuringState; 570187f5c9SPaolo Bonzini 581f050a46SStefan Hajnoczi /* Callbacks for file descriptor monitoring implementations */ 591f050a46SStefan Hajnoczi typedef struct { 601f050a46SStefan Hajnoczi /* 611f050a46SStefan Hajnoczi * update: 621f050a46SStefan Hajnoczi * @ctx: the AioContext 63b321051cSStefan Hajnoczi * @old_node: the existing handler or NULL if this file descriptor is being 64b321051cSStefan Hajnoczi * monitored for the first time 65b321051cSStefan Hajnoczi * @new_node: the new handler or NULL if this file descriptor is being 66b321051cSStefan Hajnoczi * removed 671f050a46SStefan Hajnoczi * 68b321051cSStefan Hajnoczi * Add/remove/modify a monitored file descriptor. 691f050a46SStefan Hajnoczi * 701f050a46SStefan Hajnoczi * Called with ctx->list_lock acquired. 711f050a46SStefan Hajnoczi */ 72b321051cSStefan Hajnoczi void (*update)(AioContext *ctx, AioHandler *old_node, AioHandler *new_node); 731f050a46SStefan Hajnoczi 741f050a46SStefan Hajnoczi /* 751f050a46SStefan Hajnoczi * wait: 761f050a46SStefan Hajnoczi * @ctx: the AioContext 771f050a46SStefan Hajnoczi * @ready_list: list for handlers that become ready 781f050a46SStefan Hajnoczi * @timeout: maximum duration to wait, in nanoseconds 791f050a46SStefan Hajnoczi * 801f050a46SStefan Hajnoczi * Wait for file descriptors to become ready and place them on ready_list. 811f050a46SStefan Hajnoczi * 821f050a46SStefan Hajnoczi * Called with ctx->list_lock incremented but not locked. 831f050a46SStefan Hajnoczi * 841f050a46SStefan Hajnoczi * Returns: number of ready file descriptors. 851f050a46SStefan Hajnoczi */ 861f050a46SStefan Hajnoczi int (*wait)(AioContext *ctx, AioHandlerList *ready_list, int64_t timeout); 871f050a46SStefan Hajnoczi } FDMonOps; 881f050a46SStefan Hajnoczi 898c6b0356SStefan Hajnoczi /* 908c6b0356SStefan Hajnoczi * Each aio_bh_poll() call carves off a slice of the BH list, so that newly 918c6b0356SStefan Hajnoczi * scheduled BHs are not processed until the next aio_bh_poll() call. All 928c6b0356SStefan Hajnoczi * active aio_bh_poll() calls chain their slices together in a list, so that 938c6b0356SStefan Hajnoczi * nested aio_bh_poll() calls process all scheduled bottom halves. 948c6b0356SStefan Hajnoczi */ 958c6b0356SStefan Hajnoczi typedef QSLIST_HEAD(, QEMUBH) BHList; 968c6b0356SStefan Hajnoczi typedef struct BHListSlice BHListSlice; 978c6b0356SStefan Hajnoczi struct BHListSlice { 988c6b0356SStefan Hajnoczi BHList bh_list; 998c6b0356SStefan Hajnoczi QSIMPLEQ_ENTRY(BHListSlice) next; 1008c6b0356SStefan Hajnoczi }; 1018c6b0356SStefan Hajnoczi 102*73fd282eSStefan Hajnoczi typedef QSLIST_HEAD(, AioHandler) AioHandlerSList; 103*73fd282eSStefan Hajnoczi 1046a1751b7SAlex Bligh struct AioContext { 105e3713e00SPaolo Bonzini GSource source; 106e3713e00SPaolo Bonzini 1077c690fd1SPaolo Bonzini /* Used by AioContext users to protect from multi-threaded access. */ 1083fe71223SPaolo Bonzini QemuRecMutex lock; 10998563fc3SStefan Hajnoczi 1107c690fd1SPaolo Bonzini /* The list of registered AIO handlers. Protected by ctx->list_lock. */ 1114749079cSStefan Hajnoczi AioHandlerList aio_handlers; 1124749079cSStefan Hajnoczi 1134749079cSStefan Hajnoczi /* The list of AIO handlers to be deleted. Protected by ctx->list_lock. */ 1144749079cSStefan Hajnoczi AioHandlerList deleted_aio_handlers; 115a915f4bcSPaolo Bonzini 116eabc9779SPaolo Bonzini /* Used to avoid unnecessary event_notifier_set calls in aio_notify; 117eabc9779SPaolo Bonzini * accessed with atomic primitives. If this field is 0, everything 118eabc9779SPaolo Bonzini * (file descriptors, bottom halves, timers) will be re-evaluated 119eabc9779SPaolo Bonzini * before the next blocking poll(), thus the event_notifier_set call 120eabc9779SPaolo Bonzini * can be skipped. If it is non-zero, you may need to wake up a 121eabc9779SPaolo Bonzini * concurrent aio_poll or the glib main event loop, making 122eabc9779SPaolo Bonzini * event_notifier_set necessary. 123eabc9779SPaolo Bonzini * 124eabc9779SPaolo Bonzini * Bit 0 is reserved for GSource usage of the AioContext, and is 1 12554a16a63SCao jin * between a call to aio_ctx_prepare and the next call to aio_ctx_check. 126eabc9779SPaolo Bonzini * Bits 1-31 simply count the number of active calls to aio_poll 127eabc9779SPaolo Bonzini * that are in the prepare or poll phase. 128eabc9779SPaolo Bonzini * 129eabc9779SPaolo Bonzini * The GSource and aio_poll must use a different mechanism because 130eabc9779SPaolo Bonzini * there is no certainty that a call to GSource's prepare callback 131eabc9779SPaolo Bonzini * (via g_main_context_prepare) is indeed followed by check and 132eabc9779SPaolo Bonzini * dispatch. It's not clear whether this would be a bug, but let's 133eabc9779SPaolo Bonzini * play safe and allow it---it will just cause extra calls to 134eabc9779SPaolo Bonzini * event_notifier_set until the next call to dispatch. 135eabc9779SPaolo Bonzini * 136eabc9779SPaolo Bonzini * Instead, the aio_poll calls include both the prepare and the 137eabc9779SPaolo Bonzini * dispatch phase, hence a simple counter is enough for them. 1380ceb849bSPaolo Bonzini */ 139eabc9779SPaolo Bonzini uint32_t notify_me; 1400ceb849bSPaolo Bonzini 1417c690fd1SPaolo Bonzini /* A lock to protect between QEMUBH and AioHandler adders and deleter, 1427c690fd1SPaolo Bonzini * and to ensure that no callbacks are removed while we're walking and 1437c690fd1SPaolo Bonzini * dispatching them. 144d7c99a12SPaolo Bonzini */ 145d7c99a12SPaolo Bonzini QemuLockCnt list_lock; 1460ceb849bSPaolo Bonzini 1478c6b0356SStefan Hajnoczi /* Bottom Halves pending aio_bh_poll() processing */ 1488c6b0356SStefan Hajnoczi BHList bh_list; 1498c6b0356SStefan Hajnoczi 1508c6b0356SStefan Hajnoczi /* Chained BH list slices for each nested aio_bh_poll() call */ 1518c6b0356SStefan Hajnoczi QSIMPLEQ_HEAD(, BHListSlice) bh_slice_list; 152f627aab1SPaolo Bonzini 15305e514b1SPaolo Bonzini /* Used by aio_notify. 15405e514b1SPaolo Bonzini * 15505e514b1SPaolo Bonzini * "notified" is used to avoid expensive event_notifier_test_and_clear 15605e514b1SPaolo Bonzini * calls. When it is clear, the EventNotifier is clear, or one thread 15705e514b1SPaolo Bonzini * is going to clear "notified" before processing more events. False 15805e514b1SPaolo Bonzini * positives are possible, i.e. "notified" could be set even though the 15905e514b1SPaolo Bonzini * EventNotifier is clear. 16005e514b1SPaolo Bonzini * 16105e514b1SPaolo Bonzini * Note that event_notifier_set *cannot* be optimized the same way. For 16205e514b1SPaolo Bonzini * more information on the problem that would result, see "#ifdef BUG2" 16305e514b1SPaolo Bonzini * in the docs/aio_notify_accept.promela formal model. 16405e514b1SPaolo Bonzini */ 16505e514b1SPaolo Bonzini bool notified; 1662f4dc3c1SPaolo Bonzini EventNotifier notifier; 1676b5f8762SStefan Hajnoczi 1680c330a73SPaolo Bonzini QSLIST_HEAD(, Coroutine) scheduled_coroutines; 1690c330a73SPaolo Bonzini QEMUBH *co_schedule_bh; 1700c330a73SPaolo Bonzini 1717c690fd1SPaolo Bonzini /* Thread pool for performing work and receiving completion callbacks. 1727c690fd1SPaolo Bonzini * Has its own locking. 1737c690fd1SPaolo Bonzini */ 1749b34277dSStefan Hajnoczi struct ThreadPool *thread_pool; 175dae21b98SAlex Bligh 1760187f5c9SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 1776663a0a3SAarushi Mehta /* 1786663a0a3SAarushi Mehta * State for native Linux AIO. Uses aio_context_acquire/release for 1790187f5c9SPaolo Bonzini * locking. 1800187f5c9SPaolo Bonzini */ 1810187f5c9SPaolo Bonzini struct LinuxAioState *linux_aio; 1820187f5c9SPaolo Bonzini #endif 1836663a0a3SAarushi Mehta #ifdef CONFIG_LINUX_IO_URING 1846663a0a3SAarushi Mehta /* 1856663a0a3SAarushi Mehta * State for Linux io_uring. Uses aio_context_acquire/release for 1866663a0a3SAarushi Mehta * locking. 1876663a0a3SAarushi Mehta */ 1886663a0a3SAarushi Mehta struct LuringState *linux_io_uring; 189*73fd282eSStefan Hajnoczi 190*73fd282eSStefan Hajnoczi /* State for file descriptor monitoring using Linux io_uring */ 191*73fd282eSStefan Hajnoczi struct io_uring fdmon_io_uring; 192*73fd282eSStefan Hajnoczi AioHandlerSList submit_list; 1936663a0a3SAarushi Mehta #endif 1940187f5c9SPaolo Bonzini 1957c690fd1SPaolo Bonzini /* TimerLists for calling timers - one per clock type. Has its own 1967c690fd1SPaolo Bonzini * locking. 1977c690fd1SPaolo Bonzini */ 198dae21b98SAlex Bligh QEMUTimerListGroup tlg; 199c1e1e5faSFam Zheng 200c1e1e5faSFam Zheng int external_disable_cnt; 201fbe3fc5cSFam Zheng 2024a1cba38SStefan Hajnoczi /* Number of AioHandlers without .io_poll() */ 2034a1cba38SStefan Hajnoczi int poll_disable_cnt; 2044a1cba38SStefan Hajnoczi 20582a41186SStefan Hajnoczi /* Polling mode parameters */ 20682a41186SStefan Hajnoczi int64_t poll_ns; /* current polling time in nanoseconds */ 20782a41186SStefan Hajnoczi int64_t poll_max_ns; /* maximum polling time in nanoseconds */ 20882a41186SStefan Hajnoczi int64_t poll_grow; /* polling time growth factor */ 20982a41186SStefan Hajnoczi int64_t poll_shrink; /* polling time shrink factor */ 2104a1cba38SStefan Hajnoczi 211684e508cSStefan Hajnoczi /* Are we in polling mode or monitoring file descriptors? */ 212684e508cSStefan Hajnoczi bool poll_started; 213684e508cSStefan Hajnoczi 214fbe3fc5cSFam Zheng /* epoll(7) state used when built with CONFIG_EPOLL */ 215fbe3fc5cSFam Zheng int epollfd; 2161f050a46SStefan Hajnoczi 2171f050a46SStefan Hajnoczi const FDMonOps *fdmon_ops; 2186a1751b7SAlex Bligh }; 219f627aab1SPaolo Bonzini 220f627aab1SPaolo Bonzini /** 221f627aab1SPaolo Bonzini * aio_context_new: Allocate a new AioContext. 222f627aab1SPaolo Bonzini * 223f627aab1SPaolo Bonzini * AioContext provide a mini event-loop that can be waited on synchronously. 224f627aab1SPaolo Bonzini * They also provide bottom halves, a service to execute a piece of code 225f627aab1SPaolo Bonzini * as soon as possible. 226f627aab1SPaolo Bonzini */ 2272f78e491SChrysostomos Nanakos AioContext *aio_context_new(Error **errp); 228f627aab1SPaolo Bonzini 229f627aab1SPaolo Bonzini /** 230e3713e00SPaolo Bonzini * aio_context_ref: 231e3713e00SPaolo Bonzini * @ctx: The AioContext to operate on. 232e3713e00SPaolo Bonzini * 233e3713e00SPaolo Bonzini * Add a reference to an AioContext. 234e3713e00SPaolo Bonzini */ 235e3713e00SPaolo Bonzini void aio_context_ref(AioContext *ctx); 236e3713e00SPaolo Bonzini 237e3713e00SPaolo Bonzini /** 238e3713e00SPaolo Bonzini * aio_context_unref: 239e3713e00SPaolo Bonzini * @ctx: The AioContext to operate on. 240e3713e00SPaolo Bonzini * 241e3713e00SPaolo Bonzini * Drop a reference to an AioContext. 242e3713e00SPaolo Bonzini */ 243e3713e00SPaolo Bonzini void aio_context_unref(AioContext *ctx); 244e3713e00SPaolo Bonzini 24598563fc3SStefan Hajnoczi /* Take ownership of the AioContext. If the AioContext will be shared between 24649110174SPaolo Bonzini * threads, and a thread does not want to be interrupted, it will have to 24749110174SPaolo Bonzini * take ownership around calls to aio_poll(). Otherwise, aio_poll() 24849110174SPaolo Bonzini * automatically takes care of calling aio_context_acquire and 24949110174SPaolo Bonzini * aio_context_release. 25098563fc3SStefan Hajnoczi * 2517c690fd1SPaolo Bonzini * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A 2527c690fd1SPaolo Bonzini * thread still has to call those to avoid being interrupted by the guest. 2537c690fd1SPaolo Bonzini * 2547c690fd1SPaolo Bonzini * Bottom halves, timers and callbacks can be created or removed without 2557c690fd1SPaolo Bonzini * acquiring the AioContext. 25698563fc3SStefan Hajnoczi */ 25798563fc3SStefan Hajnoczi void aio_context_acquire(AioContext *ctx); 25898563fc3SStefan Hajnoczi 25998563fc3SStefan Hajnoczi /* Relinquish ownership of the AioContext. */ 26098563fc3SStefan Hajnoczi void aio_context_release(AioContext *ctx); 26198563fc3SStefan Hajnoczi 262e3713e00SPaolo Bonzini /** 2635b8bb359SPaolo Bonzini * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run 2645b8bb359SPaolo Bonzini * only once and as soon as possible. 2655b8bb359SPaolo Bonzini */ 2665b8bb359SPaolo Bonzini void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); 2675b8bb359SPaolo Bonzini 2685b8bb359SPaolo Bonzini /** 269f627aab1SPaolo Bonzini * aio_bh_new: Allocate a new bottom half structure. 270f627aab1SPaolo Bonzini * 271f627aab1SPaolo Bonzini * Bottom halves are lightweight callbacks whose invocation is guaranteed 272f627aab1SPaolo Bonzini * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure 273f627aab1SPaolo Bonzini * is opaque and must be allocated prior to its use. 274f627aab1SPaolo Bonzini */ 275f627aab1SPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); 276f627aab1SPaolo Bonzini 277f627aab1SPaolo Bonzini /** 2782f4dc3c1SPaolo Bonzini * aio_notify: Force processing of pending events. 2792f4dc3c1SPaolo Bonzini * 2802f4dc3c1SPaolo Bonzini * Similar to signaling a condition variable, aio_notify forces 281722f8d90SYaowei Bai * aio_poll to exit, so that the next call will re-examine pending events. 282722f8d90SYaowei Bai * The caller of aio_notify will usually call aio_poll again very soon, 2832f4dc3c1SPaolo Bonzini * or go through another iteration of the GLib main loop. Hence, aio_notify 2842f4dc3c1SPaolo Bonzini * also has the side effect of recalculating the sets of file descriptors 2852f4dc3c1SPaolo Bonzini * that the main loop waits for. 2862f4dc3c1SPaolo Bonzini * 2872f4dc3c1SPaolo Bonzini * Calling aio_notify is rarely necessary, because for example scheduling 2882f4dc3c1SPaolo Bonzini * a bottom half calls it already. 2892f4dc3c1SPaolo Bonzini */ 2902f4dc3c1SPaolo Bonzini void aio_notify(AioContext *ctx); 2912f4dc3c1SPaolo Bonzini 2922f4dc3c1SPaolo Bonzini /** 29305e514b1SPaolo Bonzini * aio_notify_accept: Acknowledge receiving an aio_notify. 29405e514b1SPaolo Bonzini * 29505e514b1SPaolo Bonzini * aio_notify() uses an EventNotifier in order to wake up a sleeping 29605e514b1SPaolo Bonzini * aio_poll() or g_main_context_iteration(). Calls to aio_notify() are 29705e514b1SPaolo Bonzini * usually rare, but the AioContext has to clear the EventNotifier on 29805e514b1SPaolo Bonzini * every aio_poll() or g_main_context_iteration() in order to avoid 29905e514b1SPaolo Bonzini * busy waiting. This event_notifier_test_and_clear() cannot be done 30005e514b1SPaolo Bonzini * using the usual aio_context_set_event_notifier(), because it must 30105e514b1SPaolo Bonzini * be done before processing all events (file descriptors, bottom halves, 30205e514b1SPaolo Bonzini * timers). 30305e514b1SPaolo Bonzini * 30405e514b1SPaolo Bonzini * aio_notify_accept() is an optimized event_notifier_test_and_clear() 30505e514b1SPaolo Bonzini * that is specific to an AioContext's notifier; it is used internally 30605e514b1SPaolo Bonzini * to clear the EventNotifier only if aio_notify() had been called. 30705e514b1SPaolo Bonzini */ 30805e514b1SPaolo Bonzini void aio_notify_accept(AioContext *ctx); 30905e514b1SPaolo Bonzini 31005e514b1SPaolo Bonzini /** 311df281b80SPavel Dovgalyuk * aio_bh_call: Executes callback function of the specified BH. 312df281b80SPavel Dovgalyuk */ 313df281b80SPavel Dovgalyuk void aio_bh_call(QEMUBH *bh); 314df281b80SPavel Dovgalyuk 315df281b80SPavel Dovgalyuk /** 316f627aab1SPaolo Bonzini * aio_bh_poll: Poll bottom halves for an AioContext. 317f627aab1SPaolo Bonzini * 318f627aab1SPaolo Bonzini * These are internal functions used by the QEMU main loop. 319dcc772e2SLiu Ping Fan * And notice that multiple occurrences of aio_bh_poll cannot 320dcc772e2SLiu Ping Fan * be called concurrently 321f627aab1SPaolo Bonzini */ 322f627aab1SPaolo Bonzini int aio_bh_poll(AioContext *ctx); 323f627aab1SPaolo Bonzini 324f627aab1SPaolo Bonzini /** 325f627aab1SPaolo Bonzini * qemu_bh_schedule: Schedule a bottom half. 326f627aab1SPaolo Bonzini * 327f627aab1SPaolo Bonzini * Scheduling a bottom half interrupts the main loop and causes the 328f627aab1SPaolo Bonzini * execution of the callback that was passed to qemu_bh_new. 329f627aab1SPaolo Bonzini * 330f627aab1SPaolo Bonzini * Bottom halves that are scheduled from a bottom half handler are instantly 331f627aab1SPaolo Bonzini * invoked. This can create an infinite loop if a bottom half handler 332f627aab1SPaolo Bonzini * schedules itself. 333f627aab1SPaolo Bonzini * 334f627aab1SPaolo Bonzini * @bh: The bottom half to be scheduled. 335f627aab1SPaolo Bonzini */ 336f627aab1SPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh); 337f627aab1SPaolo Bonzini 338f627aab1SPaolo Bonzini /** 339f627aab1SPaolo Bonzini * qemu_bh_cancel: Cancel execution of a bottom half. 340f627aab1SPaolo Bonzini * 341f627aab1SPaolo Bonzini * Canceling execution of a bottom half undoes the effect of calls to 342f627aab1SPaolo Bonzini * qemu_bh_schedule without freeing its resources yet. While cancellation 343f627aab1SPaolo Bonzini * itself is also wait-free and thread-safe, it can of course race with the 344f627aab1SPaolo Bonzini * loop that executes bottom halves unless you are holding the iothread 345f627aab1SPaolo Bonzini * mutex. This makes it mostly useless if you are not holding the mutex. 346f627aab1SPaolo Bonzini * 347f627aab1SPaolo Bonzini * @bh: The bottom half to be canceled. 348f627aab1SPaolo Bonzini */ 349f627aab1SPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh); 350f627aab1SPaolo Bonzini 351f627aab1SPaolo Bonzini /** 352f627aab1SPaolo Bonzini *qemu_bh_delete: Cancel execution of a bottom half and free its resources. 353f627aab1SPaolo Bonzini * 354f627aab1SPaolo Bonzini * Deleting a bottom half frees the memory that was allocated for it by 355f627aab1SPaolo Bonzini * qemu_bh_new. It also implies canceling the bottom half if it was 356f627aab1SPaolo Bonzini * scheduled. 357dcc772e2SLiu Ping Fan * This func is async. The bottom half will do the delete action at the finial 358dcc772e2SLiu Ping Fan * end. 359f627aab1SPaolo Bonzini * 360f627aab1SPaolo Bonzini * @bh: The bottom half to be deleted. 361f627aab1SPaolo Bonzini */ 362f627aab1SPaolo Bonzini void qemu_bh_delete(QEMUBH *bh); 363f627aab1SPaolo Bonzini 364cd9ba1ebSPaolo Bonzini /* Return whether there are any pending callbacks from the GSource 365a3462c65SPaolo Bonzini * attached to the AioContext, before g_poll is invoked. 366a3462c65SPaolo Bonzini * 367a3462c65SPaolo Bonzini * This is used internally in the implementation of the GSource. 368a3462c65SPaolo Bonzini */ 369a3462c65SPaolo Bonzini bool aio_prepare(AioContext *ctx); 370a3462c65SPaolo Bonzini 371a3462c65SPaolo Bonzini /* Return whether there are any pending callbacks from the GSource 372a3462c65SPaolo Bonzini * attached to the AioContext, after g_poll is invoked. 373cd9ba1ebSPaolo Bonzini * 374cd9ba1ebSPaolo Bonzini * This is used internally in the implementation of the GSource. 375cd9ba1ebSPaolo Bonzini */ 376cd9ba1ebSPaolo Bonzini bool aio_pending(AioContext *ctx); 377cd9ba1ebSPaolo Bonzini 378e4c7e2d1SPaolo Bonzini /* Dispatch any pending callbacks from the GSource attached to the AioContext. 379e4c7e2d1SPaolo Bonzini * 380e4c7e2d1SPaolo Bonzini * This is used internally in the implementation of the GSource. 381e4c7e2d1SPaolo Bonzini */ 382a153bf52SPaolo Bonzini void aio_dispatch(AioContext *ctx); 383e4c7e2d1SPaolo Bonzini 3847c0628b2SPaolo Bonzini /* Progress in completing AIO work to occur. This can issue new pending 3857c0628b2SPaolo Bonzini * aio as a result of executing I/O completion or bh callbacks. 386bcdc1857SPaolo Bonzini * 387acfb23adSPaolo Bonzini * Return whether any progress was made by executing AIO or bottom half 388acfb23adSPaolo Bonzini * handlers. If @blocking == true, this should always be true except 389acfb23adSPaolo Bonzini * if someone called aio_notify. 3907c0628b2SPaolo Bonzini * 3917c0628b2SPaolo Bonzini * If there are no pending bottom halves, but there are pending AIO 3927c0628b2SPaolo Bonzini * operations, it may not be possible to make any progress without 3937c0628b2SPaolo Bonzini * blocking. If @blocking is true, this function will wait until one 3947c0628b2SPaolo Bonzini * or more AIO events have completed, to ensure something has moved 3957c0628b2SPaolo Bonzini * before returning. 3967c0628b2SPaolo Bonzini */ 3977c0628b2SPaolo Bonzini bool aio_poll(AioContext *ctx, bool blocking); 398a76bab49Saliguori 399a76bab49Saliguori /* Register a file descriptor and associated callbacks. Behaves very similarly 4006484e422SFam Zheng * to qemu_set_fd_handler. Unlike qemu_set_fd_handler, these callbacks will 40187f68d31SPaolo Bonzini * be invoked when using aio_poll(). 402a76bab49Saliguori * 403a76bab49Saliguori * Code that invokes AIO completion functions should rely on this function 404a76bab49Saliguori * instead of qemu_set_fd_handler[2]. 405a76bab49Saliguori */ 406a915f4bcSPaolo Bonzini void aio_set_fd_handler(AioContext *ctx, 407a915f4bcSPaolo Bonzini int fd, 408dca21ef2SFam Zheng bool is_external, 409a76bab49Saliguori IOHandler *io_read, 410a76bab49Saliguori IOHandler *io_write, 411f6a51c84SStefan Hajnoczi AioPollFn *io_poll, 412a76bab49Saliguori void *opaque); 4139958c351SPaolo Bonzini 414684e508cSStefan Hajnoczi /* Set polling begin/end callbacks for a file descriptor that has already been 415684e508cSStefan Hajnoczi * registered with aio_set_fd_handler. Do nothing if the file descriptor is 416684e508cSStefan Hajnoczi * not registered. 417684e508cSStefan Hajnoczi */ 418684e508cSStefan Hajnoczi void aio_set_fd_poll(AioContext *ctx, int fd, 419684e508cSStefan Hajnoczi IOHandler *io_poll_begin, 420684e508cSStefan Hajnoczi IOHandler *io_poll_end); 421684e508cSStefan Hajnoczi 4229958c351SPaolo Bonzini /* Register an event notifier and associated callbacks. Behaves very similarly 4239958c351SPaolo Bonzini * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks 42487f68d31SPaolo Bonzini * will be invoked when using aio_poll(). 4259958c351SPaolo Bonzini * 4269958c351SPaolo Bonzini * Code that invokes AIO completion functions should rely on this function 4279958c351SPaolo Bonzini * instead of event_notifier_set_handler. 4289958c351SPaolo Bonzini */ 429a915f4bcSPaolo Bonzini void aio_set_event_notifier(AioContext *ctx, 430a915f4bcSPaolo Bonzini EventNotifier *notifier, 431dca21ef2SFam Zheng bool is_external, 432f6a51c84SStefan Hajnoczi EventNotifierHandler *io_read, 433f6a51c84SStefan Hajnoczi AioPollFn *io_poll); 434a915f4bcSPaolo Bonzini 435684e508cSStefan Hajnoczi /* Set polling begin/end callbacks for an event notifier that has already been 436684e508cSStefan Hajnoczi * registered with aio_set_event_notifier. Do nothing if the event notifier is 437684e508cSStefan Hajnoczi * not registered. 438684e508cSStefan Hajnoczi */ 439684e508cSStefan Hajnoczi void aio_set_event_notifier_poll(AioContext *ctx, 440684e508cSStefan Hajnoczi EventNotifier *notifier, 441684e508cSStefan Hajnoczi EventNotifierHandler *io_poll_begin, 442684e508cSStefan Hajnoczi EventNotifierHandler *io_poll_end); 443684e508cSStefan Hajnoczi 444e3713e00SPaolo Bonzini /* Return a GSource that lets the main loop poll the file descriptors attached 445e3713e00SPaolo Bonzini * to this AioContext. 446e3713e00SPaolo Bonzini */ 447e3713e00SPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx); 448e3713e00SPaolo Bonzini 4499b34277dSStefan Hajnoczi /* Return the ThreadPool bound to this AioContext */ 4509b34277dSStefan Hajnoczi struct ThreadPool *aio_get_thread_pool(AioContext *ctx); 4519b34277dSStefan Hajnoczi 452ed6e2161SNishanth Aravamudan /* Setup the LinuxAioState bound to this AioContext */ 453ed6e2161SNishanth Aravamudan struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp); 454ed6e2161SNishanth Aravamudan 4550187f5c9SPaolo Bonzini /* Return the LinuxAioState bound to this AioContext */ 4560187f5c9SPaolo Bonzini struct LinuxAioState *aio_get_linux_aio(AioContext *ctx); 4570187f5c9SPaolo Bonzini 4586663a0a3SAarushi Mehta /* Setup the LuringState bound to this AioContext */ 4596663a0a3SAarushi Mehta struct LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp); 4606663a0a3SAarushi Mehta 4616663a0a3SAarushi Mehta /* Return the LuringState bound to this AioContext */ 4626663a0a3SAarushi Mehta struct LuringState *aio_get_linux_io_uring(AioContext *ctx); 4634e29e831SAlex Bligh /** 46489a603a0SArtem Pisarenko * aio_timer_new_with_attrs: 46589a603a0SArtem Pisarenko * @ctx: the aio context 46689a603a0SArtem Pisarenko * @type: the clock type 46789a603a0SArtem Pisarenko * @scale: the scale 46889a603a0SArtem Pisarenko * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_<id> values 46989a603a0SArtem Pisarenko * to assign 47089a603a0SArtem Pisarenko * @cb: the callback to call on timer expiry 47189a603a0SArtem Pisarenko * @opaque: the opaque pointer to pass to the callback 47289a603a0SArtem Pisarenko * 47389a603a0SArtem Pisarenko * Allocate a new timer (with attributes) attached to the context @ctx. 47489a603a0SArtem Pisarenko * The function is responsible for memory allocation. 47589a603a0SArtem Pisarenko * 47689a603a0SArtem Pisarenko * The preferred interface is aio_timer_init or aio_timer_init_with_attrs. 47789a603a0SArtem Pisarenko * Use that unless you really need dynamic memory allocation. 47889a603a0SArtem Pisarenko * 47989a603a0SArtem Pisarenko * Returns: a pointer to the new timer 48089a603a0SArtem Pisarenko */ 48189a603a0SArtem Pisarenko static inline QEMUTimer *aio_timer_new_with_attrs(AioContext *ctx, 48289a603a0SArtem Pisarenko QEMUClockType type, 48389a603a0SArtem Pisarenko int scale, int attributes, 48489a603a0SArtem Pisarenko QEMUTimerCB *cb, void *opaque) 48589a603a0SArtem Pisarenko { 48689a603a0SArtem Pisarenko return timer_new_full(&ctx->tlg, type, scale, attributes, cb, opaque); 48789a603a0SArtem Pisarenko } 48889a603a0SArtem Pisarenko 48989a603a0SArtem Pisarenko /** 4904e29e831SAlex Bligh * aio_timer_new: 4914e29e831SAlex Bligh * @ctx: the aio context 4924e29e831SAlex Bligh * @type: the clock type 4934e29e831SAlex Bligh * @scale: the scale 4944e29e831SAlex Bligh * @cb: the callback to call on timer expiry 4954e29e831SAlex Bligh * @opaque: the opaque pointer to pass to the callback 4964e29e831SAlex Bligh * 4974e29e831SAlex Bligh * Allocate a new timer attached to the context @ctx. 49889a603a0SArtem Pisarenko * See aio_timer_new_with_attrs for details. 4994e29e831SAlex Bligh * 5004e29e831SAlex Bligh * Returns: a pointer to the new timer 5014e29e831SAlex Bligh */ 5024e29e831SAlex Bligh static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, 5034e29e831SAlex Bligh int scale, 5044e29e831SAlex Bligh QEMUTimerCB *cb, void *opaque) 5054e29e831SAlex Bligh { 50689a603a0SArtem Pisarenko return timer_new_full(&ctx->tlg, type, scale, 0, cb, opaque); 50789a603a0SArtem Pisarenko } 50889a603a0SArtem Pisarenko 50989a603a0SArtem Pisarenko /** 51089a603a0SArtem Pisarenko * aio_timer_init_with_attrs: 51189a603a0SArtem Pisarenko * @ctx: the aio context 51289a603a0SArtem Pisarenko * @ts: the timer 51389a603a0SArtem Pisarenko * @type: the clock type 51489a603a0SArtem Pisarenko * @scale: the scale 51589a603a0SArtem Pisarenko * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_<id> values 51689a603a0SArtem Pisarenko * to assign 51789a603a0SArtem Pisarenko * @cb: the callback to call on timer expiry 51889a603a0SArtem Pisarenko * @opaque: the opaque pointer to pass to the callback 51989a603a0SArtem Pisarenko * 52089a603a0SArtem Pisarenko * Initialise a new timer (with attributes) attached to the context @ctx. 52189a603a0SArtem Pisarenko * The caller is responsible for memory allocation. 52289a603a0SArtem Pisarenko */ 52389a603a0SArtem Pisarenko static inline void aio_timer_init_with_attrs(AioContext *ctx, 52489a603a0SArtem Pisarenko QEMUTimer *ts, QEMUClockType type, 52589a603a0SArtem Pisarenko int scale, int attributes, 52689a603a0SArtem Pisarenko QEMUTimerCB *cb, void *opaque) 52789a603a0SArtem Pisarenko { 52889a603a0SArtem Pisarenko timer_init_full(ts, &ctx->tlg, type, scale, attributes, cb, opaque); 5294e29e831SAlex Bligh } 5304e29e831SAlex Bligh 5314e29e831SAlex Bligh /** 5324e29e831SAlex Bligh * aio_timer_init: 5334e29e831SAlex Bligh * @ctx: the aio context 5344e29e831SAlex Bligh * @ts: the timer 5354e29e831SAlex Bligh * @type: the clock type 5364e29e831SAlex Bligh * @scale: the scale 5374e29e831SAlex Bligh * @cb: the callback to call on timer expiry 5384e29e831SAlex Bligh * @opaque: the opaque pointer to pass to the callback 5394e29e831SAlex Bligh * 5404e29e831SAlex Bligh * Initialise a new timer attached to the context @ctx. 54189a603a0SArtem Pisarenko * See aio_timer_init_with_attrs for details. 5424e29e831SAlex Bligh */ 5434e29e831SAlex Bligh static inline void aio_timer_init(AioContext *ctx, 5444e29e831SAlex Bligh QEMUTimer *ts, QEMUClockType type, 5454e29e831SAlex Bligh int scale, 5464e29e831SAlex Bligh QEMUTimerCB *cb, void *opaque) 5474e29e831SAlex Bligh { 54889a603a0SArtem Pisarenko timer_init_full(ts, &ctx->tlg, type, scale, 0, cb, opaque); 5494e29e831SAlex Bligh } 5504e29e831SAlex Bligh 551845ca10dSPaolo Bonzini /** 552845ca10dSPaolo Bonzini * aio_compute_timeout: 553845ca10dSPaolo Bonzini * @ctx: the aio context 554845ca10dSPaolo Bonzini * 555845ca10dSPaolo Bonzini * Compute the timeout that a blocking aio_poll should use. 556845ca10dSPaolo Bonzini */ 557845ca10dSPaolo Bonzini int64_t aio_compute_timeout(AioContext *ctx); 558845ca10dSPaolo Bonzini 559c1e1e5faSFam Zheng /** 560c1e1e5faSFam Zheng * aio_disable_external: 561c1e1e5faSFam Zheng * @ctx: the aio context 562c1e1e5faSFam Zheng * 563c1e1e5faSFam Zheng * Disable the further processing of external clients. 564c1e1e5faSFam Zheng */ 565c1e1e5faSFam Zheng static inline void aio_disable_external(AioContext *ctx) 566c1e1e5faSFam Zheng { 567c1e1e5faSFam Zheng atomic_inc(&ctx->external_disable_cnt); 568c1e1e5faSFam Zheng } 569c1e1e5faSFam Zheng 570c1e1e5faSFam Zheng /** 571c1e1e5faSFam Zheng * aio_enable_external: 572c1e1e5faSFam Zheng * @ctx: the aio context 573c1e1e5faSFam Zheng * 574c1e1e5faSFam Zheng * Enable the processing of external clients. 575c1e1e5faSFam Zheng */ 576c1e1e5faSFam Zheng static inline void aio_enable_external(AioContext *ctx) 577c1e1e5faSFam Zheng { 578321d1dbaSStefan Hajnoczi int old; 579321d1dbaSStefan Hajnoczi 580321d1dbaSStefan Hajnoczi old = atomic_fetch_dec(&ctx->external_disable_cnt); 581321d1dbaSStefan Hajnoczi assert(old > 0); 582321d1dbaSStefan Hajnoczi if (old == 1) { 583321d1dbaSStefan Hajnoczi /* Kick event loop so it re-arms file descriptors */ 584321d1dbaSStefan Hajnoczi aio_notify(ctx); 585321d1dbaSStefan Hajnoczi } 586c1e1e5faSFam Zheng } 587c1e1e5faSFam Zheng 588c1e1e5faSFam Zheng /** 5895ceb9e39SFam Zheng * aio_external_disabled: 5905ceb9e39SFam Zheng * @ctx: the aio context 5915ceb9e39SFam Zheng * 5925ceb9e39SFam Zheng * Return true if the external clients are disabled. 5935ceb9e39SFam Zheng */ 5945ceb9e39SFam Zheng static inline bool aio_external_disabled(AioContext *ctx) 5955ceb9e39SFam Zheng { 5965ceb9e39SFam Zheng return atomic_read(&ctx->external_disable_cnt); 5975ceb9e39SFam Zheng } 5985ceb9e39SFam Zheng 5995ceb9e39SFam Zheng /** 600c1e1e5faSFam Zheng * aio_node_check: 601c1e1e5faSFam Zheng * @ctx: the aio context 602c1e1e5faSFam Zheng * @is_external: Whether or not the checked node is an external event source. 603c1e1e5faSFam Zheng * 604c1e1e5faSFam Zheng * Check if the node's is_external flag is okay to be polled by the ctx at this 605c1e1e5faSFam Zheng * moment. True means green light. 606c1e1e5faSFam Zheng */ 607c1e1e5faSFam Zheng static inline bool aio_node_check(AioContext *ctx, bool is_external) 608c1e1e5faSFam Zheng { 609c1e1e5faSFam Zheng return !is_external || !atomic_read(&ctx->external_disable_cnt); 610c1e1e5faSFam Zheng } 611c1e1e5faSFam Zheng 61237fcee5dSFam Zheng /** 6130c330a73SPaolo Bonzini * aio_co_schedule: 6140c330a73SPaolo Bonzini * @ctx: the aio context 6150c330a73SPaolo Bonzini * @co: the coroutine 6160c330a73SPaolo Bonzini * 6170c330a73SPaolo Bonzini * Start a coroutine on a remote AioContext. 6180c330a73SPaolo Bonzini * 6190c330a73SPaolo Bonzini * The coroutine must not be entered by anyone else while aio_co_schedule() 6200c330a73SPaolo Bonzini * is active. In addition the coroutine must have yielded unless ctx 6210c330a73SPaolo Bonzini * is the context in which the coroutine is running (i.e. the value of 6220c330a73SPaolo Bonzini * qemu_get_current_aio_context() from the coroutine itself). 6230c330a73SPaolo Bonzini */ 6240c330a73SPaolo Bonzini void aio_co_schedule(AioContext *ctx, struct Coroutine *co); 6250c330a73SPaolo Bonzini 6260c330a73SPaolo Bonzini /** 6270c330a73SPaolo Bonzini * aio_co_wake: 6280c330a73SPaolo Bonzini * @co: the coroutine 6290c330a73SPaolo Bonzini * 6300c330a73SPaolo Bonzini * Restart a coroutine on the AioContext where it was running last, thus 6310c330a73SPaolo Bonzini * preventing coroutines from jumping from one context to another when they 6320c330a73SPaolo Bonzini * go to sleep. 6330c330a73SPaolo Bonzini * 6340c330a73SPaolo Bonzini * aio_co_wake may be executed either in coroutine or non-coroutine 6350c330a73SPaolo Bonzini * context. The coroutine must not be entered by anyone else while 6360c330a73SPaolo Bonzini * aio_co_wake() is active. 6370c330a73SPaolo Bonzini */ 6380c330a73SPaolo Bonzini void aio_co_wake(struct Coroutine *co); 6390c330a73SPaolo Bonzini 6400c330a73SPaolo Bonzini /** 6418865852eSFam Zheng * aio_co_enter: 6428865852eSFam Zheng * @ctx: the context to run the coroutine 6438865852eSFam Zheng * @co: the coroutine to run 6448865852eSFam Zheng * 6458865852eSFam Zheng * Enter a coroutine in the specified AioContext. 6468865852eSFam Zheng */ 6478865852eSFam Zheng void aio_co_enter(AioContext *ctx, struct Coroutine *co); 6488865852eSFam Zheng 6498865852eSFam Zheng /** 650e4370165SPaolo Bonzini * Return the AioContext whose event loop runs in the current thread. 651e4370165SPaolo Bonzini * 652e4370165SPaolo Bonzini * If called from an IOThread this will be the IOThread's AioContext. If 653e4370165SPaolo Bonzini * called from another thread it will be the main loop AioContext. 654e4370165SPaolo Bonzini */ 655e4370165SPaolo Bonzini AioContext *qemu_get_current_aio_context(void); 656e4370165SPaolo Bonzini 657e4370165SPaolo Bonzini /** 658d2b63ba8SStefan Hajnoczi * in_aio_context_home_thread: 659e4370165SPaolo Bonzini * @ctx: the aio context 660e4370165SPaolo Bonzini * 661d2b63ba8SStefan Hajnoczi * Return whether we are running in the thread that normally runs @ctx. Note 662d2b63ba8SStefan Hajnoczi * that acquiring/releasing ctx does not affect the outcome, each AioContext 663d2b63ba8SStefan Hajnoczi * still only has one home thread that is responsible for running it. 664e4370165SPaolo Bonzini */ 665d2b63ba8SStefan Hajnoczi static inline bool in_aio_context_home_thread(AioContext *ctx) 666e4370165SPaolo Bonzini { 667e4370165SPaolo Bonzini return ctx == qemu_get_current_aio_context(); 668e4370165SPaolo Bonzini } 669e4370165SPaolo Bonzini 670e4370165SPaolo Bonzini /** 67137fcee5dSFam Zheng * aio_context_setup: 67237fcee5dSFam Zheng * @ctx: the aio context 67337fcee5dSFam Zheng * 67437fcee5dSFam Zheng * Initialize the aio context. 67537fcee5dSFam Zheng */ 6767e003465SCao jin void aio_context_setup(AioContext *ctx); 67737fcee5dSFam Zheng 6784a1cba38SStefan Hajnoczi /** 679cd0a6d2bSJie Wang * aio_context_destroy: 680cd0a6d2bSJie Wang * @ctx: the aio context 681cd0a6d2bSJie Wang * 682cd0a6d2bSJie Wang * Destroy the aio context. 683cd0a6d2bSJie Wang */ 684cd0a6d2bSJie Wang void aio_context_destroy(AioContext *ctx); 685cd0a6d2bSJie Wang 686cd0a6d2bSJie Wang /** 6874a1cba38SStefan Hajnoczi * aio_context_set_poll_params: 6884a1cba38SStefan Hajnoczi * @ctx: the aio context 6894a1cba38SStefan Hajnoczi * @max_ns: how long to busy poll for, in nanoseconds 69082a41186SStefan Hajnoczi * @grow: polling time growth factor 69182a41186SStefan Hajnoczi * @shrink: polling time shrink factor 6924a1cba38SStefan Hajnoczi * 6934a1cba38SStefan Hajnoczi * Poll mode can be disabled by setting poll_max_ns to 0. 6944a1cba38SStefan Hajnoczi */ 6954a1cba38SStefan Hajnoczi void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, 69682a41186SStefan Hajnoczi int64_t grow, int64_t shrink, 6974a1cba38SStefan Hajnoczi Error **errp); 6984a1cba38SStefan Hajnoczi 699a76bab49Saliguori #endif 700