1a76bab49Saliguori /* 2a76bab49Saliguori * QEMU aio implementation 3a76bab49Saliguori * 4a76bab49Saliguori * Copyright IBM, Corp. 2008 5a76bab49Saliguori * 6a76bab49Saliguori * Authors: 7a76bab49Saliguori * Anthony Liguori <aliguori@us.ibm.com> 8a76bab49Saliguori * 9a76bab49Saliguori * This work is licensed under the terms of the GNU GPL, version 2. See 10a76bab49Saliguori * the COPYING file in the top-level directory. 11a76bab49Saliguori * 12a76bab49Saliguori */ 13a76bab49Saliguori 14a76bab49Saliguori #ifndef QEMU_AIO_H 15a76bab49Saliguori #define QEMU_AIO_H 16a76bab49Saliguori 176a1751b7SAlex Bligh #include "qemu/typedefs.h" 18a76bab49Saliguori #include "qemu-common.h" 191de7afc9SPaolo Bonzini #include "qemu/queue.h" 201de7afc9SPaolo Bonzini #include "qemu/event_notifier.h" 21dcc772e2SLiu Ping Fan #include "qemu/thread.h" 2298563fc3SStefan Hajnoczi #include "qemu/rfifolock.h" 23dae21b98SAlex Bligh #include "qemu/timer.h" 24a76bab49Saliguori 257c84b1b8SMarkus Armbruster typedef struct BlockAIOCB BlockAIOCB; 26097310b5SMarkus Armbruster typedef void BlockCompletionFunc(void *opaque, int ret); 2785e8dab1SPaolo Bonzini 28d7331bedSStefan Hajnoczi typedef struct AIOCBInfo { 297c84b1b8SMarkus Armbruster void (*cancel_async)(BlockAIOCB *acb); 307c84b1b8SMarkus Armbruster AioContext *(*get_aio_context)(BlockAIOCB *acb); 318c82e9a4SStefan Hajnoczi size_t aiocb_size; 32d7331bedSStefan Hajnoczi } AIOCBInfo; 3385e8dab1SPaolo Bonzini 347c84b1b8SMarkus Armbruster struct BlockAIOCB { 35d7331bedSStefan Hajnoczi const AIOCBInfo *aiocb_info; 3685e8dab1SPaolo Bonzini BlockDriverState *bs; 37097310b5SMarkus Armbruster BlockCompletionFunc *cb; 3885e8dab1SPaolo Bonzini void *opaque; 39f197fe2bSFam Zheng int refcnt; 4085e8dab1SPaolo Bonzini }; 4185e8dab1SPaolo Bonzini 42d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, 43097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque); 448007429aSFam Zheng void qemu_aio_unref(void *p); 45f197fe2bSFam Zheng void qemu_aio_ref(void *p); 4685e8dab1SPaolo Bonzini 47f627aab1SPaolo Bonzini typedef struct AioHandler AioHandler; 48f627aab1SPaolo Bonzini typedef void QEMUBHFunc(void *opaque); 49f627aab1SPaolo Bonzini typedef void IOHandler(void *opaque); 50f627aab1SPaolo Bonzini 516a1751b7SAlex Bligh struct AioContext { 52e3713e00SPaolo Bonzini GSource source; 53e3713e00SPaolo Bonzini 5498563fc3SStefan Hajnoczi /* Protects all fields from multi-threaded access */ 5598563fc3SStefan Hajnoczi RFifoLock lock; 5698563fc3SStefan Hajnoczi 57a915f4bcSPaolo Bonzini /* The list of registered AIO handlers */ 58a915f4bcSPaolo Bonzini QLIST_HEAD(, AioHandler) aio_handlers; 59a915f4bcSPaolo Bonzini 60a915f4bcSPaolo Bonzini /* This is a simple lock used to protect the aio_handlers list. 61a915f4bcSPaolo Bonzini * Specifically, it's used to ensure that no callbacks are removed while 62a915f4bcSPaolo Bonzini * we're walking and dispatching callbacks. 63a915f4bcSPaolo Bonzini */ 64a915f4bcSPaolo Bonzini int walking_handlers; 65a915f4bcSPaolo Bonzini 660ceb849bSPaolo Bonzini /* Used to avoid unnecessary event_notifier_set calls in aio_notify. 670ceb849bSPaolo Bonzini * Writes protected by lock or BQL, reads are lockless. 680ceb849bSPaolo Bonzini */ 690ceb849bSPaolo Bonzini bool dispatching; 700ceb849bSPaolo Bonzini 71dcc772e2SLiu Ping Fan /* lock to protect between bh's adders and deleter */ 72dcc772e2SLiu Ping Fan QemuMutex bh_lock; 730ceb849bSPaolo Bonzini 74f627aab1SPaolo Bonzini /* Anchor of the list of Bottom Halves belonging to the context */ 75f627aab1SPaolo Bonzini struct QEMUBH *first_bh; 76f627aab1SPaolo Bonzini 77f627aab1SPaolo Bonzini /* A simple lock used to protect the first_bh list, and ensure that 78f627aab1SPaolo Bonzini * no callbacks are removed while we're walking and dispatching callbacks. 79f627aab1SPaolo Bonzini */ 80f627aab1SPaolo Bonzini int walking_bh; 812f4dc3c1SPaolo Bonzini 822f4dc3c1SPaolo Bonzini /* Used for aio_notify. */ 832f4dc3c1SPaolo Bonzini EventNotifier notifier; 846b5f8762SStefan Hajnoczi 859b34277dSStefan Hajnoczi /* Thread pool for performing work and receiving completion callbacks */ 869b34277dSStefan Hajnoczi struct ThreadPool *thread_pool; 87dae21b98SAlex Bligh 88dae21b98SAlex Bligh /* TimerLists for calling timers - one per clock type */ 89dae21b98SAlex Bligh QEMUTimerListGroup tlg; 906a1751b7SAlex Bligh }; 91f627aab1SPaolo Bonzini 920ceb849bSPaolo Bonzini /* Used internally to synchronize aio_poll against qemu_bh_schedule. */ 930ceb849bSPaolo Bonzini void aio_set_dispatching(AioContext *ctx, bool dispatching); 940ceb849bSPaolo Bonzini 95f627aab1SPaolo Bonzini /** 96f627aab1SPaolo Bonzini * aio_context_new: Allocate a new AioContext. 97f627aab1SPaolo Bonzini * 98f627aab1SPaolo Bonzini * AioContext provide a mini event-loop that can be waited on synchronously. 99f627aab1SPaolo Bonzini * They also provide bottom halves, a service to execute a piece of code 100f627aab1SPaolo Bonzini * as soon as possible. 101f627aab1SPaolo Bonzini */ 1022f78e491SChrysostomos Nanakos AioContext *aio_context_new(Error **errp); 103f627aab1SPaolo Bonzini 104f627aab1SPaolo Bonzini /** 105e3713e00SPaolo Bonzini * aio_context_ref: 106e3713e00SPaolo Bonzini * @ctx: The AioContext to operate on. 107e3713e00SPaolo Bonzini * 108e3713e00SPaolo Bonzini * Add a reference to an AioContext. 109e3713e00SPaolo Bonzini */ 110e3713e00SPaolo Bonzini void aio_context_ref(AioContext *ctx); 111e3713e00SPaolo Bonzini 112e3713e00SPaolo Bonzini /** 113e3713e00SPaolo Bonzini * aio_context_unref: 114e3713e00SPaolo Bonzini * @ctx: The AioContext to operate on. 115e3713e00SPaolo Bonzini * 116e3713e00SPaolo Bonzini * Drop a reference to an AioContext. 117e3713e00SPaolo Bonzini */ 118e3713e00SPaolo Bonzini void aio_context_unref(AioContext *ctx); 119e3713e00SPaolo Bonzini 12098563fc3SStefan Hajnoczi /* Take ownership of the AioContext. If the AioContext will be shared between 12149110174SPaolo Bonzini * threads, and a thread does not want to be interrupted, it will have to 12249110174SPaolo Bonzini * take ownership around calls to aio_poll(). Otherwise, aio_poll() 12349110174SPaolo Bonzini * automatically takes care of calling aio_context_acquire and 12449110174SPaolo Bonzini * aio_context_release. 12598563fc3SStefan Hajnoczi * 12649110174SPaolo Bonzini * Access to timers and BHs from a thread that has not acquired AioContext 12749110174SPaolo Bonzini * is possible. Access to callbacks for now must be done while the AioContext 12849110174SPaolo Bonzini * is owned by the thread (FIXME). 12998563fc3SStefan Hajnoczi */ 13098563fc3SStefan Hajnoczi void aio_context_acquire(AioContext *ctx); 13198563fc3SStefan Hajnoczi 13298563fc3SStefan Hajnoczi /* Relinquish ownership of the AioContext. */ 13398563fc3SStefan Hajnoczi void aio_context_release(AioContext *ctx); 13498563fc3SStefan Hajnoczi 135e3713e00SPaolo Bonzini /** 136f627aab1SPaolo Bonzini * aio_bh_new: Allocate a new bottom half structure. 137f627aab1SPaolo Bonzini * 138f627aab1SPaolo Bonzini * Bottom halves are lightweight callbacks whose invocation is guaranteed 139f627aab1SPaolo Bonzini * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure 140f627aab1SPaolo Bonzini * is opaque and must be allocated prior to its use. 141f627aab1SPaolo Bonzini */ 142f627aab1SPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); 143f627aab1SPaolo Bonzini 144f627aab1SPaolo Bonzini /** 1452f4dc3c1SPaolo Bonzini * aio_notify: Force processing of pending events. 1462f4dc3c1SPaolo Bonzini * 1472f4dc3c1SPaolo Bonzini * Similar to signaling a condition variable, aio_notify forces 1482f4dc3c1SPaolo Bonzini * aio_wait to exit, so that the next call will re-examine pending events. 1492f4dc3c1SPaolo Bonzini * The caller of aio_notify will usually call aio_wait again very soon, 1502f4dc3c1SPaolo Bonzini * or go through another iteration of the GLib main loop. Hence, aio_notify 1512f4dc3c1SPaolo Bonzini * also has the side effect of recalculating the sets of file descriptors 1522f4dc3c1SPaolo Bonzini * that the main loop waits for. 1532f4dc3c1SPaolo Bonzini * 1542f4dc3c1SPaolo Bonzini * Calling aio_notify is rarely necessary, because for example scheduling 1552f4dc3c1SPaolo Bonzini * a bottom half calls it already. 1562f4dc3c1SPaolo Bonzini */ 1572f4dc3c1SPaolo Bonzini void aio_notify(AioContext *ctx); 1582f4dc3c1SPaolo Bonzini 1592f4dc3c1SPaolo Bonzini /** 160f627aab1SPaolo Bonzini * aio_bh_poll: Poll bottom halves for an AioContext. 161f627aab1SPaolo Bonzini * 162f627aab1SPaolo Bonzini * These are internal functions used by the QEMU main loop. 163dcc772e2SLiu Ping Fan * And notice that multiple occurrences of aio_bh_poll cannot 164dcc772e2SLiu Ping Fan * be called concurrently 165f627aab1SPaolo Bonzini */ 166f627aab1SPaolo Bonzini int aio_bh_poll(AioContext *ctx); 167f627aab1SPaolo Bonzini 168f627aab1SPaolo Bonzini /** 169f627aab1SPaolo Bonzini * qemu_bh_schedule: Schedule a bottom half. 170f627aab1SPaolo Bonzini * 171f627aab1SPaolo Bonzini * Scheduling a bottom half interrupts the main loop and causes the 172f627aab1SPaolo Bonzini * execution of the callback that was passed to qemu_bh_new. 173f627aab1SPaolo Bonzini * 174f627aab1SPaolo Bonzini * Bottom halves that are scheduled from a bottom half handler are instantly 175f627aab1SPaolo Bonzini * invoked. This can create an infinite loop if a bottom half handler 176f627aab1SPaolo Bonzini * schedules itself. 177f627aab1SPaolo Bonzini * 178f627aab1SPaolo Bonzini * @bh: The bottom half to be scheduled. 179f627aab1SPaolo Bonzini */ 180f627aab1SPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh); 181f627aab1SPaolo Bonzini 182f627aab1SPaolo Bonzini /** 183f627aab1SPaolo Bonzini * qemu_bh_cancel: Cancel execution of a bottom half. 184f627aab1SPaolo Bonzini * 185f627aab1SPaolo Bonzini * Canceling execution of a bottom half undoes the effect of calls to 186f627aab1SPaolo Bonzini * qemu_bh_schedule without freeing its resources yet. While cancellation 187f627aab1SPaolo Bonzini * itself is also wait-free and thread-safe, it can of course race with the 188f627aab1SPaolo Bonzini * loop that executes bottom halves unless you are holding the iothread 189f627aab1SPaolo Bonzini * mutex. This makes it mostly useless if you are not holding the mutex. 190f627aab1SPaolo Bonzini * 191f627aab1SPaolo Bonzini * @bh: The bottom half to be canceled. 192f627aab1SPaolo Bonzini */ 193f627aab1SPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh); 194f627aab1SPaolo Bonzini 195f627aab1SPaolo Bonzini /** 196f627aab1SPaolo Bonzini *qemu_bh_delete: Cancel execution of a bottom half and free its resources. 197f627aab1SPaolo Bonzini * 198f627aab1SPaolo Bonzini * Deleting a bottom half frees the memory that was allocated for it by 199f627aab1SPaolo Bonzini * qemu_bh_new. It also implies canceling the bottom half if it was 200f627aab1SPaolo Bonzini * scheduled. 201dcc772e2SLiu Ping Fan * This func is async. The bottom half will do the delete action at the finial 202dcc772e2SLiu Ping Fan * end. 203f627aab1SPaolo Bonzini * 204f627aab1SPaolo Bonzini * @bh: The bottom half to be deleted. 205f627aab1SPaolo Bonzini */ 206f627aab1SPaolo Bonzini void qemu_bh_delete(QEMUBH *bh); 207f627aab1SPaolo Bonzini 208cd9ba1ebSPaolo Bonzini /* Return whether there are any pending callbacks from the GSource 209a3462c65SPaolo Bonzini * attached to the AioContext, before g_poll is invoked. 210a3462c65SPaolo Bonzini * 211a3462c65SPaolo Bonzini * This is used internally in the implementation of the GSource. 212a3462c65SPaolo Bonzini */ 213a3462c65SPaolo Bonzini bool aio_prepare(AioContext *ctx); 214a3462c65SPaolo Bonzini 215a3462c65SPaolo Bonzini /* Return whether there are any pending callbacks from the GSource 216a3462c65SPaolo Bonzini * attached to the AioContext, after g_poll is invoked. 217cd9ba1ebSPaolo Bonzini * 218cd9ba1ebSPaolo Bonzini * This is used internally in the implementation of the GSource. 219cd9ba1ebSPaolo Bonzini */ 220cd9ba1ebSPaolo Bonzini bool aio_pending(AioContext *ctx); 221cd9ba1ebSPaolo Bonzini 222e4c7e2d1SPaolo Bonzini /* Dispatch any pending callbacks from the GSource attached to the AioContext. 223e4c7e2d1SPaolo Bonzini * 224e4c7e2d1SPaolo Bonzini * This is used internally in the implementation of the GSource. 225e4c7e2d1SPaolo Bonzini */ 226e4c7e2d1SPaolo Bonzini bool aio_dispatch(AioContext *ctx); 227e4c7e2d1SPaolo Bonzini 2287c0628b2SPaolo Bonzini /* Progress in completing AIO work to occur. This can issue new pending 2297c0628b2SPaolo Bonzini * aio as a result of executing I/O completion or bh callbacks. 230bcdc1857SPaolo Bonzini * 231acfb23adSPaolo Bonzini * Return whether any progress was made by executing AIO or bottom half 232acfb23adSPaolo Bonzini * handlers. If @blocking == true, this should always be true except 233acfb23adSPaolo Bonzini * if someone called aio_notify. 2347c0628b2SPaolo Bonzini * 2357c0628b2SPaolo Bonzini * If there are no pending bottom halves, but there are pending AIO 2367c0628b2SPaolo Bonzini * operations, it may not be possible to make any progress without 2377c0628b2SPaolo Bonzini * blocking. If @blocking is true, this function will wait until one 2387c0628b2SPaolo Bonzini * or more AIO events have completed, to ensure something has moved 2397c0628b2SPaolo Bonzini * before returning. 2407c0628b2SPaolo Bonzini */ 2417c0628b2SPaolo Bonzini bool aio_poll(AioContext *ctx, bool blocking); 242a76bab49Saliguori 243a76bab49Saliguori /* Register a file descriptor and associated callbacks. Behaves very similarly 244*6484e422SFam Zheng * to qemu_set_fd_handler. Unlike qemu_set_fd_handler, these callbacks will 24587f68d31SPaolo Bonzini * be invoked when using aio_poll(). 246a76bab49Saliguori * 247a76bab49Saliguori * Code that invokes AIO completion functions should rely on this function 248a76bab49Saliguori * instead of qemu_set_fd_handler[2]. 249a76bab49Saliguori */ 250a915f4bcSPaolo Bonzini void aio_set_fd_handler(AioContext *ctx, 251a915f4bcSPaolo Bonzini int fd, 252a76bab49Saliguori IOHandler *io_read, 253a76bab49Saliguori IOHandler *io_write, 254a76bab49Saliguori void *opaque); 2559958c351SPaolo Bonzini 2569958c351SPaolo Bonzini /* Register an event notifier and associated callbacks. Behaves very similarly 2579958c351SPaolo Bonzini * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks 25887f68d31SPaolo Bonzini * will be invoked when using aio_poll(). 2599958c351SPaolo Bonzini * 2609958c351SPaolo Bonzini * Code that invokes AIO completion functions should rely on this function 2619958c351SPaolo Bonzini * instead of event_notifier_set_handler. 2629958c351SPaolo Bonzini */ 263a915f4bcSPaolo Bonzini void aio_set_event_notifier(AioContext *ctx, 264a915f4bcSPaolo Bonzini EventNotifier *notifier, 265f2e5dca4SStefan Hajnoczi EventNotifierHandler *io_read); 266a915f4bcSPaolo Bonzini 267e3713e00SPaolo Bonzini /* Return a GSource that lets the main loop poll the file descriptors attached 268e3713e00SPaolo Bonzini * to this AioContext. 269e3713e00SPaolo Bonzini */ 270e3713e00SPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx); 271e3713e00SPaolo Bonzini 2729b34277dSStefan Hajnoczi /* Return the ThreadPool bound to this AioContext */ 2739b34277dSStefan Hajnoczi struct ThreadPool *aio_get_thread_pool(AioContext *ctx); 2749b34277dSStefan Hajnoczi 2754e29e831SAlex Bligh /** 2764e29e831SAlex Bligh * aio_timer_new: 2774e29e831SAlex Bligh * @ctx: the aio context 2784e29e831SAlex Bligh * @type: the clock type 2794e29e831SAlex Bligh * @scale: the scale 2804e29e831SAlex Bligh * @cb: the callback to call on timer expiry 2814e29e831SAlex Bligh * @opaque: the opaque pointer to pass to the callback 2824e29e831SAlex Bligh * 2834e29e831SAlex Bligh * Allocate a new timer attached to the context @ctx. 2844e29e831SAlex Bligh * The function is responsible for memory allocation. 2854e29e831SAlex Bligh * 2864e29e831SAlex Bligh * The preferred interface is aio_timer_init. Use that 2874e29e831SAlex Bligh * unless you really need dynamic memory allocation. 2884e29e831SAlex Bligh * 2894e29e831SAlex Bligh * Returns: a pointer to the new timer 2904e29e831SAlex Bligh */ 2914e29e831SAlex Bligh static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, 2924e29e831SAlex Bligh int scale, 2934e29e831SAlex Bligh QEMUTimerCB *cb, void *opaque) 2944e29e831SAlex Bligh { 2954e29e831SAlex Bligh return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque); 2964e29e831SAlex Bligh } 2974e29e831SAlex Bligh 2984e29e831SAlex Bligh /** 2994e29e831SAlex Bligh * aio_timer_init: 3004e29e831SAlex Bligh * @ctx: the aio context 3014e29e831SAlex Bligh * @ts: the timer 3024e29e831SAlex Bligh * @type: the clock type 3034e29e831SAlex Bligh * @scale: the scale 3044e29e831SAlex Bligh * @cb: the callback to call on timer expiry 3054e29e831SAlex Bligh * @opaque: the opaque pointer to pass to the callback 3064e29e831SAlex Bligh * 3074e29e831SAlex Bligh * Initialise a new timer attached to the context @ctx. 3084e29e831SAlex Bligh * The caller is responsible for memory allocation. 3094e29e831SAlex Bligh */ 3104e29e831SAlex Bligh static inline void aio_timer_init(AioContext *ctx, 3114e29e831SAlex Bligh QEMUTimer *ts, QEMUClockType type, 3124e29e831SAlex Bligh int scale, 3134e29e831SAlex Bligh QEMUTimerCB *cb, void *opaque) 3144e29e831SAlex Bligh { 315f186aa97SPaolo Bonzini timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque); 3164e29e831SAlex Bligh } 3174e29e831SAlex Bligh 318845ca10dSPaolo Bonzini /** 319845ca10dSPaolo Bonzini * aio_compute_timeout: 320845ca10dSPaolo Bonzini * @ctx: the aio context 321845ca10dSPaolo Bonzini * 322845ca10dSPaolo Bonzini * Compute the timeout that a blocking aio_poll should use. 323845ca10dSPaolo Bonzini */ 324845ca10dSPaolo Bonzini int64_t aio_compute_timeout(AioContext *ctx); 325845ca10dSPaolo Bonzini 326a76bab49Saliguori #endif 327