1a76bab49Saliguori /* 2a76bab49Saliguori * QEMU aio implementation 3a76bab49Saliguori * 4a76bab49Saliguori * Copyright IBM, Corp. 2008 5a76bab49Saliguori * 6a76bab49Saliguori * Authors: 7a76bab49Saliguori * Anthony Liguori <aliguori@us.ibm.com> 8a76bab49Saliguori * 9a76bab49Saliguori * This work is licensed under the terms of the GNU GPL, version 2. See 10a76bab49Saliguori * the COPYING file in the top-level directory. 11a76bab49Saliguori * 12a76bab49Saliguori */ 13a76bab49Saliguori 14a76bab49Saliguori #ifndef QEMU_AIO_H 15a76bab49Saliguori #define QEMU_AIO_H 16a76bab49Saliguori 176a1751b7SAlex Bligh #include "qemu/typedefs.h" 18a76bab49Saliguori #include "qemu-common.h" 191de7afc9SPaolo Bonzini #include "qemu/queue.h" 201de7afc9SPaolo Bonzini #include "qemu/event_notifier.h" 21dcc772e2SLiu Ping Fan #include "qemu/thread.h" 2298563fc3SStefan Hajnoczi #include "qemu/rfifolock.h" 23dae21b98SAlex Bligh #include "qemu/timer.h" 24a76bab49Saliguori 2585e8dab1SPaolo Bonzini typedef struct BlockDriverAIOCB BlockDriverAIOCB; 2685e8dab1SPaolo Bonzini typedef void BlockDriverCompletionFunc(void *opaque, int ret); 2785e8dab1SPaolo Bonzini 28d7331bedSStefan Hajnoczi typedef struct AIOCBInfo { 2985e8dab1SPaolo Bonzini void (*cancel)(BlockDriverAIOCB *acb); 308c82e9a4SStefan Hajnoczi size_t aiocb_size; 31d7331bedSStefan Hajnoczi } AIOCBInfo; 3285e8dab1SPaolo Bonzini 3385e8dab1SPaolo Bonzini struct BlockDriverAIOCB { 34d7331bedSStefan Hajnoczi const AIOCBInfo *aiocb_info; 3585e8dab1SPaolo Bonzini BlockDriverState *bs; 3685e8dab1SPaolo Bonzini BlockDriverCompletionFunc *cb; 3785e8dab1SPaolo Bonzini void *opaque; 3885e8dab1SPaolo Bonzini }; 3985e8dab1SPaolo Bonzini 40d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, 4185e8dab1SPaolo Bonzini BlockDriverCompletionFunc *cb, void *opaque); 4285e8dab1SPaolo Bonzini void qemu_aio_release(void *p); 4385e8dab1SPaolo Bonzini 44f627aab1SPaolo Bonzini typedef struct AioHandler AioHandler; 45f627aab1SPaolo Bonzini typedef void QEMUBHFunc(void *opaque); 46f627aab1SPaolo Bonzini typedef void IOHandler(void *opaque); 47f627aab1SPaolo Bonzini 486a1751b7SAlex Bligh struct AioContext { 49e3713e00SPaolo Bonzini GSource source; 50e3713e00SPaolo Bonzini 5198563fc3SStefan Hajnoczi /* Protects all fields from multi-threaded access */ 5298563fc3SStefan Hajnoczi RFifoLock lock; 5398563fc3SStefan Hajnoczi 54a915f4bcSPaolo Bonzini /* The list of registered AIO handlers */ 55a915f4bcSPaolo Bonzini QLIST_HEAD(, AioHandler) aio_handlers; 56a915f4bcSPaolo Bonzini 57a915f4bcSPaolo Bonzini /* This is a simple lock used to protect the aio_handlers list. 58a915f4bcSPaolo Bonzini * Specifically, it's used to ensure that no callbacks are removed while 59a915f4bcSPaolo Bonzini * we're walking and dispatching callbacks. 60a915f4bcSPaolo Bonzini */ 61a915f4bcSPaolo Bonzini int walking_handlers; 62a915f4bcSPaolo Bonzini 63*0ceb849bSPaolo Bonzini /* Used to avoid unnecessary event_notifier_set calls in aio_notify. 64*0ceb849bSPaolo Bonzini * Writes protected by lock or BQL, reads are lockless. 65*0ceb849bSPaolo Bonzini */ 66*0ceb849bSPaolo Bonzini bool dispatching; 67*0ceb849bSPaolo Bonzini 68dcc772e2SLiu Ping Fan /* lock to protect between bh's adders and deleter */ 69dcc772e2SLiu Ping Fan QemuMutex bh_lock; 70*0ceb849bSPaolo Bonzini 71f627aab1SPaolo Bonzini /* Anchor of the list of Bottom Halves belonging to the context */ 72f627aab1SPaolo Bonzini struct QEMUBH *first_bh; 73f627aab1SPaolo Bonzini 74f627aab1SPaolo Bonzini /* A simple lock used to protect the first_bh list, and ensure that 75f627aab1SPaolo Bonzini * no callbacks are removed while we're walking and dispatching callbacks. 76f627aab1SPaolo Bonzini */ 77f627aab1SPaolo Bonzini int walking_bh; 782f4dc3c1SPaolo Bonzini 792f4dc3c1SPaolo Bonzini /* Used for aio_notify. */ 802f4dc3c1SPaolo Bonzini EventNotifier notifier; 816b5f8762SStefan Hajnoczi 826b5f8762SStefan Hajnoczi /* GPollFDs for aio_poll() */ 836b5f8762SStefan Hajnoczi GArray *pollfds; 849b34277dSStefan Hajnoczi 859b34277dSStefan Hajnoczi /* Thread pool for performing work and receiving completion callbacks */ 869b34277dSStefan Hajnoczi struct ThreadPool *thread_pool; 87dae21b98SAlex Bligh 88dae21b98SAlex Bligh /* TimerLists for calling timers - one per clock type */ 89dae21b98SAlex Bligh QEMUTimerListGroup tlg; 906a1751b7SAlex Bligh }; 91f627aab1SPaolo Bonzini 92*0ceb849bSPaolo Bonzini /* Used internally to synchronize aio_poll against qemu_bh_schedule. */ 93*0ceb849bSPaolo Bonzini void aio_set_dispatching(AioContext *ctx, bool dispatching); 94*0ceb849bSPaolo Bonzini 95f627aab1SPaolo Bonzini /** 96f627aab1SPaolo Bonzini * aio_context_new: Allocate a new AioContext. 97f627aab1SPaolo Bonzini * 98f627aab1SPaolo Bonzini * AioContext provide a mini event-loop that can be waited on synchronously. 99f627aab1SPaolo Bonzini * They also provide bottom halves, a service to execute a piece of code 100f627aab1SPaolo Bonzini * as soon as possible. 101f627aab1SPaolo Bonzini */ 102f627aab1SPaolo Bonzini AioContext *aio_context_new(void); 103f627aab1SPaolo Bonzini 104f627aab1SPaolo Bonzini /** 105e3713e00SPaolo Bonzini * aio_context_ref: 106e3713e00SPaolo Bonzini * @ctx: The AioContext to operate on. 107e3713e00SPaolo Bonzini * 108e3713e00SPaolo Bonzini * Add a reference to an AioContext. 109e3713e00SPaolo Bonzini */ 110e3713e00SPaolo Bonzini void aio_context_ref(AioContext *ctx); 111e3713e00SPaolo Bonzini 112e3713e00SPaolo Bonzini /** 113e3713e00SPaolo Bonzini * aio_context_unref: 114e3713e00SPaolo Bonzini * @ctx: The AioContext to operate on. 115e3713e00SPaolo Bonzini * 116e3713e00SPaolo Bonzini * Drop a reference to an AioContext. 117e3713e00SPaolo Bonzini */ 118e3713e00SPaolo Bonzini void aio_context_unref(AioContext *ctx); 119e3713e00SPaolo Bonzini 12098563fc3SStefan Hajnoczi /* Take ownership of the AioContext. If the AioContext will be shared between 12198563fc3SStefan Hajnoczi * threads, a thread must have ownership when calling aio_poll(). 12298563fc3SStefan Hajnoczi * 12398563fc3SStefan Hajnoczi * Note that multiple threads calling aio_poll() means timers, BHs, and 12498563fc3SStefan Hajnoczi * callbacks may be invoked from a different thread than they were registered 12598563fc3SStefan Hajnoczi * from. Therefore, code must use AioContext acquire/release or use 12698563fc3SStefan Hajnoczi * fine-grained synchronization to protect shared state if other threads will 12798563fc3SStefan Hajnoczi * be accessing it simultaneously. 12898563fc3SStefan Hajnoczi */ 12998563fc3SStefan Hajnoczi void aio_context_acquire(AioContext *ctx); 13098563fc3SStefan Hajnoczi 13198563fc3SStefan Hajnoczi /* Relinquish ownership of the AioContext. */ 13298563fc3SStefan Hajnoczi void aio_context_release(AioContext *ctx); 13398563fc3SStefan Hajnoczi 134e3713e00SPaolo Bonzini /** 135f627aab1SPaolo Bonzini * aio_bh_new: Allocate a new bottom half structure. 136f627aab1SPaolo Bonzini * 137f627aab1SPaolo Bonzini * Bottom halves are lightweight callbacks whose invocation is guaranteed 138f627aab1SPaolo Bonzini * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure 139f627aab1SPaolo Bonzini * is opaque and must be allocated prior to its use. 140f627aab1SPaolo Bonzini */ 141f627aab1SPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); 142f627aab1SPaolo Bonzini 143f627aab1SPaolo Bonzini /** 1442f4dc3c1SPaolo Bonzini * aio_notify: Force processing of pending events. 1452f4dc3c1SPaolo Bonzini * 1462f4dc3c1SPaolo Bonzini * Similar to signaling a condition variable, aio_notify forces 1472f4dc3c1SPaolo Bonzini * aio_wait to exit, so that the next call will re-examine pending events. 1482f4dc3c1SPaolo Bonzini * The caller of aio_notify will usually call aio_wait again very soon, 1492f4dc3c1SPaolo Bonzini * or go through another iteration of the GLib main loop. Hence, aio_notify 1502f4dc3c1SPaolo Bonzini * also has the side effect of recalculating the sets of file descriptors 1512f4dc3c1SPaolo Bonzini * that the main loop waits for. 1522f4dc3c1SPaolo Bonzini * 1532f4dc3c1SPaolo Bonzini * Calling aio_notify is rarely necessary, because for example scheduling 1542f4dc3c1SPaolo Bonzini * a bottom half calls it already. 1552f4dc3c1SPaolo Bonzini */ 1562f4dc3c1SPaolo Bonzini void aio_notify(AioContext *ctx); 1572f4dc3c1SPaolo Bonzini 1582f4dc3c1SPaolo Bonzini /** 159f627aab1SPaolo Bonzini * aio_bh_poll: Poll bottom halves for an AioContext. 160f627aab1SPaolo Bonzini * 161f627aab1SPaolo Bonzini * These are internal functions used by the QEMU main loop. 162dcc772e2SLiu Ping Fan * And notice that multiple occurrences of aio_bh_poll cannot 163dcc772e2SLiu Ping Fan * be called concurrently 164f627aab1SPaolo Bonzini */ 165f627aab1SPaolo Bonzini int aio_bh_poll(AioContext *ctx); 166f627aab1SPaolo Bonzini 167f627aab1SPaolo Bonzini /** 168f627aab1SPaolo Bonzini * qemu_bh_schedule: Schedule a bottom half. 169f627aab1SPaolo Bonzini * 170f627aab1SPaolo Bonzini * Scheduling a bottom half interrupts the main loop and causes the 171f627aab1SPaolo Bonzini * execution of the callback that was passed to qemu_bh_new. 172f627aab1SPaolo Bonzini * 173f627aab1SPaolo Bonzini * Bottom halves that are scheduled from a bottom half handler are instantly 174f627aab1SPaolo Bonzini * invoked. This can create an infinite loop if a bottom half handler 175f627aab1SPaolo Bonzini * schedules itself. 176f627aab1SPaolo Bonzini * 177f627aab1SPaolo Bonzini * @bh: The bottom half to be scheduled. 178f627aab1SPaolo Bonzini */ 179f627aab1SPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh); 180f627aab1SPaolo Bonzini 181f627aab1SPaolo Bonzini /** 182f627aab1SPaolo Bonzini * qemu_bh_cancel: Cancel execution of a bottom half. 183f627aab1SPaolo Bonzini * 184f627aab1SPaolo Bonzini * Canceling execution of a bottom half undoes the effect of calls to 185f627aab1SPaolo Bonzini * qemu_bh_schedule without freeing its resources yet. While cancellation 186f627aab1SPaolo Bonzini * itself is also wait-free and thread-safe, it can of course race with the 187f627aab1SPaolo Bonzini * loop that executes bottom halves unless you are holding the iothread 188f627aab1SPaolo Bonzini * mutex. This makes it mostly useless if you are not holding the mutex. 189f627aab1SPaolo Bonzini * 190f627aab1SPaolo Bonzini * @bh: The bottom half to be canceled. 191f627aab1SPaolo Bonzini */ 192f627aab1SPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh); 193f627aab1SPaolo Bonzini 194f627aab1SPaolo Bonzini /** 195f627aab1SPaolo Bonzini *qemu_bh_delete: Cancel execution of a bottom half and free its resources. 196f627aab1SPaolo Bonzini * 197f627aab1SPaolo Bonzini * Deleting a bottom half frees the memory that was allocated for it by 198f627aab1SPaolo Bonzini * qemu_bh_new. It also implies canceling the bottom half if it was 199f627aab1SPaolo Bonzini * scheduled. 200dcc772e2SLiu Ping Fan * This func is async. The bottom half will do the delete action at the finial 201dcc772e2SLiu Ping Fan * end. 202f627aab1SPaolo Bonzini * 203f627aab1SPaolo Bonzini * @bh: The bottom half to be deleted. 204f627aab1SPaolo Bonzini */ 205f627aab1SPaolo Bonzini void qemu_bh_delete(QEMUBH *bh); 206f627aab1SPaolo Bonzini 207cd9ba1ebSPaolo Bonzini /* Return whether there are any pending callbacks from the GSource 208cd9ba1ebSPaolo Bonzini * attached to the AioContext. 209cd9ba1ebSPaolo Bonzini * 210cd9ba1ebSPaolo Bonzini * This is used internally in the implementation of the GSource. 211cd9ba1ebSPaolo Bonzini */ 212cd9ba1ebSPaolo Bonzini bool aio_pending(AioContext *ctx); 213cd9ba1ebSPaolo Bonzini 2147c0628b2SPaolo Bonzini /* Progress in completing AIO work to occur. This can issue new pending 2157c0628b2SPaolo Bonzini * aio as a result of executing I/O completion or bh callbacks. 216bcdc1857SPaolo Bonzini * 2177c0628b2SPaolo Bonzini * If there is no pending AIO operation or completion (bottom half), 2182ea9b58fSKevin Wolf * return false. If there are pending AIO operations of bottom halves, 2192ea9b58fSKevin Wolf * return true. 2207c0628b2SPaolo Bonzini * 2217c0628b2SPaolo Bonzini * If there are no pending bottom halves, but there are pending AIO 2227c0628b2SPaolo Bonzini * operations, it may not be possible to make any progress without 2237c0628b2SPaolo Bonzini * blocking. If @blocking is true, this function will wait until one 2247c0628b2SPaolo Bonzini * or more AIO events have completed, to ensure something has moved 2257c0628b2SPaolo Bonzini * before returning. 2267c0628b2SPaolo Bonzini */ 2277c0628b2SPaolo Bonzini bool aio_poll(AioContext *ctx, bool blocking); 228a76bab49Saliguori 2299958c351SPaolo Bonzini #ifdef CONFIG_POSIX 230a76bab49Saliguori /* Register a file descriptor and associated callbacks. Behaves very similarly 231a76bab49Saliguori * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will 23287f68d31SPaolo Bonzini * be invoked when using aio_poll(). 233a76bab49Saliguori * 234a76bab49Saliguori * Code that invokes AIO completion functions should rely on this function 235a76bab49Saliguori * instead of qemu_set_fd_handler[2]. 236a76bab49Saliguori */ 237a915f4bcSPaolo Bonzini void aio_set_fd_handler(AioContext *ctx, 238a915f4bcSPaolo Bonzini int fd, 239a76bab49Saliguori IOHandler *io_read, 240a76bab49Saliguori IOHandler *io_write, 241a76bab49Saliguori void *opaque); 2429958c351SPaolo Bonzini #endif 2439958c351SPaolo Bonzini 2449958c351SPaolo Bonzini /* Register an event notifier and associated callbacks. Behaves very similarly 2459958c351SPaolo Bonzini * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks 24687f68d31SPaolo Bonzini * will be invoked when using aio_poll(). 2479958c351SPaolo Bonzini * 2489958c351SPaolo Bonzini * Code that invokes AIO completion functions should rely on this function 2499958c351SPaolo Bonzini * instead of event_notifier_set_handler. 2509958c351SPaolo Bonzini */ 251a915f4bcSPaolo Bonzini void aio_set_event_notifier(AioContext *ctx, 252a915f4bcSPaolo Bonzini EventNotifier *notifier, 253f2e5dca4SStefan Hajnoczi EventNotifierHandler *io_read); 254a915f4bcSPaolo Bonzini 255e3713e00SPaolo Bonzini /* Return a GSource that lets the main loop poll the file descriptors attached 256e3713e00SPaolo Bonzini * to this AioContext. 257e3713e00SPaolo Bonzini */ 258e3713e00SPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx); 259e3713e00SPaolo Bonzini 2609b34277dSStefan Hajnoczi /* Return the ThreadPool bound to this AioContext */ 2619b34277dSStefan Hajnoczi struct ThreadPool *aio_get_thread_pool(AioContext *ctx); 2629b34277dSStefan Hajnoczi 2634e29e831SAlex Bligh /** 2644e29e831SAlex Bligh * aio_timer_new: 2654e29e831SAlex Bligh * @ctx: the aio context 2664e29e831SAlex Bligh * @type: the clock type 2674e29e831SAlex Bligh * @scale: the scale 2684e29e831SAlex Bligh * @cb: the callback to call on timer expiry 2694e29e831SAlex Bligh * @opaque: the opaque pointer to pass to the callback 2704e29e831SAlex Bligh * 2714e29e831SAlex Bligh * Allocate a new timer attached to the context @ctx. 2724e29e831SAlex Bligh * The function is responsible for memory allocation. 2734e29e831SAlex Bligh * 2744e29e831SAlex Bligh * The preferred interface is aio_timer_init. Use that 2754e29e831SAlex Bligh * unless you really need dynamic memory allocation. 2764e29e831SAlex Bligh * 2774e29e831SAlex Bligh * Returns: a pointer to the new timer 2784e29e831SAlex Bligh */ 2794e29e831SAlex Bligh static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, 2804e29e831SAlex Bligh int scale, 2814e29e831SAlex Bligh QEMUTimerCB *cb, void *opaque) 2824e29e831SAlex Bligh { 2834e29e831SAlex Bligh return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque); 2844e29e831SAlex Bligh } 2854e29e831SAlex Bligh 2864e29e831SAlex Bligh /** 2874e29e831SAlex Bligh * aio_timer_init: 2884e29e831SAlex Bligh * @ctx: the aio context 2894e29e831SAlex Bligh * @ts: the timer 2904e29e831SAlex Bligh * @type: the clock type 2914e29e831SAlex Bligh * @scale: the scale 2924e29e831SAlex Bligh * @cb: the callback to call on timer expiry 2934e29e831SAlex Bligh * @opaque: the opaque pointer to pass to the callback 2944e29e831SAlex Bligh * 2954e29e831SAlex Bligh * Initialise a new timer attached to the context @ctx. 2964e29e831SAlex Bligh * The caller is responsible for memory allocation. 2974e29e831SAlex Bligh */ 2984e29e831SAlex Bligh static inline void aio_timer_init(AioContext *ctx, 2994e29e831SAlex Bligh QEMUTimer *ts, QEMUClockType type, 3004e29e831SAlex Bligh int scale, 3014e29e831SAlex Bligh QEMUTimerCB *cb, void *opaque) 3024e29e831SAlex Bligh { 3034e29e831SAlex Bligh timer_init(ts, ctx->tlg.tl[type], scale, cb, opaque); 3044e29e831SAlex Bligh } 3054e29e831SAlex Bligh 306a76bab49Saliguori #endif 307