1a76bab49Saliguori /* 2a76bab49Saliguori * QEMU aio implementation 3a76bab49Saliguori * 4a76bab49Saliguori * Copyright IBM, Corp. 2008 5a76bab49Saliguori * 6a76bab49Saliguori * Authors: 7a76bab49Saliguori * Anthony Liguori <aliguori@us.ibm.com> 8a76bab49Saliguori * 9a76bab49Saliguori * This work is licensed under the terms of the GNU GPL, version 2. See 10a76bab49Saliguori * the COPYING file in the top-level directory. 11a76bab49Saliguori * 12a76bab49Saliguori */ 13a76bab49Saliguori 14a76bab49Saliguori #ifndef QEMU_AIO_H 15a76bab49Saliguori #define QEMU_AIO_H 16a76bab49Saliguori 176a1751b7SAlex Bligh #include "qemu/typedefs.h" 18a76bab49Saliguori #include "qemu-common.h" 191de7afc9SPaolo Bonzini #include "qemu/queue.h" 201de7afc9SPaolo Bonzini #include "qemu/event_notifier.h" 21dcc772e2SLiu Ping Fan #include "qemu/thread.h" 22dae21b98SAlex Bligh #include "qemu/timer.h" 23a76bab49Saliguori 2485e8dab1SPaolo Bonzini typedef struct BlockDriverAIOCB BlockDriverAIOCB; 2585e8dab1SPaolo Bonzini typedef void BlockDriverCompletionFunc(void *opaque, int ret); 2685e8dab1SPaolo Bonzini 27d7331bedSStefan Hajnoczi typedef struct AIOCBInfo { 2885e8dab1SPaolo Bonzini void (*cancel)(BlockDriverAIOCB *acb); 298c82e9a4SStefan Hajnoczi size_t aiocb_size; 30d7331bedSStefan Hajnoczi } AIOCBInfo; 3185e8dab1SPaolo Bonzini 3285e8dab1SPaolo Bonzini struct BlockDriverAIOCB { 33d7331bedSStefan Hajnoczi const AIOCBInfo *aiocb_info; 3485e8dab1SPaolo Bonzini BlockDriverState *bs; 3585e8dab1SPaolo Bonzini BlockDriverCompletionFunc *cb; 3685e8dab1SPaolo Bonzini void *opaque; 3785e8dab1SPaolo Bonzini }; 3885e8dab1SPaolo Bonzini 39d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, 4085e8dab1SPaolo Bonzini BlockDriverCompletionFunc *cb, void *opaque); 4185e8dab1SPaolo Bonzini void qemu_aio_release(void *p); 4285e8dab1SPaolo Bonzini 43f627aab1SPaolo Bonzini typedef struct AioHandler AioHandler; 44f627aab1SPaolo Bonzini typedef void QEMUBHFunc(void *opaque); 45f627aab1SPaolo Bonzini typedef void IOHandler(void *opaque); 46f627aab1SPaolo Bonzini 476a1751b7SAlex Bligh struct AioContext { 48e3713e00SPaolo Bonzini GSource source; 49e3713e00SPaolo Bonzini 50a915f4bcSPaolo Bonzini /* The list of registered AIO handlers */ 51a915f4bcSPaolo Bonzini QLIST_HEAD(, AioHandler) aio_handlers; 52a915f4bcSPaolo Bonzini 53a915f4bcSPaolo Bonzini /* This is a simple lock used to protect the aio_handlers list. 54a915f4bcSPaolo Bonzini * Specifically, it's used to ensure that no callbacks are removed while 55a915f4bcSPaolo Bonzini * we're walking and dispatching callbacks. 56a915f4bcSPaolo Bonzini */ 57a915f4bcSPaolo Bonzini int walking_handlers; 58a915f4bcSPaolo Bonzini 59dcc772e2SLiu Ping Fan /* lock to protect between bh's adders and deleter */ 60dcc772e2SLiu Ping Fan QemuMutex bh_lock; 61f627aab1SPaolo Bonzini /* Anchor of the list of Bottom Halves belonging to the context */ 62f627aab1SPaolo Bonzini struct QEMUBH *first_bh; 63f627aab1SPaolo Bonzini 64f627aab1SPaolo Bonzini /* A simple lock used to protect the first_bh list, and ensure that 65f627aab1SPaolo Bonzini * no callbacks are removed while we're walking and dispatching callbacks. 66f627aab1SPaolo Bonzini */ 67f627aab1SPaolo Bonzini int walking_bh; 682f4dc3c1SPaolo Bonzini 692f4dc3c1SPaolo Bonzini /* Used for aio_notify. */ 702f4dc3c1SPaolo Bonzini EventNotifier notifier; 716b5f8762SStefan Hajnoczi 726b5f8762SStefan Hajnoczi /* GPollFDs for aio_poll() */ 736b5f8762SStefan Hajnoczi GArray *pollfds; 749b34277dSStefan Hajnoczi 759b34277dSStefan Hajnoczi /* Thread pool for performing work and receiving completion callbacks */ 769b34277dSStefan Hajnoczi struct ThreadPool *thread_pool; 77dae21b98SAlex Bligh 78dae21b98SAlex Bligh /* TimerLists for calling timers - one per clock type */ 79dae21b98SAlex Bligh QEMUTimerListGroup tlg; 806a1751b7SAlex Bligh }; 81f627aab1SPaolo Bonzini 82f627aab1SPaolo Bonzini /** 83f627aab1SPaolo Bonzini * aio_context_new: Allocate a new AioContext. 84f627aab1SPaolo Bonzini * 85f627aab1SPaolo Bonzini * AioContext provide a mini event-loop that can be waited on synchronously. 86f627aab1SPaolo Bonzini * They also provide bottom halves, a service to execute a piece of code 87f627aab1SPaolo Bonzini * as soon as possible. 88f627aab1SPaolo Bonzini */ 89f627aab1SPaolo Bonzini AioContext *aio_context_new(void); 90f627aab1SPaolo Bonzini 91f627aab1SPaolo Bonzini /** 92e3713e00SPaolo Bonzini * aio_context_ref: 93e3713e00SPaolo Bonzini * @ctx: The AioContext to operate on. 94e3713e00SPaolo Bonzini * 95e3713e00SPaolo Bonzini * Add a reference to an AioContext. 96e3713e00SPaolo Bonzini */ 97e3713e00SPaolo Bonzini void aio_context_ref(AioContext *ctx); 98e3713e00SPaolo Bonzini 99e3713e00SPaolo Bonzini /** 100e3713e00SPaolo Bonzini * aio_context_unref: 101e3713e00SPaolo Bonzini * @ctx: The AioContext to operate on. 102e3713e00SPaolo Bonzini * 103e3713e00SPaolo Bonzini * Drop a reference to an AioContext. 104e3713e00SPaolo Bonzini */ 105e3713e00SPaolo Bonzini void aio_context_unref(AioContext *ctx); 106e3713e00SPaolo Bonzini 107e3713e00SPaolo Bonzini /** 108f627aab1SPaolo Bonzini * aio_bh_new: Allocate a new bottom half structure. 109f627aab1SPaolo Bonzini * 110f627aab1SPaolo Bonzini * Bottom halves are lightweight callbacks whose invocation is guaranteed 111f627aab1SPaolo Bonzini * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure 112f627aab1SPaolo Bonzini * is opaque and must be allocated prior to its use. 113f627aab1SPaolo Bonzini */ 114f627aab1SPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); 115f627aab1SPaolo Bonzini 116f627aab1SPaolo Bonzini /** 1172f4dc3c1SPaolo Bonzini * aio_notify: Force processing of pending events. 1182f4dc3c1SPaolo Bonzini * 1192f4dc3c1SPaolo Bonzini * Similar to signaling a condition variable, aio_notify forces 1202f4dc3c1SPaolo Bonzini * aio_wait to exit, so that the next call will re-examine pending events. 1212f4dc3c1SPaolo Bonzini * The caller of aio_notify will usually call aio_wait again very soon, 1222f4dc3c1SPaolo Bonzini * or go through another iteration of the GLib main loop. Hence, aio_notify 1232f4dc3c1SPaolo Bonzini * also has the side effect of recalculating the sets of file descriptors 1242f4dc3c1SPaolo Bonzini * that the main loop waits for. 1252f4dc3c1SPaolo Bonzini * 1262f4dc3c1SPaolo Bonzini * Calling aio_notify is rarely necessary, because for example scheduling 1272f4dc3c1SPaolo Bonzini * a bottom half calls it already. 1282f4dc3c1SPaolo Bonzini */ 1292f4dc3c1SPaolo Bonzini void aio_notify(AioContext *ctx); 1302f4dc3c1SPaolo Bonzini 1312f4dc3c1SPaolo Bonzini /** 132f627aab1SPaolo Bonzini * aio_bh_poll: Poll bottom halves for an AioContext. 133f627aab1SPaolo Bonzini * 134f627aab1SPaolo Bonzini * These are internal functions used by the QEMU main loop. 135dcc772e2SLiu Ping Fan * And notice that multiple occurrences of aio_bh_poll cannot 136dcc772e2SLiu Ping Fan * be called concurrently 137f627aab1SPaolo Bonzini */ 138f627aab1SPaolo Bonzini int aio_bh_poll(AioContext *ctx); 139f627aab1SPaolo Bonzini 140f627aab1SPaolo Bonzini /** 141f627aab1SPaolo Bonzini * qemu_bh_schedule: Schedule a bottom half. 142f627aab1SPaolo Bonzini * 143f627aab1SPaolo Bonzini * Scheduling a bottom half interrupts the main loop and causes the 144f627aab1SPaolo Bonzini * execution of the callback that was passed to qemu_bh_new. 145f627aab1SPaolo Bonzini * 146f627aab1SPaolo Bonzini * Bottom halves that are scheduled from a bottom half handler are instantly 147f627aab1SPaolo Bonzini * invoked. This can create an infinite loop if a bottom half handler 148f627aab1SPaolo Bonzini * schedules itself. 149f627aab1SPaolo Bonzini * 150f627aab1SPaolo Bonzini * @bh: The bottom half to be scheduled. 151f627aab1SPaolo Bonzini */ 152f627aab1SPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh); 153f627aab1SPaolo Bonzini 154f627aab1SPaolo Bonzini /** 155f627aab1SPaolo Bonzini * qemu_bh_cancel: Cancel execution of a bottom half. 156f627aab1SPaolo Bonzini * 157f627aab1SPaolo Bonzini * Canceling execution of a bottom half undoes the effect of calls to 158f627aab1SPaolo Bonzini * qemu_bh_schedule without freeing its resources yet. While cancellation 159f627aab1SPaolo Bonzini * itself is also wait-free and thread-safe, it can of course race with the 160f627aab1SPaolo Bonzini * loop that executes bottom halves unless you are holding the iothread 161f627aab1SPaolo Bonzini * mutex. This makes it mostly useless if you are not holding the mutex. 162f627aab1SPaolo Bonzini * 163f627aab1SPaolo Bonzini * @bh: The bottom half to be canceled. 164f627aab1SPaolo Bonzini */ 165f627aab1SPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh); 166f627aab1SPaolo Bonzini 167f627aab1SPaolo Bonzini /** 168f627aab1SPaolo Bonzini *qemu_bh_delete: Cancel execution of a bottom half and free its resources. 169f627aab1SPaolo Bonzini * 170f627aab1SPaolo Bonzini * Deleting a bottom half frees the memory that was allocated for it by 171f627aab1SPaolo Bonzini * qemu_bh_new. It also implies canceling the bottom half if it was 172f627aab1SPaolo Bonzini * scheduled. 173dcc772e2SLiu Ping Fan * This func is async. The bottom half will do the delete action at the finial 174dcc772e2SLiu Ping Fan * end. 175f627aab1SPaolo Bonzini * 176f627aab1SPaolo Bonzini * @bh: The bottom half to be deleted. 177f627aab1SPaolo Bonzini */ 178f627aab1SPaolo Bonzini void qemu_bh_delete(QEMUBH *bh); 179f627aab1SPaolo Bonzini 180cd9ba1ebSPaolo Bonzini /* Return whether there are any pending callbacks from the GSource 181cd9ba1ebSPaolo Bonzini * attached to the AioContext. 182cd9ba1ebSPaolo Bonzini * 183cd9ba1ebSPaolo Bonzini * This is used internally in the implementation of the GSource. 184cd9ba1ebSPaolo Bonzini */ 185cd9ba1ebSPaolo Bonzini bool aio_pending(AioContext *ctx); 186cd9ba1ebSPaolo Bonzini 1877c0628b2SPaolo Bonzini /* Progress in completing AIO work to occur. This can issue new pending 1887c0628b2SPaolo Bonzini * aio as a result of executing I/O completion or bh callbacks. 189bcdc1857SPaolo Bonzini * 1907c0628b2SPaolo Bonzini * If there is no pending AIO operation or completion (bottom half), 1912ea9b58fSKevin Wolf * return false. If there are pending AIO operations of bottom halves, 1922ea9b58fSKevin Wolf * return true. 1937c0628b2SPaolo Bonzini * 1947c0628b2SPaolo Bonzini * If there are no pending bottom halves, but there are pending AIO 1957c0628b2SPaolo Bonzini * operations, it may not be possible to make any progress without 1967c0628b2SPaolo Bonzini * blocking. If @blocking is true, this function will wait until one 1977c0628b2SPaolo Bonzini * or more AIO events have completed, to ensure something has moved 1987c0628b2SPaolo Bonzini * before returning. 1997c0628b2SPaolo Bonzini */ 2007c0628b2SPaolo Bonzini bool aio_poll(AioContext *ctx, bool blocking); 201a76bab49Saliguori 2029958c351SPaolo Bonzini #ifdef CONFIG_POSIX 203a76bab49Saliguori /* Register a file descriptor and associated callbacks. Behaves very similarly 204a76bab49Saliguori * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will 205c57b6656SKevin Wolf * be invoked when using qemu_aio_wait(). 206a76bab49Saliguori * 207a76bab49Saliguori * Code that invokes AIO completion functions should rely on this function 208a76bab49Saliguori * instead of qemu_set_fd_handler[2]. 209a76bab49Saliguori */ 210a915f4bcSPaolo Bonzini void aio_set_fd_handler(AioContext *ctx, 211a915f4bcSPaolo Bonzini int fd, 212a76bab49Saliguori IOHandler *io_read, 213a76bab49Saliguori IOHandler *io_write, 214a76bab49Saliguori void *opaque); 2159958c351SPaolo Bonzini #endif 2169958c351SPaolo Bonzini 2179958c351SPaolo Bonzini /* Register an event notifier and associated callbacks. Behaves very similarly 2189958c351SPaolo Bonzini * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks 219c57b6656SKevin Wolf * will be invoked when using qemu_aio_wait(). 2209958c351SPaolo Bonzini * 2219958c351SPaolo Bonzini * Code that invokes AIO completion functions should rely on this function 2229958c351SPaolo Bonzini * instead of event_notifier_set_handler. 2239958c351SPaolo Bonzini */ 224a915f4bcSPaolo Bonzini void aio_set_event_notifier(AioContext *ctx, 225a915f4bcSPaolo Bonzini EventNotifier *notifier, 226f2e5dca4SStefan Hajnoczi EventNotifierHandler *io_read); 227a915f4bcSPaolo Bonzini 228e3713e00SPaolo Bonzini /* Return a GSource that lets the main loop poll the file descriptors attached 229e3713e00SPaolo Bonzini * to this AioContext. 230e3713e00SPaolo Bonzini */ 231e3713e00SPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx); 232e3713e00SPaolo Bonzini 2339b34277dSStefan Hajnoczi /* Return the ThreadPool bound to this AioContext */ 2349b34277dSStefan Hajnoczi struct ThreadPool *aio_get_thread_pool(AioContext *ctx); 2359b34277dSStefan Hajnoczi 236a915f4bcSPaolo Bonzini /* Functions to operate on the main QEMU AioContext. */ 237a915f4bcSPaolo Bonzini 238a915f4bcSPaolo Bonzini bool qemu_aio_wait(void); 2399958c351SPaolo Bonzini void qemu_aio_set_event_notifier(EventNotifier *notifier, 240f2e5dca4SStefan Hajnoczi EventNotifierHandler *io_read); 241a76bab49Saliguori 242a915f4bcSPaolo Bonzini #ifdef CONFIG_POSIX 243a915f4bcSPaolo Bonzini void qemu_aio_set_fd_handler(int fd, 244a915f4bcSPaolo Bonzini IOHandler *io_read, 245a915f4bcSPaolo Bonzini IOHandler *io_write, 246a915f4bcSPaolo Bonzini void *opaque); 247a915f4bcSPaolo Bonzini #endif 248a915f4bcSPaolo Bonzini 249*4e29e831SAlex Bligh /** 250*4e29e831SAlex Bligh * aio_timer_new: 251*4e29e831SAlex Bligh * @ctx: the aio context 252*4e29e831SAlex Bligh * @type: the clock type 253*4e29e831SAlex Bligh * @scale: the scale 254*4e29e831SAlex Bligh * @cb: the callback to call on timer expiry 255*4e29e831SAlex Bligh * @opaque: the opaque pointer to pass to the callback 256*4e29e831SAlex Bligh * 257*4e29e831SAlex Bligh * Allocate a new timer attached to the context @ctx. 258*4e29e831SAlex Bligh * The function is responsible for memory allocation. 259*4e29e831SAlex Bligh * 260*4e29e831SAlex Bligh * The preferred interface is aio_timer_init. Use that 261*4e29e831SAlex Bligh * unless you really need dynamic memory allocation. 262*4e29e831SAlex Bligh * 263*4e29e831SAlex Bligh * Returns: a pointer to the new timer 264*4e29e831SAlex Bligh */ 265*4e29e831SAlex Bligh static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, 266*4e29e831SAlex Bligh int scale, 267*4e29e831SAlex Bligh QEMUTimerCB *cb, void *opaque) 268*4e29e831SAlex Bligh { 269*4e29e831SAlex Bligh return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque); 270*4e29e831SAlex Bligh } 271*4e29e831SAlex Bligh 272*4e29e831SAlex Bligh /** 273*4e29e831SAlex Bligh * aio_timer_init: 274*4e29e831SAlex Bligh * @ctx: the aio context 275*4e29e831SAlex Bligh * @ts: the timer 276*4e29e831SAlex Bligh * @type: the clock type 277*4e29e831SAlex Bligh * @scale: the scale 278*4e29e831SAlex Bligh * @cb: the callback to call on timer expiry 279*4e29e831SAlex Bligh * @opaque: the opaque pointer to pass to the callback 280*4e29e831SAlex Bligh * 281*4e29e831SAlex Bligh * Initialise a new timer attached to the context @ctx. 282*4e29e831SAlex Bligh * The caller is responsible for memory allocation. 283*4e29e831SAlex Bligh */ 284*4e29e831SAlex Bligh static inline void aio_timer_init(AioContext *ctx, 285*4e29e831SAlex Bligh QEMUTimer *ts, QEMUClockType type, 286*4e29e831SAlex Bligh int scale, 287*4e29e831SAlex Bligh QEMUTimerCB *cb, void *opaque) 288*4e29e831SAlex Bligh { 289*4e29e831SAlex Bligh timer_init(ts, ctx->tlg.tl[type], scale, cb, opaque); 290*4e29e831SAlex Bligh } 291*4e29e831SAlex Bligh 292a76bab49Saliguori #endif 293