xref: /qemu/include/block/aio.h (revision 6484e422479c93f28e3f8a68258b0eacd3b31e6d)
1a76bab49Saliguori /*
2a76bab49Saliguori  * QEMU aio implementation
3a76bab49Saliguori  *
4a76bab49Saliguori  * Copyright IBM, Corp. 2008
5a76bab49Saliguori  *
6a76bab49Saliguori  * Authors:
7a76bab49Saliguori  *  Anthony Liguori   <aliguori@us.ibm.com>
8a76bab49Saliguori  *
9a76bab49Saliguori  * This work is licensed under the terms of the GNU GPL, version 2.  See
10a76bab49Saliguori  * the COPYING file in the top-level directory.
11a76bab49Saliguori  *
12a76bab49Saliguori  */
13a76bab49Saliguori 
14a76bab49Saliguori #ifndef QEMU_AIO_H
15a76bab49Saliguori #define QEMU_AIO_H
16a76bab49Saliguori 
176a1751b7SAlex Bligh #include "qemu/typedefs.h"
18a76bab49Saliguori #include "qemu-common.h"
191de7afc9SPaolo Bonzini #include "qemu/queue.h"
201de7afc9SPaolo Bonzini #include "qemu/event_notifier.h"
21dcc772e2SLiu Ping Fan #include "qemu/thread.h"
2298563fc3SStefan Hajnoczi #include "qemu/rfifolock.h"
23dae21b98SAlex Bligh #include "qemu/timer.h"
24a76bab49Saliguori 
257c84b1b8SMarkus Armbruster typedef struct BlockAIOCB BlockAIOCB;
26097310b5SMarkus Armbruster typedef void BlockCompletionFunc(void *opaque, int ret);
2785e8dab1SPaolo Bonzini 
28d7331bedSStefan Hajnoczi typedef struct AIOCBInfo {
297c84b1b8SMarkus Armbruster     void (*cancel_async)(BlockAIOCB *acb);
307c84b1b8SMarkus Armbruster     AioContext *(*get_aio_context)(BlockAIOCB *acb);
318c82e9a4SStefan Hajnoczi     size_t aiocb_size;
32d7331bedSStefan Hajnoczi } AIOCBInfo;
3385e8dab1SPaolo Bonzini 
347c84b1b8SMarkus Armbruster struct BlockAIOCB {
35d7331bedSStefan Hajnoczi     const AIOCBInfo *aiocb_info;
3685e8dab1SPaolo Bonzini     BlockDriverState *bs;
37097310b5SMarkus Armbruster     BlockCompletionFunc *cb;
3885e8dab1SPaolo Bonzini     void *opaque;
39f197fe2bSFam Zheng     int refcnt;
4085e8dab1SPaolo Bonzini };
4185e8dab1SPaolo Bonzini 
42d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
43097310b5SMarkus Armbruster                    BlockCompletionFunc *cb, void *opaque);
448007429aSFam Zheng void qemu_aio_unref(void *p);
45f197fe2bSFam Zheng void qemu_aio_ref(void *p);
4685e8dab1SPaolo Bonzini 
47f627aab1SPaolo Bonzini typedef struct AioHandler AioHandler;
48f627aab1SPaolo Bonzini typedef void QEMUBHFunc(void *opaque);
49f627aab1SPaolo Bonzini typedef void IOHandler(void *opaque);
50f627aab1SPaolo Bonzini 
516a1751b7SAlex Bligh struct AioContext {
52e3713e00SPaolo Bonzini     GSource source;
53e3713e00SPaolo Bonzini 
5498563fc3SStefan Hajnoczi     /* Protects all fields from multi-threaded access */
5598563fc3SStefan Hajnoczi     RFifoLock lock;
5698563fc3SStefan Hajnoczi 
57a915f4bcSPaolo Bonzini     /* The list of registered AIO handlers */
58a915f4bcSPaolo Bonzini     QLIST_HEAD(, AioHandler) aio_handlers;
59a915f4bcSPaolo Bonzini 
60a915f4bcSPaolo Bonzini     /* This is a simple lock used to protect the aio_handlers list.
61a915f4bcSPaolo Bonzini      * Specifically, it's used to ensure that no callbacks are removed while
62a915f4bcSPaolo Bonzini      * we're walking and dispatching callbacks.
63a915f4bcSPaolo Bonzini      */
64a915f4bcSPaolo Bonzini     int walking_handlers;
65a915f4bcSPaolo Bonzini 
660ceb849bSPaolo Bonzini     /* Used to avoid unnecessary event_notifier_set calls in aio_notify.
670ceb849bSPaolo Bonzini      * Writes protected by lock or BQL, reads are lockless.
680ceb849bSPaolo Bonzini      */
690ceb849bSPaolo Bonzini     bool dispatching;
700ceb849bSPaolo Bonzini 
71dcc772e2SLiu Ping Fan     /* lock to protect between bh's adders and deleter */
72dcc772e2SLiu Ping Fan     QemuMutex bh_lock;
730ceb849bSPaolo Bonzini 
74f627aab1SPaolo Bonzini     /* Anchor of the list of Bottom Halves belonging to the context */
75f627aab1SPaolo Bonzini     struct QEMUBH *first_bh;
76f627aab1SPaolo Bonzini 
77f627aab1SPaolo Bonzini     /* A simple lock used to protect the first_bh list, and ensure that
78f627aab1SPaolo Bonzini      * no callbacks are removed while we're walking and dispatching callbacks.
79f627aab1SPaolo Bonzini      */
80f627aab1SPaolo Bonzini     int walking_bh;
812f4dc3c1SPaolo Bonzini 
822f4dc3c1SPaolo Bonzini     /* Used for aio_notify.  */
832f4dc3c1SPaolo Bonzini     EventNotifier notifier;
846b5f8762SStefan Hajnoczi 
859b34277dSStefan Hajnoczi     /* Thread pool for performing work and receiving completion callbacks */
869b34277dSStefan Hajnoczi     struct ThreadPool *thread_pool;
87dae21b98SAlex Bligh 
88dae21b98SAlex Bligh     /* TimerLists for calling timers - one per clock type */
89dae21b98SAlex Bligh     QEMUTimerListGroup tlg;
906a1751b7SAlex Bligh };
91f627aab1SPaolo Bonzini 
920ceb849bSPaolo Bonzini /* Used internally to synchronize aio_poll against qemu_bh_schedule.  */
930ceb849bSPaolo Bonzini void aio_set_dispatching(AioContext *ctx, bool dispatching);
940ceb849bSPaolo Bonzini 
95f627aab1SPaolo Bonzini /**
96f627aab1SPaolo Bonzini  * aio_context_new: Allocate a new AioContext.
97f627aab1SPaolo Bonzini  *
98f627aab1SPaolo Bonzini  * AioContext provide a mini event-loop that can be waited on synchronously.
99f627aab1SPaolo Bonzini  * They also provide bottom halves, a service to execute a piece of code
100f627aab1SPaolo Bonzini  * as soon as possible.
101f627aab1SPaolo Bonzini  */
1022f78e491SChrysostomos Nanakos AioContext *aio_context_new(Error **errp);
103f627aab1SPaolo Bonzini 
104f627aab1SPaolo Bonzini /**
105e3713e00SPaolo Bonzini  * aio_context_ref:
106e3713e00SPaolo Bonzini  * @ctx: The AioContext to operate on.
107e3713e00SPaolo Bonzini  *
108e3713e00SPaolo Bonzini  * Add a reference to an AioContext.
109e3713e00SPaolo Bonzini  */
110e3713e00SPaolo Bonzini void aio_context_ref(AioContext *ctx);
111e3713e00SPaolo Bonzini 
112e3713e00SPaolo Bonzini /**
113e3713e00SPaolo Bonzini  * aio_context_unref:
114e3713e00SPaolo Bonzini  * @ctx: The AioContext to operate on.
115e3713e00SPaolo Bonzini  *
116e3713e00SPaolo Bonzini  * Drop a reference to an AioContext.
117e3713e00SPaolo Bonzini  */
118e3713e00SPaolo Bonzini void aio_context_unref(AioContext *ctx);
119e3713e00SPaolo Bonzini 
12098563fc3SStefan Hajnoczi /* Take ownership of the AioContext.  If the AioContext will be shared between
12149110174SPaolo Bonzini  * threads, and a thread does not want to be interrupted, it will have to
12249110174SPaolo Bonzini  * take ownership around calls to aio_poll().  Otherwise, aio_poll()
12349110174SPaolo Bonzini  * automatically takes care of calling aio_context_acquire and
12449110174SPaolo Bonzini  * aio_context_release.
12598563fc3SStefan Hajnoczi  *
12649110174SPaolo Bonzini  * Access to timers and BHs from a thread that has not acquired AioContext
12749110174SPaolo Bonzini  * is possible.  Access to callbacks for now must be done while the AioContext
12849110174SPaolo Bonzini  * is owned by the thread (FIXME).
12998563fc3SStefan Hajnoczi  */
13098563fc3SStefan Hajnoczi void aio_context_acquire(AioContext *ctx);
13198563fc3SStefan Hajnoczi 
13298563fc3SStefan Hajnoczi /* Relinquish ownership of the AioContext. */
13398563fc3SStefan Hajnoczi void aio_context_release(AioContext *ctx);
13498563fc3SStefan Hajnoczi 
135e3713e00SPaolo Bonzini /**
136f627aab1SPaolo Bonzini  * aio_bh_new: Allocate a new bottom half structure.
137f627aab1SPaolo Bonzini  *
138f627aab1SPaolo Bonzini  * Bottom halves are lightweight callbacks whose invocation is guaranteed
139f627aab1SPaolo Bonzini  * to be wait-free, thread-safe and signal-safe.  The #QEMUBH structure
140f627aab1SPaolo Bonzini  * is opaque and must be allocated prior to its use.
141f627aab1SPaolo Bonzini  */
142f627aab1SPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
143f627aab1SPaolo Bonzini 
144f627aab1SPaolo Bonzini /**
1452f4dc3c1SPaolo Bonzini  * aio_notify: Force processing of pending events.
1462f4dc3c1SPaolo Bonzini  *
1472f4dc3c1SPaolo Bonzini  * Similar to signaling a condition variable, aio_notify forces
1482f4dc3c1SPaolo Bonzini  * aio_wait to exit, so that the next call will re-examine pending events.
1492f4dc3c1SPaolo Bonzini  * The caller of aio_notify will usually call aio_wait again very soon,
1502f4dc3c1SPaolo Bonzini  * or go through another iteration of the GLib main loop.  Hence, aio_notify
1512f4dc3c1SPaolo Bonzini  * also has the side effect of recalculating the sets of file descriptors
1522f4dc3c1SPaolo Bonzini  * that the main loop waits for.
1532f4dc3c1SPaolo Bonzini  *
1542f4dc3c1SPaolo Bonzini  * Calling aio_notify is rarely necessary, because for example scheduling
1552f4dc3c1SPaolo Bonzini  * a bottom half calls it already.
1562f4dc3c1SPaolo Bonzini  */
1572f4dc3c1SPaolo Bonzini void aio_notify(AioContext *ctx);
1582f4dc3c1SPaolo Bonzini 
1592f4dc3c1SPaolo Bonzini /**
160f627aab1SPaolo Bonzini  * aio_bh_poll: Poll bottom halves for an AioContext.
161f627aab1SPaolo Bonzini  *
162f627aab1SPaolo Bonzini  * These are internal functions used by the QEMU main loop.
163dcc772e2SLiu Ping Fan  * And notice that multiple occurrences of aio_bh_poll cannot
164dcc772e2SLiu Ping Fan  * be called concurrently
165f627aab1SPaolo Bonzini  */
166f627aab1SPaolo Bonzini int aio_bh_poll(AioContext *ctx);
167f627aab1SPaolo Bonzini 
168f627aab1SPaolo Bonzini /**
169f627aab1SPaolo Bonzini  * qemu_bh_schedule: Schedule a bottom half.
170f627aab1SPaolo Bonzini  *
171f627aab1SPaolo Bonzini  * Scheduling a bottom half interrupts the main loop and causes the
172f627aab1SPaolo Bonzini  * execution of the callback that was passed to qemu_bh_new.
173f627aab1SPaolo Bonzini  *
174f627aab1SPaolo Bonzini  * Bottom halves that are scheduled from a bottom half handler are instantly
175f627aab1SPaolo Bonzini  * invoked.  This can create an infinite loop if a bottom half handler
176f627aab1SPaolo Bonzini  * schedules itself.
177f627aab1SPaolo Bonzini  *
178f627aab1SPaolo Bonzini  * @bh: The bottom half to be scheduled.
179f627aab1SPaolo Bonzini  */
180f627aab1SPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh);
181f627aab1SPaolo Bonzini 
182f627aab1SPaolo Bonzini /**
183f627aab1SPaolo Bonzini  * qemu_bh_cancel: Cancel execution of a bottom half.
184f627aab1SPaolo Bonzini  *
185f627aab1SPaolo Bonzini  * Canceling execution of a bottom half undoes the effect of calls to
186f627aab1SPaolo Bonzini  * qemu_bh_schedule without freeing its resources yet.  While cancellation
187f627aab1SPaolo Bonzini  * itself is also wait-free and thread-safe, it can of course race with the
188f627aab1SPaolo Bonzini  * loop that executes bottom halves unless you are holding the iothread
189f627aab1SPaolo Bonzini  * mutex.  This makes it mostly useless if you are not holding the mutex.
190f627aab1SPaolo Bonzini  *
191f627aab1SPaolo Bonzini  * @bh: The bottom half to be canceled.
192f627aab1SPaolo Bonzini  */
193f627aab1SPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh);
194f627aab1SPaolo Bonzini 
195f627aab1SPaolo Bonzini /**
196f627aab1SPaolo Bonzini  *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
197f627aab1SPaolo Bonzini  *
198f627aab1SPaolo Bonzini  * Deleting a bottom half frees the memory that was allocated for it by
199f627aab1SPaolo Bonzini  * qemu_bh_new.  It also implies canceling the bottom half if it was
200f627aab1SPaolo Bonzini  * scheduled.
201dcc772e2SLiu Ping Fan  * This func is async. The bottom half will do the delete action at the finial
202dcc772e2SLiu Ping Fan  * end.
203f627aab1SPaolo Bonzini  *
204f627aab1SPaolo Bonzini  * @bh: The bottom half to be deleted.
205f627aab1SPaolo Bonzini  */
206f627aab1SPaolo Bonzini void qemu_bh_delete(QEMUBH *bh);
207f627aab1SPaolo Bonzini 
208cd9ba1ebSPaolo Bonzini /* Return whether there are any pending callbacks from the GSource
209a3462c65SPaolo Bonzini  * attached to the AioContext, before g_poll is invoked.
210a3462c65SPaolo Bonzini  *
211a3462c65SPaolo Bonzini  * This is used internally in the implementation of the GSource.
212a3462c65SPaolo Bonzini  */
213a3462c65SPaolo Bonzini bool aio_prepare(AioContext *ctx);
214a3462c65SPaolo Bonzini 
215a3462c65SPaolo Bonzini /* Return whether there are any pending callbacks from the GSource
216a3462c65SPaolo Bonzini  * attached to the AioContext, after g_poll is invoked.
217cd9ba1ebSPaolo Bonzini  *
218cd9ba1ebSPaolo Bonzini  * This is used internally in the implementation of the GSource.
219cd9ba1ebSPaolo Bonzini  */
220cd9ba1ebSPaolo Bonzini bool aio_pending(AioContext *ctx);
221cd9ba1ebSPaolo Bonzini 
222e4c7e2d1SPaolo Bonzini /* Dispatch any pending callbacks from the GSource attached to the AioContext.
223e4c7e2d1SPaolo Bonzini  *
224e4c7e2d1SPaolo Bonzini  * This is used internally in the implementation of the GSource.
225e4c7e2d1SPaolo Bonzini  */
226e4c7e2d1SPaolo Bonzini bool aio_dispatch(AioContext *ctx);
227e4c7e2d1SPaolo Bonzini 
2287c0628b2SPaolo Bonzini /* Progress in completing AIO work to occur.  This can issue new pending
2297c0628b2SPaolo Bonzini  * aio as a result of executing I/O completion or bh callbacks.
230bcdc1857SPaolo Bonzini  *
231acfb23adSPaolo Bonzini  * Return whether any progress was made by executing AIO or bottom half
232acfb23adSPaolo Bonzini  * handlers.  If @blocking == true, this should always be true except
233acfb23adSPaolo Bonzini  * if someone called aio_notify.
2347c0628b2SPaolo Bonzini  *
2357c0628b2SPaolo Bonzini  * If there are no pending bottom halves, but there are pending AIO
2367c0628b2SPaolo Bonzini  * operations, it may not be possible to make any progress without
2377c0628b2SPaolo Bonzini  * blocking.  If @blocking is true, this function will wait until one
2387c0628b2SPaolo Bonzini  * or more AIO events have completed, to ensure something has moved
2397c0628b2SPaolo Bonzini  * before returning.
2407c0628b2SPaolo Bonzini  */
2417c0628b2SPaolo Bonzini bool aio_poll(AioContext *ctx, bool blocking);
242a76bab49Saliguori 
243a76bab49Saliguori /* Register a file descriptor and associated callbacks.  Behaves very similarly
244*6484e422SFam Zheng  * to qemu_set_fd_handler.  Unlike qemu_set_fd_handler, these callbacks will
24587f68d31SPaolo Bonzini  * be invoked when using aio_poll().
246a76bab49Saliguori  *
247a76bab49Saliguori  * Code that invokes AIO completion functions should rely on this function
248a76bab49Saliguori  * instead of qemu_set_fd_handler[2].
249a76bab49Saliguori  */
250a915f4bcSPaolo Bonzini void aio_set_fd_handler(AioContext *ctx,
251a915f4bcSPaolo Bonzini                         int fd,
252a76bab49Saliguori                         IOHandler *io_read,
253a76bab49Saliguori                         IOHandler *io_write,
254a76bab49Saliguori                         void *opaque);
2559958c351SPaolo Bonzini 
2569958c351SPaolo Bonzini /* Register an event notifier and associated callbacks.  Behaves very similarly
2579958c351SPaolo Bonzini  * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
25887f68d31SPaolo Bonzini  * will be invoked when using aio_poll().
2599958c351SPaolo Bonzini  *
2609958c351SPaolo Bonzini  * Code that invokes AIO completion functions should rely on this function
2619958c351SPaolo Bonzini  * instead of event_notifier_set_handler.
2629958c351SPaolo Bonzini  */
263a915f4bcSPaolo Bonzini void aio_set_event_notifier(AioContext *ctx,
264a915f4bcSPaolo Bonzini                             EventNotifier *notifier,
265f2e5dca4SStefan Hajnoczi                             EventNotifierHandler *io_read);
266a915f4bcSPaolo Bonzini 
267e3713e00SPaolo Bonzini /* Return a GSource that lets the main loop poll the file descriptors attached
268e3713e00SPaolo Bonzini  * to this AioContext.
269e3713e00SPaolo Bonzini  */
270e3713e00SPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx);
271e3713e00SPaolo Bonzini 
2729b34277dSStefan Hajnoczi /* Return the ThreadPool bound to this AioContext */
2739b34277dSStefan Hajnoczi struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
2749b34277dSStefan Hajnoczi 
2754e29e831SAlex Bligh /**
2764e29e831SAlex Bligh  * aio_timer_new:
2774e29e831SAlex Bligh  * @ctx: the aio context
2784e29e831SAlex Bligh  * @type: the clock type
2794e29e831SAlex Bligh  * @scale: the scale
2804e29e831SAlex Bligh  * @cb: the callback to call on timer expiry
2814e29e831SAlex Bligh  * @opaque: the opaque pointer to pass to the callback
2824e29e831SAlex Bligh  *
2834e29e831SAlex Bligh  * Allocate a new timer attached to the context @ctx.
2844e29e831SAlex Bligh  * The function is responsible for memory allocation.
2854e29e831SAlex Bligh  *
2864e29e831SAlex Bligh  * The preferred interface is aio_timer_init. Use that
2874e29e831SAlex Bligh  * unless you really need dynamic memory allocation.
2884e29e831SAlex Bligh  *
2894e29e831SAlex Bligh  * Returns: a pointer to the new timer
2904e29e831SAlex Bligh  */
2914e29e831SAlex Bligh static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type,
2924e29e831SAlex Bligh                                        int scale,
2934e29e831SAlex Bligh                                        QEMUTimerCB *cb, void *opaque)
2944e29e831SAlex Bligh {
2954e29e831SAlex Bligh     return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque);
2964e29e831SAlex Bligh }
2974e29e831SAlex Bligh 
2984e29e831SAlex Bligh /**
2994e29e831SAlex Bligh  * aio_timer_init:
3004e29e831SAlex Bligh  * @ctx: the aio context
3014e29e831SAlex Bligh  * @ts: the timer
3024e29e831SAlex Bligh  * @type: the clock type
3034e29e831SAlex Bligh  * @scale: the scale
3044e29e831SAlex Bligh  * @cb: the callback to call on timer expiry
3054e29e831SAlex Bligh  * @opaque: the opaque pointer to pass to the callback
3064e29e831SAlex Bligh  *
3074e29e831SAlex Bligh  * Initialise a new timer attached to the context @ctx.
3084e29e831SAlex Bligh  * The caller is responsible for memory allocation.
3094e29e831SAlex Bligh  */
3104e29e831SAlex Bligh static inline void aio_timer_init(AioContext *ctx,
3114e29e831SAlex Bligh                                   QEMUTimer *ts, QEMUClockType type,
3124e29e831SAlex Bligh                                   int scale,
3134e29e831SAlex Bligh                                   QEMUTimerCB *cb, void *opaque)
3144e29e831SAlex Bligh {
315f186aa97SPaolo Bonzini     timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque);
3164e29e831SAlex Bligh }
3174e29e831SAlex Bligh 
318845ca10dSPaolo Bonzini /**
319845ca10dSPaolo Bonzini  * aio_compute_timeout:
320845ca10dSPaolo Bonzini  * @ctx: the aio context
321845ca10dSPaolo Bonzini  *
322845ca10dSPaolo Bonzini  * Compute the timeout that a blocking aio_poll should use.
323845ca10dSPaolo Bonzini  */
324845ca10dSPaolo Bonzini int64_t aio_compute_timeout(AioContext *ctx);
325845ca10dSPaolo Bonzini 
326a76bab49Saliguori #endif
327