xref: /qemu/include/block/aio.h (revision 0ceb849bd336a5f9b6e1ed56d45cf5773d251ad8)
1a76bab49Saliguori /*
2a76bab49Saliguori  * QEMU aio implementation
3a76bab49Saliguori  *
4a76bab49Saliguori  * Copyright IBM, Corp. 2008
5a76bab49Saliguori  *
6a76bab49Saliguori  * Authors:
7a76bab49Saliguori  *  Anthony Liguori   <aliguori@us.ibm.com>
8a76bab49Saliguori  *
9a76bab49Saliguori  * This work is licensed under the terms of the GNU GPL, version 2.  See
10a76bab49Saliguori  * the COPYING file in the top-level directory.
11a76bab49Saliguori  *
12a76bab49Saliguori  */
13a76bab49Saliguori 
14a76bab49Saliguori #ifndef QEMU_AIO_H
15a76bab49Saliguori #define QEMU_AIO_H
16a76bab49Saliguori 
176a1751b7SAlex Bligh #include "qemu/typedefs.h"
18a76bab49Saliguori #include "qemu-common.h"
191de7afc9SPaolo Bonzini #include "qemu/queue.h"
201de7afc9SPaolo Bonzini #include "qemu/event_notifier.h"
21dcc772e2SLiu Ping Fan #include "qemu/thread.h"
2298563fc3SStefan Hajnoczi #include "qemu/rfifolock.h"
23dae21b98SAlex Bligh #include "qemu/timer.h"
24a76bab49Saliguori 
2585e8dab1SPaolo Bonzini typedef struct BlockDriverAIOCB BlockDriverAIOCB;
2685e8dab1SPaolo Bonzini typedef void BlockDriverCompletionFunc(void *opaque, int ret);
2785e8dab1SPaolo Bonzini 
28d7331bedSStefan Hajnoczi typedef struct AIOCBInfo {
2985e8dab1SPaolo Bonzini     void (*cancel)(BlockDriverAIOCB *acb);
308c82e9a4SStefan Hajnoczi     size_t aiocb_size;
31d7331bedSStefan Hajnoczi } AIOCBInfo;
3285e8dab1SPaolo Bonzini 
3385e8dab1SPaolo Bonzini struct BlockDriverAIOCB {
34d7331bedSStefan Hajnoczi     const AIOCBInfo *aiocb_info;
3585e8dab1SPaolo Bonzini     BlockDriverState *bs;
3685e8dab1SPaolo Bonzini     BlockDriverCompletionFunc *cb;
3785e8dab1SPaolo Bonzini     void *opaque;
3885e8dab1SPaolo Bonzini };
3985e8dab1SPaolo Bonzini 
40d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
4185e8dab1SPaolo Bonzini                    BlockDriverCompletionFunc *cb, void *opaque);
4285e8dab1SPaolo Bonzini void qemu_aio_release(void *p);
4385e8dab1SPaolo Bonzini 
44f627aab1SPaolo Bonzini typedef struct AioHandler AioHandler;
45f627aab1SPaolo Bonzini typedef void QEMUBHFunc(void *opaque);
46f627aab1SPaolo Bonzini typedef void IOHandler(void *opaque);
47f627aab1SPaolo Bonzini 
486a1751b7SAlex Bligh struct AioContext {
49e3713e00SPaolo Bonzini     GSource source;
50e3713e00SPaolo Bonzini 
5198563fc3SStefan Hajnoczi     /* Protects all fields from multi-threaded access */
5298563fc3SStefan Hajnoczi     RFifoLock lock;
5398563fc3SStefan Hajnoczi 
54a915f4bcSPaolo Bonzini     /* The list of registered AIO handlers */
55a915f4bcSPaolo Bonzini     QLIST_HEAD(, AioHandler) aio_handlers;
56a915f4bcSPaolo Bonzini 
57a915f4bcSPaolo Bonzini     /* This is a simple lock used to protect the aio_handlers list.
58a915f4bcSPaolo Bonzini      * Specifically, it's used to ensure that no callbacks are removed while
59a915f4bcSPaolo Bonzini      * we're walking and dispatching callbacks.
60a915f4bcSPaolo Bonzini      */
61a915f4bcSPaolo Bonzini     int walking_handlers;
62a915f4bcSPaolo Bonzini 
63*0ceb849bSPaolo Bonzini     /* Used to avoid unnecessary event_notifier_set calls in aio_notify.
64*0ceb849bSPaolo Bonzini      * Writes protected by lock or BQL, reads are lockless.
65*0ceb849bSPaolo Bonzini      */
66*0ceb849bSPaolo Bonzini     bool dispatching;
67*0ceb849bSPaolo Bonzini 
68dcc772e2SLiu Ping Fan     /* lock to protect between bh's adders and deleter */
69dcc772e2SLiu Ping Fan     QemuMutex bh_lock;
70*0ceb849bSPaolo Bonzini 
71f627aab1SPaolo Bonzini     /* Anchor of the list of Bottom Halves belonging to the context */
72f627aab1SPaolo Bonzini     struct QEMUBH *first_bh;
73f627aab1SPaolo Bonzini 
74f627aab1SPaolo Bonzini     /* A simple lock used to protect the first_bh list, and ensure that
75f627aab1SPaolo Bonzini      * no callbacks are removed while we're walking and dispatching callbacks.
76f627aab1SPaolo Bonzini      */
77f627aab1SPaolo Bonzini     int walking_bh;
782f4dc3c1SPaolo Bonzini 
792f4dc3c1SPaolo Bonzini     /* Used for aio_notify.  */
802f4dc3c1SPaolo Bonzini     EventNotifier notifier;
816b5f8762SStefan Hajnoczi 
826b5f8762SStefan Hajnoczi     /* GPollFDs for aio_poll() */
836b5f8762SStefan Hajnoczi     GArray *pollfds;
849b34277dSStefan Hajnoczi 
859b34277dSStefan Hajnoczi     /* Thread pool for performing work and receiving completion callbacks */
869b34277dSStefan Hajnoczi     struct ThreadPool *thread_pool;
87dae21b98SAlex Bligh 
88dae21b98SAlex Bligh     /* TimerLists for calling timers - one per clock type */
89dae21b98SAlex Bligh     QEMUTimerListGroup tlg;
906a1751b7SAlex Bligh };
91f627aab1SPaolo Bonzini 
92*0ceb849bSPaolo Bonzini /* Used internally to synchronize aio_poll against qemu_bh_schedule.  */
93*0ceb849bSPaolo Bonzini void aio_set_dispatching(AioContext *ctx, bool dispatching);
94*0ceb849bSPaolo Bonzini 
95f627aab1SPaolo Bonzini /**
96f627aab1SPaolo Bonzini  * aio_context_new: Allocate a new AioContext.
97f627aab1SPaolo Bonzini  *
98f627aab1SPaolo Bonzini  * AioContext provide a mini event-loop that can be waited on synchronously.
99f627aab1SPaolo Bonzini  * They also provide bottom halves, a service to execute a piece of code
100f627aab1SPaolo Bonzini  * as soon as possible.
101f627aab1SPaolo Bonzini  */
102f627aab1SPaolo Bonzini AioContext *aio_context_new(void);
103f627aab1SPaolo Bonzini 
104f627aab1SPaolo Bonzini /**
105e3713e00SPaolo Bonzini  * aio_context_ref:
106e3713e00SPaolo Bonzini  * @ctx: The AioContext to operate on.
107e3713e00SPaolo Bonzini  *
108e3713e00SPaolo Bonzini  * Add a reference to an AioContext.
109e3713e00SPaolo Bonzini  */
110e3713e00SPaolo Bonzini void aio_context_ref(AioContext *ctx);
111e3713e00SPaolo Bonzini 
112e3713e00SPaolo Bonzini /**
113e3713e00SPaolo Bonzini  * aio_context_unref:
114e3713e00SPaolo Bonzini  * @ctx: The AioContext to operate on.
115e3713e00SPaolo Bonzini  *
116e3713e00SPaolo Bonzini  * Drop a reference to an AioContext.
117e3713e00SPaolo Bonzini  */
118e3713e00SPaolo Bonzini void aio_context_unref(AioContext *ctx);
119e3713e00SPaolo Bonzini 
12098563fc3SStefan Hajnoczi /* Take ownership of the AioContext.  If the AioContext will be shared between
12198563fc3SStefan Hajnoczi  * threads, a thread must have ownership when calling aio_poll().
12298563fc3SStefan Hajnoczi  *
12398563fc3SStefan Hajnoczi  * Note that multiple threads calling aio_poll() means timers, BHs, and
12498563fc3SStefan Hajnoczi  * callbacks may be invoked from a different thread than they were registered
12598563fc3SStefan Hajnoczi  * from.  Therefore, code must use AioContext acquire/release or use
12698563fc3SStefan Hajnoczi  * fine-grained synchronization to protect shared state if other threads will
12798563fc3SStefan Hajnoczi  * be accessing it simultaneously.
12898563fc3SStefan Hajnoczi  */
12998563fc3SStefan Hajnoczi void aio_context_acquire(AioContext *ctx);
13098563fc3SStefan Hajnoczi 
13198563fc3SStefan Hajnoczi /* Relinquish ownership of the AioContext. */
13298563fc3SStefan Hajnoczi void aio_context_release(AioContext *ctx);
13398563fc3SStefan Hajnoczi 
134e3713e00SPaolo Bonzini /**
135f627aab1SPaolo Bonzini  * aio_bh_new: Allocate a new bottom half structure.
136f627aab1SPaolo Bonzini  *
137f627aab1SPaolo Bonzini  * Bottom halves are lightweight callbacks whose invocation is guaranteed
138f627aab1SPaolo Bonzini  * to be wait-free, thread-safe and signal-safe.  The #QEMUBH structure
139f627aab1SPaolo Bonzini  * is opaque and must be allocated prior to its use.
140f627aab1SPaolo Bonzini  */
141f627aab1SPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
142f627aab1SPaolo Bonzini 
143f627aab1SPaolo Bonzini /**
1442f4dc3c1SPaolo Bonzini  * aio_notify: Force processing of pending events.
1452f4dc3c1SPaolo Bonzini  *
1462f4dc3c1SPaolo Bonzini  * Similar to signaling a condition variable, aio_notify forces
1472f4dc3c1SPaolo Bonzini  * aio_wait to exit, so that the next call will re-examine pending events.
1482f4dc3c1SPaolo Bonzini  * The caller of aio_notify will usually call aio_wait again very soon,
1492f4dc3c1SPaolo Bonzini  * or go through another iteration of the GLib main loop.  Hence, aio_notify
1502f4dc3c1SPaolo Bonzini  * also has the side effect of recalculating the sets of file descriptors
1512f4dc3c1SPaolo Bonzini  * that the main loop waits for.
1522f4dc3c1SPaolo Bonzini  *
1532f4dc3c1SPaolo Bonzini  * Calling aio_notify is rarely necessary, because for example scheduling
1542f4dc3c1SPaolo Bonzini  * a bottom half calls it already.
1552f4dc3c1SPaolo Bonzini  */
1562f4dc3c1SPaolo Bonzini void aio_notify(AioContext *ctx);
1572f4dc3c1SPaolo Bonzini 
1582f4dc3c1SPaolo Bonzini /**
159f627aab1SPaolo Bonzini  * aio_bh_poll: Poll bottom halves for an AioContext.
160f627aab1SPaolo Bonzini  *
161f627aab1SPaolo Bonzini  * These are internal functions used by the QEMU main loop.
162dcc772e2SLiu Ping Fan  * And notice that multiple occurrences of aio_bh_poll cannot
163dcc772e2SLiu Ping Fan  * be called concurrently
164f627aab1SPaolo Bonzini  */
165f627aab1SPaolo Bonzini int aio_bh_poll(AioContext *ctx);
166f627aab1SPaolo Bonzini 
167f627aab1SPaolo Bonzini /**
168f627aab1SPaolo Bonzini  * qemu_bh_schedule: Schedule a bottom half.
169f627aab1SPaolo Bonzini  *
170f627aab1SPaolo Bonzini  * Scheduling a bottom half interrupts the main loop and causes the
171f627aab1SPaolo Bonzini  * execution of the callback that was passed to qemu_bh_new.
172f627aab1SPaolo Bonzini  *
173f627aab1SPaolo Bonzini  * Bottom halves that are scheduled from a bottom half handler are instantly
174f627aab1SPaolo Bonzini  * invoked.  This can create an infinite loop if a bottom half handler
175f627aab1SPaolo Bonzini  * schedules itself.
176f627aab1SPaolo Bonzini  *
177f627aab1SPaolo Bonzini  * @bh: The bottom half to be scheduled.
178f627aab1SPaolo Bonzini  */
179f627aab1SPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh);
180f627aab1SPaolo Bonzini 
181f627aab1SPaolo Bonzini /**
182f627aab1SPaolo Bonzini  * qemu_bh_cancel: Cancel execution of a bottom half.
183f627aab1SPaolo Bonzini  *
184f627aab1SPaolo Bonzini  * Canceling execution of a bottom half undoes the effect of calls to
185f627aab1SPaolo Bonzini  * qemu_bh_schedule without freeing its resources yet.  While cancellation
186f627aab1SPaolo Bonzini  * itself is also wait-free and thread-safe, it can of course race with the
187f627aab1SPaolo Bonzini  * loop that executes bottom halves unless you are holding the iothread
188f627aab1SPaolo Bonzini  * mutex.  This makes it mostly useless if you are not holding the mutex.
189f627aab1SPaolo Bonzini  *
190f627aab1SPaolo Bonzini  * @bh: The bottom half to be canceled.
191f627aab1SPaolo Bonzini  */
192f627aab1SPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh);
193f627aab1SPaolo Bonzini 
194f627aab1SPaolo Bonzini /**
195f627aab1SPaolo Bonzini  *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
196f627aab1SPaolo Bonzini  *
197f627aab1SPaolo Bonzini  * Deleting a bottom half frees the memory that was allocated for it by
198f627aab1SPaolo Bonzini  * qemu_bh_new.  It also implies canceling the bottom half if it was
199f627aab1SPaolo Bonzini  * scheduled.
200dcc772e2SLiu Ping Fan  * This func is async. The bottom half will do the delete action at the finial
201dcc772e2SLiu Ping Fan  * end.
202f627aab1SPaolo Bonzini  *
203f627aab1SPaolo Bonzini  * @bh: The bottom half to be deleted.
204f627aab1SPaolo Bonzini  */
205f627aab1SPaolo Bonzini void qemu_bh_delete(QEMUBH *bh);
206f627aab1SPaolo Bonzini 
207cd9ba1ebSPaolo Bonzini /* Return whether there are any pending callbacks from the GSource
208cd9ba1ebSPaolo Bonzini  * attached to the AioContext.
209cd9ba1ebSPaolo Bonzini  *
210cd9ba1ebSPaolo Bonzini  * This is used internally in the implementation of the GSource.
211cd9ba1ebSPaolo Bonzini  */
212cd9ba1ebSPaolo Bonzini bool aio_pending(AioContext *ctx);
213cd9ba1ebSPaolo Bonzini 
2147c0628b2SPaolo Bonzini /* Progress in completing AIO work to occur.  This can issue new pending
2157c0628b2SPaolo Bonzini  * aio as a result of executing I/O completion or bh callbacks.
216bcdc1857SPaolo Bonzini  *
2177c0628b2SPaolo Bonzini  * If there is no pending AIO operation or completion (bottom half),
2182ea9b58fSKevin Wolf  * return false.  If there are pending AIO operations of bottom halves,
2192ea9b58fSKevin Wolf  * return true.
2207c0628b2SPaolo Bonzini  *
2217c0628b2SPaolo Bonzini  * If there are no pending bottom halves, but there are pending AIO
2227c0628b2SPaolo Bonzini  * operations, it may not be possible to make any progress without
2237c0628b2SPaolo Bonzini  * blocking.  If @blocking is true, this function will wait until one
2247c0628b2SPaolo Bonzini  * or more AIO events have completed, to ensure something has moved
2257c0628b2SPaolo Bonzini  * before returning.
2267c0628b2SPaolo Bonzini  */
2277c0628b2SPaolo Bonzini bool aio_poll(AioContext *ctx, bool blocking);
228a76bab49Saliguori 
2299958c351SPaolo Bonzini #ifdef CONFIG_POSIX
230a76bab49Saliguori /* Register a file descriptor and associated callbacks.  Behaves very similarly
231a76bab49Saliguori  * to qemu_set_fd_handler2.  Unlike qemu_set_fd_handler2, these callbacks will
23287f68d31SPaolo Bonzini  * be invoked when using aio_poll().
233a76bab49Saliguori  *
234a76bab49Saliguori  * Code that invokes AIO completion functions should rely on this function
235a76bab49Saliguori  * instead of qemu_set_fd_handler[2].
236a76bab49Saliguori  */
237a915f4bcSPaolo Bonzini void aio_set_fd_handler(AioContext *ctx,
238a915f4bcSPaolo Bonzini                         int fd,
239a76bab49Saliguori                         IOHandler *io_read,
240a76bab49Saliguori                         IOHandler *io_write,
241a76bab49Saliguori                         void *opaque);
2429958c351SPaolo Bonzini #endif
2439958c351SPaolo Bonzini 
2449958c351SPaolo Bonzini /* Register an event notifier and associated callbacks.  Behaves very similarly
2459958c351SPaolo Bonzini  * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
24687f68d31SPaolo Bonzini  * will be invoked when using aio_poll().
2479958c351SPaolo Bonzini  *
2489958c351SPaolo Bonzini  * Code that invokes AIO completion functions should rely on this function
2499958c351SPaolo Bonzini  * instead of event_notifier_set_handler.
2509958c351SPaolo Bonzini  */
251a915f4bcSPaolo Bonzini void aio_set_event_notifier(AioContext *ctx,
252a915f4bcSPaolo Bonzini                             EventNotifier *notifier,
253f2e5dca4SStefan Hajnoczi                             EventNotifierHandler *io_read);
254a915f4bcSPaolo Bonzini 
255e3713e00SPaolo Bonzini /* Return a GSource that lets the main loop poll the file descriptors attached
256e3713e00SPaolo Bonzini  * to this AioContext.
257e3713e00SPaolo Bonzini  */
258e3713e00SPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx);
259e3713e00SPaolo Bonzini 
2609b34277dSStefan Hajnoczi /* Return the ThreadPool bound to this AioContext */
2619b34277dSStefan Hajnoczi struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
2629b34277dSStefan Hajnoczi 
2634e29e831SAlex Bligh /**
2644e29e831SAlex Bligh  * aio_timer_new:
2654e29e831SAlex Bligh  * @ctx: the aio context
2664e29e831SAlex Bligh  * @type: the clock type
2674e29e831SAlex Bligh  * @scale: the scale
2684e29e831SAlex Bligh  * @cb: the callback to call on timer expiry
2694e29e831SAlex Bligh  * @opaque: the opaque pointer to pass to the callback
2704e29e831SAlex Bligh  *
2714e29e831SAlex Bligh  * Allocate a new timer attached to the context @ctx.
2724e29e831SAlex Bligh  * The function is responsible for memory allocation.
2734e29e831SAlex Bligh  *
2744e29e831SAlex Bligh  * The preferred interface is aio_timer_init. Use that
2754e29e831SAlex Bligh  * unless you really need dynamic memory allocation.
2764e29e831SAlex Bligh  *
2774e29e831SAlex Bligh  * Returns: a pointer to the new timer
2784e29e831SAlex Bligh  */
2794e29e831SAlex Bligh static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type,
2804e29e831SAlex Bligh                                        int scale,
2814e29e831SAlex Bligh                                        QEMUTimerCB *cb, void *opaque)
2824e29e831SAlex Bligh {
2834e29e831SAlex Bligh     return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque);
2844e29e831SAlex Bligh }
2854e29e831SAlex Bligh 
2864e29e831SAlex Bligh /**
2874e29e831SAlex Bligh  * aio_timer_init:
2884e29e831SAlex Bligh  * @ctx: the aio context
2894e29e831SAlex Bligh  * @ts: the timer
2904e29e831SAlex Bligh  * @type: the clock type
2914e29e831SAlex Bligh  * @scale: the scale
2924e29e831SAlex Bligh  * @cb: the callback to call on timer expiry
2934e29e831SAlex Bligh  * @opaque: the opaque pointer to pass to the callback
2944e29e831SAlex Bligh  *
2954e29e831SAlex Bligh  * Initialise a new timer attached to the context @ctx.
2964e29e831SAlex Bligh  * The caller is responsible for memory allocation.
2974e29e831SAlex Bligh  */
2984e29e831SAlex Bligh static inline void aio_timer_init(AioContext *ctx,
2994e29e831SAlex Bligh                                   QEMUTimer *ts, QEMUClockType type,
3004e29e831SAlex Bligh                                   int scale,
3014e29e831SAlex Bligh                                   QEMUTimerCB *cb, void *opaque)
3024e29e831SAlex Bligh {
3034e29e831SAlex Bligh     timer_init(ts, ctx->tlg.tl[type], scale, cb, opaque);
3044e29e831SAlex Bligh }
3054e29e831SAlex Bligh 
306a76bab49Saliguori #endif
307