130a9aa69SJean-Philippe Brucker #include <libaio.h>
230a9aa69SJean-Philippe Brucker #include <pthread.h>
330a9aa69SJean-Philippe Brucker #include <sys/eventfd.h>
430a9aa69SJean-Philippe Brucker
5*2790307cSJean-Philippe Brucker #include "kvm/brlock.h"
630a9aa69SJean-Philippe Brucker #include "kvm/disk-image.h"
730a9aa69SJean-Philippe Brucker #include "kvm/kvm.h"
830a9aa69SJean-Philippe Brucker #include "linux/list.h"
930a9aa69SJean-Philippe Brucker
1030a9aa69SJean-Philippe Brucker #define AIO_MAX 256
1130a9aa69SJean-Philippe Brucker
aio_submit(struct disk_image * disk,int nr,struct iocb ** ios)12*2790307cSJean-Philippe Brucker static int aio_submit(struct disk_image *disk, int nr, struct iocb **ios)
1330a9aa69SJean-Philippe Brucker {
1430a9aa69SJean-Philippe Brucker int ret;
1530a9aa69SJean-Philippe Brucker
16*2790307cSJean-Philippe Brucker __sync_fetch_and_add(&disk->aio_inflight, nr);
17*2790307cSJean-Philippe Brucker /*
18*2790307cSJean-Philippe Brucker * A wmb() is needed here, to ensure disk_aio_thread() sees this
19*2790307cSJean-Philippe Brucker * increase after receiving the events. It is included in the
20*2790307cSJean-Philippe Brucker * __sync_fetch_and_add (as a full barrier).
21*2790307cSJean-Philippe Brucker */
2230a9aa69SJean-Philippe Brucker restart:
23*2790307cSJean-Philippe Brucker ret = io_submit(disk->ctx, nr, ios);
2430a9aa69SJean-Philippe Brucker if (ret == -EAGAIN)
2530a9aa69SJean-Philippe Brucker goto restart;
26*2790307cSJean-Philippe Brucker else if (ret <= 0)
27*2790307cSJean-Philippe Brucker /* disk_aio_thread() is never going to see those */
28*2790307cSJean-Philippe Brucker __sync_fetch_and_sub(&disk->aio_inflight, nr);
2930a9aa69SJean-Philippe Brucker
3030a9aa69SJean-Philippe Brucker return ret;
3130a9aa69SJean-Philippe Brucker }
3230a9aa69SJean-Philippe Brucker
raw_image__read_async(struct disk_image * disk,u64 sector,const struct iovec * iov,int iovcount,void * param)3330a9aa69SJean-Philippe Brucker ssize_t raw_image__read_async(struct disk_image *disk, u64 sector,
3430a9aa69SJean-Philippe Brucker const struct iovec *iov, int iovcount,
3530a9aa69SJean-Philippe Brucker void *param)
3630a9aa69SJean-Philippe Brucker {
3730a9aa69SJean-Philippe Brucker struct iocb iocb;
38*2790307cSJean-Philippe Brucker u64 offset = sector << SECTOR_SHIFT;
39*2790307cSJean-Philippe Brucker struct iocb *ios[1] = { &iocb };
4030a9aa69SJean-Philippe Brucker
41*2790307cSJean-Philippe Brucker io_prep_preadv(&iocb, disk->fd, iov, iovcount, offset);
42*2790307cSJean-Philippe Brucker io_set_eventfd(&iocb, disk->evt);
43*2790307cSJean-Philippe Brucker iocb.data = param;
44*2790307cSJean-Philippe Brucker
45*2790307cSJean-Philippe Brucker return aio_submit(disk, 1, ios);
4630a9aa69SJean-Philippe Brucker }
4730a9aa69SJean-Philippe Brucker
raw_image__write_async(struct disk_image * disk,u64 sector,const struct iovec * iov,int iovcount,void * param)4830a9aa69SJean-Philippe Brucker ssize_t raw_image__write_async(struct disk_image *disk, u64 sector,
4930a9aa69SJean-Philippe Brucker const struct iovec *iov, int iovcount,
5030a9aa69SJean-Philippe Brucker void *param)
5130a9aa69SJean-Philippe Brucker {
5230a9aa69SJean-Philippe Brucker struct iocb iocb;
53*2790307cSJean-Philippe Brucker u64 offset = sector << SECTOR_SHIFT;
54*2790307cSJean-Philippe Brucker struct iocb *ios[1] = { &iocb };
5530a9aa69SJean-Philippe Brucker
56*2790307cSJean-Philippe Brucker io_prep_pwritev(&iocb, disk->fd, iov, iovcount, offset);
57*2790307cSJean-Philippe Brucker io_set_eventfd(&iocb, disk->evt);
58*2790307cSJean-Philippe Brucker iocb.data = param;
59*2790307cSJean-Philippe Brucker
60*2790307cSJean-Philippe Brucker return aio_submit(disk, 1, ios);
61*2790307cSJean-Philippe Brucker }
62*2790307cSJean-Philippe Brucker
63*2790307cSJean-Philippe Brucker /*
64*2790307cSJean-Philippe Brucker * When this function returns there are no in-flight I/O. Caller ensures that
65*2790307cSJean-Philippe Brucker * io_submit() isn't called concurrently.
66*2790307cSJean-Philippe Brucker *
67*2790307cSJean-Philippe Brucker * Returns an inaccurate number of I/O that was in-flight when the function was
68*2790307cSJean-Philippe Brucker * called.
69*2790307cSJean-Philippe Brucker */
raw_image__wait(struct disk_image * disk)70*2790307cSJean-Philippe Brucker int raw_image__wait(struct disk_image *disk)
71*2790307cSJean-Philippe Brucker {
72*2790307cSJean-Philippe Brucker u64 inflight = disk->aio_inflight;
73*2790307cSJean-Philippe Brucker
74*2790307cSJean-Philippe Brucker while (disk->aio_inflight) {
75*2790307cSJean-Philippe Brucker usleep(100);
76*2790307cSJean-Philippe Brucker barrier();
77*2790307cSJean-Philippe Brucker }
78*2790307cSJean-Philippe Brucker
79*2790307cSJean-Philippe Brucker return inflight;
8030a9aa69SJean-Philippe Brucker }
8130a9aa69SJean-Philippe Brucker
disk_aio_get_events(struct disk_image * disk)82200cb823SJean-Philippe Brucker static int disk_aio_get_events(struct disk_image *disk)
8330a9aa69SJean-Philippe Brucker {
8430a9aa69SJean-Philippe Brucker struct io_event event[AIO_MAX];
8530a9aa69SJean-Philippe Brucker struct timespec notime = {0};
8630a9aa69SJean-Philippe Brucker int nr, i;
87200cb823SJean-Philippe Brucker
88200cb823SJean-Philippe Brucker do {
89200cb823SJean-Philippe Brucker nr = io_getevents(disk->ctx, 1, ARRAY_SIZE(event), event, ¬ime);
90200cb823SJean-Philippe Brucker for (i = 0; i < nr; i++)
91200cb823SJean-Philippe Brucker disk->disk_req_cb(event[i].data, event[i].res);
92*2790307cSJean-Philippe Brucker
93*2790307cSJean-Philippe Brucker /* Pairs with wmb() in aio_submit() */
94*2790307cSJean-Philippe Brucker rmb();
95*2790307cSJean-Philippe Brucker __sync_fetch_and_sub(&disk->aio_inflight, nr);
96*2790307cSJean-Philippe Brucker
97200cb823SJean-Philippe Brucker } while (nr > 0);
98200cb823SJean-Philippe Brucker
99200cb823SJean-Philippe Brucker return 0;
100200cb823SJean-Philippe Brucker }
101200cb823SJean-Philippe Brucker
disk_aio_thread(void * param)102200cb823SJean-Philippe Brucker static void *disk_aio_thread(void *param)
103200cb823SJean-Philippe Brucker {
104200cb823SJean-Philippe Brucker struct disk_image *disk = param;
10530a9aa69SJean-Philippe Brucker u64 dummy;
10630a9aa69SJean-Philippe Brucker
10730a9aa69SJean-Philippe Brucker kvm__set_thread_name("disk-image-io");
10830a9aa69SJean-Philippe Brucker
10930a9aa69SJean-Philippe Brucker while (read(disk->evt, &dummy, sizeof(dummy)) > 0) {
110200cb823SJean-Philippe Brucker if (disk_aio_get_events(disk))
111200cb823SJean-Philippe Brucker break;
11230a9aa69SJean-Philippe Brucker }
11330a9aa69SJean-Philippe Brucker
11430a9aa69SJean-Philippe Brucker return NULL;
11530a9aa69SJean-Philippe Brucker }
11630a9aa69SJean-Philippe Brucker
disk_aio_setup(struct disk_image * disk)11730a9aa69SJean-Philippe Brucker int disk_aio_setup(struct disk_image *disk)
11830a9aa69SJean-Philippe Brucker {
11930a9aa69SJean-Philippe Brucker int r;
12030a9aa69SJean-Philippe Brucker
121d62e8ee0SJean-Philippe Brucker /* No need to setup AIO if the disk ops won't make use of it */
122d62e8ee0SJean-Philippe Brucker if (!disk->ops->async)
123d62e8ee0SJean-Philippe Brucker return 0;
124d62e8ee0SJean-Philippe Brucker
12530a9aa69SJean-Philippe Brucker disk->evt = eventfd(0, 0);
12630a9aa69SJean-Philippe Brucker if (disk->evt < 0)
12730a9aa69SJean-Philippe Brucker return -errno;
12830a9aa69SJean-Philippe Brucker
12930a9aa69SJean-Philippe Brucker io_setup(AIO_MAX, &disk->ctx);
130a839180eSJean-Philippe Brucker r = pthread_create(&disk->thread, NULL, disk_aio_thread, disk);
13130a9aa69SJean-Philippe Brucker if (r) {
13230a9aa69SJean-Philippe Brucker r = -errno;
13330a9aa69SJean-Philippe Brucker close(disk->evt);
13430a9aa69SJean-Philippe Brucker return r;
13530a9aa69SJean-Philippe Brucker }
136d62e8ee0SJean-Philippe Brucker
137d62e8ee0SJean-Philippe Brucker disk->async = true;
13830a9aa69SJean-Philippe Brucker return 0;
13930a9aa69SJean-Philippe Brucker }
14030a9aa69SJean-Philippe Brucker
disk_aio_destroy(struct disk_image * disk)14130a9aa69SJean-Philippe Brucker void disk_aio_destroy(struct disk_image *disk)
14230a9aa69SJean-Philippe Brucker {
143d62e8ee0SJean-Philippe Brucker if (!disk->async)
144d62e8ee0SJean-Philippe Brucker return;
145d62e8ee0SJean-Philippe Brucker
146a839180eSJean-Philippe Brucker pthread_cancel(disk->thread);
147a839180eSJean-Philippe Brucker pthread_join(disk->thread, NULL);
14830a9aa69SJean-Philippe Brucker close(disk->evt);
14930a9aa69SJean-Philippe Brucker io_destroy(disk->ctx);
15030a9aa69SJean-Philippe Brucker }
151