xref: /kvmtool/disk/aio.c (revision 2790307c3d6d2aad838e1eef76ec0aa21b110698)
130a9aa69SJean-Philippe Brucker #include <libaio.h>
230a9aa69SJean-Philippe Brucker #include <pthread.h>
330a9aa69SJean-Philippe Brucker #include <sys/eventfd.h>
430a9aa69SJean-Philippe Brucker 
5*2790307cSJean-Philippe Brucker #include "kvm/brlock.h"
630a9aa69SJean-Philippe Brucker #include "kvm/disk-image.h"
730a9aa69SJean-Philippe Brucker #include "kvm/kvm.h"
830a9aa69SJean-Philippe Brucker #include "linux/list.h"
930a9aa69SJean-Philippe Brucker 
1030a9aa69SJean-Philippe Brucker #define AIO_MAX 256
1130a9aa69SJean-Philippe Brucker 
aio_submit(struct disk_image * disk,int nr,struct iocb ** ios)12*2790307cSJean-Philippe Brucker static int aio_submit(struct disk_image *disk, int nr, struct iocb **ios)
1330a9aa69SJean-Philippe Brucker {
1430a9aa69SJean-Philippe Brucker 	int ret;
1530a9aa69SJean-Philippe Brucker 
16*2790307cSJean-Philippe Brucker 	__sync_fetch_and_add(&disk->aio_inflight, nr);
17*2790307cSJean-Philippe Brucker 	/*
18*2790307cSJean-Philippe Brucker 	 * A wmb() is needed here, to ensure disk_aio_thread() sees this
19*2790307cSJean-Philippe Brucker 	 * increase after receiving the events. It is included in the
20*2790307cSJean-Philippe Brucker 	 * __sync_fetch_and_add (as a full barrier).
21*2790307cSJean-Philippe Brucker 	 */
2230a9aa69SJean-Philippe Brucker restart:
23*2790307cSJean-Philippe Brucker 	ret = io_submit(disk->ctx, nr, ios);
2430a9aa69SJean-Philippe Brucker 	if (ret == -EAGAIN)
2530a9aa69SJean-Philippe Brucker 		goto restart;
26*2790307cSJean-Philippe Brucker 	else if (ret <= 0)
27*2790307cSJean-Philippe Brucker 		/* disk_aio_thread() is never going to see those */
28*2790307cSJean-Philippe Brucker 		__sync_fetch_and_sub(&disk->aio_inflight, nr);
2930a9aa69SJean-Philippe Brucker 
3030a9aa69SJean-Philippe Brucker 	return ret;
3130a9aa69SJean-Philippe Brucker }
3230a9aa69SJean-Philippe Brucker 
raw_image__read_async(struct disk_image * disk,u64 sector,const struct iovec * iov,int iovcount,void * param)3330a9aa69SJean-Philippe Brucker ssize_t raw_image__read_async(struct disk_image *disk, u64 sector,
3430a9aa69SJean-Philippe Brucker 			      const struct iovec *iov, int iovcount,
3530a9aa69SJean-Philippe Brucker 			      void *param)
3630a9aa69SJean-Philippe Brucker {
3730a9aa69SJean-Philippe Brucker 	struct iocb iocb;
38*2790307cSJean-Philippe Brucker 	u64 offset = sector << SECTOR_SHIFT;
39*2790307cSJean-Philippe Brucker 	struct iocb *ios[1] = { &iocb };
4030a9aa69SJean-Philippe Brucker 
41*2790307cSJean-Philippe Brucker 	io_prep_preadv(&iocb, disk->fd, iov, iovcount, offset);
42*2790307cSJean-Philippe Brucker 	io_set_eventfd(&iocb, disk->evt);
43*2790307cSJean-Philippe Brucker 	iocb.data = param;
44*2790307cSJean-Philippe Brucker 
45*2790307cSJean-Philippe Brucker 	return aio_submit(disk, 1, ios);
4630a9aa69SJean-Philippe Brucker }
4730a9aa69SJean-Philippe Brucker 
raw_image__write_async(struct disk_image * disk,u64 sector,const struct iovec * iov,int iovcount,void * param)4830a9aa69SJean-Philippe Brucker ssize_t raw_image__write_async(struct disk_image *disk, u64 sector,
4930a9aa69SJean-Philippe Brucker 			       const struct iovec *iov, int iovcount,
5030a9aa69SJean-Philippe Brucker 			       void *param)
5130a9aa69SJean-Philippe Brucker {
5230a9aa69SJean-Philippe Brucker 	struct iocb iocb;
53*2790307cSJean-Philippe Brucker 	u64 offset = sector << SECTOR_SHIFT;
54*2790307cSJean-Philippe Brucker 	struct iocb *ios[1] = { &iocb };
5530a9aa69SJean-Philippe Brucker 
56*2790307cSJean-Philippe Brucker 	io_prep_pwritev(&iocb, disk->fd, iov, iovcount, offset);
57*2790307cSJean-Philippe Brucker 	io_set_eventfd(&iocb, disk->evt);
58*2790307cSJean-Philippe Brucker 	iocb.data = param;
59*2790307cSJean-Philippe Brucker 
60*2790307cSJean-Philippe Brucker 	return aio_submit(disk, 1, ios);
61*2790307cSJean-Philippe Brucker }
62*2790307cSJean-Philippe Brucker 
63*2790307cSJean-Philippe Brucker /*
64*2790307cSJean-Philippe Brucker  * When this function returns there are no in-flight I/O. Caller ensures that
65*2790307cSJean-Philippe Brucker  * io_submit() isn't called concurrently.
66*2790307cSJean-Philippe Brucker  *
67*2790307cSJean-Philippe Brucker  * Returns an inaccurate number of I/O that was in-flight when the function was
68*2790307cSJean-Philippe Brucker  * called.
69*2790307cSJean-Philippe Brucker  */
raw_image__wait(struct disk_image * disk)70*2790307cSJean-Philippe Brucker int raw_image__wait(struct disk_image *disk)
71*2790307cSJean-Philippe Brucker {
72*2790307cSJean-Philippe Brucker 	u64 inflight = disk->aio_inflight;
73*2790307cSJean-Philippe Brucker 
74*2790307cSJean-Philippe Brucker 	while (disk->aio_inflight) {
75*2790307cSJean-Philippe Brucker 		usleep(100);
76*2790307cSJean-Philippe Brucker 		barrier();
77*2790307cSJean-Philippe Brucker 	}
78*2790307cSJean-Philippe Brucker 
79*2790307cSJean-Philippe Brucker 	return inflight;
8030a9aa69SJean-Philippe Brucker }
8130a9aa69SJean-Philippe Brucker 
disk_aio_get_events(struct disk_image * disk)82200cb823SJean-Philippe Brucker static int disk_aio_get_events(struct disk_image *disk)
8330a9aa69SJean-Philippe Brucker {
8430a9aa69SJean-Philippe Brucker 	struct io_event event[AIO_MAX];
8530a9aa69SJean-Philippe Brucker 	struct timespec notime = {0};
8630a9aa69SJean-Philippe Brucker 	int nr, i;
87200cb823SJean-Philippe Brucker 
88200cb823SJean-Philippe Brucker 	do {
89200cb823SJean-Philippe Brucker 		nr = io_getevents(disk->ctx, 1, ARRAY_SIZE(event), event, &notime);
90200cb823SJean-Philippe Brucker 		for (i = 0; i < nr; i++)
91200cb823SJean-Philippe Brucker 			disk->disk_req_cb(event[i].data, event[i].res);
92*2790307cSJean-Philippe Brucker 
93*2790307cSJean-Philippe Brucker 		/* Pairs with wmb() in aio_submit() */
94*2790307cSJean-Philippe Brucker 		rmb();
95*2790307cSJean-Philippe Brucker 		__sync_fetch_and_sub(&disk->aio_inflight, nr);
96*2790307cSJean-Philippe Brucker 
97200cb823SJean-Philippe Brucker 	} while (nr > 0);
98200cb823SJean-Philippe Brucker 
99200cb823SJean-Philippe Brucker 	return 0;
100200cb823SJean-Philippe Brucker }
101200cb823SJean-Philippe Brucker 
disk_aio_thread(void * param)102200cb823SJean-Philippe Brucker static void *disk_aio_thread(void *param)
103200cb823SJean-Philippe Brucker {
104200cb823SJean-Philippe Brucker 	struct disk_image *disk = param;
10530a9aa69SJean-Philippe Brucker 	u64 dummy;
10630a9aa69SJean-Philippe Brucker 
10730a9aa69SJean-Philippe Brucker 	kvm__set_thread_name("disk-image-io");
10830a9aa69SJean-Philippe Brucker 
10930a9aa69SJean-Philippe Brucker 	while (read(disk->evt, &dummy, sizeof(dummy)) > 0) {
110200cb823SJean-Philippe Brucker 		if (disk_aio_get_events(disk))
111200cb823SJean-Philippe Brucker 			break;
11230a9aa69SJean-Philippe Brucker 	}
11330a9aa69SJean-Philippe Brucker 
11430a9aa69SJean-Philippe Brucker 	return NULL;
11530a9aa69SJean-Philippe Brucker }
11630a9aa69SJean-Philippe Brucker 
disk_aio_setup(struct disk_image * disk)11730a9aa69SJean-Philippe Brucker int disk_aio_setup(struct disk_image *disk)
11830a9aa69SJean-Philippe Brucker {
11930a9aa69SJean-Philippe Brucker 	int r;
12030a9aa69SJean-Philippe Brucker 
121d62e8ee0SJean-Philippe Brucker 	/* No need to setup AIO if the disk ops won't make use of it */
122d62e8ee0SJean-Philippe Brucker 	if (!disk->ops->async)
123d62e8ee0SJean-Philippe Brucker 		return 0;
124d62e8ee0SJean-Philippe Brucker 
12530a9aa69SJean-Philippe Brucker 	disk->evt = eventfd(0, 0);
12630a9aa69SJean-Philippe Brucker 	if (disk->evt < 0)
12730a9aa69SJean-Philippe Brucker 		return -errno;
12830a9aa69SJean-Philippe Brucker 
12930a9aa69SJean-Philippe Brucker 	io_setup(AIO_MAX, &disk->ctx);
130a839180eSJean-Philippe Brucker 	r = pthread_create(&disk->thread, NULL, disk_aio_thread, disk);
13130a9aa69SJean-Philippe Brucker 	if (r) {
13230a9aa69SJean-Philippe Brucker 		r = -errno;
13330a9aa69SJean-Philippe Brucker 		close(disk->evt);
13430a9aa69SJean-Philippe Brucker 		return r;
13530a9aa69SJean-Philippe Brucker 	}
136d62e8ee0SJean-Philippe Brucker 
137d62e8ee0SJean-Philippe Brucker 	disk->async = true;
13830a9aa69SJean-Philippe Brucker 	return 0;
13930a9aa69SJean-Philippe Brucker }
14030a9aa69SJean-Philippe Brucker 
disk_aio_destroy(struct disk_image * disk)14130a9aa69SJean-Philippe Brucker void disk_aio_destroy(struct disk_image *disk)
14230a9aa69SJean-Philippe Brucker {
143d62e8ee0SJean-Philippe Brucker 	if (!disk->async)
144d62e8ee0SJean-Philippe Brucker 		return;
145d62e8ee0SJean-Philippe Brucker 
146a839180eSJean-Philippe Brucker 	pthread_cancel(disk->thread);
147a839180eSJean-Philippe Brucker 	pthread_join(disk->thread, NULL);
14830a9aa69SJean-Philippe Brucker 	close(disk->evt);
14930a9aa69SJean-Philippe Brucker 	io_destroy(disk->ctx);
15030a9aa69SJean-Philippe Brucker }
151